xref: /freebsd/sys/vm/vm_object.c (revision 0ea3482342b4d7d6e71f3007ce4dafe445c639fd)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $Id: vm_object.c,v 1.54 1995/10/23 03:49:43 dyson Exp $
65  */
66 
67 /*
68  *	Virtual memory object module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/kernel.h>
74 #include <sys/proc.h>		/* for curproc, pageproc */
75 #include <sys/malloc.h>
76 #include <sys/vnode.h>
77 #include <sys/mount.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_pageout.h>
82 #include <vm/vm_pager.h>
83 #include <vm/swap_pager.h>
84 #include <vm/vm_kern.h>
85 
86 static void _vm_object_allocate(objtype_t, vm_size_t, vm_object_t);
87 
88 
89 /*
90  *	Virtual memory objects maintain the actual data
91  *	associated with allocated virtual memory.  A given
92  *	page of memory exists within exactly one object.
93  *
94  *	An object is only deallocated when all "references"
95  *	are given up.  Only one "reference" to a given
96  *	region of an object should be writeable.
97  *
98  *	Associated with each object is a list of all resident
99  *	memory pages belonging to that object; this list is
100  *	maintained by the "vm_page" module, and locked by the object's
101  *	lock.
102  *
103  *	Each object also records a "pager" routine which is
104  *	used to retrieve (and store) pages to the proper backing
105  *	storage.  In addition, objects may be backed by other
106  *	objects from which they were virtual-copied.
107  *
108  *	The only items within the object structure which are
109  *	modified after time of creation are:
110  *		reference count		locked by object's lock
111  *		pager routine		locked by object's lock
112  *
113  */
114 
115 int vm_object_cache_max;
116 struct object_q vm_object_cached_list;
117 int vm_object_cached;
118 struct object_q vm_object_list;
119 long vm_object_count;
120 vm_object_t kernel_object;
121 vm_object_t kmem_object;
122 struct vm_object kernel_object_store;
123 struct vm_object kmem_object_store;
124 extern int vm_pageout_page_count;
125 
126 long object_collapses;
127 long object_bypasses;
128 
129 static void
130 _vm_object_allocate(type, size, object)
131 	objtype_t type;
132 	vm_size_t size;
133 	register vm_object_t object;
134 {
135 	TAILQ_INIT(&object->memq);
136 	TAILQ_INIT(&object->shadow_head);
137 
138 	object->type = type;
139 	object->size = size;
140 	object->ref_count = 1;
141 	object->flags = 0;
142 	object->paging_in_progress = 0;
143 	object->resident_page_count = 0;
144 	object->handle = NULL;
145 	object->paging_offset = 0;
146 	object->backing_object = NULL;
147 	object->backing_object_offset = (vm_offset_t) 0;
148 
149 	object->last_read = 0;
150 
151 	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
152 	vm_object_count++;
153 }
154 
155 /*
156  *	vm_object_init:
157  *
158  *	Initialize the VM objects module.
159  */
160 void
161 vm_object_init(vm_offset_t nothing)
162 {
163 	register int i;
164 
165 	TAILQ_INIT(&vm_object_cached_list);
166 	TAILQ_INIT(&vm_object_list);
167 	vm_object_count = 0;
168 
169 	vm_object_cache_max = 84;
170 	if (cnt.v_page_count > 1000)
171 		vm_object_cache_max += (cnt.v_page_count - 1000) / 4;
172 
173 	kernel_object = &kernel_object_store;
174 	_vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
175 	    kernel_object);
176 
177 	kmem_object = &kmem_object_store;
178 	_vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
179 	    kmem_object);
180 }
181 
182 /*
183  *	vm_object_allocate:
184  *
185  *	Returns a new object with the given size.
186  */
187 
188 vm_object_t
189 vm_object_allocate(type, size)
190 	objtype_t type;
191 	vm_size_t size;
192 {
193 	register vm_object_t result;
194 
195 	result = (vm_object_t)
196 	    malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK);
197 
198 
199 	_vm_object_allocate(type, size, result);
200 
201 	return (result);
202 }
203 
204 
205 /*
206  *	vm_object_reference:
207  *
208  *	Gets another reference to the given object.
209  */
210 inline void
211 vm_object_reference(object)
212 	register vm_object_t object;
213 {
214 	if (object == NULL)
215 		return;
216 
217 	if (object->ref_count == 0) {
218 		if ((object->flags & OBJ_CANPERSIST) == 0)
219 			panic("vm_object_reference: non-persistent object with 0 ref_count");
220 		TAILQ_REMOVE(&vm_object_cached_list, object, cached_list);
221 		vm_object_cached--;
222 	}
223 	object->ref_count++;
224 }
225 
226 /*
227  *	vm_object_deallocate:
228  *
229  *	Release a reference to the specified object,
230  *	gained either through a vm_object_allocate
231  *	or a vm_object_reference call.  When all references
232  *	are gone, storage associated with this object
233  *	may be relinquished.
234  *
235  *	No object may be locked.
236  */
237 void
238 vm_object_deallocate(object)
239 	vm_object_t object;
240 {
241 	vm_object_t temp;
242 
243 	while (object != NULL) {
244 
245 		if (object->ref_count == 0)
246 			panic("vm_object_deallocate: object deallocated too many times");
247 
248 		/*
249 		 * Lose the reference
250 		 */
251 		object->ref_count--;
252 
253 		if (object->ref_count != 0) {
254 			if ((object->ref_count == 1) &&
255 			    (object->handle == NULL) &&
256 			    (object->type == OBJT_DEFAULT ||
257 			     object->type == OBJT_SWAP)) {
258 				vm_object_t robject;
259 				robject = object->shadow_head.tqh_first;
260 				if ((robject != NULL) &&
261 				    (robject->handle == NULL) &&
262 				    (robject->type == OBJT_DEFAULT ||
263 				     robject->type == OBJT_SWAP)) {
264 					int s;
265 					robject->ref_count += 2;
266 					object->ref_count += 2;
267 
268 					do {
269 						s = splhigh();
270 						while (robject->paging_in_progress) {
271 							robject->flags |= OBJ_PIPWNT;
272 							tsleep(robject, PVM, "objde1", 0);
273 						}
274 
275 						while (object->paging_in_progress) {
276 							object->flags |= OBJ_PIPWNT;
277 							tsleep(object, PVM, "objde2", 0);
278 						}
279 						splx(s);
280 
281 					} while( object->paging_in_progress || robject->paging_in_progress);
282 
283 					object->ref_count -= 2;
284 					robject->ref_count -= 2;
285 					if( robject->ref_count == 0) {
286 						robject->ref_count += 1;
287 						object = robject;
288 						continue;
289 					}
290 					vm_object_collapse(robject);
291 					return;
292 				}
293 			}
294 			/*
295 			 * If there are still references, then we are done.
296 			 */
297 			return;
298 		}
299 
300 		if (object->type == OBJT_VNODE) {
301 			struct vnode *vp = object->handle;
302 
303 			vp->v_flag &= ~VTEXT;
304 		}
305 
306 		/*
307 		 * See if this object can persist and has some resident
308 		 * pages.  If so, enter it in the cache.
309 		 */
310 		if (object->flags & OBJ_CANPERSIST) {
311 			if (object->resident_page_count != 0) {
312 				vm_object_page_clean(object, 0, 0 ,TRUE, TRUE);
313 				TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
314 				    cached_list);
315 				vm_object_cached++;
316 
317 				vm_object_cache_trim();
318 				return;
319 			} else {
320 				object->flags &= ~OBJ_CANPERSIST;
321 			}
322 		}
323 
324 		/*
325 		 * Make sure no one uses us.
326 		 */
327 		object->flags |= OBJ_DEAD;
328 
329 		temp = object->backing_object;
330 		if (temp)
331 			TAILQ_REMOVE(&temp->shadow_head, object, shadow_list);
332 		vm_object_terminate(object);
333 		/* unlocks and deallocates object */
334 		object = temp;
335 	}
336 }
337 
338 /*
339  *	vm_object_terminate actually destroys the specified object, freeing
340  *	up all previously used resources.
341  *
342  *	The object must be locked.
343  */
344 void
345 vm_object_terminate(object)
346 	register vm_object_t object;
347 {
348 	register vm_page_t p, next;
349 	vm_object_t backing_object;
350 	int s;
351 
352 	/*
353 	 * wait for the pageout daemon to be done with the object
354 	 */
355 	s = splhigh();
356 	while (object->paging_in_progress) {
357 		object->flags |= OBJ_PIPWNT;
358 		tsleep(object, PVM, "objtrm", 0);
359 	}
360 	splx(s);
361 
362 	if (object->paging_in_progress != 0)
363 		panic("vm_object_deallocate: pageout in progress");
364 
365 	/*
366 	 * Clean and free the pages, as appropriate. All references to the
367 	 * object are gone, so we don't need to lock it.
368 	 */
369 	if (object->type == OBJT_VNODE) {
370 		struct vnode *vp = object->handle;
371 
372 		VOP_LOCK(vp);
373 		vm_object_page_clean(object, 0, 0, TRUE, FALSE);
374 		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
375 		VOP_UNLOCK(vp);
376 	}
377 
378 	/*
379 	 * Now free the pages. For internal objects, this also removes them
380 	 * from paging queues.
381 	 */
382 	while ((p = object->memq.tqh_first) != NULL) {
383 		if (p->flags & PG_BUSY)
384 			printf("vm_object_terminate: freeing busy page\n");
385 		PAGE_WAKEUP(p);
386 		vm_page_free(p);
387 		cnt.v_pfree++;
388 	}
389 
390 	/*
391 	 * Let the pager know object is dead.
392 	 */
393 	vm_pager_deallocate(object);
394 
395 	TAILQ_REMOVE(&vm_object_list, object, object_list);
396 	vm_object_count--;
397 
398 	wakeup(object);
399 
400 	/*
401 	 * Free the space for the object.
402 	 */
403 	free((caddr_t) object, M_VMOBJ);
404 }
405 
406 /*
407  *	vm_object_page_clean
408  *
409  *	Clean all dirty pages in the specified range of object.
410  *	Leaves page on whatever queue it is currently on.
411  *
412  *	Odd semantics: if start == end, we clean everything.
413  *
414  *	The object must be locked.
415  */
416 
417 void
418 vm_object_page_clean(object, start, end, syncio, lockflag)
419 	vm_object_t object;
420 	vm_offset_t start;
421 	vm_offset_t end;
422 	boolean_t syncio;
423 	boolean_t lockflag;
424 {
425 	register vm_page_t p;
426 	register vm_offset_t tstart, tend;
427 	int s;
428 	struct vnode *vp;
429 	int runlen;
430 	vm_page_t ma[vm_pageout_page_count];
431 
432 	if (object->type != OBJT_VNODE ||
433 		(object->flags & OBJ_MIGHTBEDIRTY) == 0)
434 		return;
435 
436 	vp = object->handle;
437 
438 	if (lockflag)
439 		VOP_LOCK(vp);
440 	object->flags |= OBJ_CLEANING;
441 
442 	if (start != end) {
443 		start = trunc_page(start);
444 		end = round_page(end);
445 	}
446 
447 startover:
448 	tstart = start;
449 	if (end == 0) {
450 		tend = object->size;
451 	} else {
452 		tend = end;
453 	}
454 	if (tstart == 0 && tend == object->size) {
455 		object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
456 	}
457 
458 	runlen = 0;
459 	for(;tstart < tend; tstart += PAGE_SIZE) {
460 relookup:
461 		p = vm_page_lookup(object, tstart);
462 		if (!p) {
463 			if (runlen > 0) {
464 				vm_pageout_flush(ma, runlen, syncio);
465 				runlen = 0;
466 			}
467 			continue;
468 		}
469 		if (p->valid == 0 || (p->flags & PG_CACHE)) {
470 			if (runlen > 0) {
471 				vm_pageout_flush(ma, runlen, syncio);
472 				runlen = 0;
473 			}
474 			continue;
475 		}
476 
477 		vm_page_protect(p, VM_PROT_READ);
478 
479 		s = splhigh();
480 		while ((p->flags & PG_BUSY) || p->busy) {
481 			if (runlen > 0) {
482 				splx(s);
483 				vm_pageout_flush(ma, runlen, syncio);
484 				runlen = 0;
485 				goto relookup;
486 			}
487 			p->flags |= PG_WANTED|PG_REFERENCED;
488 			tsleep(p, PVM, "vpcwai", 0);
489 			splx(s);
490 			goto relookup;
491 		}
492 		splx(s);
493 
494 		if (p->dirty == 0)
495 			vm_page_test_dirty(p);
496 
497 		if ((p->valid & p->dirty) != 0) {
498 			ma[runlen] = p;
499 			p->flags |= PG_BUSY;
500 			runlen++;
501 			if (runlen >= vm_pageout_page_count) {
502 				vm_pageout_flush(ma, runlen, syncio);
503 				runlen = 0;
504 			}
505 		} else if (runlen > 0) {
506 			vm_pageout_flush(ma, runlen, syncio);
507 			runlen = 0;
508 		}
509 
510 	}
511 	if (runlen > 0) {
512 		vm_pageout_flush(ma, runlen, syncio);
513 	}
514 
515 	VOP_FSYNC(vp, NULL, syncio, curproc);
516 
517 	if (lockflag)
518 		VOP_UNLOCK(vp);
519 	object->flags &= ~OBJ_CLEANING;
520 	return;
521 }
522 
523 /*
524  *	vm_object_deactivate_pages
525  *
526  *	Deactivate all pages in the specified object.  (Keep its pages
527  *	in memory even though it is no longer referenced.)
528  *
529  *	The object must be locked.
530  */
531 void
532 vm_object_deactivate_pages(object)
533 	register vm_object_t object;
534 {
535 	register vm_page_t p, next;
536 
537 	for (p = object->memq.tqh_first; p != NULL; p = next) {
538 		next = p->listq.tqe_next;
539 		vm_page_deactivate(p);
540 	}
541 }
542 
543 /*
544  *	Trim the object cache to size.
545  */
546 void
547 vm_object_cache_trim()
548 {
549 	register vm_object_t object;
550 
551 	while (vm_object_cached > vm_object_cache_max) {
552 		object = vm_object_cached_list.tqh_first;
553 
554 		vm_object_reference(object);
555 		pager_cache(object, FALSE);
556 	}
557 }
558 
559 
560 /*
561  *	vm_object_pmap_copy:
562  *
563  *	Makes all physical pages in the specified
564  *	object range copy-on-write.  No writeable
565  *	references to these pages should remain.
566  *
567  *	The object must *not* be locked.
568  */
569 void
570 vm_object_pmap_copy(object, start, end)
571 	register vm_object_t object;
572 	register vm_offset_t start;
573 	register vm_offset_t end;
574 {
575 	register vm_page_t p;
576 
577 	if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0)
578 		return;
579 
580 	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
581 		vm_page_protect(p, VM_PROT_READ);
582 	}
583 
584 	object->flags &= ~OBJ_WRITEABLE;
585 }
586 
587 /*
588  *	vm_object_pmap_remove:
589  *
590  *	Removes all physical pages in the specified
591  *	object range from all physical maps.
592  *
593  *	The object must *not* be locked.
594  */
595 void
596 vm_object_pmap_remove(object, start, end)
597 	register vm_object_t object;
598 	register vm_offset_t start;
599 	register vm_offset_t end;
600 {
601 	register vm_page_t p;
602 	int s;
603 
604 	if (object == NULL)
605 		return;
606 	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
607 		vm_page_protect(p, VM_PROT_NONE);
608 	}
609 }
610 
611 /*
612  *	vm_object_copy:
613  *
614  *	Create a new object which is a copy of an existing
615  *	object, and mark all of the pages in the existing
616  *	object 'copy-on-write'.  The new object has one reference.
617  *	Returns the new object.
618  *
619  *	May defer the copy until later if the object is not backed
620  *	up by a non-default pager.
621  */
622 void
623 vm_object_copy(src_object, src_offset, size,
624     dst_object, dst_offset, src_needs_copy)
625 	register vm_object_t src_object;
626 	vm_offset_t src_offset;
627 	vm_size_t size;
628 	vm_object_t *dst_object;/* OUT */
629 	vm_offset_t *dst_offset;/* OUT */
630 	boolean_t *src_needs_copy;	/* OUT */
631 {
632 	register vm_object_t new_copy;
633 	register vm_object_t old_copy;
634 	vm_offset_t new_start, new_end;
635 
636 	register vm_page_t p;
637 
638 	if (src_object == NULL) {
639 		/*
640 		 * Nothing to copy
641 		 */
642 		*dst_object = NULL;
643 		*dst_offset = 0;
644 		*src_needs_copy = FALSE;
645 		return;
646 	}
647 
648 	/*
649 	 * Try to collapse the object before copying it.
650 	 */
651 	if (src_object->handle == NULL &&
652 	    (src_object->type == OBJT_DEFAULT ||
653 	     src_object->type == OBJT_SWAP))
654 		vm_object_collapse(src_object);
655 
656 
657 	/*
658 	 * Make another reference to the object
659 	 */
660 	src_object->ref_count++;
661 
662 	*dst_object = src_object;
663 	*dst_offset = src_offset;
664 
665 	/*
666 	 * Must make a shadow when write is desired
667 	 */
668 	*src_needs_copy = TRUE;
669 	return;
670 }
671 
672 /*
673  *	vm_object_shadow:
674  *
675  *	Create a new object which is backed by the
676  *	specified existing object range.  The source
677  *	object reference is deallocated.
678  *
679  *	The new object and offset into that object
680  *	are returned in the source parameters.
681  */
682 
683 void
684 vm_object_shadow(object, offset, length)
685 	vm_object_t *object;	/* IN/OUT */
686 	vm_offset_t *offset;	/* IN/OUT */
687 	vm_size_t length;
688 {
689 	register vm_object_t source;
690 	register vm_object_t result;
691 
692 	source = *object;
693 
694 	/*
695 	 * Allocate a new object with the given length
696 	 */
697 
698 	if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL)
699 		panic("vm_object_shadow: no object for shadowing");
700 
701 	/*
702 	 * The new object shadows the source object, adding a reference to it.
703 	 * Our caller changes his reference to point to the new object,
704 	 * removing a reference to the source object.  Net result: no change
705 	 * of reference count.
706 	 */
707 	result->backing_object = source;
708 	if (source)
709 		TAILQ_INSERT_TAIL(&result->backing_object->shadow_head, result, shadow_list);
710 
711 	/*
712 	 * Store the offset into the source object, and fix up the offset into
713 	 * the new object.
714 	 */
715 
716 	result->backing_object_offset = *offset;
717 
718 	/*
719 	 * Return the new things
720 	 */
721 
722 	*offset = 0;
723 	*object = result;
724 }
725 
726 
727 /*
728  * this version of collapse allows the operation to occur earlier and
729  * when paging_in_progress is true for an object...  This is not a complete
730  * operation, but should plug 99.9% of the rest of the leaks.
731  */
732 static void
733 vm_object_qcollapse(object)
734 	register vm_object_t object;
735 {
736 	register vm_object_t backing_object;
737 	register vm_offset_t backing_offset, new_offset;
738 	register vm_page_t p, pp;
739 	register vm_size_t size;
740 
741 	backing_object = object->backing_object;
742 	if (backing_object->ref_count != 1)
743 		return;
744 
745 	backing_object->ref_count += 2;
746 
747 	backing_offset = object->backing_object_offset;
748 	size = object->size;
749 	p = backing_object->memq.tqh_first;
750 	while (p) {
751 		vm_page_t next;
752 
753 		next = p->listq.tqe_next;
754 		if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) ||
755 		    !p->valid || p->hold_count || p->wire_count || p->busy) {
756 			p = next;
757 			continue;
758 		}
759 		vm_page_protect(p, VM_PROT_NONE);
760 		new_offset = (p->offset - backing_offset);
761 		if (p->offset < backing_offset ||
762 		    new_offset >= size) {
763 			if (backing_object->type == OBJT_SWAP)
764 				swap_pager_freespace(backing_object,
765 				    backing_object->paging_offset + p->offset, PAGE_SIZE);
766 			vm_page_free(p);
767 		} else {
768 			pp = vm_page_lookup(object, new_offset);
769 			if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object,
770 				    object->paging_offset + new_offset, NULL, NULL))) {
771 				if (backing_object->type == OBJT_SWAP)
772 					swap_pager_freespace(backing_object,
773 					    backing_object->paging_offset + p->offset, PAGE_SIZE);
774 				vm_page_free(p);
775 			} else {
776 				if (backing_object->type == OBJT_SWAP)
777 					swap_pager_freespace(backing_object,
778 					    backing_object->paging_offset + p->offset, PAGE_SIZE);
779 				vm_page_rename(p, object, new_offset);
780 				p->dirty = VM_PAGE_BITS_ALL;
781 			}
782 		}
783 		p = next;
784 	}
785 	backing_object->ref_count -= 2;
786 }
787 
788 /*
789  *	vm_object_collapse:
790  *
791  *	Collapse an object with the object backing it.
792  *	Pages in the backing object are moved into the
793  *	parent, and the backing object is deallocated.
794  */
795 void
796 vm_object_collapse(object)
797 	vm_object_t object;
798 
799 {
800 	vm_object_t backing_object;
801 	vm_offset_t backing_offset;
802 	vm_size_t size;
803 	vm_offset_t new_offset;
804 	vm_page_t p, pp;
805 
806 	while (TRUE) {
807 		/*
808 		 * Verify that the conditions are right for collapse:
809 		 *
810 		 * The object exists and no pages in it are currently being paged
811 		 * out.
812 		 */
813 		if (object == NULL)
814 			return;
815 
816 		/*
817 		 * Make sure there is a backing object.
818 		 */
819 		if ((backing_object = object->backing_object) == NULL)
820 			return;
821 
822 		/*
823 		 * we check the backing object first, because it is most likely
824 		 * not collapsable.
825 		 */
826 		if (backing_object->handle != NULL ||
827 		    (backing_object->type != OBJT_DEFAULT &&
828 		     backing_object->type != OBJT_SWAP) ||
829 		    (backing_object->flags & OBJ_DEAD) ||
830 		    object->handle != NULL ||
831 		    (object->type != OBJT_DEFAULT &&
832 		     object->type != OBJT_SWAP) ||
833 		    (object->flags & OBJ_DEAD)) {
834 			return;
835 		}
836 
837 		if (object->paging_in_progress != 0 ||
838 		    backing_object->paging_in_progress != 0) {
839 			vm_object_qcollapse(object);
840 			return;
841 		}
842 
843 		/*
844 		 * We know that we can either collapse the backing object (if
845 		 * the parent is the only reference to it) or (perhaps) remove
846 		 * the parent's reference to it.
847 		 */
848 
849 		backing_offset = object->backing_object_offset;
850 		size = object->size;
851 
852 		/*
853 		 * If there is exactly one reference to the backing object, we
854 		 * can collapse it into the parent.
855 		 */
856 
857 		if (backing_object->ref_count == 1) {
858 
859 			backing_object->flags |= OBJ_DEAD;
860 			/*
861 			 * We can collapse the backing object.
862 			 *
863 			 * Move all in-memory pages from backing_object to the
864 			 * parent.  Pages that have been paged out will be
865 			 * overwritten by any of the parent's pages that
866 			 * shadow them.
867 			 */
868 
869 			while ((p = backing_object->memq.tqh_first) != 0) {
870 
871 				new_offset = (p->offset - backing_offset);
872 
873 				/*
874 				 * If the parent has a page here, or if this
875 				 * page falls outside the parent, dispose of
876 				 * it.
877 				 *
878 				 * Otherwise, move it as planned.
879 				 */
880 
881 				if (p->offset < backing_offset ||
882 				    new_offset >= size) {
883 					vm_page_protect(p, VM_PROT_NONE);
884 					PAGE_WAKEUP(p);
885 					vm_page_free(p);
886 				} else {
887 					pp = vm_page_lookup(object, new_offset);
888 					if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object,
889 					    object->paging_offset + new_offset, NULL, NULL))) {
890 						vm_page_protect(p, VM_PROT_NONE);
891 						PAGE_WAKEUP(p);
892 						vm_page_free(p);
893 					} else {
894 						vm_page_rename(p, object, new_offset);
895 					}
896 				}
897 			}
898 
899 			/*
900 			 * Move the pager from backing_object to object.
901 			 */
902 
903 			if (backing_object->type == OBJT_SWAP) {
904 				backing_object->paging_in_progress++;
905 				if (object->type == OBJT_SWAP) {
906 					object->paging_in_progress++;
907 					/*
908 					 * copy shadow object pages into ours
909 					 * and destroy unneeded pages in
910 					 * shadow object.
911 					 */
912 					swap_pager_copy(
913 					    backing_object, backing_object->paging_offset,
914 					    object, object->paging_offset,
915 					    object->backing_object_offset);
916 					vm_object_pip_wakeup(object);
917 				} else {
918 					object->paging_in_progress++;
919 					/*
920 					 * move the shadow backing_object's pager data to
921 					 * "object" and convert "object" type to OBJT_SWAP.
922 					 */
923 					object->type = OBJT_SWAP;
924 					object->un_pager.swp.swp_nblocks =
925 					    backing_object->un_pager.swp.swp_nblocks;
926 					object->un_pager.swp.swp_allocsize =
927 					    backing_object->un_pager.swp.swp_allocsize;
928 					object->un_pager.swp.swp_blocks =
929 					    backing_object->un_pager.swp.swp_blocks;
930 					object->un_pager.swp.swp_poip =		/* XXX */
931 					    backing_object->un_pager.swp.swp_poip;
932 					object->paging_offset = backing_object->paging_offset + backing_offset;
933 					TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
934 
935 					/*
936 					 * Convert backing object from OBJT_SWAP to
937 					 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is
938 					 * actually necessary.
939 					 */
940 					backing_object->type = OBJT_DEFAULT;
941 					TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list);
942 					/*
943 					 * free unnecessary blocks
944 					 */
945 					swap_pager_freespace(object, 0, object->paging_offset);
946 					vm_object_pip_wakeup(object);
947 				}
948 
949 				vm_object_pip_wakeup(backing_object);
950 			}
951 			/*
952 			 * Object now shadows whatever backing_object did.
953 			 * Note that the reference to backing_object->backing_object
954 			 * moves from within backing_object to within object.
955 			 */
956 
957 			TAILQ_REMOVE(&object->backing_object->shadow_head, object,
958 			    shadow_list);
959 			if (backing_object->backing_object)
960 				TAILQ_REMOVE(&backing_object->backing_object->shadow_head,
961 				    backing_object, shadow_list);
962 			object->backing_object = backing_object->backing_object;
963 			if (object->backing_object)
964 				TAILQ_INSERT_TAIL(&object->backing_object->shadow_head,
965 				    object, shadow_list);
966 
967 			object->backing_object_offset += backing_object->backing_object_offset;
968 			/*
969 			 * Discard backing_object.
970 			 *
971 			 * Since the backing object has no pages, no pager left,
972 			 * and no object references within it, all that is
973 			 * necessary is to dispose of it.
974 			 */
975 
976 			TAILQ_REMOVE(&vm_object_list, backing_object,
977 			    object_list);
978 			vm_object_count--;
979 
980 			free((caddr_t) backing_object, M_VMOBJ);
981 
982 			object_collapses++;
983 		} else {
984 			/*
985 			 * If all of the pages in the backing object are
986 			 * shadowed by the parent object, the parent object no
987 			 * longer has to shadow the backing object; it can
988 			 * shadow the next one in the chain.
989 			 *
990 			 * The backing object must not be paged out - we'd have
991 			 * to check all of the paged-out pages, as well.
992 			 */
993 
994 			if (backing_object->type != OBJT_DEFAULT) {
995 				return;
996 			}
997 			/*
998 			 * Should have a check for a 'small' number of pages
999 			 * here.
1000 			 */
1001 
1002 			for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) {
1003 				new_offset = (p->offset - backing_offset);
1004 
1005 				/*
1006 				 * If the parent has a page here, or if this
1007 				 * page falls outside the parent, keep going.
1008 				 *
1009 				 * Otherwise, the backing_object must be left in
1010 				 * the chain.
1011 				 */
1012 
1013 				if (p->offset >= backing_offset && new_offset <= size) {
1014 
1015 					pp = vm_page_lookup(object, new_offset);
1016 
1017 					if ((pp == NULL || pp->valid == 0) &&
1018 				   	    !vm_pager_has_page(object, object->paging_offset + new_offset, NULL, NULL)) {
1019 
1020 						/*
1021 						 * Page still needed. Can't go any
1022 						 * further.
1023 						 */
1024 						return;
1025 					}
1026 				}
1027 			}
1028 
1029 			/*
1030 			 * Make the parent shadow the next object in the
1031 			 * chain.  Deallocating backing_object will not remove
1032 			 * it, since its reference count is at least 2.
1033 			 */
1034 
1035 			TAILQ_REMOVE(&object->backing_object->shadow_head,
1036 			    object, shadow_list);
1037 			vm_object_reference(object->backing_object = backing_object->backing_object);
1038 			if (object->backing_object)
1039 				TAILQ_INSERT_TAIL(&object->backing_object->shadow_head,
1040 				    object, shadow_list);
1041 			object->backing_object_offset += backing_object->backing_object_offset;
1042 
1043 			/*
1044 			 * Drop the reference count on backing_object. Since
1045 			 * its ref_count was at least 2, it will not vanish;
1046 			 * so we don't need to call vm_object_deallocate.
1047 			 */
1048 			if (backing_object->ref_count == 1)
1049 				printf("should have called obj deallocate\n");
1050 			backing_object->ref_count--;
1051 
1052 			object_bypasses++;
1053 
1054 		}
1055 
1056 		/*
1057 		 * Try again with this object's new backing object.
1058 		 */
1059 	}
1060 }
1061 
1062 /*
1063  *	vm_object_page_remove: [internal]
1064  *
1065  *	Removes all physical pages in the specified
1066  *	object range from the object's list of pages.
1067  *
1068  *	The object must be locked.
1069  */
1070 void
1071 vm_object_page_remove(object, start, end, clean_only)
1072 	register vm_object_t object;
1073 	register vm_offset_t start;
1074 	register vm_offset_t end;
1075 	boolean_t clean_only;
1076 {
1077 	register vm_page_t p, next;
1078 	vm_offset_t size;
1079 	int s;
1080 
1081 	if (object == NULL)
1082 		return;
1083 
1084 	object->paging_in_progress++;
1085 	start = trunc_page(start);
1086 	end = round_page(end);
1087 again:
1088 	size = end - start;
1089 	if (size > 4 * PAGE_SIZE || size >= object->size / 4) {
1090 		for (p = object->memq.tqh_first; p != NULL; p = next) {
1091 			next = p->listq.tqe_next;
1092 			if ((start <= p->offset) && (p->offset < end)) {
1093 				s = splhigh();
1094 				if (p->bmapped) {
1095 					splx(s);
1096 					continue;
1097 				}
1098 				if ((p->flags & PG_BUSY) || p->busy) {
1099 					p->flags |= PG_WANTED;
1100 					tsleep(p, PVM, "vmopar", 0);
1101 					splx(s);
1102 					goto again;
1103 				}
1104 				splx(s);
1105 				if (clean_only) {
1106 					vm_page_test_dirty(p);
1107 					if (p->valid & p->dirty)
1108 						continue;
1109 				}
1110 				vm_page_protect(p, VM_PROT_NONE);
1111 				PAGE_WAKEUP(p);
1112 				vm_page_free(p);
1113 			}
1114 		}
1115 	} else {
1116 		while (size > 0) {
1117 			while ((p = vm_page_lookup(object, start)) != 0) {
1118 				s = splhigh();
1119 				if (p->bmapped) {
1120 					splx(s);
1121 					break;
1122 				}
1123 				if ((p->flags & PG_BUSY) || p->busy) {
1124 					p->flags |= PG_WANTED;
1125 					tsleep(p, PVM, "vmopar", 0);
1126 					splx(s);
1127 					goto again;
1128 				}
1129 				splx(s);
1130 				if (clean_only) {
1131 					vm_page_test_dirty(p);
1132 					if (p->valid & p->dirty)
1133 						continue;
1134 				}
1135 				vm_page_protect(p, VM_PROT_NONE);
1136 				PAGE_WAKEUP(p);
1137 				vm_page_free(p);
1138 			}
1139 			start += PAGE_SIZE;
1140 			size -= PAGE_SIZE;
1141 		}
1142 	}
1143 	vm_object_pip_wakeup(object);
1144 }
1145 
1146 /*
1147  *	Routine:	vm_object_coalesce
1148  *	Function:	Coalesces two objects backing up adjoining
1149  *			regions of memory into a single object.
1150  *
1151  *	returns TRUE if objects were combined.
1152  *
1153  *	NOTE:	Only works at the moment if the second object is NULL -
1154  *		if it's not, which object do we lock first?
1155  *
1156  *	Parameters:
1157  *		prev_object	First object to coalesce
1158  *		prev_offset	Offset into prev_object
1159  *		next_object	Second object into coalesce
1160  *		next_offset	Offset into next_object
1161  *
1162  *		prev_size	Size of reference to prev_object
1163  *		next_size	Size of reference to next_object
1164  *
1165  *	Conditions:
1166  *	The object must *not* be locked.
1167  */
1168 boolean_t
1169 vm_object_coalesce(prev_object, next_object,
1170     prev_offset, next_offset,
1171     prev_size, next_size)
1172 	register vm_object_t prev_object;
1173 	vm_object_t next_object;
1174 	vm_offset_t prev_offset, next_offset;
1175 	vm_size_t prev_size, next_size;
1176 {
1177 	vm_size_t newsize;
1178 
1179 	if (next_object != NULL) {
1180 		return (FALSE);
1181 	}
1182 	if (prev_object == NULL) {
1183 		return (TRUE);
1184 	}
1185 
1186 	/*
1187 	 * Try to collapse the object first
1188 	 */
1189 	vm_object_collapse(prev_object);
1190 
1191 	/*
1192 	 * Can't coalesce if: . more than one reference . paged out . shadows
1193 	 * another object . has a copy elsewhere (any of which mean that the
1194 	 * pages not mapped to prev_entry may be in use anyway)
1195 	 */
1196 
1197 	if (prev_object->ref_count > 1 ||
1198 	    prev_object->type != OBJT_DEFAULT ||
1199 	    prev_object->backing_object != NULL) {
1200 		return (FALSE);
1201 	}
1202 	/*
1203 	 * Remove any pages that may still be in the object from a previous
1204 	 * deallocation.
1205 	 */
1206 
1207 	vm_object_page_remove(prev_object,
1208 	    prev_offset + prev_size,
1209 	    prev_offset + prev_size + next_size, FALSE);
1210 
1211 	/*
1212 	 * Extend the object if necessary.
1213 	 */
1214 	newsize = prev_offset + prev_size + next_size;
1215 	if (newsize > prev_object->size)
1216 		prev_object->size = newsize;
1217 
1218 	return (TRUE);
1219 }
1220 
1221 /*
1222  * returns page after looking up in shadow chain
1223  */
1224 
1225 vm_page_t
1226 vm_object_page_lookup(object, offset)
1227 	vm_object_t object;
1228 	vm_offset_t offset;
1229 {
1230 	vm_page_t m;
1231 
1232 	if (!(m = vm_page_lookup(object, offset))) {
1233 		if (!object->backing_object)
1234 			return 0;
1235 		else
1236 			return vm_object_page_lookup(object->backing_object, offset + object->backing_object_offset);
1237 	}
1238 	return m;
1239 }
1240 
1241 #ifdef DDB
1242 
1243 int
1244 _vm_object_in_map(map, object, entry)
1245 	vm_map_t map;
1246 	vm_object_t object;
1247 	vm_map_entry_t entry;
1248 {
1249 	vm_map_t tmpm;
1250 	vm_map_entry_t tmpe;
1251 	vm_object_t obj;
1252 	int entcount;
1253 
1254 	if (map == 0)
1255 		return 0;
1256 
1257 	if (entry == 0) {
1258 		tmpe = map->header.next;
1259 		entcount = map->nentries;
1260 		while (entcount-- && (tmpe != &map->header)) {
1261 			if( _vm_object_in_map(map, object, tmpe)) {
1262 				return 1;
1263 			}
1264 			tmpe = tmpe->next;
1265 		}
1266 	} else if (entry->is_sub_map || entry->is_a_map) {
1267 		tmpm = entry->object.share_map;
1268 		tmpe = tmpm->header.next;
1269 		entcount = tmpm->nentries;
1270 		while (entcount-- && tmpe != &tmpm->header) {
1271 			if( _vm_object_in_map(tmpm, object, tmpe)) {
1272 				return 1;
1273 			}
1274 			tmpe = tmpe->next;
1275 		}
1276 	} else if (obj = entry->object.vm_object) {
1277 		for(; obj; obj=obj->backing_object)
1278 			if( obj == object) {
1279 				return 1;
1280 			}
1281 	}
1282 	return 0;
1283 }
1284 
1285 int
1286 vm_object_in_map( object)
1287 	vm_object_t object;
1288 {
1289 	struct proc *p;
1290 	for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
1291 		if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
1292 			continue;
1293 /*
1294 		if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
1295 			continue;
1296 		}
1297 */
1298 		if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0))
1299 			return 1;
1300 	}
1301 	if( _vm_object_in_map( kernel_map, object, 0))
1302 		return 1;
1303 	if( _vm_object_in_map( kmem_map, object, 0))
1304 		return 1;
1305 	if( _vm_object_in_map( pager_map, object, 0))
1306 		return 1;
1307 	if( _vm_object_in_map( buffer_map, object, 0))
1308 		return 1;
1309 	if( _vm_object_in_map( io_map, object, 0))
1310 		return 1;
1311 	if( _vm_object_in_map( phys_map, object, 0))
1312 		return 1;
1313 	if( _vm_object_in_map( mb_map, object, 0))
1314 		return 1;
1315 	if( _vm_object_in_map( u_map, object, 0))
1316 		return 1;
1317 	return 0;
1318 }
1319 
1320 
1321 void
1322 vm_object_check() {
1323 	int i;
1324 	int maxhash = 0;
1325 	vm_object_t object;
1326 
1327 	/*
1328 	 * make sure that internal objs are in a map somewhere
1329 	 * and none have zero ref counts.
1330 	 */
1331 	for (object = vm_object_list.tqh_first;
1332 			object != NULL;
1333 			object = object->object_list.tqe_next) {
1334 		if (object->handle == NULL &&
1335 		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
1336 			if (object->ref_count == 0) {
1337 				printf("vmochk: internal obj has zero ref count: %d\n",
1338 					object->size);
1339 			}
1340 			if (!vm_object_in_map(object)) {
1341 				printf("vmochk: internal obj is not in a map: ref: %d, size: %d: 0x%x, backing_object: 0x%x\n",
1342 				    object->ref_count, object->size, object->backing_object);
1343 			}
1344 		}
1345 	}
1346 }
1347 
1348 /*
1349  *	vm_object_print:	[ debug ]
1350  */
1351 void
1352 vm_object_print(iobject, full, dummy3, dummy4)
1353 	/* db_expr_t */ int iobject;
1354 	boolean_t full;
1355 	/* db_expr_t */ int dummy3;
1356 	char *dummy4;
1357 {
1358 	vm_object_t object = (vm_object_t)iobject;	/* XXX */
1359 	register vm_page_t p;
1360 
1361 	register int count;
1362 
1363 	if (object == NULL)
1364 		return;
1365 
1366 	iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
1367 	    (int) object, (int) object->size,
1368 	    object->resident_page_count, object->ref_count);
1369 	printf("offset=0x%x, backing_object=(0x%x)+0x%x\n",
1370 	    (int) object->paging_offset,
1371 	    (int) object->backing_object, (int) object->backing_object_offset);
1372 	printf("cache: next=%p, prev=%p\n",
1373 	    object->cached_list.tqe_next, object->cached_list.tqe_prev);
1374 
1375 	if (!full)
1376 		return;
1377 
1378 	indent += 2;
1379 	count = 0;
1380 	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
1381 		if (count == 0)
1382 			iprintf("memory:=");
1383 		else if (count == 6) {
1384 			printf("\n");
1385 			iprintf(" ...");
1386 			count = 0;
1387 		} else
1388 			printf(",");
1389 		count++;
1390 
1391 		printf("(off=0x%lx,page=0x%lx)",
1392 		    (u_long) p->offset, (u_long) VM_PAGE_TO_PHYS(p));
1393 	}
1394 	if (count != 0)
1395 		printf("\n");
1396 	indent -= 2;
1397 }
1398 #endif /* DDB */
1399