1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_object.c,v 1.66 1996/03/28 04:53:26 dyson Exp $ 65 */ 66 67 /* 68 * Virtual memory object module. 69 */ 70 #include "opt_ddb.h" 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/kernel.h> 75 #include <sys/proc.h> /* for curproc, pageproc */ 76 #include <sys/malloc.h> 77 #include <sys/vnode.h> 78 #include <sys/mount.h> 79 #include <sys/vmmeter.h> 80 81 #include <vm/vm.h> 82 #include <vm/vm_param.h> 83 #include <vm/vm_prot.h> 84 #include <vm/lock.h> 85 #include <vm/pmap.h> 86 #include <vm/vm_map.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pageout.h> 90 #include <vm/vm_pager.h> 91 #include <vm/swap_pager.h> 92 #include <vm/vm_kern.h> 93 #include <vm/vm_extern.h> 94 95 #ifdef DDB 96 static void DDB_vm_object_check __P((void)); 97 #endif 98 99 static void _vm_object_allocate __P((objtype_t, vm_size_t, vm_object_t)); 100 #ifdef DDB 101 static int _vm_object_in_map __P((vm_map_t map, vm_object_t object, 102 vm_map_entry_t entry)); 103 static int vm_object_in_map __P((vm_object_t object)); 104 #endif 105 static void vm_object_qcollapse __P((vm_object_t object)); 106 #ifdef not_used 107 static void vm_object_deactivate_pages __P((vm_object_t)); 108 #endif 109 static void vm_object_terminate __P((vm_object_t)); 110 static void vm_object_cache_trim __P((void)); 111 112 /* 113 * Virtual memory objects maintain the actual data 114 * associated with allocated virtual memory. A given 115 * page of memory exists within exactly one object. 116 * 117 * An object is only deallocated when all "references" 118 * are given up. Only one "reference" to a given 119 * region of an object should be writeable. 120 * 121 * Associated with each object is a list of all resident 122 * memory pages belonging to that object; this list is 123 * maintained by the "vm_page" module, and locked by the object's 124 * lock. 125 * 126 * Each object also records a "pager" routine which is 127 * used to retrieve (and store) pages to the proper backing 128 * storage. In addition, objects may be backed by other 129 * objects from which they were virtual-copied. 130 * 131 * The only items within the object structure which are 132 * modified after time of creation are: 133 * reference count locked by object's lock 134 * pager routine locked by object's lock 135 * 136 */ 137 138 int vm_object_cache_max; 139 struct object_q vm_object_cached_list; 140 static int vm_object_cached; 141 struct object_q vm_object_list; 142 static long vm_object_count; 143 vm_object_t kernel_object; 144 vm_object_t kmem_object; 145 static struct vm_object kernel_object_store; 146 static struct vm_object kmem_object_store; 147 extern int vm_pageout_page_count; 148 149 static long object_collapses; 150 static long object_bypasses; 151 152 static void 153 _vm_object_allocate(type, size, object) 154 objtype_t type; 155 vm_size_t size; 156 register vm_object_t object; 157 { 158 TAILQ_INIT(&object->memq); 159 TAILQ_INIT(&object->shadow_head); 160 161 object->type = type; 162 object->size = size; 163 object->ref_count = 1; 164 object->flags = 0; 165 object->paging_in_progress = 0; 166 object->resident_page_count = 0; 167 object->shadow_count = 0; 168 object->handle = NULL; 169 object->paging_offset = (vm_ooffset_t) 0; 170 object->backing_object = NULL; 171 object->backing_object_offset = (vm_ooffset_t) 0; 172 173 object->last_read = 0; 174 175 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 176 vm_object_count++; 177 } 178 179 /* 180 * vm_object_init: 181 * 182 * Initialize the VM objects module. 183 */ 184 void 185 vm_object_init() 186 { 187 TAILQ_INIT(&vm_object_cached_list); 188 TAILQ_INIT(&vm_object_list); 189 vm_object_count = 0; 190 191 vm_object_cache_max = 84; 192 if (cnt.v_page_count > 1000) 193 vm_object_cache_max += (cnt.v_page_count - 1000) / 4; 194 195 kernel_object = &kernel_object_store; 196 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 197 kernel_object); 198 199 kmem_object = &kmem_object_store; 200 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 201 kmem_object); 202 } 203 204 /* 205 * vm_object_allocate: 206 * 207 * Returns a new object with the given size. 208 */ 209 210 vm_object_t 211 vm_object_allocate(type, size) 212 objtype_t type; 213 vm_size_t size; 214 { 215 register vm_object_t result; 216 217 result = (vm_object_t) 218 malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); 219 220 221 _vm_object_allocate(type, size, result); 222 223 return (result); 224 } 225 226 227 /* 228 * vm_object_reference: 229 * 230 * Gets another reference to the given object. 231 */ 232 inline void 233 vm_object_reference(object) 234 register vm_object_t object; 235 { 236 if (object == NULL) 237 return; 238 239 if (object->ref_count == 0) { 240 if ((object->flags & OBJ_CANPERSIST) == 0) 241 panic("vm_object_reference: non-persistent object with 0 ref_count"); 242 TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); 243 vm_object_cached--; 244 } 245 object->ref_count++; 246 } 247 248 /* 249 * vm_object_deallocate: 250 * 251 * Release a reference to the specified object, 252 * gained either through a vm_object_allocate 253 * or a vm_object_reference call. When all references 254 * are gone, storage associated with this object 255 * may be relinquished. 256 * 257 * No object may be locked. 258 */ 259 void 260 vm_object_deallocate(object) 261 vm_object_t object; 262 { 263 vm_object_t temp; 264 vm_page_t p; 265 266 while (object != NULL) { 267 268 if (object->ref_count == 0) 269 panic("vm_object_deallocate: object deallocated too many times"); 270 271 /* 272 * Lose the reference 273 */ 274 object->ref_count--; 275 if (object->ref_count != 0) { 276 if ((object->ref_count == 1) && 277 (object->handle == NULL) && 278 (object->type == OBJT_DEFAULT || 279 object->type == OBJT_SWAP)) { 280 vm_object_t robject; 281 robject = object->shadow_head.tqh_first; 282 if ((robject != NULL) && 283 (robject->handle == NULL) && 284 (robject->type == OBJT_DEFAULT || 285 robject->type == OBJT_SWAP)) { 286 int s; 287 robject->ref_count += 2; 288 object->ref_count += 2; 289 290 do { 291 s = splhigh(); 292 while (robject->paging_in_progress) { 293 robject->flags |= OBJ_PIPWNT; 294 tsleep(robject, PVM, "objde1", 0); 295 } 296 297 while (object->paging_in_progress) { 298 object->flags |= OBJ_PIPWNT; 299 tsleep(object, PVM, "objde2", 0); 300 } 301 splx(s); 302 303 } while( object->paging_in_progress || robject->paging_in_progress); 304 305 object->ref_count -= 2; 306 robject->ref_count -= 2; 307 if( robject->ref_count == 0) { 308 robject->ref_count += 1; 309 object = robject; 310 continue; 311 } 312 vm_object_collapse(robject); 313 return; 314 } 315 } 316 /* 317 * If there are still references, then we are done. 318 */ 319 return; 320 } 321 322 if (object->type == OBJT_VNODE) { 323 struct vnode *vp = object->handle; 324 325 vp->v_flag &= ~VTEXT; 326 } 327 328 /* 329 * See if this object can persist and has some resident 330 * pages. If so, enter it in the cache. 331 */ 332 if (object->flags & OBJ_CANPERSIST) { 333 if (object->resident_page_count != 0) { 334 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); 335 TAILQ_INSERT_TAIL(&vm_object_cached_list, object, 336 cached_list); 337 vm_object_cached++; 338 339 vm_object_cache_trim(); 340 return; 341 } else { 342 object->flags &= ~OBJ_CANPERSIST; 343 } 344 } 345 346 /* 347 * Make sure no one uses us. 348 */ 349 object->flags |= OBJ_DEAD; 350 351 temp = object->backing_object; 352 if (temp) { 353 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 354 --temp->shadow_count; 355 } 356 vm_object_terminate(object); 357 /* unlocks and deallocates object */ 358 object = temp; 359 } 360 } 361 362 /* 363 * vm_object_terminate actually destroys the specified object, freeing 364 * up all previously used resources. 365 * 366 * The object must be locked. 367 */ 368 static void 369 vm_object_terminate(object) 370 register vm_object_t object; 371 { 372 register vm_page_t p; 373 int s; 374 375 /* 376 * wait for the pageout daemon to be done with the object 377 */ 378 s = splhigh(); 379 while (object->paging_in_progress) { 380 object->flags |= OBJ_PIPWNT; 381 tsleep(object, PVM, "objtrm", 0); 382 } 383 splx(s); 384 385 if (object->paging_in_progress != 0) 386 panic("vm_object_deallocate: pageout in progress"); 387 388 /* 389 * Clean and free the pages, as appropriate. All references to the 390 * object are gone, so we don't need to lock it. 391 */ 392 if (object->type == OBJT_VNODE) { 393 struct vnode *vp = object->handle; 394 395 VOP_LOCK(vp); 396 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 397 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 398 VOP_UNLOCK(vp); 399 } 400 401 /* 402 * Now free the pages. For internal objects, this also removes them 403 * from paging queues. 404 */ 405 while ((p = object->memq.tqh_first) != NULL) { 406 if (p->flags & PG_BUSY) 407 printf("vm_object_terminate: freeing busy page\n"); 408 PAGE_WAKEUP(p); 409 vm_page_free(p); 410 cnt.v_pfree++; 411 } 412 413 /* 414 * Let the pager know object is dead. 415 */ 416 vm_pager_deallocate(object); 417 418 TAILQ_REMOVE(&vm_object_list, object, object_list); 419 vm_object_count--; 420 421 wakeup(object); 422 423 /* 424 * Free the space for the object. 425 */ 426 free((caddr_t) object, M_VMOBJ); 427 } 428 429 /* 430 * vm_object_page_clean 431 * 432 * Clean all dirty pages in the specified range of object. 433 * Leaves page on whatever queue it is currently on. 434 * 435 * Odd semantics: if start == end, we clean everything. 436 * 437 * The object must be locked. 438 */ 439 440 void 441 vm_object_page_clean(object, start, end, syncio, lockflag) 442 vm_object_t object; 443 vm_pindex_t start; 444 vm_pindex_t end; 445 boolean_t syncio; 446 boolean_t lockflag; 447 { 448 register vm_page_t p, np, tp; 449 register vm_offset_t tstart, tend; 450 vm_pindex_t pi; 451 int s; 452 struct vnode *vp; 453 int runlen; 454 int maxf; 455 int chkb; 456 int maxb; 457 int i; 458 vm_page_t maf[vm_pageout_page_count]; 459 vm_page_t mab[vm_pageout_page_count]; 460 vm_page_t ma[vm_pageout_page_count]; 461 462 if (object->type != OBJT_VNODE || 463 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 464 return; 465 466 vp = object->handle; 467 468 if (lockflag) 469 VOP_LOCK(vp); 470 object->flags |= OBJ_CLEANING; 471 472 tstart = start; 473 if (end == 0) { 474 tend = object->size; 475 } else { 476 tend = end; 477 } 478 if ((tstart == 0) && (tend == object->size)) { 479 object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 480 } 481 for(p = object->memq.tqh_first; p; p = p->listq.tqe_next) 482 p->flags |= PG_CLEANCHK; 483 484 rescan: 485 for(p = object->memq.tqh_first; p; p = np) { 486 np = p->listq.tqe_next; 487 488 pi = p->pindex; 489 if (((p->flags & PG_CLEANCHK) == 0) || 490 (pi < tstart) || (pi >= tend) || 491 (p->valid == 0) || (p->queue == PQ_CACHE)) { 492 p->flags &= ~PG_CLEANCHK; 493 continue; 494 } 495 496 vm_page_test_dirty(p); 497 if ((p->dirty & p->valid) == 0) { 498 p->flags &= ~PG_CLEANCHK; 499 continue; 500 } 501 502 s = splhigh(); 503 if ((p->flags & PG_BUSY) || p->busy) { 504 p->flags |= PG_WANTED|PG_REFERENCED; 505 tsleep(p, PVM, "vpcwai", 0); 506 splx(s); 507 goto rescan; 508 } 509 splx(s); 510 511 maxf = 0; 512 for(i=1;i<vm_pageout_page_count;i++) { 513 if (tp = vm_page_lookup(object, pi + i)) { 514 if ((tp->flags & PG_BUSY) || 515 (tp->flags & PG_CLEANCHK) == 0) 516 break; 517 vm_page_test_dirty(tp); 518 if ((tp->dirty & tp->valid) == 0) { 519 tp->flags &= ~PG_CLEANCHK; 520 break; 521 } 522 maf[ i - 1 ] = tp; 523 maxf++; 524 continue; 525 } 526 break; 527 } 528 529 maxb = 0; 530 chkb = vm_pageout_page_count - maxf; 531 if (chkb) { 532 for(i = 1; i < chkb;i++) { 533 if (tp = vm_page_lookup(object, pi - i)) { 534 if ((tp->flags & PG_BUSY) || 535 (tp->flags & PG_CLEANCHK) == 0) 536 break; 537 vm_page_test_dirty(tp); 538 if ((tp->dirty & tp->valid) == 0) { 539 tp->flags &= ~PG_CLEANCHK; 540 break; 541 } 542 mab[ i - 1 ] = tp; 543 maxb++; 544 continue; 545 } 546 break; 547 } 548 } 549 550 for(i=0;i<maxb;i++) { 551 int index = (maxb - i) - 1; 552 ma[index] = mab[i]; 553 ma[index]->flags |= PG_BUSY; 554 ma[index]->flags &= ~PG_CLEANCHK; 555 vm_page_protect(ma[index], VM_PROT_READ); 556 } 557 vm_page_protect(p, VM_PROT_READ); 558 p->flags |= PG_BUSY; 559 p->flags &= ~PG_CLEANCHK; 560 ma[maxb] = p; 561 for(i=0;i<maxf;i++) { 562 int index = (maxb + i) + 1; 563 ma[index] = maf[i]; 564 ma[index]->flags |= PG_BUSY; 565 ma[index]->flags &= ~PG_CLEANCHK; 566 vm_page_protect(ma[index], VM_PROT_READ); 567 } 568 runlen = maxb + maxf + 1; 569 /* 570 printf("maxb: %d, maxf: %d, runlen: %d, offset: %d\n", maxb, maxf, runlen, ma[0]->pindex); 571 */ 572 vm_pageout_flush(ma, runlen, 0); 573 goto rescan; 574 } 575 576 VOP_FSYNC(vp, NULL, syncio, curproc); 577 578 if (lockflag) 579 VOP_UNLOCK(vp); 580 object->flags &= ~OBJ_CLEANING; 581 return; 582 } 583 584 #ifdef not_used 585 /* XXX I cannot tell if this should be an exported symbol */ 586 /* 587 * vm_object_deactivate_pages 588 * 589 * Deactivate all pages in the specified object. (Keep its pages 590 * in memory even though it is no longer referenced.) 591 * 592 * The object must be locked. 593 */ 594 static void 595 vm_object_deactivate_pages(object) 596 register vm_object_t object; 597 { 598 register vm_page_t p, next; 599 600 for (p = object->memq.tqh_first; p != NULL; p = next) { 601 next = p->listq.tqe_next; 602 vm_page_deactivate(p); 603 } 604 } 605 #endif 606 607 /* 608 * Trim the object cache to size. 609 */ 610 static void 611 vm_object_cache_trim() 612 { 613 register vm_object_t object; 614 615 while (vm_object_cached > vm_object_cache_max) { 616 object = vm_object_cached_list.tqh_first; 617 618 vm_object_reference(object); 619 pager_cache(object, FALSE); 620 } 621 } 622 623 624 /* 625 * vm_object_pmap_copy: 626 * 627 * Makes all physical pages in the specified 628 * object range copy-on-write. No writeable 629 * references to these pages should remain. 630 * 631 * The object must *not* be locked. 632 */ 633 void 634 vm_object_pmap_copy(object, start, end) 635 register vm_object_t object; 636 register vm_pindex_t start; 637 register vm_pindex_t end; 638 { 639 register vm_page_t p; 640 641 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 642 return; 643 644 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 645 vm_page_protect(p, VM_PROT_READ); 646 } 647 648 object->flags &= ~OBJ_WRITEABLE; 649 } 650 651 /* 652 * vm_object_pmap_remove: 653 * 654 * Removes all physical pages in the specified 655 * object range from all physical maps. 656 * 657 * The object must *not* be locked. 658 */ 659 void 660 vm_object_pmap_remove(object, start, end) 661 register vm_object_t object; 662 register vm_pindex_t start; 663 register vm_pindex_t end; 664 { 665 register vm_page_t p; 666 if (object == NULL) 667 return; 668 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 669 if (p->pindex >= start && p->pindex < end) 670 vm_page_protect(p, VM_PROT_NONE); 671 } 672 } 673 674 /* 675 * vm_object_copy: 676 * 677 * Create a new object which is a copy of an existing 678 * object, and mark all of the pages in the existing 679 * object 'copy-on-write'. The new object has one reference. 680 * Returns the new object. 681 * 682 * May defer the copy until later if the object is not backed 683 * up by a non-default pager. 684 * 685 */ 686 void 687 vm_object_copy(src_object, src_offset, 688 dst_object, dst_offset, src_needs_copy) 689 register vm_object_t src_object; 690 vm_pindex_t src_offset; 691 vm_object_t *dst_object;/* OUT */ 692 vm_pindex_t *dst_offset;/* OUT */ 693 boolean_t *src_needs_copy; /* OUT */ 694 { 695 if (src_object == NULL) { 696 /* 697 * Nothing to copy 698 */ 699 *dst_object = NULL; 700 *dst_offset = 0; 701 *src_needs_copy = FALSE; 702 return; 703 } 704 705 /* 706 * Try to collapse the object before copying it. 707 */ 708 if (src_object->handle == NULL && 709 (src_object->type == OBJT_DEFAULT || 710 src_object->type == OBJT_SWAP)) 711 vm_object_collapse(src_object); 712 713 714 /* 715 * Make another reference to the object 716 */ 717 src_object->ref_count++; 718 *dst_object = src_object; 719 *dst_offset = src_offset; 720 721 /* 722 * Must make a shadow when write is desired 723 */ 724 *src_needs_copy = TRUE; 725 return; 726 } 727 728 /* 729 * vm_object_shadow: 730 * 731 * Create a new object which is backed by the 732 * specified existing object range. The source 733 * object reference is deallocated. 734 * 735 * The new object and offset into that object 736 * are returned in the source parameters. 737 */ 738 739 void 740 vm_object_shadow(object, offset, length) 741 vm_object_t *object; /* IN/OUT */ 742 vm_ooffset_t *offset; /* IN/OUT */ 743 vm_size_t length; 744 { 745 register vm_object_t source; 746 register vm_object_t result; 747 748 source = *object; 749 750 /* 751 * Allocate a new object with the given length 752 */ 753 754 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 755 panic("vm_object_shadow: no object for shadowing"); 756 757 /* 758 * The new object shadows the source object, adding a reference to it. 759 * Our caller changes his reference to point to the new object, 760 * removing a reference to the source object. Net result: no change 761 * of reference count. 762 */ 763 result->backing_object = source; 764 if (source) { 765 TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list); 766 ++source->shadow_count; 767 } 768 769 /* 770 * Store the offset into the source object, and fix up the offset into 771 * the new object. 772 */ 773 774 result->backing_object_offset = *offset; 775 776 /* 777 * Return the new things 778 */ 779 780 *offset = 0; 781 *object = result; 782 } 783 784 785 /* 786 * this version of collapse allows the operation to occur earlier and 787 * when paging_in_progress is true for an object... This is not a complete 788 * operation, but should plug 99.9% of the rest of the leaks. 789 */ 790 static void 791 vm_object_qcollapse(object) 792 register vm_object_t object; 793 { 794 register vm_object_t backing_object; 795 register vm_pindex_t backing_offset_index, paging_offset_index; 796 vm_pindex_t backing_object_paging_offset_index; 797 vm_pindex_t new_pindex; 798 register vm_page_t p, pp; 799 register vm_size_t size; 800 801 backing_object = object->backing_object; 802 if (backing_object->ref_count != 1) 803 return; 804 805 backing_object->ref_count += 2; 806 807 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 808 backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset); 809 paging_offset_index = OFF_TO_IDX(object->paging_offset); 810 size = object->size; 811 p = backing_object->memq.tqh_first; 812 while (p) { 813 vm_page_t next; 814 815 next = p->listq.tqe_next; 816 if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) || 817 (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) { 818 p = next; 819 continue; 820 } 821 vm_page_protect(p, VM_PROT_NONE); 822 new_pindex = p->pindex - backing_offset_index; 823 if (p->pindex < backing_offset_index || 824 new_pindex >= size) { 825 if (backing_object->type == OBJT_SWAP) 826 swap_pager_freespace(backing_object, 827 backing_object_paging_offset_index+p->pindex, 828 1); 829 vm_page_free(p); 830 } else { 831 pp = vm_page_lookup(object, new_pindex); 832 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 833 paging_offset_index + new_pindex, NULL, NULL))) { 834 if (backing_object->type == OBJT_SWAP) 835 swap_pager_freespace(backing_object, 836 backing_object_paging_offset_index + p->pindex, 1); 837 vm_page_free(p); 838 } else { 839 if (backing_object->type == OBJT_SWAP) 840 swap_pager_freespace(backing_object, 841 backing_object_paging_offset_index + p->pindex, 1); 842 vm_page_rename(p, object, new_pindex); 843 p->dirty = VM_PAGE_BITS_ALL; 844 } 845 } 846 p = next; 847 } 848 backing_object->ref_count -= 2; 849 } 850 851 /* 852 * vm_object_collapse: 853 * 854 * Collapse an object with the object backing it. 855 * Pages in the backing object are moved into the 856 * parent, and the backing object is deallocated. 857 */ 858 void 859 vm_object_collapse(object) 860 vm_object_t object; 861 862 { 863 vm_object_t backing_object; 864 vm_ooffset_t backing_offset; 865 vm_size_t size; 866 vm_pindex_t new_pindex, backing_offset_index; 867 vm_page_t p, pp; 868 869 while (TRUE) { 870 /* 871 * Verify that the conditions are right for collapse: 872 * 873 * The object exists and no pages in it are currently being paged 874 * out. 875 */ 876 if (object == NULL) 877 return; 878 879 /* 880 * Make sure there is a backing object. 881 */ 882 if ((backing_object = object->backing_object) == NULL) 883 return; 884 885 /* 886 * we check the backing object first, because it is most likely 887 * not collapsable. 888 */ 889 if (backing_object->handle != NULL || 890 (backing_object->type != OBJT_DEFAULT && 891 backing_object->type != OBJT_SWAP) || 892 (backing_object->flags & OBJ_DEAD) || 893 object->handle != NULL || 894 (object->type != OBJT_DEFAULT && 895 object->type != OBJT_SWAP) || 896 (object->flags & OBJ_DEAD)) { 897 return; 898 } 899 900 if (object->paging_in_progress != 0 || 901 backing_object->paging_in_progress != 0) { 902 vm_object_qcollapse(object); 903 return; 904 } 905 906 /* 907 * We know that we can either collapse the backing object (if 908 * the parent is the only reference to it) or (perhaps) remove 909 * the parent's reference to it. 910 */ 911 912 backing_offset = object->backing_object_offset; 913 backing_offset_index = OFF_TO_IDX(backing_offset); 914 size = object->size; 915 916 /* 917 * If there is exactly one reference to the backing object, we 918 * can collapse it into the parent. 919 */ 920 921 if (backing_object->ref_count == 1) { 922 923 backing_object->flags |= OBJ_DEAD; 924 /* 925 * We can collapse the backing object. 926 * 927 * Move all in-memory pages from backing_object to the 928 * parent. Pages that have been paged out will be 929 * overwritten by any of the parent's pages that 930 * shadow them. 931 */ 932 933 while ((p = backing_object->memq.tqh_first) != 0) { 934 935 new_pindex = p->pindex - backing_offset_index; 936 937 /* 938 * If the parent has a page here, or if this 939 * page falls outside the parent, dispose of 940 * it. 941 * 942 * Otherwise, move it as planned. 943 */ 944 945 if (p->pindex < backing_offset_index || 946 new_pindex >= size) { 947 vm_page_protect(p, VM_PROT_NONE); 948 PAGE_WAKEUP(p); 949 vm_page_free(p); 950 } else { 951 pp = vm_page_lookup(object, new_pindex); 952 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 953 OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL))) { 954 vm_page_protect(p, VM_PROT_NONE); 955 PAGE_WAKEUP(p); 956 vm_page_free(p); 957 } else { 958 vm_page_rename(p, object, new_pindex); 959 } 960 } 961 } 962 963 /* 964 * Move the pager from backing_object to object. 965 */ 966 967 if (backing_object->type == OBJT_SWAP) { 968 backing_object->paging_in_progress++; 969 if (object->type == OBJT_SWAP) { 970 object->paging_in_progress++; 971 /* 972 * copy shadow object pages into ours 973 * and destroy unneeded pages in 974 * shadow object. 975 */ 976 swap_pager_copy( 977 backing_object, 978 OFF_TO_IDX(backing_object->paging_offset), 979 object, 980 OFF_TO_IDX(object->paging_offset), 981 OFF_TO_IDX(object->backing_object_offset)); 982 vm_object_pip_wakeup(object); 983 } else { 984 object->paging_in_progress++; 985 /* 986 * move the shadow backing_object's pager data to 987 * "object" and convert "object" type to OBJT_SWAP. 988 */ 989 object->type = OBJT_SWAP; 990 object->un_pager.swp.swp_nblocks = 991 backing_object->un_pager.swp.swp_nblocks; 992 object->un_pager.swp.swp_allocsize = 993 backing_object->un_pager.swp.swp_allocsize; 994 object->un_pager.swp.swp_blocks = 995 backing_object->un_pager.swp.swp_blocks; 996 object->un_pager.swp.swp_poip = /* XXX */ 997 backing_object->un_pager.swp.swp_poip; 998 object->paging_offset = backing_object->paging_offset + backing_offset; 999 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 1000 1001 /* 1002 * Convert backing object from OBJT_SWAP to 1003 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is 1004 * actually necessary. 1005 */ 1006 backing_object->type = OBJT_DEFAULT; 1007 TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); 1008 /* 1009 * free unnecessary blocks 1010 */ 1011 swap_pager_freespace(object, 0, 1012 OFF_TO_IDX(object->paging_offset)); 1013 vm_object_pip_wakeup(object); 1014 } 1015 1016 vm_object_pip_wakeup(backing_object); 1017 } 1018 /* 1019 * Object now shadows whatever backing_object did. 1020 * Note that the reference to backing_object->backing_object 1021 * moves from within backing_object to within object. 1022 */ 1023 1024 TAILQ_REMOVE(&object->backing_object->shadow_head, object, 1025 shadow_list); 1026 --object->backing_object->shadow_count; 1027 if (backing_object->backing_object) { 1028 TAILQ_REMOVE(&backing_object->backing_object->shadow_head, 1029 backing_object, shadow_list); 1030 --backing_object->backing_object->shadow_count; 1031 } 1032 object->backing_object = backing_object->backing_object; 1033 if (object->backing_object) { 1034 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1035 object, shadow_list); 1036 ++object->backing_object->shadow_count; 1037 } 1038 1039 object->backing_object_offset += backing_object->backing_object_offset; 1040 /* 1041 * Discard backing_object. 1042 * 1043 * Since the backing object has no pages, no pager left, 1044 * and no object references within it, all that is 1045 * necessary is to dispose of it. 1046 */ 1047 1048 TAILQ_REMOVE(&vm_object_list, backing_object, 1049 object_list); 1050 vm_object_count--; 1051 1052 free((caddr_t) backing_object, M_VMOBJ); 1053 1054 object_collapses++; 1055 } else { 1056 /* 1057 * If all of the pages in the backing object are 1058 * shadowed by the parent object, the parent object no 1059 * longer has to shadow the backing object; it can 1060 * shadow the next one in the chain. 1061 * 1062 * The backing object must not be paged out - we'd have 1063 * to check all of the paged-out pages, as well. 1064 */ 1065 1066 if (backing_object->type != OBJT_DEFAULT) { 1067 return; 1068 } 1069 /* 1070 * Should have a check for a 'small' number of pages 1071 * here. 1072 */ 1073 1074 for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { 1075 new_pindex = p->pindex - backing_offset_index; 1076 1077 /* 1078 * If the parent has a page here, or if this 1079 * page falls outside the parent, keep going. 1080 * 1081 * Otherwise, the backing_object must be left in 1082 * the chain. 1083 */ 1084 1085 if (p->pindex >= backing_offset_index && 1086 new_pindex <= size) { 1087 1088 pp = vm_page_lookup(object, new_pindex); 1089 1090 if ((pp == NULL || pp->valid == 0) && 1091 !vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL)) { 1092 /* 1093 * Page still needed. Can't go any 1094 * further. 1095 */ 1096 return; 1097 } 1098 } 1099 } 1100 1101 /* 1102 * Make the parent shadow the next object in the 1103 * chain. Deallocating backing_object will not remove 1104 * it, since its reference count is at least 2. 1105 */ 1106 1107 TAILQ_REMOVE(&object->backing_object->shadow_head, 1108 object, shadow_list); 1109 --object->backing_object->shadow_count; 1110 vm_object_reference(object->backing_object = backing_object->backing_object); 1111 if (object->backing_object) { 1112 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1113 object, shadow_list); 1114 ++object->backing_object->shadow_count; 1115 } 1116 object->backing_object_offset += backing_object->backing_object_offset; 1117 1118 /* 1119 * Drop the reference count on backing_object. Since 1120 * its ref_count was at least 2, it will not vanish; 1121 * so we don't need to call vm_object_deallocate. 1122 */ 1123 if (backing_object->ref_count == 1) 1124 printf("should have called obj deallocate\n"); 1125 backing_object->ref_count--; 1126 1127 object_bypasses++; 1128 1129 } 1130 1131 /* 1132 * Try again with this object's new backing object. 1133 */ 1134 } 1135 } 1136 1137 /* 1138 * vm_object_page_remove: [internal] 1139 * 1140 * Removes all physical pages in the specified 1141 * object range from the object's list of pages. 1142 * 1143 * The object must be locked. 1144 */ 1145 void 1146 vm_object_page_remove(object, start, end, clean_only) 1147 register vm_object_t object; 1148 register vm_pindex_t start; 1149 register vm_pindex_t end; 1150 boolean_t clean_only; 1151 { 1152 register vm_page_t p, next; 1153 unsigned int size; 1154 int s; 1155 1156 if (object == NULL) 1157 return; 1158 1159 object->paging_in_progress++; 1160 again: 1161 size = end - start; 1162 if (size > 4 || size >= object->size / 4) { 1163 for (p = object->memq.tqh_first; p != NULL; p = next) { 1164 next = p->listq.tqe_next; 1165 if (p->wire_count != 0) { 1166 vm_page_protect(p, VM_PROT_NONE); 1167 p->valid = 0; 1168 continue; 1169 } 1170 if ((start <= p->pindex) && (p->pindex < end)) { 1171 s = splhigh(); 1172 if ((p->flags & PG_BUSY) || p->busy) { 1173 p->flags |= PG_WANTED; 1174 tsleep(p, PVM, "vmopar", 0); 1175 splx(s); 1176 goto again; 1177 } 1178 splx(s); 1179 if (clean_only) { 1180 vm_page_test_dirty(p); 1181 if (p->valid & p->dirty) 1182 continue; 1183 } 1184 vm_page_protect(p, VM_PROT_NONE); 1185 PAGE_WAKEUP(p); 1186 vm_page_free(p); 1187 } 1188 } 1189 } else { 1190 while (size > 0) { 1191 if ((p = vm_page_lookup(object, start)) != 0) { 1192 if (p->wire_count != 0) { 1193 p->valid = 0; 1194 vm_page_protect(p, VM_PROT_NONE); 1195 start += 1; 1196 size -= 1; 1197 continue; 1198 } 1199 s = splhigh(); 1200 if ((p->flags & PG_BUSY) || p->busy) { 1201 p->flags |= PG_WANTED; 1202 tsleep(p, PVM, "vmopar", 0); 1203 splx(s); 1204 goto again; 1205 } 1206 splx(s); 1207 if (clean_only) { 1208 vm_page_test_dirty(p); 1209 if (p->valid & p->dirty) { 1210 start += 1; 1211 size -= 1; 1212 continue; 1213 } 1214 } 1215 vm_page_protect(p, VM_PROT_NONE); 1216 PAGE_WAKEUP(p); 1217 vm_page_free(p); 1218 } 1219 start += 1; 1220 size -= 1; 1221 } 1222 } 1223 vm_object_pip_wakeup(object); 1224 } 1225 1226 /* 1227 * Routine: vm_object_coalesce 1228 * Function: Coalesces two objects backing up adjoining 1229 * regions of memory into a single object. 1230 * 1231 * returns TRUE if objects were combined. 1232 * 1233 * NOTE: Only works at the moment if the second object is NULL - 1234 * if it's not, which object do we lock first? 1235 * 1236 * Parameters: 1237 * prev_object First object to coalesce 1238 * prev_offset Offset into prev_object 1239 * next_object Second object into coalesce 1240 * next_offset Offset into next_object 1241 * 1242 * prev_size Size of reference to prev_object 1243 * next_size Size of reference to next_object 1244 * 1245 * Conditions: 1246 * The object must *not* be locked. 1247 */ 1248 boolean_t 1249 vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) 1250 register vm_object_t prev_object; 1251 vm_pindex_t prev_pindex; 1252 vm_size_t prev_size, next_size; 1253 { 1254 vm_size_t newsize; 1255 1256 if (prev_object == NULL) { 1257 return (TRUE); 1258 } 1259 1260 if (prev_object->type != OBJT_DEFAULT) { 1261 return (FALSE); 1262 } 1263 1264 /* 1265 * Try to collapse the object first 1266 */ 1267 vm_object_collapse(prev_object); 1268 1269 /* 1270 * Can't coalesce if: . more than one reference . paged out . shadows 1271 * another object . has a copy elsewhere (any of which mean that the 1272 * pages not mapped to prev_entry may be in use anyway) 1273 */ 1274 1275 if (prev_object->ref_count > 1 || 1276 prev_object->backing_object != NULL) { 1277 return (FALSE); 1278 } 1279 1280 prev_size >>= PAGE_SHIFT; 1281 next_size >>= PAGE_SHIFT; 1282 /* 1283 * Remove any pages that may still be in the object from a previous 1284 * deallocation. 1285 */ 1286 1287 vm_object_page_remove(prev_object, 1288 prev_pindex + prev_size, 1289 prev_pindex + prev_size + next_size, FALSE); 1290 1291 /* 1292 * Extend the object if necessary. 1293 */ 1294 newsize = prev_pindex + prev_size + next_size; 1295 if (newsize > prev_object->size) 1296 prev_object->size = newsize; 1297 1298 return (TRUE); 1299 } 1300 1301 #ifdef DDB 1302 1303 static int 1304 _vm_object_in_map(map, object, entry) 1305 vm_map_t map; 1306 vm_object_t object; 1307 vm_map_entry_t entry; 1308 { 1309 vm_map_t tmpm; 1310 vm_map_entry_t tmpe; 1311 vm_object_t obj; 1312 int entcount; 1313 1314 if (map == 0) 1315 return 0; 1316 1317 if (entry == 0) { 1318 tmpe = map->header.next; 1319 entcount = map->nentries; 1320 while (entcount-- && (tmpe != &map->header)) { 1321 if( _vm_object_in_map(map, object, tmpe)) { 1322 return 1; 1323 } 1324 tmpe = tmpe->next; 1325 } 1326 } else if (entry->is_sub_map || entry->is_a_map) { 1327 tmpm = entry->object.share_map; 1328 tmpe = tmpm->header.next; 1329 entcount = tmpm->nentries; 1330 while (entcount-- && tmpe != &tmpm->header) { 1331 if( _vm_object_in_map(tmpm, object, tmpe)) { 1332 return 1; 1333 } 1334 tmpe = tmpe->next; 1335 } 1336 } else if (obj = entry->object.vm_object) { 1337 for(; obj; obj=obj->backing_object) 1338 if( obj == object) { 1339 return 1; 1340 } 1341 } 1342 return 0; 1343 } 1344 1345 static int 1346 vm_object_in_map( object) 1347 vm_object_t object; 1348 { 1349 struct proc *p; 1350 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 1351 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1352 continue; 1353 /* 1354 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1355 continue; 1356 } 1357 */ 1358 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1359 return 1; 1360 } 1361 if( _vm_object_in_map( kernel_map, object, 0)) 1362 return 1; 1363 if( _vm_object_in_map( kmem_map, object, 0)) 1364 return 1; 1365 if( _vm_object_in_map( pager_map, object, 0)) 1366 return 1; 1367 if( _vm_object_in_map( buffer_map, object, 0)) 1368 return 1; 1369 if( _vm_object_in_map( io_map, object, 0)) 1370 return 1; 1371 if( _vm_object_in_map( phys_map, object, 0)) 1372 return 1; 1373 if( _vm_object_in_map( mb_map, object, 0)) 1374 return 1; 1375 if( _vm_object_in_map( u_map, object, 0)) 1376 return 1; 1377 return 0; 1378 } 1379 1380 1381 #ifdef DDB 1382 static void 1383 DDB_vm_object_check() 1384 { 1385 vm_object_t object; 1386 1387 /* 1388 * make sure that internal objs are in a map somewhere 1389 * and none have zero ref counts. 1390 */ 1391 for (object = vm_object_list.tqh_first; 1392 object != NULL; 1393 object = object->object_list.tqe_next) { 1394 if (object->handle == NULL && 1395 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1396 if (object->ref_count == 0) { 1397 printf("vmochk: internal obj has zero ref count: %d\n", 1398 object->size); 1399 } 1400 if (!vm_object_in_map(object)) { 1401 printf("vmochk: internal obj is not in a map: " 1402 "ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", 1403 object->ref_count, object->size, 1404 object->size, object->backing_object); 1405 } 1406 } 1407 } 1408 } 1409 #endif /* DDB */ 1410 1411 /* 1412 * vm_object_print: [ debug ] 1413 */ 1414 void 1415 vm_object_print(iobject, full, dummy3, dummy4) 1416 /* db_expr_t */ int iobject; 1417 boolean_t full; 1418 /* db_expr_t */ int dummy3; 1419 char *dummy4; 1420 { 1421 vm_object_t object = (vm_object_t)iobject; /* XXX */ 1422 register vm_page_t p; 1423 1424 register int count; 1425 1426 if (object == NULL) 1427 return; 1428 1429 iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", 1430 (int) object, (int) object->size, 1431 object->resident_page_count, object->ref_count); 1432 printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", 1433 (int) object->paging_offset, 1434 (int) object->backing_object, (int) object->backing_object_offset); 1435 printf("cache: next=%p, prev=%p\n", 1436 object->cached_list.tqe_next, object->cached_list.tqe_prev); 1437 1438 if (!full) 1439 return; 1440 1441 indent += 2; 1442 count = 0; 1443 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 1444 if (count == 0) 1445 iprintf("memory:="); 1446 else if (count == 6) { 1447 printf("\n"); 1448 iprintf(" ..."); 1449 count = 0; 1450 } else 1451 printf(","); 1452 count++; 1453 1454 printf("(off=0x%lx,page=0x%lx)", 1455 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); 1456 } 1457 if (count != 0) 1458 printf("\n"); 1459 indent -= 2; 1460 } 1461 #endif /* DDB */ 1462