1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_object.c,v 1.54 1995/10/23 03:49:43 dyson Exp $ 65 */ 66 67 /* 68 * Virtual memory object module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/kernel.h> 74 #include <sys/proc.h> /* for curproc, pageproc */ 75 #include <sys/malloc.h> 76 #include <sys/vnode.h> 77 #include <sys/mount.h> 78 79 #include <vm/vm.h> 80 #include <vm/vm_page.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_pager.h> 83 #include <vm/swap_pager.h> 84 #include <vm/vm_kern.h> 85 86 static void _vm_object_allocate(objtype_t, vm_size_t, vm_object_t); 87 88 89 /* 90 * Virtual memory objects maintain the actual data 91 * associated with allocated virtual memory. A given 92 * page of memory exists within exactly one object. 93 * 94 * An object is only deallocated when all "references" 95 * are given up. Only one "reference" to a given 96 * region of an object should be writeable. 97 * 98 * Associated with each object is a list of all resident 99 * memory pages belonging to that object; this list is 100 * maintained by the "vm_page" module, and locked by the object's 101 * lock. 102 * 103 * Each object also records a "pager" routine which is 104 * used to retrieve (and store) pages to the proper backing 105 * storage. In addition, objects may be backed by other 106 * objects from which they were virtual-copied. 107 * 108 * The only items within the object structure which are 109 * modified after time of creation are: 110 * reference count locked by object's lock 111 * pager routine locked by object's lock 112 * 113 */ 114 115 int vm_object_cache_max; 116 struct object_q vm_object_cached_list; 117 int vm_object_cached; 118 struct object_q vm_object_list; 119 long vm_object_count; 120 vm_object_t kernel_object; 121 vm_object_t kmem_object; 122 struct vm_object kernel_object_store; 123 struct vm_object kmem_object_store; 124 extern int vm_pageout_page_count; 125 126 long object_collapses; 127 long object_bypasses; 128 129 static void 130 _vm_object_allocate(type, size, object) 131 objtype_t type; 132 vm_size_t size; 133 register vm_object_t object; 134 { 135 TAILQ_INIT(&object->memq); 136 TAILQ_INIT(&object->shadow_head); 137 138 object->type = type; 139 object->size = size; 140 object->ref_count = 1; 141 object->flags = 0; 142 object->paging_in_progress = 0; 143 object->resident_page_count = 0; 144 object->handle = NULL; 145 object->paging_offset = 0; 146 object->backing_object = NULL; 147 object->backing_object_offset = (vm_offset_t) 0; 148 149 object->last_read = 0; 150 151 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 152 vm_object_count++; 153 } 154 155 /* 156 * vm_object_init: 157 * 158 * Initialize the VM objects module. 159 */ 160 void 161 vm_object_init(vm_offset_t nothing) 162 { 163 register int i; 164 165 TAILQ_INIT(&vm_object_cached_list); 166 TAILQ_INIT(&vm_object_list); 167 vm_object_count = 0; 168 169 vm_object_cache_max = 84; 170 if (cnt.v_page_count > 1000) 171 vm_object_cache_max += (cnt.v_page_count - 1000) / 4; 172 173 kernel_object = &kernel_object_store; 174 _vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 175 kernel_object); 176 177 kmem_object = &kmem_object_store; 178 _vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 179 kmem_object); 180 } 181 182 /* 183 * vm_object_allocate: 184 * 185 * Returns a new object with the given size. 186 */ 187 188 vm_object_t 189 vm_object_allocate(type, size) 190 objtype_t type; 191 vm_size_t size; 192 { 193 register vm_object_t result; 194 195 result = (vm_object_t) 196 malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); 197 198 199 _vm_object_allocate(type, size, result); 200 201 return (result); 202 } 203 204 205 /* 206 * vm_object_reference: 207 * 208 * Gets another reference to the given object. 209 */ 210 inline void 211 vm_object_reference(object) 212 register vm_object_t object; 213 { 214 if (object == NULL) 215 return; 216 217 if (object->ref_count == 0) { 218 if ((object->flags & OBJ_CANPERSIST) == 0) 219 panic("vm_object_reference: non-persistent object with 0 ref_count"); 220 TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); 221 vm_object_cached--; 222 } 223 object->ref_count++; 224 } 225 226 /* 227 * vm_object_deallocate: 228 * 229 * Release a reference to the specified object, 230 * gained either through a vm_object_allocate 231 * or a vm_object_reference call. When all references 232 * are gone, storage associated with this object 233 * may be relinquished. 234 * 235 * No object may be locked. 236 */ 237 void 238 vm_object_deallocate(object) 239 vm_object_t object; 240 { 241 vm_object_t temp; 242 243 while (object != NULL) { 244 245 if (object->ref_count == 0) 246 panic("vm_object_deallocate: object deallocated too many times"); 247 248 /* 249 * Lose the reference 250 */ 251 object->ref_count--; 252 253 if (object->ref_count != 0) { 254 if ((object->ref_count == 1) && 255 (object->handle == NULL) && 256 (object->type == OBJT_DEFAULT || 257 object->type == OBJT_SWAP)) { 258 vm_object_t robject; 259 robject = object->shadow_head.tqh_first; 260 if ((robject != NULL) && 261 (robject->handle == NULL) && 262 (robject->type == OBJT_DEFAULT || 263 robject->type == OBJT_SWAP)) { 264 int s; 265 robject->ref_count += 2; 266 object->ref_count += 2; 267 268 do { 269 s = splhigh(); 270 while (robject->paging_in_progress) { 271 robject->flags |= OBJ_PIPWNT; 272 tsleep(robject, PVM, "objde1", 0); 273 } 274 275 while (object->paging_in_progress) { 276 object->flags |= OBJ_PIPWNT; 277 tsleep(object, PVM, "objde2", 0); 278 } 279 splx(s); 280 281 } while( object->paging_in_progress || robject->paging_in_progress); 282 283 object->ref_count -= 2; 284 robject->ref_count -= 2; 285 if( robject->ref_count == 0) { 286 robject->ref_count += 1; 287 object = robject; 288 continue; 289 } 290 vm_object_collapse(robject); 291 return; 292 } 293 } 294 /* 295 * If there are still references, then we are done. 296 */ 297 return; 298 } 299 300 if (object->type == OBJT_VNODE) { 301 struct vnode *vp = object->handle; 302 303 vp->v_flag &= ~VTEXT; 304 } 305 306 /* 307 * See if this object can persist and has some resident 308 * pages. If so, enter it in the cache. 309 */ 310 if (object->flags & OBJ_CANPERSIST) { 311 if (object->resident_page_count != 0) { 312 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); 313 TAILQ_INSERT_TAIL(&vm_object_cached_list, object, 314 cached_list); 315 vm_object_cached++; 316 317 vm_object_cache_trim(); 318 return; 319 } else { 320 object->flags &= ~OBJ_CANPERSIST; 321 } 322 } 323 324 /* 325 * Make sure no one uses us. 326 */ 327 object->flags |= OBJ_DEAD; 328 329 temp = object->backing_object; 330 if (temp) 331 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 332 vm_object_terminate(object); 333 /* unlocks and deallocates object */ 334 object = temp; 335 } 336 } 337 338 /* 339 * vm_object_terminate actually destroys the specified object, freeing 340 * up all previously used resources. 341 * 342 * The object must be locked. 343 */ 344 void 345 vm_object_terminate(object) 346 register vm_object_t object; 347 { 348 register vm_page_t p, next; 349 vm_object_t backing_object; 350 int s; 351 352 /* 353 * wait for the pageout daemon to be done with the object 354 */ 355 s = splhigh(); 356 while (object->paging_in_progress) { 357 object->flags |= OBJ_PIPWNT; 358 tsleep(object, PVM, "objtrm", 0); 359 } 360 splx(s); 361 362 if (object->paging_in_progress != 0) 363 panic("vm_object_deallocate: pageout in progress"); 364 365 /* 366 * Clean and free the pages, as appropriate. All references to the 367 * object are gone, so we don't need to lock it. 368 */ 369 if (object->type == OBJT_VNODE) { 370 struct vnode *vp = object->handle; 371 372 VOP_LOCK(vp); 373 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 374 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 375 VOP_UNLOCK(vp); 376 } 377 378 /* 379 * Now free the pages. For internal objects, this also removes them 380 * from paging queues. 381 */ 382 while ((p = object->memq.tqh_first) != NULL) { 383 if (p->flags & PG_BUSY) 384 printf("vm_object_terminate: freeing busy page\n"); 385 PAGE_WAKEUP(p); 386 vm_page_free(p); 387 cnt.v_pfree++; 388 } 389 390 /* 391 * Let the pager know object is dead. 392 */ 393 vm_pager_deallocate(object); 394 395 TAILQ_REMOVE(&vm_object_list, object, object_list); 396 vm_object_count--; 397 398 wakeup(object); 399 400 /* 401 * Free the space for the object. 402 */ 403 free((caddr_t) object, M_VMOBJ); 404 } 405 406 /* 407 * vm_object_page_clean 408 * 409 * Clean all dirty pages in the specified range of object. 410 * Leaves page on whatever queue it is currently on. 411 * 412 * Odd semantics: if start == end, we clean everything. 413 * 414 * The object must be locked. 415 */ 416 417 void 418 vm_object_page_clean(object, start, end, syncio, lockflag) 419 vm_object_t object; 420 vm_offset_t start; 421 vm_offset_t end; 422 boolean_t syncio; 423 boolean_t lockflag; 424 { 425 register vm_page_t p; 426 register vm_offset_t tstart, tend; 427 int s; 428 struct vnode *vp; 429 int runlen; 430 vm_page_t ma[vm_pageout_page_count]; 431 432 if (object->type != OBJT_VNODE || 433 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 434 return; 435 436 vp = object->handle; 437 438 if (lockflag) 439 VOP_LOCK(vp); 440 object->flags |= OBJ_CLEANING; 441 442 if (start != end) { 443 start = trunc_page(start); 444 end = round_page(end); 445 } 446 447 startover: 448 tstart = start; 449 if (end == 0) { 450 tend = object->size; 451 } else { 452 tend = end; 453 } 454 if (tstart == 0 && tend == object->size) { 455 object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 456 } 457 458 runlen = 0; 459 for(;tstart < tend; tstart += PAGE_SIZE) { 460 relookup: 461 p = vm_page_lookup(object, tstart); 462 if (!p) { 463 if (runlen > 0) { 464 vm_pageout_flush(ma, runlen, syncio); 465 runlen = 0; 466 } 467 continue; 468 } 469 if (p->valid == 0 || (p->flags & PG_CACHE)) { 470 if (runlen > 0) { 471 vm_pageout_flush(ma, runlen, syncio); 472 runlen = 0; 473 } 474 continue; 475 } 476 477 vm_page_protect(p, VM_PROT_READ); 478 479 s = splhigh(); 480 while ((p->flags & PG_BUSY) || p->busy) { 481 if (runlen > 0) { 482 splx(s); 483 vm_pageout_flush(ma, runlen, syncio); 484 runlen = 0; 485 goto relookup; 486 } 487 p->flags |= PG_WANTED|PG_REFERENCED; 488 tsleep(p, PVM, "vpcwai", 0); 489 splx(s); 490 goto relookup; 491 } 492 splx(s); 493 494 if (p->dirty == 0) 495 vm_page_test_dirty(p); 496 497 if ((p->valid & p->dirty) != 0) { 498 ma[runlen] = p; 499 p->flags |= PG_BUSY; 500 runlen++; 501 if (runlen >= vm_pageout_page_count) { 502 vm_pageout_flush(ma, runlen, syncio); 503 runlen = 0; 504 } 505 } else if (runlen > 0) { 506 vm_pageout_flush(ma, runlen, syncio); 507 runlen = 0; 508 } 509 510 } 511 if (runlen > 0) { 512 vm_pageout_flush(ma, runlen, syncio); 513 } 514 515 VOP_FSYNC(vp, NULL, syncio, curproc); 516 517 if (lockflag) 518 VOP_UNLOCK(vp); 519 object->flags &= ~OBJ_CLEANING; 520 return; 521 } 522 523 /* 524 * vm_object_deactivate_pages 525 * 526 * Deactivate all pages in the specified object. (Keep its pages 527 * in memory even though it is no longer referenced.) 528 * 529 * The object must be locked. 530 */ 531 void 532 vm_object_deactivate_pages(object) 533 register vm_object_t object; 534 { 535 register vm_page_t p, next; 536 537 for (p = object->memq.tqh_first; p != NULL; p = next) { 538 next = p->listq.tqe_next; 539 vm_page_deactivate(p); 540 } 541 } 542 543 /* 544 * Trim the object cache to size. 545 */ 546 void 547 vm_object_cache_trim() 548 { 549 register vm_object_t object; 550 551 while (vm_object_cached > vm_object_cache_max) { 552 object = vm_object_cached_list.tqh_first; 553 554 vm_object_reference(object); 555 pager_cache(object, FALSE); 556 } 557 } 558 559 560 /* 561 * vm_object_pmap_copy: 562 * 563 * Makes all physical pages in the specified 564 * object range copy-on-write. No writeable 565 * references to these pages should remain. 566 * 567 * The object must *not* be locked. 568 */ 569 void 570 vm_object_pmap_copy(object, start, end) 571 register vm_object_t object; 572 register vm_offset_t start; 573 register vm_offset_t end; 574 { 575 register vm_page_t p; 576 577 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 578 return; 579 580 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 581 vm_page_protect(p, VM_PROT_READ); 582 } 583 584 object->flags &= ~OBJ_WRITEABLE; 585 } 586 587 /* 588 * vm_object_pmap_remove: 589 * 590 * Removes all physical pages in the specified 591 * object range from all physical maps. 592 * 593 * The object must *not* be locked. 594 */ 595 void 596 vm_object_pmap_remove(object, start, end) 597 register vm_object_t object; 598 register vm_offset_t start; 599 register vm_offset_t end; 600 { 601 register vm_page_t p; 602 int s; 603 604 if (object == NULL) 605 return; 606 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 607 vm_page_protect(p, VM_PROT_NONE); 608 } 609 } 610 611 /* 612 * vm_object_copy: 613 * 614 * Create a new object which is a copy of an existing 615 * object, and mark all of the pages in the existing 616 * object 'copy-on-write'. The new object has one reference. 617 * Returns the new object. 618 * 619 * May defer the copy until later if the object is not backed 620 * up by a non-default pager. 621 */ 622 void 623 vm_object_copy(src_object, src_offset, size, 624 dst_object, dst_offset, src_needs_copy) 625 register vm_object_t src_object; 626 vm_offset_t src_offset; 627 vm_size_t size; 628 vm_object_t *dst_object;/* OUT */ 629 vm_offset_t *dst_offset;/* OUT */ 630 boolean_t *src_needs_copy; /* OUT */ 631 { 632 register vm_object_t new_copy; 633 register vm_object_t old_copy; 634 vm_offset_t new_start, new_end; 635 636 register vm_page_t p; 637 638 if (src_object == NULL) { 639 /* 640 * Nothing to copy 641 */ 642 *dst_object = NULL; 643 *dst_offset = 0; 644 *src_needs_copy = FALSE; 645 return; 646 } 647 648 /* 649 * Try to collapse the object before copying it. 650 */ 651 if (src_object->handle == NULL && 652 (src_object->type == OBJT_DEFAULT || 653 src_object->type == OBJT_SWAP)) 654 vm_object_collapse(src_object); 655 656 657 /* 658 * Make another reference to the object 659 */ 660 src_object->ref_count++; 661 662 *dst_object = src_object; 663 *dst_offset = src_offset; 664 665 /* 666 * Must make a shadow when write is desired 667 */ 668 *src_needs_copy = TRUE; 669 return; 670 } 671 672 /* 673 * vm_object_shadow: 674 * 675 * Create a new object which is backed by the 676 * specified existing object range. The source 677 * object reference is deallocated. 678 * 679 * The new object and offset into that object 680 * are returned in the source parameters. 681 */ 682 683 void 684 vm_object_shadow(object, offset, length) 685 vm_object_t *object; /* IN/OUT */ 686 vm_offset_t *offset; /* IN/OUT */ 687 vm_size_t length; 688 { 689 register vm_object_t source; 690 register vm_object_t result; 691 692 source = *object; 693 694 /* 695 * Allocate a new object with the given length 696 */ 697 698 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 699 panic("vm_object_shadow: no object for shadowing"); 700 701 /* 702 * The new object shadows the source object, adding a reference to it. 703 * Our caller changes his reference to point to the new object, 704 * removing a reference to the source object. Net result: no change 705 * of reference count. 706 */ 707 result->backing_object = source; 708 if (source) 709 TAILQ_INSERT_TAIL(&result->backing_object->shadow_head, result, shadow_list); 710 711 /* 712 * Store the offset into the source object, and fix up the offset into 713 * the new object. 714 */ 715 716 result->backing_object_offset = *offset; 717 718 /* 719 * Return the new things 720 */ 721 722 *offset = 0; 723 *object = result; 724 } 725 726 727 /* 728 * this version of collapse allows the operation to occur earlier and 729 * when paging_in_progress is true for an object... This is not a complete 730 * operation, but should plug 99.9% of the rest of the leaks. 731 */ 732 static void 733 vm_object_qcollapse(object) 734 register vm_object_t object; 735 { 736 register vm_object_t backing_object; 737 register vm_offset_t backing_offset, new_offset; 738 register vm_page_t p, pp; 739 register vm_size_t size; 740 741 backing_object = object->backing_object; 742 if (backing_object->ref_count != 1) 743 return; 744 745 backing_object->ref_count += 2; 746 747 backing_offset = object->backing_object_offset; 748 size = object->size; 749 p = backing_object->memq.tqh_first; 750 while (p) { 751 vm_page_t next; 752 753 next = p->listq.tqe_next; 754 if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) || 755 !p->valid || p->hold_count || p->wire_count || p->busy) { 756 p = next; 757 continue; 758 } 759 vm_page_protect(p, VM_PROT_NONE); 760 new_offset = (p->offset - backing_offset); 761 if (p->offset < backing_offset || 762 new_offset >= size) { 763 if (backing_object->type == OBJT_SWAP) 764 swap_pager_freespace(backing_object, 765 backing_object->paging_offset + p->offset, PAGE_SIZE); 766 vm_page_free(p); 767 } else { 768 pp = vm_page_lookup(object, new_offset); 769 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 770 object->paging_offset + new_offset, NULL, NULL))) { 771 if (backing_object->type == OBJT_SWAP) 772 swap_pager_freespace(backing_object, 773 backing_object->paging_offset + p->offset, PAGE_SIZE); 774 vm_page_free(p); 775 } else { 776 if (backing_object->type == OBJT_SWAP) 777 swap_pager_freespace(backing_object, 778 backing_object->paging_offset + p->offset, PAGE_SIZE); 779 vm_page_rename(p, object, new_offset); 780 p->dirty = VM_PAGE_BITS_ALL; 781 } 782 } 783 p = next; 784 } 785 backing_object->ref_count -= 2; 786 } 787 788 /* 789 * vm_object_collapse: 790 * 791 * Collapse an object with the object backing it. 792 * Pages in the backing object are moved into the 793 * parent, and the backing object is deallocated. 794 */ 795 void 796 vm_object_collapse(object) 797 vm_object_t object; 798 799 { 800 vm_object_t backing_object; 801 vm_offset_t backing_offset; 802 vm_size_t size; 803 vm_offset_t new_offset; 804 vm_page_t p, pp; 805 806 while (TRUE) { 807 /* 808 * Verify that the conditions are right for collapse: 809 * 810 * The object exists and no pages in it are currently being paged 811 * out. 812 */ 813 if (object == NULL) 814 return; 815 816 /* 817 * Make sure there is a backing object. 818 */ 819 if ((backing_object = object->backing_object) == NULL) 820 return; 821 822 /* 823 * we check the backing object first, because it is most likely 824 * not collapsable. 825 */ 826 if (backing_object->handle != NULL || 827 (backing_object->type != OBJT_DEFAULT && 828 backing_object->type != OBJT_SWAP) || 829 (backing_object->flags & OBJ_DEAD) || 830 object->handle != NULL || 831 (object->type != OBJT_DEFAULT && 832 object->type != OBJT_SWAP) || 833 (object->flags & OBJ_DEAD)) { 834 return; 835 } 836 837 if (object->paging_in_progress != 0 || 838 backing_object->paging_in_progress != 0) { 839 vm_object_qcollapse(object); 840 return; 841 } 842 843 /* 844 * We know that we can either collapse the backing object (if 845 * the parent is the only reference to it) or (perhaps) remove 846 * the parent's reference to it. 847 */ 848 849 backing_offset = object->backing_object_offset; 850 size = object->size; 851 852 /* 853 * If there is exactly one reference to the backing object, we 854 * can collapse it into the parent. 855 */ 856 857 if (backing_object->ref_count == 1) { 858 859 backing_object->flags |= OBJ_DEAD; 860 /* 861 * We can collapse the backing object. 862 * 863 * Move all in-memory pages from backing_object to the 864 * parent. Pages that have been paged out will be 865 * overwritten by any of the parent's pages that 866 * shadow them. 867 */ 868 869 while ((p = backing_object->memq.tqh_first) != 0) { 870 871 new_offset = (p->offset - backing_offset); 872 873 /* 874 * If the parent has a page here, or if this 875 * page falls outside the parent, dispose of 876 * it. 877 * 878 * Otherwise, move it as planned. 879 */ 880 881 if (p->offset < backing_offset || 882 new_offset >= size) { 883 vm_page_protect(p, VM_PROT_NONE); 884 PAGE_WAKEUP(p); 885 vm_page_free(p); 886 } else { 887 pp = vm_page_lookup(object, new_offset); 888 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 889 object->paging_offset + new_offset, NULL, NULL))) { 890 vm_page_protect(p, VM_PROT_NONE); 891 PAGE_WAKEUP(p); 892 vm_page_free(p); 893 } else { 894 vm_page_rename(p, object, new_offset); 895 } 896 } 897 } 898 899 /* 900 * Move the pager from backing_object to object. 901 */ 902 903 if (backing_object->type == OBJT_SWAP) { 904 backing_object->paging_in_progress++; 905 if (object->type == OBJT_SWAP) { 906 object->paging_in_progress++; 907 /* 908 * copy shadow object pages into ours 909 * and destroy unneeded pages in 910 * shadow object. 911 */ 912 swap_pager_copy( 913 backing_object, backing_object->paging_offset, 914 object, object->paging_offset, 915 object->backing_object_offset); 916 vm_object_pip_wakeup(object); 917 } else { 918 object->paging_in_progress++; 919 /* 920 * move the shadow backing_object's pager data to 921 * "object" and convert "object" type to OBJT_SWAP. 922 */ 923 object->type = OBJT_SWAP; 924 object->un_pager.swp.swp_nblocks = 925 backing_object->un_pager.swp.swp_nblocks; 926 object->un_pager.swp.swp_allocsize = 927 backing_object->un_pager.swp.swp_allocsize; 928 object->un_pager.swp.swp_blocks = 929 backing_object->un_pager.swp.swp_blocks; 930 object->un_pager.swp.swp_poip = /* XXX */ 931 backing_object->un_pager.swp.swp_poip; 932 object->paging_offset = backing_object->paging_offset + backing_offset; 933 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 934 935 /* 936 * Convert backing object from OBJT_SWAP to 937 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is 938 * actually necessary. 939 */ 940 backing_object->type = OBJT_DEFAULT; 941 TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); 942 /* 943 * free unnecessary blocks 944 */ 945 swap_pager_freespace(object, 0, object->paging_offset); 946 vm_object_pip_wakeup(object); 947 } 948 949 vm_object_pip_wakeup(backing_object); 950 } 951 /* 952 * Object now shadows whatever backing_object did. 953 * Note that the reference to backing_object->backing_object 954 * moves from within backing_object to within object. 955 */ 956 957 TAILQ_REMOVE(&object->backing_object->shadow_head, object, 958 shadow_list); 959 if (backing_object->backing_object) 960 TAILQ_REMOVE(&backing_object->backing_object->shadow_head, 961 backing_object, shadow_list); 962 object->backing_object = backing_object->backing_object; 963 if (object->backing_object) 964 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 965 object, shadow_list); 966 967 object->backing_object_offset += backing_object->backing_object_offset; 968 /* 969 * Discard backing_object. 970 * 971 * Since the backing object has no pages, no pager left, 972 * and no object references within it, all that is 973 * necessary is to dispose of it. 974 */ 975 976 TAILQ_REMOVE(&vm_object_list, backing_object, 977 object_list); 978 vm_object_count--; 979 980 free((caddr_t) backing_object, M_VMOBJ); 981 982 object_collapses++; 983 } else { 984 /* 985 * If all of the pages in the backing object are 986 * shadowed by the parent object, the parent object no 987 * longer has to shadow the backing object; it can 988 * shadow the next one in the chain. 989 * 990 * The backing object must not be paged out - we'd have 991 * to check all of the paged-out pages, as well. 992 */ 993 994 if (backing_object->type != OBJT_DEFAULT) { 995 return; 996 } 997 /* 998 * Should have a check for a 'small' number of pages 999 * here. 1000 */ 1001 1002 for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { 1003 new_offset = (p->offset - backing_offset); 1004 1005 /* 1006 * If the parent has a page here, or if this 1007 * page falls outside the parent, keep going. 1008 * 1009 * Otherwise, the backing_object must be left in 1010 * the chain. 1011 */ 1012 1013 if (p->offset >= backing_offset && new_offset <= size) { 1014 1015 pp = vm_page_lookup(object, new_offset); 1016 1017 if ((pp == NULL || pp->valid == 0) && 1018 !vm_pager_has_page(object, object->paging_offset + new_offset, NULL, NULL)) { 1019 1020 /* 1021 * Page still needed. Can't go any 1022 * further. 1023 */ 1024 return; 1025 } 1026 } 1027 } 1028 1029 /* 1030 * Make the parent shadow the next object in the 1031 * chain. Deallocating backing_object will not remove 1032 * it, since its reference count is at least 2. 1033 */ 1034 1035 TAILQ_REMOVE(&object->backing_object->shadow_head, 1036 object, shadow_list); 1037 vm_object_reference(object->backing_object = backing_object->backing_object); 1038 if (object->backing_object) 1039 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1040 object, shadow_list); 1041 object->backing_object_offset += backing_object->backing_object_offset; 1042 1043 /* 1044 * Drop the reference count on backing_object. Since 1045 * its ref_count was at least 2, it will not vanish; 1046 * so we don't need to call vm_object_deallocate. 1047 */ 1048 if (backing_object->ref_count == 1) 1049 printf("should have called obj deallocate\n"); 1050 backing_object->ref_count--; 1051 1052 object_bypasses++; 1053 1054 } 1055 1056 /* 1057 * Try again with this object's new backing object. 1058 */ 1059 } 1060 } 1061 1062 /* 1063 * vm_object_page_remove: [internal] 1064 * 1065 * Removes all physical pages in the specified 1066 * object range from the object's list of pages. 1067 * 1068 * The object must be locked. 1069 */ 1070 void 1071 vm_object_page_remove(object, start, end, clean_only) 1072 register vm_object_t object; 1073 register vm_offset_t start; 1074 register vm_offset_t end; 1075 boolean_t clean_only; 1076 { 1077 register vm_page_t p, next; 1078 vm_offset_t size; 1079 int s; 1080 1081 if (object == NULL) 1082 return; 1083 1084 object->paging_in_progress++; 1085 start = trunc_page(start); 1086 end = round_page(end); 1087 again: 1088 size = end - start; 1089 if (size > 4 * PAGE_SIZE || size >= object->size / 4) { 1090 for (p = object->memq.tqh_first; p != NULL; p = next) { 1091 next = p->listq.tqe_next; 1092 if ((start <= p->offset) && (p->offset < end)) { 1093 s = splhigh(); 1094 if (p->bmapped) { 1095 splx(s); 1096 continue; 1097 } 1098 if ((p->flags & PG_BUSY) || p->busy) { 1099 p->flags |= PG_WANTED; 1100 tsleep(p, PVM, "vmopar", 0); 1101 splx(s); 1102 goto again; 1103 } 1104 splx(s); 1105 if (clean_only) { 1106 vm_page_test_dirty(p); 1107 if (p->valid & p->dirty) 1108 continue; 1109 } 1110 vm_page_protect(p, VM_PROT_NONE); 1111 PAGE_WAKEUP(p); 1112 vm_page_free(p); 1113 } 1114 } 1115 } else { 1116 while (size > 0) { 1117 while ((p = vm_page_lookup(object, start)) != 0) { 1118 s = splhigh(); 1119 if (p->bmapped) { 1120 splx(s); 1121 break; 1122 } 1123 if ((p->flags & PG_BUSY) || p->busy) { 1124 p->flags |= PG_WANTED; 1125 tsleep(p, PVM, "vmopar", 0); 1126 splx(s); 1127 goto again; 1128 } 1129 splx(s); 1130 if (clean_only) { 1131 vm_page_test_dirty(p); 1132 if (p->valid & p->dirty) 1133 continue; 1134 } 1135 vm_page_protect(p, VM_PROT_NONE); 1136 PAGE_WAKEUP(p); 1137 vm_page_free(p); 1138 } 1139 start += PAGE_SIZE; 1140 size -= PAGE_SIZE; 1141 } 1142 } 1143 vm_object_pip_wakeup(object); 1144 } 1145 1146 /* 1147 * Routine: vm_object_coalesce 1148 * Function: Coalesces two objects backing up adjoining 1149 * regions of memory into a single object. 1150 * 1151 * returns TRUE if objects were combined. 1152 * 1153 * NOTE: Only works at the moment if the second object is NULL - 1154 * if it's not, which object do we lock first? 1155 * 1156 * Parameters: 1157 * prev_object First object to coalesce 1158 * prev_offset Offset into prev_object 1159 * next_object Second object into coalesce 1160 * next_offset Offset into next_object 1161 * 1162 * prev_size Size of reference to prev_object 1163 * next_size Size of reference to next_object 1164 * 1165 * Conditions: 1166 * The object must *not* be locked. 1167 */ 1168 boolean_t 1169 vm_object_coalesce(prev_object, next_object, 1170 prev_offset, next_offset, 1171 prev_size, next_size) 1172 register vm_object_t prev_object; 1173 vm_object_t next_object; 1174 vm_offset_t prev_offset, next_offset; 1175 vm_size_t prev_size, next_size; 1176 { 1177 vm_size_t newsize; 1178 1179 if (next_object != NULL) { 1180 return (FALSE); 1181 } 1182 if (prev_object == NULL) { 1183 return (TRUE); 1184 } 1185 1186 /* 1187 * Try to collapse the object first 1188 */ 1189 vm_object_collapse(prev_object); 1190 1191 /* 1192 * Can't coalesce if: . more than one reference . paged out . shadows 1193 * another object . has a copy elsewhere (any of which mean that the 1194 * pages not mapped to prev_entry may be in use anyway) 1195 */ 1196 1197 if (prev_object->ref_count > 1 || 1198 prev_object->type != OBJT_DEFAULT || 1199 prev_object->backing_object != NULL) { 1200 return (FALSE); 1201 } 1202 /* 1203 * Remove any pages that may still be in the object from a previous 1204 * deallocation. 1205 */ 1206 1207 vm_object_page_remove(prev_object, 1208 prev_offset + prev_size, 1209 prev_offset + prev_size + next_size, FALSE); 1210 1211 /* 1212 * Extend the object if necessary. 1213 */ 1214 newsize = prev_offset + prev_size + next_size; 1215 if (newsize > prev_object->size) 1216 prev_object->size = newsize; 1217 1218 return (TRUE); 1219 } 1220 1221 /* 1222 * returns page after looking up in shadow chain 1223 */ 1224 1225 vm_page_t 1226 vm_object_page_lookup(object, offset) 1227 vm_object_t object; 1228 vm_offset_t offset; 1229 { 1230 vm_page_t m; 1231 1232 if (!(m = vm_page_lookup(object, offset))) { 1233 if (!object->backing_object) 1234 return 0; 1235 else 1236 return vm_object_page_lookup(object->backing_object, offset + object->backing_object_offset); 1237 } 1238 return m; 1239 } 1240 1241 #ifdef DDB 1242 1243 int 1244 _vm_object_in_map(map, object, entry) 1245 vm_map_t map; 1246 vm_object_t object; 1247 vm_map_entry_t entry; 1248 { 1249 vm_map_t tmpm; 1250 vm_map_entry_t tmpe; 1251 vm_object_t obj; 1252 int entcount; 1253 1254 if (map == 0) 1255 return 0; 1256 1257 if (entry == 0) { 1258 tmpe = map->header.next; 1259 entcount = map->nentries; 1260 while (entcount-- && (tmpe != &map->header)) { 1261 if( _vm_object_in_map(map, object, tmpe)) { 1262 return 1; 1263 } 1264 tmpe = tmpe->next; 1265 } 1266 } else if (entry->is_sub_map || entry->is_a_map) { 1267 tmpm = entry->object.share_map; 1268 tmpe = tmpm->header.next; 1269 entcount = tmpm->nentries; 1270 while (entcount-- && tmpe != &tmpm->header) { 1271 if( _vm_object_in_map(tmpm, object, tmpe)) { 1272 return 1; 1273 } 1274 tmpe = tmpe->next; 1275 } 1276 } else if (obj = entry->object.vm_object) { 1277 for(; obj; obj=obj->backing_object) 1278 if( obj == object) { 1279 return 1; 1280 } 1281 } 1282 return 0; 1283 } 1284 1285 int 1286 vm_object_in_map( object) 1287 vm_object_t object; 1288 { 1289 struct proc *p; 1290 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 1291 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1292 continue; 1293 /* 1294 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1295 continue; 1296 } 1297 */ 1298 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1299 return 1; 1300 } 1301 if( _vm_object_in_map( kernel_map, object, 0)) 1302 return 1; 1303 if( _vm_object_in_map( kmem_map, object, 0)) 1304 return 1; 1305 if( _vm_object_in_map( pager_map, object, 0)) 1306 return 1; 1307 if( _vm_object_in_map( buffer_map, object, 0)) 1308 return 1; 1309 if( _vm_object_in_map( io_map, object, 0)) 1310 return 1; 1311 if( _vm_object_in_map( phys_map, object, 0)) 1312 return 1; 1313 if( _vm_object_in_map( mb_map, object, 0)) 1314 return 1; 1315 if( _vm_object_in_map( u_map, object, 0)) 1316 return 1; 1317 return 0; 1318 } 1319 1320 1321 void 1322 vm_object_check() { 1323 int i; 1324 int maxhash = 0; 1325 vm_object_t object; 1326 1327 /* 1328 * make sure that internal objs are in a map somewhere 1329 * and none have zero ref counts. 1330 */ 1331 for (object = vm_object_list.tqh_first; 1332 object != NULL; 1333 object = object->object_list.tqe_next) { 1334 if (object->handle == NULL && 1335 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1336 if (object->ref_count == 0) { 1337 printf("vmochk: internal obj has zero ref count: %d\n", 1338 object->size); 1339 } 1340 if (!vm_object_in_map(object)) { 1341 printf("vmochk: internal obj is not in a map: ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", 1342 object->ref_count, object->size, object->backing_object); 1343 } 1344 } 1345 } 1346 } 1347 1348 /* 1349 * vm_object_print: [ debug ] 1350 */ 1351 void 1352 vm_object_print(iobject, full, dummy3, dummy4) 1353 /* db_expr_t */ int iobject; 1354 boolean_t full; 1355 /* db_expr_t */ int dummy3; 1356 char *dummy4; 1357 { 1358 vm_object_t object = (vm_object_t)iobject; /* XXX */ 1359 register vm_page_t p; 1360 1361 register int count; 1362 1363 if (object == NULL) 1364 return; 1365 1366 iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", 1367 (int) object, (int) object->size, 1368 object->resident_page_count, object->ref_count); 1369 printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", 1370 (int) object->paging_offset, 1371 (int) object->backing_object, (int) object->backing_object_offset); 1372 printf("cache: next=%p, prev=%p\n", 1373 object->cached_list.tqe_next, object->cached_list.tqe_prev); 1374 1375 if (!full) 1376 return; 1377 1378 indent += 2; 1379 count = 0; 1380 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 1381 if (count == 0) 1382 iprintf("memory:="); 1383 else if (count == 6) { 1384 printf("\n"); 1385 iprintf(" ..."); 1386 count = 0; 1387 } else 1388 printf(","); 1389 count++; 1390 1391 printf("(off=0x%lx,page=0x%lx)", 1392 (u_long) p->offset, (u_long) VM_PAGE_TO_PHYS(p)); 1393 } 1394 if (count != 0) 1395 printf("\n"); 1396 indent -= 2; 1397 } 1398 #endif /* DDB */ 1399