1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_object.c,v 1.52 1995/08/16 16:14:28 bde Exp $ 65 */ 66 67 /* 68 * Virtual memory object module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/kernel.h> 74 #include <sys/proc.h> /* for curproc, pageproc */ 75 #include <sys/malloc.h> 76 #include <sys/vnode.h> 77 #include <sys/mount.h> 78 79 #include <vm/vm.h> 80 #include <vm/vm_page.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_pager.h> 83 #include <vm/swap_pager.h> 84 #include <vm/vm_kern.h> 85 86 static void _vm_object_allocate(objtype_t, vm_size_t, vm_object_t); 87 88 89 /* 90 * Virtual memory objects maintain the actual data 91 * associated with allocated virtual memory. A given 92 * page of memory exists within exactly one object. 93 * 94 * An object is only deallocated when all "references" 95 * are given up. Only one "reference" to a given 96 * region of an object should be writeable. 97 * 98 * Associated with each object is a list of all resident 99 * memory pages belonging to that object; this list is 100 * maintained by the "vm_page" module, and locked by the object's 101 * lock. 102 * 103 * Each object also records a "pager" routine which is 104 * used to retrieve (and store) pages to the proper backing 105 * storage. In addition, objects may be backed by other 106 * objects from which they were virtual-copied. 107 * 108 * The only items within the object structure which are 109 * modified after time of creation are: 110 * reference count locked by object's lock 111 * pager routine locked by object's lock 112 * 113 */ 114 115 int vm_object_cache_max; 116 struct object_q vm_object_cached_list; 117 int vm_object_cached; 118 struct object_q vm_object_list; 119 long vm_object_count; 120 vm_object_t kernel_object; 121 vm_object_t kmem_object; 122 struct vm_object kernel_object_store; 123 struct vm_object kmem_object_store; 124 125 long object_collapses; 126 long object_bypasses; 127 128 static void 129 _vm_object_allocate(type, size, object) 130 objtype_t type; 131 vm_size_t size; 132 register vm_object_t object; 133 { 134 TAILQ_INIT(&object->memq); 135 TAILQ_INIT(&object->shadow_head); 136 137 object->type = type; 138 object->size = size; 139 object->ref_count = 1; 140 object->flags = 0; 141 object->paging_in_progress = 0; 142 object->resident_page_count = 0; 143 object->handle = NULL; 144 object->paging_offset = 0; 145 object->backing_object = NULL; 146 object->backing_object_offset = (vm_offset_t) 0; 147 148 object->last_read = 0; 149 150 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 151 vm_object_count++; 152 } 153 154 /* 155 * vm_object_init: 156 * 157 * Initialize the VM objects module. 158 */ 159 void 160 vm_object_init(vm_offset_t nothing) 161 { 162 register int i; 163 164 TAILQ_INIT(&vm_object_cached_list); 165 TAILQ_INIT(&vm_object_list); 166 vm_object_count = 0; 167 168 vm_object_cache_max = 84; 169 if (cnt.v_page_count > 1000) 170 vm_object_cache_max += (cnt.v_page_count - 1000) / 4; 171 172 kernel_object = &kernel_object_store; 173 _vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 174 kernel_object); 175 176 kmem_object = &kmem_object_store; 177 _vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 178 kmem_object); 179 } 180 181 /* 182 * vm_object_allocate: 183 * 184 * Returns a new object with the given size. 185 */ 186 187 vm_object_t 188 vm_object_allocate(type, size) 189 objtype_t type; 190 vm_size_t size; 191 { 192 register vm_object_t result; 193 194 result = (vm_object_t) 195 malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); 196 197 198 _vm_object_allocate(type, size, result); 199 200 return (result); 201 } 202 203 204 /* 205 * vm_object_reference: 206 * 207 * Gets another reference to the given object. 208 */ 209 inline void 210 vm_object_reference(object) 211 register vm_object_t object; 212 { 213 if (object == NULL) 214 return; 215 216 if (object->ref_count == 0) { 217 if ((object->flags & OBJ_CANPERSIST) == 0) 218 panic("vm_object_reference: non-persistent object with 0 ref_count"); 219 TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); 220 vm_object_cached--; 221 } 222 object->ref_count++; 223 } 224 225 /* 226 * vm_object_deallocate: 227 * 228 * Release a reference to the specified object, 229 * gained either through a vm_object_allocate 230 * or a vm_object_reference call. When all references 231 * are gone, storage associated with this object 232 * may be relinquished. 233 * 234 * No object may be locked. 235 */ 236 void 237 vm_object_deallocate(object) 238 vm_object_t object; 239 { 240 vm_object_t temp; 241 242 while (object != NULL) { 243 244 if (object->ref_count == 0) 245 panic("vm_object_deallocate: object deallocated too many times"); 246 247 /* 248 * Lose the reference 249 */ 250 object->ref_count--; 251 252 if (object->ref_count != 0) { 253 if ((object->ref_count == 1) && 254 (object->handle == NULL) && 255 (object->type == OBJT_DEFAULT || 256 object->type == OBJT_SWAP)) { 257 vm_object_t robject; 258 robject = object->shadow_head.tqh_first; 259 if ((robject != NULL) && 260 (robject->handle == NULL) && 261 (robject->type == OBJT_DEFAULT || 262 robject->type == OBJT_SWAP)) { 263 int s; 264 robject->ref_count += 2; 265 object->ref_count += 2; 266 267 do { 268 s = splhigh(); 269 while (robject->paging_in_progress) { 270 robject->flags |= OBJ_PIPWNT; 271 tsleep(robject, PVM, "objde1", 0); 272 } 273 274 while (object->paging_in_progress) { 275 object->flags |= OBJ_PIPWNT; 276 tsleep(object, PVM, "objde2", 0); 277 } 278 splx(s); 279 280 } while( object->paging_in_progress || robject->paging_in_progress); 281 282 object->ref_count -= 2; 283 robject->ref_count -= 2; 284 if( robject->ref_count == 0) { 285 robject->ref_count += 1; 286 object = robject; 287 continue; 288 } 289 vm_object_collapse(robject); 290 return; 291 } 292 } 293 /* 294 * If there are still references, then we are done. 295 */ 296 return; 297 } 298 299 if (object->type == OBJT_VNODE) { 300 struct vnode *vp = object->handle; 301 302 vp->v_flag &= ~VTEXT; 303 } 304 305 /* 306 * See if this object can persist and has some resident 307 * pages. If so, enter it in the cache. 308 */ 309 if (object->flags & OBJ_CANPERSIST) { 310 if (object->resident_page_count != 0) { 311 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); 312 TAILQ_INSERT_TAIL(&vm_object_cached_list, object, 313 cached_list); 314 vm_object_cached++; 315 316 vm_object_cache_trim(); 317 return; 318 } else { 319 object->flags &= ~OBJ_CANPERSIST; 320 } 321 } 322 323 /* 324 * Make sure no one uses us. 325 */ 326 object->flags |= OBJ_DEAD; 327 328 temp = object->backing_object; 329 if (temp) 330 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 331 vm_object_terminate(object); 332 /* unlocks and deallocates object */ 333 object = temp; 334 } 335 } 336 337 /* 338 * vm_object_terminate actually destroys the specified object, freeing 339 * up all previously used resources. 340 * 341 * The object must be locked. 342 */ 343 void 344 vm_object_terminate(object) 345 register vm_object_t object; 346 { 347 register vm_page_t p, next; 348 vm_object_t backing_object; 349 int s; 350 351 /* 352 * wait for the pageout daemon to be done with the object 353 */ 354 s = splhigh(); 355 while (object->paging_in_progress) { 356 object->flags |= OBJ_PIPWNT; 357 tsleep(object, PVM, "objtrm", 0); 358 } 359 splx(s); 360 361 if (object->paging_in_progress != 0) 362 panic("vm_object_deallocate: pageout in progress"); 363 364 /* 365 * Clean and free the pages, as appropriate. All references to the 366 * object are gone, so we don't need to lock it. 367 */ 368 if (object->type == OBJT_VNODE) { 369 struct vnode *vp = object->handle; 370 371 VOP_LOCK(vp); 372 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 373 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 374 VOP_UNLOCK(vp); 375 } 376 377 /* 378 * Now free the pages. For internal objects, this also removes them 379 * from paging queues. 380 */ 381 while ((p = object->memq.tqh_first) != NULL) { 382 if (p->flags & PG_BUSY) 383 printf("vm_object_terminate: freeing busy page\n"); 384 PAGE_WAKEUP(p); 385 vm_page_free(p); 386 cnt.v_pfree++; 387 } 388 389 /* 390 * Let the pager know object is dead. 391 */ 392 vm_pager_deallocate(object); 393 394 TAILQ_REMOVE(&vm_object_list, object, object_list); 395 vm_object_count--; 396 397 wakeup(object); 398 399 /* 400 * Free the space for the object. 401 */ 402 free((caddr_t) object, M_VMOBJ); 403 } 404 405 /* 406 * vm_object_page_clean 407 * 408 * Clean all dirty pages in the specified range of object. 409 * Leaves page on whatever queue it is currently on. 410 * 411 * Odd semantics: if start == end, we clean everything. 412 * 413 * The object must be locked. 414 */ 415 416 void 417 vm_object_page_clean(object, start, end, syncio, lockflag) 418 vm_object_t object; 419 vm_offset_t start; 420 vm_offset_t end; 421 boolean_t syncio; 422 boolean_t lockflag; 423 { 424 register vm_page_t p; 425 register vm_offset_t tstart, tend; 426 int pass; 427 int pgcount, s; 428 int allclean; 429 int entireobj; 430 struct vnode *vp; 431 432 if (object->type != OBJT_VNODE || (object->flags & OBJ_WRITEABLE) == 0) 433 return; 434 435 vp = object->handle; 436 437 if (lockflag) 438 VOP_LOCK(vp); 439 440 if (start != end) { 441 start = trunc_page(start); 442 end = round_page(end); 443 } 444 445 pass = 0; 446 startover: 447 tstart = start; 448 if (end == 0) { 449 tend = object->size; 450 } else { 451 tend = end; 452 } 453 entireobj = 0; 454 if (tstart == 0 && tend == object->size) { 455 object->flags &= ~OBJ_WRITEABLE; 456 entireobj = 1; 457 } 458 459 pgcount = object->resident_page_count; 460 461 if (pass == 0 && 462 (pgcount < 128 || pgcount > (object->size / (8 * PAGE_SIZE)))) { 463 allclean = 1; 464 for(; pgcount && (tstart < tend); tstart += PAGE_SIZE) { 465 p = vm_page_lookup(object, tstart); 466 if (!p) 467 continue; 468 --pgcount; 469 s = splhigh(); 470 TAILQ_REMOVE(&object->memq, p, listq); 471 TAILQ_INSERT_TAIL(&object->memq, p, listq); 472 splx(s); 473 if (entireobj) 474 vm_page_protect(p, VM_PROT_READ); 475 if ((p->flags & (PG_BUSY|PG_CACHE)) || p->busy || 476 p->valid == 0) { 477 continue; 478 } 479 vm_page_test_dirty(p); 480 if ((p->valid & p->dirty) != 0) { 481 vm_offset_t tincr; 482 tincr = vm_pageout_clean(p, VM_PAGEOUT_FORCE); 483 if( tincr) { 484 pgcount -= (tincr - 1); 485 tincr *= PAGE_SIZE; 486 tstart += tincr - PAGE_SIZE; 487 } 488 allclean = 0; 489 } 490 } 491 if (!allclean) { 492 pass = 1; 493 goto startover; 494 } 495 if (lockflag) 496 VOP_UNLOCK(vp); 497 return; 498 } 499 500 allclean = 1; 501 while ((p = object->memq.tqh_first) != NULL && pgcount > 0) { 502 503 if (p->flags & PG_CACHE) { 504 goto donext; 505 } 506 507 if (entireobj || (p->offset >= tstart && p->offset < tend)) { 508 if (entireobj) 509 vm_page_protect(p, VM_PROT_READ); 510 511 if (p->valid == 0) { 512 goto donext; 513 } 514 515 s = splhigh(); 516 if ((p->flags & PG_BUSY) || p->busy) { 517 allclean = 0; 518 if (pass > 0) { 519 p->flags |= PG_WANTED; 520 tsleep(p, PVM, "objpcn", 0); 521 splx(s); 522 continue; 523 } else { 524 splx(s); 525 goto donext; 526 } 527 } 528 529 TAILQ_REMOVE(&object->memq, p, listq); 530 TAILQ_INSERT_TAIL(&object->memq, p, listq); 531 splx(s); 532 533 pgcount--; 534 vm_page_test_dirty(p); 535 if ((p->valid & p->dirty) != 0) { 536 vm_pageout_clean(p, VM_PAGEOUT_FORCE); 537 allclean = 0; 538 } 539 continue; 540 } 541 donext: 542 TAILQ_REMOVE(&object->memq, p, listq); 543 TAILQ_INSERT_TAIL(&object->memq, p, listq); 544 pgcount--; 545 } 546 if ((!allclean && (pass == 0)) || 547 (entireobj && (object->flags & OBJ_WRITEABLE))) { 548 pass = 1; 549 if (entireobj) 550 object->flags &= ~OBJ_WRITEABLE; 551 goto startover; 552 } 553 if (lockflag) 554 VOP_UNLOCK(vp); 555 return; 556 } 557 558 /* 559 * vm_object_deactivate_pages 560 * 561 * Deactivate all pages in the specified object. (Keep its pages 562 * in memory even though it is no longer referenced.) 563 * 564 * The object must be locked. 565 */ 566 void 567 vm_object_deactivate_pages(object) 568 register vm_object_t object; 569 { 570 register vm_page_t p, next; 571 572 for (p = object->memq.tqh_first; p != NULL; p = next) { 573 next = p->listq.tqe_next; 574 vm_page_deactivate(p); 575 } 576 } 577 578 /* 579 * Trim the object cache to size. 580 */ 581 void 582 vm_object_cache_trim() 583 { 584 register vm_object_t object; 585 586 while (vm_object_cached > vm_object_cache_max) { 587 object = vm_object_cached_list.tqh_first; 588 589 vm_object_reference(object); 590 pager_cache(object, FALSE); 591 } 592 } 593 594 595 /* 596 * vm_object_pmap_copy: 597 * 598 * Makes all physical pages in the specified 599 * object range copy-on-write. No writeable 600 * references to these pages should remain. 601 * 602 * The object must *not* be locked. 603 */ 604 void 605 vm_object_pmap_copy(object, start, end) 606 register vm_object_t object; 607 register vm_offset_t start; 608 register vm_offset_t end; 609 { 610 register vm_page_t p; 611 612 if (object == NULL) 613 return; 614 615 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 616 if ((start <= p->offset) && (p->offset < end)) { 617 vm_page_protect(p, VM_PROT_READ); 618 p->flags |= PG_COPYONWRITE; 619 } 620 } 621 } 622 623 /* 624 * vm_object_pmap_remove: 625 * 626 * Removes all physical pages in the specified 627 * object range from all physical maps. 628 * 629 * The object must *not* be locked. 630 */ 631 void 632 vm_object_pmap_remove(object, start, end) 633 register vm_object_t object; 634 register vm_offset_t start; 635 register vm_offset_t end; 636 { 637 register vm_page_t p; 638 int s; 639 640 if (object == NULL) 641 return; 642 ++object->paging_in_progress; 643 644 again: 645 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 646 if ((start <= p->offset) && (p->offset < end)) { 647 s = splhigh(); 648 if ((p->flags & PG_BUSY) || p->busy) { 649 p->flags |= PG_WANTED; 650 tsleep(p, PVM, "vmopmr", 0); 651 splx(s); 652 goto again; 653 } 654 splx(s); 655 vm_page_protect(p, VM_PROT_NONE); 656 } 657 } 658 vm_object_pip_wakeup(object); 659 } 660 661 /* 662 * vm_object_copy: 663 * 664 * Create a new object which is a copy of an existing 665 * object, and mark all of the pages in the existing 666 * object 'copy-on-write'. The new object has one reference. 667 * Returns the new object. 668 * 669 * May defer the copy until later if the object is not backed 670 * up by a non-default pager. 671 */ 672 void 673 vm_object_copy(src_object, src_offset, size, 674 dst_object, dst_offset, src_needs_copy) 675 register vm_object_t src_object; 676 vm_offset_t src_offset; 677 vm_size_t size; 678 vm_object_t *dst_object;/* OUT */ 679 vm_offset_t *dst_offset;/* OUT */ 680 boolean_t *src_needs_copy; /* OUT */ 681 { 682 register vm_object_t new_copy; 683 register vm_object_t old_copy; 684 vm_offset_t new_start, new_end; 685 686 register vm_page_t p; 687 688 if (src_object == NULL) { 689 /* 690 * Nothing to copy 691 */ 692 *dst_object = NULL; 693 *dst_offset = 0; 694 *src_needs_copy = FALSE; 695 return; 696 } 697 698 /* 699 * Try to collapse the object before copying it. 700 */ 701 if (src_object->handle == NULL && 702 (src_object->type == OBJT_DEFAULT || 703 src_object->type == OBJT_SWAP)) 704 vm_object_collapse(src_object); 705 706 707 /* 708 * Make another reference to the object 709 */ 710 src_object->ref_count++; 711 712 /* 713 * Mark all of the pages copy-on-write. 714 */ 715 for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next) 716 if (src_offset <= p->offset && 717 p->offset < src_offset + size) 718 p->flags |= PG_COPYONWRITE; 719 720 *dst_object = src_object; 721 *dst_offset = src_offset; 722 723 /* 724 * Must make a shadow when write is desired 725 */ 726 *src_needs_copy = TRUE; 727 return; 728 } 729 730 /* 731 * vm_object_shadow: 732 * 733 * Create a new object which is backed by the 734 * specified existing object range. The source 735 * object reference is deallocated. 736 * 737 * The new object and offset into that object 738 * are returned in the source parameters. 739 */ 740 741 void 742 vm_object_shadow(object, offset, length) 743 vm_object_t *object; /* IN/OUT */ 744 vm_offset_t *offset; /* IN/OUT */ 745 vm_size_t length; 746 { 747 register vm_object_t source; 748 register vm_object_t result; 749 750 source = *object; 751 752 /* 753 * Allocate a new object with the given length 754 */ 755 756 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 757 panic("vm_object_shadow: no object for shadowing"); 758 759 /* 760 * The new object shadows the source object, adding a reference to it. 761 * Our caller changes his reference to point to the new object, 762 * removing a reference to the source object. Net result: no change 763 * of reference count. 764 */ 765 result->backing_object = source; 766 if (source) 767 TAILQ_INSERT_TAIL(&result->backing_object->shadow_head, result, shadow_list); 768 769 /* 770 * Store the offset into the source object, and fix up the offset into 771 * the new object. 772 */ 773 774 result->backing_object_offset = *offset; 775 776 /* 777 * Return the new things 778 */ 779 780 *offset = 0; 781 *object = result; 782 } 783 784 785 /* 786 * this version of collapse allows the operation to occur earlier and 787 * when paging_in_progress is true for an object... This is not a complete 788 * operation, but should plug 99.9% of the rest of the leaks. 789 */ 790 static void 791 vm_object_qcollapse(object) 792 register vm_object_t object; 793 { 794 register vm_object_t backing_object; 795 register vm_offset_t backing_offset, new_offset; 796 register vm_page_t p, pp; 797 register vm_size_t size; 798 799 backing_object = object->backing_object; 800 if (backing_object->ref_count != 1) 801 return; 802 803 backing_object->ref_count += 2; 804 805 backing_offset = object->backing_object_offset; 806 size = object->size; 807 p = backing_object->memq.tqh_first; 808 while (p) { 809 vm_page_t next; 810 811 next = p->listq.tqe_next; 812 if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) || 813 !p->valid || p->hold_count || p->wire_count || p->busy) { 814 p = next; 815 continue; 816 } 817 vm_page_protect(p, VM_PROT_NONE); 818 new_offset = (p->offset - backing_offset); 819 if (p->offset < backing_offset || 820 new_offset >= size) { 821 if (backing_object->type == OBJT_SWAP) 822 swap_pager_freespace(backing_object, 823 backing_object->paging_offset + p->offset, PAGE_SIZE); 824 vm_page_free(p); 825 } else { 826 pp = vm_page_lookup(object, new_offset); 827 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 828 object->paging_offset + new_offset, NULL, NULL))) { 829 if (backing_object->type == OBJT_SWAP) 830 swap_pager_freespace(backing_object, 831 backing_object->paging_offset + p->offset, PAGE_SIZE); 832 vm_page_free(p); 833 } else { 834 if (backing_object->type == OBJT_SWAP) 835 swap_pager_freespace(backing_object, 836 backing_object->paging_offset + p->offset, PAGE_SIZE); 837 vm_page_rename(p, object, new_offset); 838 p->dirty = VM_PAGE_BITS_ALL; 839 } 840 } 841 p = next; 842 } 843 backing_object->ref_count -= 2; 844 } 845 846 /* 847 * vm_object_collapse: 848 * 849 * Collapse an object with the object backing it. 850 * Pages in the backing object are moved into the 851 * parent, and the backing object is deallocated. 852 */ 853 void 854 vm_object_collapse(object) 855 vm_object_t object; 856 857 { 858 vm_object_t backing_object; 859 vm_offset_t backing_offset; 860 vm_size_t size; 861 vm_offset_t new_offset; 862 vm_page_t p, pp; 863 864 while (TRUE) { 865 /* 866 * Verify that the conditions are right for collapse: 867 * 868 * The object exists and no pages in it are currently being paged 869 * out. 870 */ 871 if (object == NULL) 872 return; 873 874 /* 875 * Make sure there is a backing object. 876 */ 877 if ((backing_object = object->backing_object) == NULL) 878 return; 879 880 /* 881 * we check the backing object first, because it is most likely 882 * not collapsable. 883 */ 884 if (backing_object->handle != NULL || 885 (backing_object->type != OBJT_DEFAULT && 886 backing_object->type != OBJT_SWAP) || 887 (backing_object->flags & OBJ_DEAD) || 888 object->handle != NULL || 889 (object->type != OBJT_DEFAULT && 890 object->type != OBJT_SWAP) || 891 (object->flags & OBJ_DEAD)) { 892 return; 893 } 894 895 if (object->paging_in_progress != 0 || 896 backing_object->paging_in_progress != 0) { 897 vm_object_qcollapse(object); 898 return; 899 } 900 901 /* 902 * We know that we can either collapse the backing object (if 903 * the parent is the only reference to it) or (perhaps) remove 904 * the parent's reference to it. 905 */ 906 907 backing_offset = object->backing_object_offset; 908 size = object->size; 909 910 /* 911 * If there is exactly one reference to the backing object, we 912 * can collapse it into the parent. 913 */ 914 915 if (backing_object->ref_count == 1) { 916 917 backing_object->flags |= OBJ_DEAD; 918 /* 919 * We can collapse the backing object. 920 * 921 * Move all in-memory pages from backing_object to the 922 * parent. Pages that have been paged out will be 923 * overwritten by any of the parent's pages that 924 * shadow them. 925 */ 926 927 while ((p = backing_object->memq.tqh_first) != 0) { 928 929 new_offset = (p->offset - backing_offset); 930 931 /* 932 * If the parent has a page here, or if this 933 * page falls outside the parent, dispose of 934 * it. 935 * 936 * Otherwise, move it as planned. 937 */ 938 939 if (p->offset < backing_offset || 940 new_offset >= size) { 941 vm_page_protect(p, VM_PROT_NONE); 942 PAGE_WAKEUP(p); 943 vm_page_free(p); 944 } else { 945 pp = vm_page_lookup(object, new_offset); 946 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 947 object->paging_offset + new_offset, NULL, NULL))) { 948 vm_page_protect(p, VM_PROT_NONE); 949 PAGE_WAKEUP(p); 950 vm_page_free(p); 951 } else { 952 vm_page_rename(p, object, new_offset); 953 } 954 } 955 } 956 957 /* 958 * Move the pager from backing_object to object. 959 */ 960 961 if (backing_object->type == OBJT_SWAP) { 962 backing_object->paging_in_progress++; 963 if (object->type == OBJT_SWAP) { 964 object->paging_in_progress++; 965 /* 966 * copy shadow object pages into ours 967 * and destroy unneeded pages in 968 * shadow object. 969 */ 970 swap_pager_copy( 971 backing_object, backing_object->paging_offset, 972 object, object->paging_offset, 973 object->backing_object_offset); 974 vm_object_pip_wakeup(object); 975 } else { 976 object->paging_in_progress++; 977 /* 978 * move the shadow backing_object's pager data to 979 * "object" and convert "object" type to OBJT_SWAP. 980 */ 981 object->type = OBJT_SWAP; 982 object->un_pager.swp.swp_nblocks = 983 backing_object->un_pager.swp.swp_nblocks; 984 object->un_pager.swp.swp_allocsize = 985 backing_object->un_pager.swp.swp_allocsize; 986 object->un_pager.swp.swp_blocks = 987 backing_object->un_pager.swp.swp_blocks; 988 object->un_pager.swp.swp_poip = /* XXX */ 989 backing_object->un_pager.swp.swp_poip; 990 object->paging_offset = backing_object->paging_offset + backing_offset; 991 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 992 993 /* 994 * Convert backing object from OBJT_SWAP to 995 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is 996 * actually necessary. 997 */ 998 backing_object->type = OBJT_DEFAULT; 999 TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); 1000 /* 1001 * free unnecessary blocks 1002 */ 1003 swap_pager_freespace(object, 0, object->paging_offset); 1004 vm_object_pip_wakeup(object); 1005 } 1006 1007 vm_object_pip_wakeup(backing_object); 1008 } 1009 /* 1010 * Object now shadows whatever backing_object did. 1011 * Note that the reference to backing_object->backing_object 1012 * moves from within backing_object to within object. 1013 */ 1014 1015 TAILQ_REMOVE(&object->backing_object->shadow_head, object, 1016 shadow_list); 1017 if (backing_object->backing_object) 1018 TAILQ_REMOVE(&backing_object->backing_object->shadow_head, 1019 backing_object, shadow_list); 1020 object->backing_object = backing_object->backing_object; 1021 if (object->backing_object) 1022 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1023 object, shadow_list); 1024 1025 object->backing_object_offset += backing_object->backing_object_offset; 1026 /* 1027 * Discard backing_object. 1028 * 1029 * Since the backing object has no pages, no pager left, 1030 * and no object references within it, all that is 1031 * necessary is to dispose of it. 1032 */ 1033 1034 TAILQ_REMOVE(&vm_object_list, backing_object, 1035 object_list); 1036 vm_object_count--; 1037 1038 free((caddr_t) backing_object, M_VMOBJ); 1039 1040 object_collapses++; 1041 } else { 1042 /* 1043 * If all of the pages in the backing object are 1044 * shadowed by the parent object, the parent object no 1045 * longer has to shadow the backing object; it can 1046 * shadow the next one in the chain. 1047 * 1048 * The backing object must not be paged out - we'd have 1049 * to check all of the paged-out pages, as well. 1050 */ 1051 1052 if (backing_object->type != OBJT_DEFAULT) { 1053 return; 1054 } 1055 /* 1056 * Should have a check for a 'small' number of pages 1057 * here. 1058 */ 1059 1060 for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { 1061 new_offset = (p->offset - backing_offset); 1062 1063 /* 1064 * If the parent has a page here, or if this 1065 * page falls outside the parent, keep going. 1066 * 1067 * Otherwise, the backing_object must be left in 1068 * the chain. 1069 */ 1070 1071 if (p->offset >= backing_offset && new_offset <= size) { 1072 1073 pp = vm_page_lookup(object, new_offset); 1074 1075 if ((pp == NULL || pp->valid == 0) && 1076 !vm_pager_has_page(object, object->paging_offset + new_offset, NULL, NULL)) { 1077 1078 /* 1079 * Page still needed. Can't go any 1080 * further. 1081 */ 1082 return; 1083 } 1084 } 1085 } 1086 1087 /* 1088 * Make the parent shadow the next object in the 1089 * chain. Deallocating backing_object will not remove 1090 * it, since its reference count is at least 2. 1091 */ 1092 1093 TAILQ_REMOVE(&object->backing_object->shadow_head, 1094 object, shadow_list); 1095 vm_object_reference(object->backing_object = backing_object->backing_object); 1096 if (object->backing_object) 1097 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1098 object, shadow_list); 1099 object->backing_object_offset += backing_object->backing_object_offset; 1100 1101 /* 1102 * Drop the reference count on backing_object. Since 1103 * its ref_count was at least 2, it will not vanish; 1104 * so we don't need to call vm_object_deallocate. 1105 */ 1106 if (backing_object->ref_count == 1) 1107 printf("should have called obj deallocate\n"); 1108 backing_object->ref_count--; 1109 1110 object_bypasses++; 1111 1112 } 1113 1114 /* 1115 * Try again with this object's new backing object. 1116 */ 1117 } 1118 } 1119 1120 /* 1121 * vm_object_page_remove: [internal] 1122 * 1123 * Removes all physical pages in the specified 1124 * object range from the object's list of pages. 1125 * 1126 * The object must be locked. 1127 */ 1128 void 1129 vm_object_page_remove(object, start, end, clean_only) 1130 register vm_object_t object; 1131 register vm_offset_t start; 1132 register vm_offset_t end; 1133 boolean_t clean_only; 1134 { 1135 register vm_page_t p, next; 1136 vm_offset_t size; 1137 int s; 1138 1139 if (object == NULL) 1140 return; 1141 1142 object->paging_in_progress++; 1143 start = trunc_page(start); 1144 end = round_page(end); 1145 again: 1146 size = end - start; 1147 if (size > 4 * PAGE_SIZE || size >= object->size / 4) { 1148 for (p = object->memq.tqh_first; p != NULL; p = next) { 1149 next = p->listq.tqe_next; 1150 if ((start <= p->offset) && (p->offset < end)) { 1151 s = splhigh(); 1152 if (p->bmapped) { 1153 splx(s); 1154 continue; 1155 } 1156 if ((p->flags & PG_BUSY) || p->busy) { 1157 p->flags |= PG_WANTED; 1158 tsleep(p, PVM, "vmopar", 0); 1159 splx(s); 1160 goto again; 1161 } 1162 splx(s); 1163 if (clean_only) { 1164 vm_page_test_dirty(p); 1165 if (p->valid & p->dirty) 1166 continue; 1167 } 1168 vm_page_protect(p, VM_PROT_NONE); 1169 PAGE_WAKEUP(p); 1170 vm_page_free(p); 1171 } 1172 } 1173 } else { 1174 while (size > 0) { 1175 while ((p = vm_page_lookup(object, start)) != 0) { 1176 s = splhigh(); 1177 if (p->bmapped) { 1178 splx(s); 1179 break; 1180 } 1181 if ((p->flags & PG_BUSY) || p->busy) { 1182 p->flags |= PG_WANTED; 1183 tsleep(p, PVM, "vmopar", 0); 1184 splx(s); 1185 goto again; 1186 } 1187 splx(s); 1188 if (clean_only) { 1189 vm_page_test_dirty(p); 1190 if (p->valid & p->dirty) 1191 continue; 1192 } 1193 vm_page_protect(p, VM_PROT_NONE); 1194 PAGE_WAKEUP(p); 1195 vm_page_free(p); 1196 } 1197 start += PAGE_SIZE; 1198 size -= PAGE_SIZE; 1199 } 1200 } 1201 vm_object_pip_wakeup(object); 1202 } 1203 1204 /* 1205 * Routine: vm_object_coalesce 1206 * Function: Coalesces two objects backing up adjoining 1207 * regions of memory into a single object. 1208 * 1209 * returns TRUE if objects were combined. 1210 * 1211 * NOTE: Only works at the moment if the second object is NULL - 1212 * if it's not, which object do we lock first? 1213 * 1214 * Parameters: 1215 * prev_object First object to coalesce 1216 * prev_offset Offset into prev_object 1217 * next_object Second object into coalesce 1218 * next_offset Offset into next_object 1219 * 1220 * prev_size Size of reference to prev_object 1221 * next_size Size of reference to next_object 1222 * 1223 * Conditions: 1224 * The object must *not* be locked. 1225 */ 1226 boolean_t 1227 vm_object_coalesce(prev_object, next_object, 1228 prev_offset, next_offset, 1229 prev_size, next_size) 1230 register vm_object_t prev_object; 1231 vm_object_t next_object; 1232 vm_offset_t prev_offset, next_offset; 1233 vm_size_t prev_size, next_size; 1234 { 1235 vm_size_t newsize; 1236 1237 if (next_object != NULL) { 1238 return (FALSE); 1239 } 1240 if (prev_object == NULL) { 1241 return (TRUE); 1242 } 1243 1244 /* 1245 * Try to collapse the object first 1246 */ 1247 vm_object_collapse(prev_object); 1248 1249 /* 1250 * Can't coalesce if: . more than one reference . paged out . shadows 1251 * another object . has a copy elsewhere (any of which mean that the 1252 * pages not mapped to prev_entry may be in use anyway) 1253 */ 1254 1255 if (prev_object->ref_count > 1 || 1256 prev_object->type != OBJT_DEFAULT || 1257 prev_object->backing_object != NULL) { 1258 return (FALSE); 1259 } 1260 /* 1261 * Remove any pages that may still be in the object from a previous 1262 * deallocation. 1263 */ 1264 1265 vm_object_page_remove(prev_object, 1266 prev_offset + prev_size, 1267 prev_offset + prev_size + next_size, FALSE); 1268 1269 /* 1270 * Extend the object if necessary. 1271 */ 1272 newsize = prev_offset + prev_size + next_size; 1273 if (newsize > prev_object->size) 1274 prev_object->size = newsize; 1275 1276 return (TRUE); 1277 } 1278 1279 /* 1280 * returns page after looking up in shadow chain 1281 */ 1282 1283 vm_page_t 1284 vm_object_page_lookup(object, offset) 1285 vm_object_t object; 1286 vm_offset_t offset; 1287 { 1288 vm_page_t m; 1289 1290 if (!(m = vm_page_lookup(object, offset))) { 1291 if (!object->backing_object) 1292 return 0; 1293 else 1294 return vm_object_page_lookup(object->backing_object, offset + object->backing_object_offset); 1295 } 1296 return m; 1297 } 1298 1299 #ifdef DDB 1300 1301 int 1302 _vm_object_in_map(map, object, entry) 1303 vm_map_t map; 1304 vm_object_t object; 1305 vm_map_entry_t entry; 1306 { 1307 vm_map_t tmpm; 1308 vm_map_entry_t tmpe; 1309 vm_object_t obj; 1310 int entcount; 1311 1312 if (map == 0) 1313 return 0; 1314 1315 if (entry == 0) { 1316 tmpe = map->header.next; 1317 entcount = map->nentries; 1318 while (entcount-- && (tmpe != &map->header)) { 1319 if( _vm_object_in_map(map, object, tmpe)) { 1320 return 1; 1321 } 1322 tmpe = tmpe->next; 1323 } 1324 } else if (entry->is_sub_map || entry->is_a_map) { 1325 tmpm = entry->object.share_map; 1326 tmpe = tmpm->header.next; 1327 entcount = tmpm->nentries; 1328 while (entcount-- && tmpe != &tmpm->header) { 1329 if( _vm_object_in_map(tmpm, object, tmpe)) { 1330 return 1; 1331 } 1332 tmpe = tmpe->next; 1333 } 1334 } else if (obj = entry->object.vm_object) { 1335 for(; obj; obj=obj->backing_object) 1336 if( obj == object) { 1337 return 1; 1338 } 1339 } 1340 return 0; 1341 } 1342 1343 int 1344 vm_object_in_map( object) 1345 vm_object_t object; 1346 { 1347 struct proc *p; 1348 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 1349 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1350 continue; 1351 /* 1352 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1353 continue; 1354 } 1355 */ 1356 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1357 return 1; 1358 } 1359 if( _vm_object_in_map( kernel_map, object, 0)) 1360 return 1; 1361 if( _vm_object_in_map( kmem_map, object, 0)) 1362 return 1; 1363 if( _vm_object_in_map( pager_map, object, 0)) 1364 return 1; 1365 if( _vm_object_in_map( buffer_map, object, 0)) 1366 return 1; 1367 if( _vm_object_in_map( io_map, object, 0)) 1368 return 1; 1369 if( _vm_object_in_map( phys_map, object, 0)) 1370 return 1; 1371 if( _vm_object_in_map( mb_map, object, 0)) 1372 return 1; 1373 if( _vm_object_in_map( u_map, object, 0)) 1374 return 1; 1375 return 0; 1376 } 1377 1378 1379 void 1380 vm_object_check() { 1381 int i; 1382 int maxhash = 0; 1383 vm_object_t object; 1384 1385 /* 1386 * make sure that internal objs are in a map somewhere 1387 * and none have zero ref counts. 1388 */ 1389 for (object = vm_object_list.tqh_first; 1390 object != NULL; 1391 object = object->object_list.tqe_next) { 1392 if (object->handle == NULL && 1393 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1394 if (object->ref_count == 0) { 1395 printf("vmochk: internal obj has zero ref count: %d\n", 1396 object->size); 1397 } 1398 if (!vm_object_in_map(object)) { 1399 printf("vmochk: internal obj is not in a map: ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", 1400 object->ref_count, object->size, object->backing_object); 1401 } 1402 } 1403 } 1404 } 1405 1406 /* 1407 * vm_object_print: [ debug ] 1408 */ 1409 void 1410 vm_object_print(iobject, full, dummy3, dummy4) 1411 /* db_expr_t */ int iobject; 1412 boolean_t full; 1413 /* db_expr_t */ int dummy3; 1414 char *dummy4; 1415 { 1416 vm_object_t object = (vm_object_t)iobject; /* XXX */ 1417 register vm_page_t p; 1418 1419 register int count; 1420 1421 if (object == NULL) 1422 return; 1423 1424 iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", 1425 (int) object, (int) object->size, 1426 object->resident_page_count, object->ref_count); 1427 printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", 1428 (int) object->paging_offset, 1429 (int) object->backing_object, (int) object->backing_object_offset); 1430 printf("cache: next=%p, prev=%p\n", 1431 object->cached_list.tqe_next, object->cached_list.tqe_prev); 1432 1433 if (!full) 1434 return; 1435 1436 indent += 2; 1437 count = 0; 1438 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 1439 if (count == 0) 1440 iprintf("memory:="); 1441 else if (count == 6) { 1442 printf("\n"); 1443 iprintf(" ..."); 1444 count = 0; 1445 } else 1446 printf(","); 1447 count++; 1448 1449 printf("(off=0x%lx,page=0x%lx)", 1450 (u_long) p->offset, (u_long) VM_PAGE_TO_PHYS(p)); 1451 } 1452 if (count != 0) 1453 printf("\n"); 1454 indent -= 2; 1455 } 1456 #endif /* DDB */ 1457