1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_object.c,v 1.53 1995/08/26 23:19:48 bde Exp $ 65 */ 66 67 /* 68 * Virtual memory object module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/kernel.h> 74 #include <sys/proc.h> /* for curproc, pageproc */ 75 #include <sys/malloc.h> 76 #include <sys/vnode.h> 77 #include <sys/mount.h> 78 79 #include <vm/vm.h> 80 #include <vm/vm_page.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_pager.h> 83 #include <vm/swap_pager.h> 84 #include <vm/vm_kern.h> 85 86 static void _vm_object_allocate(objtype_t, vm_size_t, vm_object_t); 87 88 89 /* 90 * Virtual memory objects maintain the actual data 91 * associated with allocated virtual memory. A given 92 * page of memory exists within exactly one object. 93 * 94 * An object is only deallocated when all "references" 95 * are given up. Only one "reference" to a given 96 * region of an object should be writeable. 97 * 98 * Associated with each object is a list of all resident 99 * memory pages belonging to that object; this list is 100 * maintained by the "vm_page" module, and locked by the object's 101 * lock. 102 * 103 * Each object also records a "pager" routine which is 104 * used to retrieve (and store) pages to the proper backing 105 * storage. In addition, objects may be backed by other 106 * objects from which they were virtual-copied. 107 * 108 * The only items within the object structure which are 109 * modified after time of creation are: 110 * reference count locked by object's lock 111 * pager routine locked by object's lock 112 * 113 */ 114 115 int vm_object_cache_max; 116 struct object_q vm_object_cached_list; 117 int vm_object_cached; 118 struct object_q vm_object_list; 119 long vm_object_count; 120 vm_object_t kernel_object; 121 vm_object_t kmem_object; 122 struct vm_object kernel_object_store; 123 struct vm_object kmem_object_store; 124 125 long object_collapses; 126 long object_bypasses; 127 128 static void 129 _vm_object_allocate(type, size, object) 130 objtype_t type; 131 vm_size_t size; 132 register vm_object_t object; 133 { 134 TAILQ_INIT(&object->memq); 135 TAILQ_INIT(&object->shadow_head); 136 137 object->type = type; 138 object->size = size; 139 object->ref_count = 1; 140 object->flags = 0; 141 object->paging_in_progress = 0; 142 object->resident_page_count = 0; 143 object->handle = NULL; 144 object->paging_offset = 0; 145 object->backing_object = NULL; 146 object->backing_object_offset = (vm_offset_t) 0; 147 148 object->last_read = 0; 149 150 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 151 vm_object_count++; 152 } 153 154 /* 155 * vm_object_init: 156 * 157 * Initialize the VM objects module. 158 */ 159 void 160 vm_object_init(vm_offset_t nothing) 161 { 162 register int i; 163 164 TAILQ_INIT(&vm_object_cached_list); 165 TAILQ_INIT(&vm_object_list); 166 vm_object_count = 0; 167 168 vm_object_cache_max = 84; 169 if (cnt.v_page_count > 1000) 170 vm_object_cache_max += (cnt.v_page_count - 1000) / 4; 171 172 kernel_object = &kernel_object_store; 173 _vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 174 kernel_object); 175 176 kmem_object = &kmem_object_store; 177 _vm_object_allocate(OBJT_DEFAULT, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 178 kmem_object); 179 } 180 181 /* 182 * vm_object_allocate: 183 * 184 * Returns a new object with the given size. 185 */ 186 187 vm_object_t 188 vm_object_allocate(type, size) 189 objtype_t type; 190 vm_size_t size; 191 { 192 register vm_object_t result; 193 194 result = (vm_object_t) 195 malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); 196 197 198 _vm_object_allocate(type, size, result); 199 200 return (result); 201 } 202 203 204 /* 205 * vm_object_reference: 206 * 207 * Gets another reference to the given object. 208 */ 209 inline void 210 vm_object_reference(object) 211 register vm_object_t object; 212 { 213 if (object == NULL) 214 return; 215 216 if (object->ref_count == 0) { 217 if ((object->flags & OBJ_CANPERSIST) == 0) 218 panic("vm_object_reference: non-persistent object with 0 ref_count"); 219 TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); 220 vm_object_cached--; 221 } 222 object->ref_count++; 223 } 224 225 /* 226 * vm_object_deallocate: 227 * 228 * Release a reference to the specified object, 229 * gained either through a vm_object_allocate 230 * or a vm_object_reference call. When all references 231 * are gone, storage associated with this object 232 * may be relinquished. 233 * 234 * No object may be locked. 235 */ 236 void 237 vm_object_deallocate(object) 238 vm_object_t object; 239 { 240 vm_object_t temp; 241 242 while (object != NULL) { 243 244 if (object->ref_count == 0) 245 panic("vm_object_deallocate: object deallocated too many times"); 246 247 /* 248 * Lose the reference 249 */ 250 object->ref_count--; 251 252 if (object->ref_count != 0) { 253 if ((object->ref_count == 1) && 254 (object->handle == NULL) && 255 (object->type == OBJT_DEFAULT || 256 object->type == OBJT_SWAP)) { 257 vm_object_t robject; 258 robject = object->shadow_head.tqh_first; 259 if ((robject != NULL) && 260 (robject->handle == NULL) && 261 (robject->type == OBJT_DEFAULT || 262 robject->type == OBJT_SWAP)) { 263 int s; 264 robject->ref_count += 2; 265 object->ref_count += 2; 266 267 do { 268 s = splhigh(); 269 while (robject->paging_in_progress) { 270 robject->flags |= OBJ_PIPWNT; 271 tsleep(robject, PVM, "objde1", 0); 272 } 273 274 while (object->paging_in_progress) { 275 object->flags |= OBJ_PIPWNT; 276 tsleep(object, PVM, "objde2", 0); 277 } 278 splx(s); 279 280 } while( object->paging_in_progress || robject->paging_in_progress); 281 282 object->ref_count -= 2; 283 robject->ref_count -= 2; 284 if( robject->ref_count == 0) { 285 robject->ref_count += 1; 286 object = robject; 287 continue; 288 } 289 vm_object_collapse(robject); 290 return; 291 } 292 } 293 /* 294 * If there are still references, then we are done. 295 */ 296 return; 297 } 298 299 if (object->type == OBJT_VNODE) { 300 struct vnode *vp = object->handle; 301 302 vp->v_flag &= ~VTEXT; 303 } 304 305 /* 306 * See if this object can persist and has some resident 307 * pages. If so, enter it in the cache. 308 */ 309 if (object->flags & OBJ_CANPERSIST) { 310 if (object->resident_page_count != 0) { 311 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); 312 TAILQ_INSERT_TAIL(&vm_object_cached_list, object, 313 cached_list); 314 vm_object_cached++; 315 316 vm_object_cache_trim(); 317 return; 318 } else { 319 object->flags &= ~OBJ_CANPERSIST; 320 } 321 } 322 323 /* 324 * Make sure no one uses us. 325 */ 326 object->flags |= OBJ_DEAD; 327 328 temp = object->backing_object; 329 if (temp) 330 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 331 vm_object_terminate(object); 332 /* unlocks and deallocates object */ 333 object = temp; 334 } 335 } 336 337 /* 338 * vm_object_terminate actually destroys the specified object, freeing 339 * up all previously used resources. 340 * 341 * The object must be locked. 342 */ 343 void 344 vm_object_terminate(object) 345 register vm_object_t object; 346 { 347 register vm_page_t p, next; 348 vm_object_t backing_object; 349 int s; 350 351 /* 352 * wait for the pageout daemon to be done with the object 353 */ 354 s = splhigh(); 355 while (object->paging_in_progress) { 356 object->flags |= OBJ_PIPWNT; 357 tsleep(object, PVM, "objtrm", 0); 358 } 359 splx(s); 360 361 if (object->paging_in_progress != 0) 362 panic("vm_object_deallocate: pageout in progress"); 363 364 /* 365 * Clean and free the pages, as appropriate. All references to the 366 * object are gone, so we don't need to lock it. 367 */ 368 if (object->type == OBJT_VNODE) { 369 struct vnode *vp = object->handle; 370 371 VOP_LOCK(vp); 372 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 373 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 374 VOP_UNLOCK(vp); 375 } 376 377 /* 378 * Now free the pages. For internal objects, this also removes them 379 * from paging queues. 380 */ 381 while ((p = object->memq.tqh_first) != NULL) { 382 if (p->flags & PG_BUSY) 383 printf("vm_object_terminate: freeing busy page\n"); 384 PAGE_WAKEUP(p); 385 vm_page_free(p); 386 cnt.v_pfree++; 387 } 388 389 /* 390 * Let the pager know object is dead. 391 */ 392 vm_pager_deallocate(object); 393 394 TAILQ_REMOVE(&vm_object_list, object, object_list); 395 vm_object_count--; 396 397 wakeup(object); 398 399 /* 400 * Free the space for the object. 401 */ 402 free((caddr_t) object, M_VMOBJ); 403 } 404 405 /* 406 * vm_object_page_clean 407 * 408 * Clean all dirty pages in the specified range of object. 409 * Leaves page on whatever queue it is currently on. 410 * 411 * Odd semantics: if start == end, we clean everything. 412 * 413 * The object must be locked. 414 */ 415 416 void 417 vm_object_page_clean(object, start, end, syncio, lockflag) 418 vm_object_t object; 419 vm_offset_t start; 420 vm_offset_t end; 421 boolean_t syncio; 422 boolean_t lockflag; 423 { 424 register vm_page_t p; 425 register vm_offset_t tstart, tend; 426 int pass; 427 int pgcount, s; 428 int allclean; 429 int entireobj; 430 struct vnode *vp; 431 432 if (object->type != OBJT_VNODE || (object->flags & OBJ_WRITEABLE) == 0) 433 return; 434 435 vp = object->handle; 436 437 if (lockflag) 438 VOP_LOCK(vp); 439 440 if (start != end) { 441 start = trunc_page(start); 442 end = round_page(end); 443 } 444 445 pass = 0; 446 startover: 447 tstart = start; 448 if (end == 0) { 449 tend = object->size; 450 } else { 451 tend = end; 452 } 453 entireobj = 0; 454 if (tstart == 0 && tend == object->size) { 455 object->flags &= ~OBJ_WRITEABLE; 456 entireobj = 1; 457 } 458 459 pgcount = object->resident_page_count; 460 461 if (pass == 0 && 462 (pgcount < 128 || pgcount > (object->size / (8 * PAGE_SIZE)))) { 463 allclean = 1; 464 for(; pgcount && (tstart < tend); tstart += PAGE_SIZE) { 465 p = vm_page_lookup(object, tstart); 466 if (!p) 467 continue; 468 --pgcount; 469 s = splhigh(); 470 TAILQ_REMOVE(&object->memq, p, listq); 471 TAILQ_INSERT_TAIL(&object->memq, p, listq); 472 splx(s); 473 if (entireobj) 474 vm_page_protect(p, VM_PROT_READ); 475 if ((p->flags & (PG_BUSY|PG_CACHE)) || p->busy || 476 p->valid == 0) { 477 continue; 478 } 479 vm_page_test_dirty(p); 480 if ((p->valid & p->dirty) != 0) { 481 vm_offset_t tincr; 482 tincr = vm_pageout_clean(p, VM_PAGEOUT_FORCE); 483 if( tincr) { 484 pgcount -= (tincr - 1); 485 tincr *= PAGE_SIZE; 486 tstart += tincr - PAGE_SIZE; 487 } 488 allclean = 0; 489 } 490 } 491 if (!allclean) { 492 pass = 1; 493 goto startover; 494 } 495 if (lockflag) 496 VOP_UNLOCK(vp); 497 return; 498 } 499 500 allclean = 1; 501 while ((p = object->memq.tqh_first) != NULL && pgcount > 0) { 502 503 if (p->flags & PG_CACHE) { 504 goto donext; 505 } 506 507 if (entireobj || (p->offset >= tstart && p->offset < tend)) { 508 if (entireobj) 509 vm_page_protect(p, VM_PROT_READ); 510 511 if (p->valid == 0) { 512 goto donext; 513 } 514 515 s = splhigh(); 516 if ((p->flags & PG_BUSY) || p->busy) { 517 allclean = 0; 518 if (pass > 0) { 519 p->flags |= PG_WANTED; 520 tsleep(p, PVM, "objpcn", 0); 521 splx(s); 522 continue; 523 } else { 524 splx(s); 525 goto donext; 526 } 527 } 528 529 TAILQ_REMOVE(&object->memq, p, listq); 530 TAILQ_INSERT_TAIL(&object->memq, p, listq); 531 splx(s); 532 533 pgcount--; 534 vm_page_test_dirty(p); 535 if ((p->valid & p->dirty) != 0) { 536 vm_pageout_clean(p, VM_PAGEOUT_FORCE); 537 allclean = 0; 538 } 539 continue; 540 } 541 donext: 542 TAILQ_REMOVE(&object->memq, p, listq); 543 TAILQ_INSERT_TAIL(&object->memq, p, listq); 544 pgcount--; 545 } 546 if ((!allclean && (pass == 0)) || 547 (entireobj && (object->flags & OBJ_WRITEABLE))) { 548 pass = 1; 549 if (entireobj) 550 object->flags &= ~OBJ_WRITEABLE; 551 goto startover; 552 } 553 if (lockflag) 554 VOP_UNLOCK(vp); 555 return; 556 } 557 558 /* 559 * vm_object_deactivate_pages 560 * 561 * Deactivate all pages in the specified object. (Keep its pages 562 * in memory even though it is no longer referenced.) 563 * 564 * The object must be locked. 565 */ 566 void 567 vm_object_deactivate_pages(object) 568 register vm_object_t object; 569 { 570 register vm_page_t p, next; 571 572 for (p = object->memq.tqh_first; p != NULL; p = next) { 573 next = p->listq.tqe_next; 574 vm_page_deactivate(p); 575 } 576 } 577 578 /* 579 * Trim the object cache to size. 580 */ 581 void 582 vm_object_cache_trim() 583 { 584 register vm_object_t object; 585 586 while (vm_object_cached > vm_object_cache_max) { 587 object = vm_object_cached_list.tqh_first; 588 589 vm_object_reference(object); 590 pager_cache(object, FALSE); 591 } 592 } 593 594 595 /* 596 * vm_object_pmap_copy: 597 * 598 * Makes all physical pages in the specified 599 * object range copy-on-write. No writeable 600 * references to these pages should remain. 601 * 602 * The object must *not* be locked. 603 */ 604 void 605 vm_object_pmap_copy(object, start, end) 606 register vm_object_t object; 607 register vm_offset_t start; 608 register vm_offset_t end; 609 { 610 register vm_page_t p; 611 612 if (object == NULL) 613 return; 614 615 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 616 if ((start <= p->offset) && (p->offset < end)) { 617 vm_page_protect(p, VM_PROT_READ); 618 } 619 } 620 } 621 622 /* 623 * vm_object_pmap_remove: 624 * 625 * Removes all physical pages in the specified 626 * object range from all physical maps. 627 * 628 * The object must *not* be locked. 629 */ 630 void 631 vm_object_pmap_remove(object, start, end) 632 register vm_object_t object; 633 register vm_offset_t start; 634 register vm_offset_t end; 635 { 636 register vm_page_t p; 637 int s; 638 639 if (object == NULL) 640 return; 641 ++object->paging_in_progress; 642 643 again: 644 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 645 if ((start <= p->offset) && (p->offset < end)) { 646 s = splhigh(); 647 if ((p->flags & PG_BUSY) || p->busy) { 648 p->flags |= PG_WANTED; 649 tsleep(p, PVM, "vmopmr", 0); 650 splx(s); 651 goto again; 652 } 653 splx(s); 654 vm_page_protect(p, VM_PROT_NONE); 655 } 656 } 657 vm_object_pip_wakeup(object); 658 } 659 660 /* 661 * vm_object_copy: 662 * 663 * Create a new object which is a copy of an existing 664 * object, and mark all of the pages in the existing 665 * object 'copy-on-write'. The new object has one reference. 666 * Returns the new object. 667 * 668 * May defer the copy until later if the object is not backed 669 * up by a non-default pager. 670 */ 671 void 672 vm_object_copy(src_object, src_offset, size, 673 dst_object, dst_offset, src_needs_copy) 674 register vm_object_t src_object; 675 vm_offset_t src_offset; 676 vm_size_t size; 677 vm_object_t *dst_object;/* OUT */ 678 vm_offset_t *dst_offset;/* OUT */ 679 boolean_t *src_needs_copy; /* OUT */ 680 { 681 register vm_object_t new_copy; 682 register vm_object_t old_copy; 683 vm_offset_t new_start, new_end; 684 685 register vm_page_t p; 686 687 if (src_object == NULL) { 688 /* 689 * Nothing to copy 690 */ 691 *dst_object = NULL; 692 *dst_offset = 0; 693 *src_needs_copy = FALSE; 694 return; 695 } 696 697 /* 698 * Try to collapse the object before copying it. 699 */ 700 if (src_object->handle == NULL && 701 (src_object->type == OBJT_DEFAULT || 702 src_object->type == OBJT_SWAP)) 703 vm_object_collapse(src_object); 704 705 706 /* 707 * Make another reference to the object 708 */ 709 src_object->ref_count++; 710 711 *dst_object = src_object; 712 *dst_offset = src_offset; 713 714 /* 715 * Must make a shadow when write is desired 716 */ 717 *src_needs_copy = TRUE; 718 return; 719 } 720 721 /* 722 * vm_object_shadow: 723 * 724 * Create a new object which is backed by the 725 * specified existing object range. The source 726 * object reference is deallocated. 727 * 728 * The new object and offset into that object 729 * are returned in the source parameters. 730 */ 731 732 void 733 vm_object_shadow(object, offset, length) 734 vm_object_t *object; /* IN/OUT */ 735 vm_offset_t *offset; /* IN/OUT */ 736 vm_size_t length; 737 { 738 register vm_object_t source; 739 register vm_object_t result; 740 741 source = *object; 742 743 /* 744 * Allocate a new object with the given length 745 */ 746 747 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 748 panic("vm_object_shadow: no object for shadowing"); 749 750 /* 751 * The new object shadows the source object, adding a reference to it. 752 * Our caller changes his reference to point to the new object, 753 * removing a reference to the source object. Net result: no change 754 * of reference count. 755 */ 756 result->backing_object = source; 757 if (source) 758 TAILQ_INSERT_TAIL(&result->backing_object->shadow_head, result, shadow_list); 759 760 /* 761 * Store the offset into the source object, and fix up the offset into 762 * the new object. 763 */ 764 765 result->backing_object_offset = *offset; 766 767 /* 768 * Return the new things 769 */ 770 771 *offset = 0; 772 *object = result; 773 } 774 775 776 /* 777 * this version of collapse allows the operation to occur earlier and 778 * when paging_in_progress is true for an object... This is not a complete 779 * operation, but should plug 99.9% of the rest of the leaks. 780 */ 781 static void 782 vm_object_qcollapse(object) 783 register vm_object_t object; 784 { 785 register vm_object_t backing_object; 786 register vm_offset_t backing_offset, new_offset; 787 register vm_page_t p, pp; 788 register vm_size_t size; 789 790 backing_object = object->backing_object; 791 if (backing_object->ref_count != 1) 792 return; 793 794 backing_object->ref_count += 2; 795 796 backing_offset = object->backing_object_offset; 797 size = object->size; 798 p = backing_object->memq.tqh_first; 799 while (p) { 800 vm_page_t next; 801 802 next = p->listq.tqe_next; 803 if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) || 804 !p->valid || p->hold_count || p->wire_count || p->busy) { 805 p = next; 806 continue; 807 } 808 vm_page_protect(p, VM_PROT_NONE); 809 new_offset = (p->offset - backing_offset); 810 if (p->offset < backing_offset || 811 new_offset >= size) { 812 if (backing_object->type == OBJT_SWAP) 813 swap_pager_freespace(backing_object, 814 backing_object->paging_offset + p->offset, PAGE_SIZE); 815 vm_page_free(p); 816 } else { 817 pp = vm_page_lookup(object, new_offset); 818 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 819 object->paging_offset + new_offset, NULL, NULL))) { 820 if (backing_object->type == OBJT_SWAP) 821 swap_pager_freespace(backing_object, 822 backing_object->paging_offset + p->offset, PAGE_SIZE); 823 vm_page_free(p); 824 } else { 825 if (backing_object->type == OBJT_SWAP) 826 swap_pager_freespace(backing_object, 827 backing_object->paging_offset + p->offset, PAGE_SIZE); 828 vm_page_rename(p, object, new_offset); 829 p->dirty = VM_PAGE_BITS_ALL; 830 } 831 } 832 p = next; 833 } 834 backing_object->ref_count -= 2; 835 } 836 837 /* 838 * vm_object_collapse: 839 * 840 * Collapse an object with the object backing it. 841 * Pages in the backing object are moved into the 842 * parent, and the backing object is deallocated. 843 */ 844 void 845 vm_object_collapse(object) 846 vm_object_t object; 847 848 { 849 vm_object_t backing_object; 850 vm_offset_t backing_offset; 851 vm_size_t size; 852 vm_offset_t new_offset; 853 vm_page_t p, pp; 854 855 while (TRUE) { 856 /* 857 * Verify that the conditions are right for collapse: 858 * 859 * The object exists and no pages in it are currently being paged 860 * out. 861 */ 862 if (object == NULL) 863 return; 864 865 /* 866 * Make sure there is a backing object. 867 */ 868 if ((backing_object = object->backing_object) == NULL) 869 return; 870 871 /* 872 * we check the backing object first, because it is most likely 873 * not collapsable. 874 */ 875 if (backing_object->handle != NULL || 876 (backing_object->type != OBJT_DEFAULT && 877 backing_object->type != OBJT_SWAP) || 878 (backing_object->flags & OBJ_DEAD) || 879 object->handle != NULL || 880 (object->type != OBJT_DEFAULT && 881 object->type != OBJT_SWAP) || 882 (object->flags & OBJ_DEAD)) { 883 return; 884 } 885 886 if (object->paging_in_progress != 0 || 887 backing_object->paging_in_progress != 0) { 888 vm_object_qcollapse(object); 889 return; 890 } 891 892 /* 893 * We know that we can either collapse the backing object (if 894 * the parent is the only reference to it) or (perhaps) remove 895 * the parent's reference to it. 896 */ 897 898 backing_offset = object->backing_object_offset; 899 size = object->size; 900 901 /* 902 * If there is exactly one reference to the backing object, we 903 * can collapse it into the parent. 904 */ 905 906 if (backing_object->ref_count == 1) { 907 908 backing_object->flags |= OBJ_DEAD; 909 /* 910 * We can collapse the backing object. 911 * 912 * Move all in-memory pages from backing_object to the 913 * parent. Pages that have been paged out will be 914 * overwritten by any of the parent's pages that 915 * shadow them. 916 */ 917 918 while ((p = backing_object->memq.tqh_first) != 0) { 919 920 new_offset = (p->offset - backing_offset); 921 922 /* 923 * If the parent has a page here, or if this 924 * page falls outside the parent, dispose of 925 * it. 926 * 927 * Otherwise, move it as planned. 928 */ 929 930 if (p->offset < backing_offset || 931 new_offset >= size) { 932 vm_page_protect(p, VM_PROT_NONE); 933 PAGE_WAKEUP(p); 934 vm_page_free(p); 935 } else { 936 pp = vm_page_lookup(object, new_offset); 937 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 938 object->paging_offset + new_offset, NULL, NULL))) { 939 vm_page_protect(p, VM_PROT_NONE); 940 PAGE_WAKEUP(p); 941 vm_page_free(p); 942 } else { 943 vm_page_rename(p, object, new_offset); 944 } 945 } 946 } 947 948 /* 949 * Move the pager from backing_object to object. 950 */ 951 952 if (backing_object->type == OBJT_SWAP) { 953 backing_object->paging_in_progress++; 954 if (object->type == OBJT_SWAP) { 955 object->paging_in_progress++; 956 /* 957 * copy shadow object pages into ours 958 * and destroy unneeded pages in 959 * shadow object. 960 */ 961 swap_pager_copy( 962 backing_object, backing_object->paging_offset, 963 object, object->paging_offset, 964 object->backing_object_offset); 965 vm_object_pip_wakeup(object); 966 } else { 967 object->paging_in_progress++; 968 /* 969 * move the shadow backing_object's pager data to 970 * "object" and convert "object" type to OBJT_SWAP. 971 */ 972 object->type = OBJT_SWAP; 973 object->un_pager.swp.swp_nblocks = 974 backing_object->un_pager.swp.swp_nblocks; 975 object->un_pager.swp.swp_allocsize = 976 backing_object->un_pager.swp.swp_allocsize; 977 object->un_pager.swp.swp_blocks = 978 backing_object->un_pager.swp.swp_blocks; 979 object->un_pager.swp.swp_poip = /* XXX */ 980 backing_object->un_pager.swp.swp_poip; 981 object->paging_offset = backing_object->paging_offset + backing_offset; 982 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 983 984 /* 985 * Convert backing object from OBJT_SWAP to 986 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is 987 * actually necessary. 988 */ 989 backing_object->type = OBJT_DEFAULT; 990 TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); 991 /* 992 * free unnecessary blocks 993 */ 994 swap_pager_freespace(object, 0, object->paging_offset); 995 vm_object_pip_wakeup(object); 996 } 997 998 vm_object_pip_wakeup(backing_object); 999 } 1000 /* 1001 * Object now shadows whatever backing_object did. 1002 * Note that the reference to backing_object->backing_object 1003 * moves from within backing_object to within object. 1004 */ 1005 1006 TAILQ_REMOVE(&object->backing_object->shadow_head, object, 1007 shadow_list); 1008 if (backing_object->backing_object) 1009 TAILQ_REMOVE(&backing_object->backing_object->shadow_head, 1010 backing_object, shadow_list); 1011 object->backing_object = backing_object->backing_object; 1012 if (object->backing_object) 1013 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1014 object, shadow_list); 1015 1016 object->backing_object_offset += backing_object->backing_object_offset; 1017 /* 1018 * Discard backing_object. 1019 * 1020 * Since the backing object has no pages, no pager left, 1021 * and no object references within it, all that is 1022 * necessary is to dispose of it. 1023 */ 1024 1025 TAILQ_REMOVE(&vm_object_list, backing_object, 1026 object_list); 1027 vm_object_count--; 1028 1029 free((caddr_t) backing_object, M_VMOBJ); 1030 1031 object_collapses++; 1032 } else { 1033 /* 1034 * If all of the pages in the backing object are 1035 * shadowed by the parent object, the parent object no 1036 * longer has to shadow the backing object; it can 1037 * shadow the next one in the chain. 1038 * 1039 * The backing object must not be paged out - we'd have 1040 * to check all of the paged-out pages, as well. 1041 */ 1042 1043 if (backing_object->type != OBJT_DEFAULT) { 1044 return; 1045 } 1046 /* 1047 * Should have a check for a 'small' number of pages 1048 * here. 1049 */ 1050 1051 for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { 1052 new_offset = (p->offset - backing_offset); 1053 1054 /* 1055 * If the parent has a page here, or if this 1056 * page falls outside the parent, keep going. 1057 * 1058 * Otherwise, the backing_object must be left in 1059 * the chain. 1060 */ 1061 1062 if (p->offset >= backing_offset && new_offset <= size) { 1063 1064 pp = vm_page_lookup(object, new_offset); 1065 1066 if ((pp == NULL || pp->valid == 0) && 1067 !vm_pager_has_page(object, object->paging_offset + new_offset, NULL, NULL)) { 1068 1069 /* 1070 * Page still needed. Can't go any 1071 * further. 1072 */ 1073 return; 1074 } 1075 } 1076 } 1077 1078 /* 1079 * Make the parent shadow the next object in the 1080 * chain. Deallocating backing_object will not remove 1081 * it, since its reference count is at least 2. 1082 */ 1083 1084 TAILQ_REMOVE(&object->backing_object->shadow_head, 1085 object, shadow_list); 1086 vm_object_reference(object->backing_object = backing_object->backing_object); 1087 if (object->backing_object) 1088 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1089 object, shadow_list); 1090 object->backing_object_offset += backing_object->backing_object_offset; 1091 1092 /* 1093 * Drop the reference count on backing_object. Since 1094 * its ref_count was at least 2, it will not vanish; 1095 * so we don't need to call vm_object_deallocate. 1096 */ 1097 if (backing_object->ref_count == 1) 1098 printf("should have called obj deallocate\n"); 1099 backing_object->ref_count--; 1100 1101 object_bypasses++; 1102 1103 } 1104 1105 /* 1106 * Try again with this object's new backing object. 1107 */ 1108 } 1109 } 1110 1111 /* 1112 * vm_object_page_remove: [internal] 1113 * 1114 * Removes all physical pages in the specified 1115 * object range from the object's list of pages. 1116 * 1117 * The object must be locked. 1118 */ 1119 void 1120 vm_object_page_remove(object, start, end, clean_only) 1121 register vm_object_t object; 1122 register vm_offset_t start; 1123 register vm_offset_t end; 1124 boolean_t clean_only; 1125 { 1126 register vm_page_t p, next; 1127 vm_offset_t size; 1128 int s; 1129 1130 if (object == NULL) 1131 return; 1132 1133 object->paging_in_progress++; 1134 start = trunc_page(start); 1135 end = round_page(end); 1136 again: 1137 size = end - start; 1138 if (size > 4 * PAGE_SIZE || size >= object->size / 4) { 1139 for (p = object->memq.tqh_first; p != NULL; p = next) { 1140 next = p->listq.tqe_next; 1141 if ((start <= p->offset) && (p->offset < end)) { 1142 s = splhigh(); 1143 if (p->bmapped) { 1144 splx(s); 1145 continue; 1146 } 1147 if ((p->flags & PG_BUSY) || p->busy) { 1148 p->flags |= PG_WANTED; 1149 tsleep(p, PVM, "vmopar", 0); 1150 splx(s); 1151 goto again; 1152 } 1153 splx(s); 1154 if (clean_only) { 1155 vm_page_test_dirty(p); 1156 if (p->valid & p->dirty) 1157 continue; 1158 } 1159 vm_page_protect(p, VM_PROT_NONE); 1160 PAGE_WAKEUP(p); 1161 vm_page_free(p); 1162 } 1163 } 1164 } else { 1165 while (size > 0) { 1166 while ((p = vm_page_lookup(object, start)) != 0) { 1167 s = splhigh(); 1168 if (p->bmapped) { 1169 splx(s); 1170 break; 1171 } 1172 if ((p->flags & PG_BUSY) || p->busy) { 1173 p->flags |= PG_WANTED; 1174 tsleep(p, PVM, "vmopar", 0); 1175 splx(s); 1176 goto again; 1177 } 1178 splx(s); 1179 if (clean_only) { 1180 vm_page_test_dirty(p); 1181 if (p->valid & p->dirty) 1182 continue; 1183 } 1184 vm_page_protect(p, VM_PROT_NONE); 1185 PAGE_WAKEUP(p); 1186 vm_page_free(p); 1187 } 1188 start += PAGE_SIZE; 1189 size -= PAGE_SIZE; 1190 } 1191 } 1192 vm_object_pip_wakeup(object); 1193 } 1194 1195 /* 1196 * Routine: vm_object_coalesce 1197 * Function: Coalesces two objects backing up adjoining 1198 * regions of memory into a single object. 1199 * 1200 * returns TRUE if objects were combined. 1201 * 1202 * NOTE: Only works at the moment if the second object is NULL - 1203 * if it's not, which object do we lock first? 1204 * 1205 * Parameters: 1206 * prev_object First object to coalesce 1207 * prev_offset Offset into prev_object 1208 * next_object Second object into coalesce 1209 * next_offset Offset into next_object 1210 * 1211 * prev_size Size of reference to prev_object 1212 * next_size Size of reference to next_object 1213 * 1214 * Conditions: 1215 * The object must *not* be locked. 1216 */ 1217 boolean_t 1218 vm_object_coalesce(prev_object, next_object, 1219 prev_offset, next_offset, 1220 prev_size, next_size) 1221 register vm_object_t prev_object; 1222 vm_object_t next_object; 1223 vm_offset_t prev_offset, next_offset; 1224 vm_size_t prev_size, next_size; 1225 { 1226 vm_size_t newsize; 1227 1228 if (next_object != NULL) { 1229 return (FALSE); 1230 } 1231 if (prev_object == NULL) { 1232 return (TRUE); 1233 } 1234 1235 /* 1236 * Try to collapse the object first 1237 */ 1238 vm_object_collapse(prev_object); 1239 1240 /* 1241 * Can't coalesce if: . more than one reference . paged out . shadows 1242 * another object . has a copy elsewhere (any of which mean that the 1243 * pages not mapped to prev_entry may be in use anyway) 1244 */ 1245 1246 if (prev_object->ref_count > 1 || 1247 prev_object->type != OBJT_DEFAULT || 1248 prev_object->backing_object != NULL) { 1249 return (FALSE); 1250 } 1251 /* 1252 * Remove any pages that may still be in the object from a previous 1253 * deallocation. 1254 */ 1255 1256 vm_object_page_remove(prev_object, 1257 prev_offset + prev_size, 1258 prev_offset + prev_size + next_size, FALSE); 1259 1260 /* 1261 * Extend the object if necessary. 1262 */ 1263 newsize = prev_offset + prev_size + next_size; 1264 if (newsize > prev_object->size) 1265 prev_object->size = newsize; 1266 1267 return (TRUE); 1268 } 1269 1270 /* 1271 * returns page after looking up in shadow chain 1272 */ 1273 1274 vm_page_t 1275 vm_object_page_lookup(object, offset) 1276 vm_object_t object; 1277 vm_offset_t offset; 1278 { 1279 vm_page_t m; 1280 1281 if (!(m = vm_page_lookup(object, offset))) { 1282 if (!object->backing_object) 1283 return 0; 1284 else 1285 return vm_object_page_lookup(object->backing_object, offset + object->backing_object_offset); 1286 } 1287 return m; 1288 } 1289 1290 #ifdef DDB 1291 1292 int 1293 _vm_object_in_map(map, object, entry) 1294 vm_map_t map; 1295 vm_object_t object; 1296 vm_map_entry_t entry; 1297 { 1298 vm_map_t tmpm; 1299 vm_map_entry_t tmpe; 1300 vm_object_t obj; 1301 int entcount; 1302 1303 if (map == 0) 1304 return 0; 1305 1306 if (entry == 0) { 1307 tmpe = map->header.next; 1308 entcount = map->nentries; 1309 while (entcount-- && (tmpe != &map->header)) { 1310 if( _vm_object_in_map(map, object, tmpe)) { 1311 return 1; 1312 } 1313 tmpe = tmpe->next; 1314 } 1315 } else if (entry->is_sub_map || entry->is_a_map) { 1316 tmpm = entry->object.share_map; 1317 tmpe = tmpm->header.next; 1318 entcount = tmpm->nentries; 1319 while (entcount-- && tmpe != &tmpm->header) { 1320 if( _vm_object_in_map(tmpm, object, tmpe)) { 1321 return 1; 1322 } 1323 tmpe = tmpe->next; 1324 } 1325 } else if (obj = entry->object.vm_object) { 1326 for(; obj; obj=obj->backing_object) 1327 if( obj == object) { 1328 return 1; 1329 } 1330 } 1331 return 0; 1332 } 1333 1334 int 1335 vm_object_in_map( object) 1336 vm_object_t object; 1337 { 1338 struct proc *p; 1339 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 1340 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1341 continue; 1342 /* 1343 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1344 continue; 1345 } 1346 */ 1347 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1348 return 1; 1349 } 1350 if( _vm_object_in_map( kernel_map, object, 0)) 1351 return 1; 1352 if( _vm_object_in_map( kmem_map, object, 0)) 1353 return 1; 1354 if( _vm_object_in_map( pager_map, object, 0)) 1355 return 1; 1356 if( _vm_object_in_map( buffer_map, object, 0)) 1357 return 1; 1358 if( _vm_object_in_map( io_map, object, 0)) 1359 return 1; 1360 if( _vm_object_in_map( phys_map, object, 0)) 1361 return 1; 1362 if( _vm_object_in_map( mb_map, object, 0)) 1363 return 1; 1364 if( _vm_object_in_map( u_map, object, 0)) 1365 return 1; 1366 return 0; 1367 } 1368 1369 1370 void 1371 vm_object_check() { 1372 int i; 1373 int maxhash = 0; 1374 vm_object_t object; 1375 1376 /* 1377 * make sure that internal objs are in a map somewhere 1378 * and none have zero ref counts. 1379 */ 1380 for (object = vm_object_list.tqh_first; 1381 object != NULL; 1382 object = object->object_list.tqe_next) { 1383 if (object->handle == NULL && 1384 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1385 if (object->ref_count == 0) { 1386 printf("vmochk: internal obj has zero ref count: %d\n", 1387 object->size); 1388 } 1389 if (!vm_object_in_map(object)) { 1390 printf("vmochk: internal obj is not in a map: ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", 1391 object->ref_count, object->size, object->backing_object); 1392 } 1393 } 1394 } 1395 } 1396 1397 /* 1398 * vm_object_print: [ debug ] 1399 */ 1400 void 1401 vm_object_print(iobject, full, dummy3, dummy4) 1402 /* db_expr_t */ int iobject; 1403 boolean_t full; 1404 /* db_expr_t */ int dummy3; 1405 char *dummy4; 1406 { 1407 vm_object_t object = (vm_object_t)iobject; /* XXX */ 1408 register vm_page_t p; 1409 1410 register int count; 1411 1412 if (object == NULL) 1413 return; 1414 1415 iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", 1416 (int) object, (int) object->size, 1417 object->resident_page_count, object->ref_count); 1418 printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", 1419 (int) object->paging_offset, 1420 (int) object->backing_object, (int) object->backing_object_offset); 1421 printf("cache: next=%p, prev=%p\n", 1422 object->cached_list.tqe_next, object->cached_list.tqe_prev); 1423 1424 if (!full) 1425 return; 1426 1427 indent += 2; 1428 count = 0; 1429 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 1430 if (count == 0) 1431 iprintf("memory:="); 1432 else if (count == 6) { 1433 printf("\n"); 1434 iprintf(" ..."); 1435 count = 0; 1436 } else 1437 printf(","); 1438 count++; 1439 1440 printf("(off=0x%lx,page=0x%lx)", 1441 (u_long) p->offset, (u_long) VM_PAGE_TO_PHYS(p)); 1442 } 1443 if (count != 0) 1444 printf("\n"); 1445 indent -= 2; 1446 } 1447 #endif /* DDB */ 1448