1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_object.c,v 1.62 1996/01/04 21:13:20 wollman Exp $ 65 */ 66 67 /* 68 * Virtual memory object module. 69 */ 70 #include "opt_ddb.h" 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/kernel.h> 75 #include <sys/proc.h> /* for curproc, pageproc */ 76 #include <sys/malloc.h> 77 #include <sys/vnode.h> 78 #include <sys/mount.h> 79 #include <sys/vmmeter.h> 80 81 #include <vm/vm.h> 82 #include <vm/vm_param.h> 83 #include <vm/vm_prot.h> 84 #include <vm/lock.h> 85 #include <vm/pmap.h> 86 #include <vm/vm_map.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pageout.h> 90 #include <vm/vm_pager.h> 91 #include <vm/swap_pager.h> 92 #include <vm/vm_kern.h> 93 #include <vm/vm_extern.h> 94 95 #ifdef DDB 96 static void DDB_vm_object_check __P((void)); 97 #endif 98 99 static void _vm_object_allocate __P((objtype_t, vm_size_t, vm_object_t)); 100 #ifdef DDB 101 static int _vm_object_in_map __P((vm_map_t map, vm_object_t object, 102 vm_map_entry_t entry)); 103 static int vm_object_in_map __P((vm_object_t object)); 104 #endif 105 static void vm_object_qcollapse __P((vm_object_t object)); 106 #ifdef not_used 107 static void vm_object_deactivate_pages __P((vm_object_t)); 108 #endif 109 static void vm_object_terminate __P((vm_object_t)); 110 static void vm_object_cache_trim __P((void)); 111 112 /* 113 * Virtual memory objects maintain the actual data 114 * associated with allocated virtual memory. A given 115 * page of memory exists within exactly one object. 116 * 117 * An object is only deallocated when all "references" 118 * are given up. Only one "reference" to a given 119 * region of an object should be writeable. 120 * 121 * Associated with each object is a list of all resident 122 * memory pages belonging to that object; this list is 123 * maintained by the "vm_page" module, and locked by the object's 124 * lock. 125 * 126 * Each object also records a "pager" routine which is 127 * used to retrieve (and store) pages to the proper backing 128 * storage. In addition, objects may be backed by other 129 * objects from which they were virtual-copied. 130 * 131 * The only items within the object structure which are 132 * modified after time of creation are: 133 * reference count locked by object's lock 134 * pager routine locked by object's lock 135 * 136 */ 137 138 int vm_object_cache_max; 139 struct object_q vm_object_cached_list; 140 static int vm_object_cached; 141 struct object_q vm_object_list; 142 static long vm_object_count; 143 vm_object_t kernel_object; 144 vm_object_t kmem_object; 145 static struct vm_object kernel_object_store; 146 static struct vm_object kmem_object_store; 147 extern int vm_pageout_page_count; 148 149 static long object_collapses; 150 static long object_bypasses; 151 152 static void 153 _vm_object_allocate(type, size, object) 154 objtype_t type; 155 vm_size_t size; 156 register vm_object_t object; 157 { 158 TAILQ_INIT(&object->memq); 159 TAILQ_INIT(&object->shadow_head); 160 161 object->type = type; 162 object->size = size; 163 object->ref_count = 1; 164 object->flags = 0; 165 object->paging_in_progress = 0; 166 object->resident_page_count = 0; 167 object->handle = NULL; 168 object->paging_offset = (vm_ooffset_t) 0; 169 object->backing_object = NULL; 170 object->backing_object_offset = (vm_ooffset_t) 0; 171 172 object->last_read = 0; 173 174 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 175 vm_object_count++; 176 } 177 178 /* 179 * vm_object_init: 180 * 181 * Initialize the VM objects module. 182 */ 183 void 184 vm_object_init() 185 { 186 TAILQ_INIT(&vm_object_cached_list); 187 TAILQ_INIT(&vm_object_list); 188 vm_object_count = 0; 189 190 vm_object_cache_max = 84; 191 if (cnt.v_page_count > 1000) 192 vm_object_cache_max += (cnt.v_page_count - 1000) / 3; 193 194 kernel_object = &kernel_object_store; 195 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 196 kernel_object); 197 198 kmem_object = &kmem_object_store; 199 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 200 kmem_object); 201 } 202 203 /* 204 * vm_object_allocate: 205 * 206 * Returns a new object with the given size. 207 */ 208 209 vm_object_t 210 vm_object_allocate(type, size) 211 objtype_t type; 212 vm_size_t size; 213 { 214 register vm_object_t result; 215 216 result = (vm_object_t) 217 malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); 218 219 220 _vm_object_allocate(type, size, result); 221 222 return (result); 223 } 224 225 226 /* 227 * vm_object_reference: 228 * 229 * Gets another reference to the given object. 230 */ 231 inline void 232 vm_object_reference(object) 233 register vm_object_t object; 234 { 235 if (object == NULL) 236 return; 237 238 if (object->ref_count == 0) { 239 if ((object->flags & OBJ_CANPERSIST) == 0) 240 panic("vm_object_reference: non-persistent object with 0 ref_count"); 241 TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); 242 vm_object_cached--; 243 } 244 object->ref_count++; 245 } 246 247 /* 248 * vm_object_deallocate: 249 * 250 * Release a reference to the specified object, 251 * gained either through a vm_object_allocate 252 * or a vm_object_reference call. When all references 253 * are gone, storage associated with this object 254 * may be relinquished. 255 * 256 * No object may be locked. 257 */ 258 void 259 vm_object_deallocate(object) 260 vm_object_t object; 261 { 262 vm_object_t temp; 263 264 while (object != NULL) { 265 266 if (object->ref_count == 0) 267 panic("vm_object_deallocate: object deallocated too many times"); 268 269 /* 270 * Lose the reference 271 */ 272 object->ref_count--; 273 274 if (object->ref_count != 0) { 275 if ((object->ref_count == 1) && 276 (object->handle == NULL) && 277 (object->type == OBJT_DEFAULT || 278 object->type == OBJT_SWAP)) { 279 vm_object_t robject; 280 robject = object->shadow_head.tqh_first; 281 if ((robject != NULL) && 282 (robject->handle == NULL) && 283 (robject->type == OBJT_DEFAULT || 284 robject->type == OBJT_SWAP)) { 285 int s; 286 robject->ref_count += 2; 287 object->ref_count += 2; 288 289 do { 290 s = splhigh(); 291 while (robject->paging_in_progress) { 292 robject->flags |= OBJ_PIPWNT; 293 tsleep(robject, PVM, "objde1", 0); 294 } 295 296 while (object->paging_in_progress) { 297 object->flags |= OBJ_PIPWNT; 298 tsleep(object, PVM, "objde2", 0); 299 } 300 splx(s); 301 302 } while( object->paging_in_progress || robject->paging_in_progress); 303 304 object->ref_count -= 2; 305 robject->ref_count -= 2; 306 if( robject->ref_count == 0) { 307 robject->ref_count += 1; 308 object = robject; 309 continue; 310 } 311 vm_object_collapse(robject); 312 return; 313 } 314 } 315 /* 316 * If there are still references, then we are done. 317 */ 318 return; 319 } 320 321 if (object->type == OBJT_VNODE) { 322 struct vnode *vp = object->handle; 323 324 vp->v_flag &= ~VTEXT; 325 } 326 327 /* 328 * See if this object can persist and has some resident 329 * pages. If so, enter it in the cache. 330 */ 331 if (object->flags & OBJ_CANPERSIST) { 332 if (object->resident_page_count != 0) { 333 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); 334 TAILQ_INSERT_TAIL(&vm_object_cached_list, object, 335 cached_list); 336 vm_object_cached++; 337 338 vm_object_cache_trim(); 339 return; 340 } else { 341 object->flags &= ~OBJ_CANPERSIST; 342 } 343 } 344 345 /* 346 * Make sure no one uses us. 347 */ 348 object->flags |= OBJ_DEAD; 349 350 temp = object->backing_object; 351 if (temp) 352 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 353 vm_object_terminate(object); 354 /* unlocks and deallocates object */ 355 object = temp; 356 } 357 } 358 359 /* 360 * vm_object_terminate actually destroys the specified object, freeing 361 * up all previously used resources. 362 * 363 * The object must be locked. 364 */ 365 static void 366 vm_object_terminate(object) 367 register vm_object_t object; 368 { 369 register vm_page_t p; 370 int s; 371 372 /* 373 * wait for the pageout daemon to be done with the object 374 */ 375 s = splhigh(); 376 while (object->paging_in_progress) { 377 object->flags |= OBJ_PIPWNT; 378 tsleep(object, PVM, "objtrm", 0); 379 } 380 splx(s); 381 382 if (object->paging_in_progress != 0) 383 panic("vm_object_deallocate: pageout in progress"); 384 385 /* 386 * Clean and free the pages, as appropriate. All references to the 387 * object are gone, so we don't need to lock it. 388 */ 389 if (object->type == OBJT_VNODE) { 390 struct vnode *vp = object->handle; 391 392 VOP_LOCK(vp); 393 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 394 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 395 VOP_UNLOCK(vp); 396 } 397 398 /* 399 * Now free the pages. For internal objects, this also removes them 400 * from paging queues. 401 */ 402 while ((p = object->memq.tqh_first) != NULL) { 403 if (p->flags & PG_BUSY) 404 printf("vm_object_terminate: freeing busy page\n"); 405 PAGE_WAKEUP(p); 406 vm_page_free(p); 407 cnt.v_pfree++; 408 } 409 410 /* 411 * Let the pager know object is dead. 412 */ 413 vm_pager_deallocate(object); 414 415 TAILQ_REMOVE(&vm_object_list, object, object_list); 416 vm_object_count--; 417 418 wakeup(object); 419 420 /* 421 * Free the space for the object. 422 */ 423 free((caddr_t) object, M_VMOBJ); 424 } 425 426 /* 427 * vm_object_page_clean 428 * 429 * Clean all dirty pages in the specified range of object. 430 * Leaves page on whatever queue it is currently on. 431 * 432 * Odd semantics: if start == end, we clean everything. 433 * 434 * The object must be locked. 435 */ 436 437 void 438 vm_object_page_clean(object, start, end, syncio, lockflag) 439 vm_object_t object; 440 vm_pindex_t start; 441 vm_pindex_t end; 442 boolean_t syncio; 443 boolean_t lockflag; 444 { 445 register vm_page_t p, np, tp; 446 register vm_offset_t tstart, tend; 447 vm_pindex_t pi; 448 int s; 449 struct vnode *vp; 450 int runlen; 451 int maxf; 452 int chkb; 453 int maxb; 454 int i; 455 vm_page_t maf[vm_pageout_page_count]; 456 vm_page_t mab[vm_pageout_page_count]; 457 vm_page_t ma[vm_pageout_page_count]; 458 459 if (object->type != OBJT_VNODE || 460 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 461 return; 462 463 vp = object->handle; 464 465 if (lockflag) 466 VOP_LOCK(vp); 467 object->flags |= OBJ_CLEANING; 468 469 tstart = start; 470 if (end == 0) { 471 tend = object->size; 472 } else { 473 tend = end; 474 } 475 if ((tstart == 0) && (tend == object->size)) { 476 object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 477 } 478 for(p = object->memq.tqh_first; p; p = p->listq.tqe_next) 479 p->flags |= PG_CLEANCHK; 480 481 rescan: 482 for(p = object->memq.tqh_first; p; p = np) { 483 np = p->listq.tqe_next; 484 485 pi = p->pindex; 486 if (((p->flags & PG_CLEANCHK) == 0) || 487 (pi < tstart) || (pi >= tend) || 488 (p->valid == 0) || (p->queue == PQ_CACHE)) { 489 p->flags &= ~PG_CLEANCHK; 490 continue; 491 } 492 493 vm_page_test_dirty(p); 494 if ((p->dirty & p->valid) == 0) { 495 p->flags &= ~PG_CLEANCHK; 496 continue; 497 } 498 499 s = splhigh(); 500 if ((p->flags & PG_BUSY) || p->busy) { 501 p->flags |= PG_WANTED|PG_REFERENCED; 502 tsleep(p, PVM, "vpcwai", 0); 503 splx(s); 504 goto rescan; 505 } 506 splx(s); 507 508 maxf = 0; 509 for(i=1;i<vm_pageout_page_count;i++) { 510 if (tp = vm_page_lookup(object, pi + i)) { 511 if ((tp->flags & PG_BUSY) || 512 (tp->flags & PG_CLEANCHK) == 0) 513 break; 514 vm_page_test_dirty(tp); 515 if ((tp->dirty & tp->valid) == 0) { 516 tp->flags &= ~PG_CLEANCHK; 517 break; 518 } 519 maf[ i - 1 ] = tp; 520 maxf++; 521 continue; 522 } 523 break; 524 } 525 526 maxb = 0; 527 chkb = vm_pageout_page_count - maxf; 528 if (chkb) { 529 for(i = 1; i < chkb;i++) { 530 if (tp = vm_page_lookup(object, pi - i)) { 531 if ((tp->flags & PG_BUSY) || 532 (tp->flags & PG_CLEANCHK) == 0) 533 break; 534 vm_page_test_dirty(tp); 535 if ((tp->dirty & tp->valid) == 0) { 536 tp->flags &= ~PG_CLEANCHK; 537 break; 538 } 539 mab[ i - 1 ] = tp; 540 maxb++; 541 continue; 542 } 543 break; 544 } 545 } 546 547 for(i=0;i<maxb;i++) { 548 int index = (maxb - i) - 1; 549 ma[index] = mab[i]; 550 ma[index]->flags |= PG_BUSY; 551 ma[index]->flags &= ~PG_CLEANCHK; 552 vm_page_protect(ma[index], VM_PROT_READ); 553 } 554 vm_page_protect(p, VM_PROT_READ); 555 p->flags |= PG_BUSY; 556 p->flags &= ~PG_CLEANCHK; 557 ma[maxb] = p; 558 for(i=0;i<maxf;i++) { 559 int index = (maxb + i) + 1; 560 ma[index] = maf[i]; 561 ma[index]->flags |= PG_BUSY; 562 ma[index]->flags &= ~PG_CLEANCHK; 563 vm_page_protect(ma[index], VM_PROT_READ); 564 } 565 runlen = maxb + maxf + 1; 566 /* 567 printf("maxb: %d, maxf: %d, runlen: %d, offset: %d\n", maxb, maxf, runlen, ma[0]->pindex); 568 */ 569 vm_pageout_flush(ma, runlen, 0); 570 goto rescan; 571 } 572 573 VOP_FSYNC(vp, NULL, syncio, curproc); 574 575 if (lockflag) 576 VOP_UNLOCK(vp); 577 object->flags &= ~OBJ_CLEANING; 578 return; 579 } 580 581 #ifdef not_used 582 /* XXX I cannot tell if this should be an exported symbol */ 583 /* 584 * vm_object_deactivate_pages 585 * 586 * Deactivate all pages in the specified object. (Keep its pages 587 * in memory even though it is no longer referenced.) 588 * 589 * The object must be locked. 590 */ 591 static void 592 vm_object_deactivate_pages(object) 593 register vm_object_t object; 594 { 595 register vm_page_t p, next; 596 597 for (p = object->memq.tqh_first; p != NULL; p = next) { 598 next = p->listq.tqe_next; 599 vm_page_deactivate(p); 600 } 601 } 602 #endif 603 604 /* 605 * Trim the object cache to size. 606 */ 607 static void 608 vm_object_cache_trim() 609 { 610 register vm_object_t object; 611 612 while (vm_object_cached > vm_object_cache_max) { 613 object = vm_object_cached_list.tqh_first; 614 615 vm_object_reference(object); 616 pager_cache(object, FALSE); 617 } 618 } 619 620 621 /* 622 * vm_object_pmap_copy: 623 * 624 * Makes all physical pages in the specified 625 * object range copy-on-write. No writeable 626 * references to these pages should remain. 627 * 628 * The object must *not* be locked. 629 */ 630 void 631 vm_object_pmap_copy(object, start, end) 632 register vm_object_t object; 633 register vm_pindex_t start; 634 register vm_pindex_t end; 635 { 636 register vm_page_t p; 637 638 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 639 return; 640 641 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 642 vm_page_protect(p, VM_PROT_READ); 643 } 644 645 object->flags &= ~OBJ_WRITEABLE; 646 } 647 648 /* 649 * vm_object_pmap_remove: 650 * 651 * Removes all physical pages in the specified 652 * object range from all physical maps. 653 * 654 * The object must *not* be locked. 655 */ 656 void 657 vm_object_pmap_remove(object, start, end) 658 register vm_object_t object; 659 register vm_pindex_t start; 660 register vm_pindex_t end; 661 { 662 register vm_page_t p; 663 if (object == NULL) 664 return; 665 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 666 if (p->pindex >= start && p->pindex < end) 667 vm_page_protect(p, VM_PROT_NONE); 668 } 669 } 670 671 /* 672 * vm_object_copy: 673 * 674 * Create a new object which is a copy of an existing 675 * object, and mark all of the pages in the existing 676 * object 'copy-on-write'. The new object has one reference. 677 * Returns the new object. 678 * 679 * May defer the copy until later if the object is not backed 680 * up by a non-default pager. 681 */ 682 void 683 vm_object_copy(src_object, src_offset, 684 dst_object, dst_offset, src_needs_copy) 685 register vm_object_t src_object; 686 vm_pindex_t src_offset; 687 vm_object_t *dst_object;/* OUT */ 688 vm_pindex_t *dst_offset;/* OUT */ 689 boolean_t *src_needs_copy; /* OUT */ 690 { 691 if (src_object == NULL) { 692 /* 693 * Nothing to copy 694 */ 695 *dst_object = NULL; 696 *dst_offset = 0; 697 *src_needs_copy = FALSE; 698 return; 699 } 700 701 /* 702 * Try to collapse the object before copying it. 703 */ 704 if (src_object->handle == NULL && 705 (src_object->type == OBJT_DEFAULT || 706 src_object->type == OBJT_SWAP)) 707 vm_object_collapse(src_object); 708 709 710 /* 711 * Make another reference to the object 712 */ 713 src_object->ref_count++; 714 715 *dst_object = src_object; 716 *dst_offset = src_offset; 717 718 /* 719 * Must make a shadow when write is desired 720 */ 721 *src_needs_copy = TRUE; 722 return; 723 } 724 725 /* 726 * vm_object_shadow: 727 * 728 * Create a new object which is backed by the 729 * specified existing object range. The source 730 * object reference is deallocated. 731 * 732 * The new object and offset into that object 733 * are returned in the source parameters. 734 */ 735 736 void 737 vm_object_shadow(object, offset, length) 738 vm_object_t *object; /* IN/OUT */ 739 vm_ooffset_t *offset; /* IN/OUT */ 740 vm_size_t length; 741 { 742 register vm_object_t source; 743 register vm_object_t result; 744 745 source = *object; 746 747 /* 748 * Allocate a new object with the given length 749 */ 750 751 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 752 panic("vm_object_shadow: no object for shadowing"); 753 754 /* 755 * The new object shadows the source object, adding a reference to it. 756 * Our caller changes his reference to point to the new object, 757 * removing a reference to the source object. Net result: no change 758 * of reference count. 759 */ 760 result->backing_object = source; 761 if (source) 762 TAILQ_INSERT_TAIL(&result->backing_object->shadow_head, result, shadow_list); 763 764 /* 765 * Store the offset into the source object, and fix up the offset into 766 * the new object. 767 */ 768 769 result->backing_object_offset = *offset; 770 771 /* 772 * Return the new things 773 */ 774 775 *offset = 0; 776 *object = result; 777 } 778 779 780 /* 781 * this version of collapse allows the operation to occur earlier and 782 * when paging_in_progress is true for an object... This is not a complete 783 * operation, but should plug 99.9% of the rest of the leaks. 784 */ 785 static void 786 vm_object_qcollapse(object) 787 register vm_object_t object; 788 { 789 register vm_object_t backing_object; 790 register vm_pindex_t backing_offset_index, paging_offset_index; 791 vm_pindex_t backing_object_paging_offset_index; 792 vm_pindex_t new_pindex; 793 register vm_page_t p, pp; 794 register vm_size_t size; 795 796 backing_object = object->backing_object; 797 if (backing_object->ref_count != 1) 798 return; 799 800 backing_object->ref_count += 2; 801 802 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 803 backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset); 804 paging_offset_index = OFF_TO_IDX(object->paging_offset); 805 size = object->size; 806 p = backing_object->memq.tqh_first; 807 while (p) { 808 vm_page_t next; 809 810 next = p->listq.tqe_next; 811 if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) || 812 (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) { 813 p = next; 814 continue; 815 } 816 vm_page_protect(p, VM_PROT_NONE); 817 new_pindex = p->pindex - backing_offset_index; 818 if (p->pindex < backing_offset_index || 819 new_pindex >= size) { 820 if (backing_object->type == OBJT_SWAP) 821 swap_pager_freespace(backing_object, 822 backing_object_paging_offset_index+p->pindex, 823 1); 824 vm_page_free(p); 825 } else { 826 pp = vm_page_lookup(object, new_pindex); 827 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 828 paging_offset_index + new_pindex, NULL, NULL))) { 829 if (backing_object->type == OBJT_SWAP) 830 swap_pager_freespace(backing_object, 831 backing_object_paging_offset_index + p->pindex, 1); 832 vm_page_free(p); 833 } else { 834 if (backing_object->type == OBJT_SWAP) 835 swap_pager_freespace(backing_object, 836 backing_object_paging_offset_index + p->pindex, 1); 837 vm_page_rename(p, object, new_pindex); 838 p->dirty = VM_PAGE_BITS_ALL; 839 } 840 } 841 p = next; 842 } 843 backing_object->ref_count -= 2; 844 } 845 846 /* 847 * vm_object_collapse: 848 * 849 * Collapse an object with the object backing it. 850 * Pages in the backing object are moved into the 851 * parent, and the backing object is deallocated. 852 */ 853 void 854 vm_object_collapse(object) 855 vm_object_t object; 856 857 { 858 vm_object_t backing_object; 859 vm_ooffset_t backing_offset; 860 vm_size_t size; 861 vm_pindex_t new_pindex, backing_offset_index; 862 vm_page_t p, pp; 863 864 while (TRUE) { 865 /* 866 * Verify that the conditions are right for collapse: 867 * 868 * The object exists and no pages in it are currently being paged 869 * out. 870 */ 871 if (object == NULL) 872 return; 873 874 /* 875 * Make sure there is a backing object. 876 */ 877 if ((backing_object = object->backing_object) == NULL) 878 return; 879 880 /* 881 * we check the backing object first, because it is most likely 882 * not collapsable. 883 */ 884 if (backing_object->handle != NULL || 885 (backing_object->type != OBJT_DEFAULT && 886 backing_object->type != OBJT_SWAP) || 887 (backing_object->flags & OBJ_DEAD) || 888 object->handle != NULL || 889 (object->type != OBJT_DEFAULT && 890 object->type != OBJT_SWAP) || 891 (object->flags & OBJ_DEAD)) { 892 return; 893 } 894 895 if (object->paging_in_progress != 0 || 896 backing_object->paging_in_progress != 0) { 897 vm_object_qcollapse(object); 898 return; 899 } 900 901 /* 902 * We know that we can either collapse the backing object (if 903 * the parent is the only reference to it) or (perhaps) remove 904 * the parent's reference to it. 905 */ 906 907 backing_offset = object->backing_object_offset; 908 backing_offset_index = OFF_TO_IDX(backing_offset); 909 size = object->size; 910 911 /* 912 * If there is exactly one reference to the backing object, we 913 * can collapse it into the parent. 914 */ 915 916 if (backing_object->ref_count == 1) { 917 918 backing_object->flags |= OBJ_DEAD; 919 /* 920 * We can collapse the backing object. 921 * 922 * Move all in-memory pages from backing_object to the 923 * parent. Pages that have been paged out will be 924 * overwritten by any of the parent's pages that 925 * shadow them. 926 */ 927 928 while ((p = backing_object->memq.tqh_first) != 0) { 929 930 new_pindex = p->pindex - backing_offset_index; 931 932 /* 933 * If the parent has a page here, or if this 934 * page falls outside the parent, dispose of 935 * it. 936 * 937 * Otherwise, move it as planned. 938 */ 939 940 if (p->pindex < backing_offset_index || 941 new_pindex >= size) { 942 vm_page_protect(p, VM_PROT_NONE); 943 PAGE_WAKEUP(p); 944 vm_page_free(p); 945 } else { 946 pp = vm_page_lookup(object, new_pindex); 947 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 948 OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL))) { 949 vm_page_protect(p, VM_PROT_NONE); 950 PAGE_WAKEUP(p); 951 vm_page_free(p); 952 } else { 953 vm_page_rename(p, object, new_pindex); 954 } 955 } 956 } 957 958 /* 959 * Move the pager from backing_object to object. 960 */ 961 962 if (backing_object->type == OBJT_SWAP) { 963 backing_object->paging_in_progress++; 964 if (object->type == OBJT_SWAP) { 965 object->paging_in_progress++; 966 /* 967 * copy shadow object pages into ours 968 * and destroy unneeded pages in 969 * shadow object. 970 */ 971 swap_pager_copy( 972 backing_object, 973 OFF_TO_IDX(backing_object->paging_offset), 974 object, 975 OFF_TO_IDX(object->paging_offset), 976 OFF_TO_IDX(object->backing_object_offset)); 977 vm_object_pip_wakeup(object); 978 } else { 979 object->paging_in_progress++; 980 /* 981 * move the shadow backing_object's pager data to 982 * "object" and convert "object" type to OBJT_SWAP. 983 */ 984 object->type = OBJT_SWAP; 985 object->un_pager.swp.swp_nblocks = 986 backing_object->un_pager.swp.swp_nblocks; 987 object->un_pager.swp.swp_allocsize = 988 backing_object->un_pager.swp.swp_allocsize; 989 object->un_pager.swp.swp_blocks = 990 backing_object->un_pager.swp.swp_blocks; 991 object->un_pager.swp.swp_poip = /* XXX */ 992 backing_object->un_pager.swp.swp_poip; 993 object->paging_offset = backing_object->paging_offset + backing_offset; 994 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 995 996 /* 997 * Convert backing object from OBJT_SWAP to 998 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is 999 * actually necessary. 1000 */ 1001 backing_object->type = OBJT_DEFAULT; 1002 TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); 1003 /* 1004 * free unnecessary blocks 1005 */ 1006 swap_pager_freespace(object, 0, 1007 OFF_TO_IDX(object->paging_offset)); 1008 vm_object_pip_wakeup(object); 1009 } 1010 1011 vm_object_pip_wakeup(backing_object); 1012 } 1013 /* 1014 * Object now shadows whatever backing_object did. 1015 * Note that the reference to backing_object->backing_object 1016 * moves from within backing_object to within object. 1017 */ 1018 1019 TAILQ_REMOVE(&object->backing_object->shadow_head, object, 1020 shadow_list); 1021 if (backing_object->backing_object) 1022 TAILQ_REMOVE(&backing_object->backing_object->shadow_head, 1023 backing_object, shadow_list); 1024 object->backing_object = backing_object->backing_object; 1025 if (object->backing_object) 1026 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1027 object, shadow_list); 1028 1029 object->backing_object_offset += backing_object->backing_object_offset; 1030 /* 1031 * Discard backing_object. 1032 * 1033 * Since the backing object has no pages, no pager left, 1034 * and no object references within it, all that is 1035 * necessary is to dispose of it. 1036 */ 1037 1038 TAILQ_REMOVE(&vm_object_list, backing_object, 1039 object_list); 1040 vm_object_count--; 1041 1042 free((caddr_t) backing_object, M_VMOBJ); 1043 1044 object_collapses++; 1045 } else { 1046 /* 1047 * If all of the pages in the backing object are 1048 * shadowed by the parent object, the parent object no 1049 * longer has to shadow the backing object; it can 1050 * shadow the next one in the chain. 1051 * 1052 * The backing object must not be paged out - we'd have 1053 * to check all of the paged-out pages, as well. 1054 */ 1055 1056 if (backing_object->type != OBJT_DEFAULT) { 1057 return; 1058 } 1059 /* 1060 * Should have a check for a 'small' number of pages 1061 * here. 1062 */ 1063 1064 for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { 1065 new_pindex = p->pindex - backing_offset_index; 1066 1067 /* 1068 * If the parent has a page here, or if this 1069 * page falls outside the parent, keep going. 1070 * 1071 * Otherwise, the backing_object must be left in 1072 * the chain. 1073 */ 1074 1075 if (p->pindex >= backing_offset_index && 1076 new_pindex <= size) { 1077 1078 pp = vm_page_lookup(object, new_pindex); 1079 1080 if ((pp == NULL || pp->valid == 0) && 1081 !vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL)) { 1082 /* 1083 * Page still needed. Can't go any 1084 * further. 1085 */ 1086 return; 1087 } 1088 } 1089 } 1090 1091 /* 1092 * Make the parent shadow the next object in the 1093 * chain. Deallocating backing_object will not remove 1094 * it, since its reference count is at least 2. 1095 */ 1096 1097 TAILQ_REMOVE(&object->backing_object->shadow_head, 1098 object, shadow_list); 1099 vm_object_reference(object->backing_object = backing_object->backing_object); 1100 if (object->backing_object) 1101 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1102 object, shadow_list); 1103 object->backing_object_offset += backing_object->backing_object_offset; 1104 1105 /* 1106 * Drop the reference count on backing_object. Since 1107 * its ref_count was at least 2, it will not vanish; 1108 * so we don't need to call vm_object_deallocate. 1109 */ 1110 if (backing_object->ref_count == 1) 1111 printf("should have called obj deallocate\n"); 1112 backing_object->ref_count--; 1113 1114 object_bypasses++; 1115 1116 } 1117 1118 /* 1119 * Try again with this object's new backing object. 1120 */ 1121 } 1122 } 1123 1124 /* 1125 * vm_object_page_remove: [internal] 1126 * 1127 * Removes all physical pages in the specified 1128 * object range from the object's list of pages. 1129 * 1130 * The object must be locked. 1131 */ 1132 void 1133 vm_object_page_remove(object, start, end, clean_only) 1134 register vm_object_t object; 1135 register vm_pindex_t start; 1136 register vm_pindex_t end; 1137 boolean_t clean_only; 1138 { 1139 register vm_page_t p, next; 1140 unsigned int size; 1141 int s; 1142 1143 if (object == NULL) 1144 return; 1145 1146 object->paging_in_progress++; 1147 again: 1148 size = end - start; 1149 if (size > 4 || size >= object->size / 4) { 1150 for (p = object->memq.tqh_first; p != NULL; p = next) { 1151 next = p->listq.tqe_next; 1152 if (p->wire_count != 0) { 1153 vm_page_protect(p, VM_PROT_NONE); 1154 p->valid = 0; 1155 continue; 1156 } 1157 if ((start <= p->pindex) && (p->pindex < end)) { 1158 s = splhigh(); 1159 if ((p->flags & PG_BUSY) || p->busy) { 1160 p->flags |= PG_WANTED; 1161 tsleep(p, PVM, "vmopar", 0); 1162 splx(s); 1163 goto again; 1164 } 1165 splx(s); 1166 if (clean_only) { 1167 vm_page_test_dirty(p); 1168 if (p->valid & p->dirty) 1169 continue; 1170 } 1171 vm_page_protect(p, VM_PROT_NONE); 1172 PAGE_WAKEUP(p); 1173 vm_page_free(p); 1174 } 1175 } 1176 } else { 1177 while (size > 0) { 1178 if ((p = vm_page_lookup(object, start)) != 0) { 1179 if (p->wire_count != 0) { 1180 p->valid = 0; 1181 vm_page_protect(p, VM_PROT_NONE); 1182 start += 1; 1183 size -= 1; 1184 continue; 1185 } 1186 s = splhigh(); 1187 if ((p->flags & PG_BUSY) || p->busy) { 1188 p->flags |= PG_WANTED; 1189 tsleep(p, PVM, "vmopar", 0); 1190 splx(s); 1191 goto again; 1192 } 1193 splx(s); 1194 if (clean_only) { 1195 vm_page_test_dirty(p); 1196 if (p->valid & p->dirty) { 1197 start += 1; 1198 size -= 1; 1199 continue; 1200 } 1201 } 1202 vm_page_protect(p, VM_PROT_NONE); 1203 PAGE_WAKEUP(p); 1204 vm_page_free(p); 1205 } 1206 start += 1; 1207 size -= 1; 1208 } 1209 } 1210 vm_object_pip_wakeup(object); 1211 } 1212 1213 /* 1214 * Routine: vm_object_coalesce 1215 * Function: Coalesces two objects backing up adjoining 1216 * regions of memory into a single object. 1217 * 1218 * returns TRUE if objects were combined. 1219 * 1220 * NOTE: Only works at the moment if the second object is NULL - 1221 * if it's not, which object do we lock first? 1222 * 1223 * Parameters: 1224 * prev_object First object to coalesce 1225 * prev_offset Offset into prev_object 1226 * next_object Second object into coalesce 1227 * next_offset Offset into next_object 1228 * 1229 * prev_size Size of reference to prev_object 1230 * next_size Size of reference to next_object 1231 * 1232 * Conditions: 1233 * The object must *not* be locked. 1234 */ 1235 boolean_t 1236 vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) 1237 register vm_object_t prev_object; 1238 vm_pindex_t prev_pindex; 1239 vm_size_t prev_size, next_size; 1240 { 1241 vm_size_t newsize; 1242 1243 if (prev_object == NULL) { 1244 return (TRUE); 1245 } 1246 1247 /* 1248 * Try to collapse the object first 1249 */ 1250 vm_object_collapse(prev_object); 1251 1252 /* 1253 * Can't coalesce if: . more than one reference . paged out . shadows 1254 * another object . has a copy elsewhere (any of which mean that the 1255 * pages not mapped to prev_entry may be in use anyway) 1256 */ 1257 1258 if (prev_object->ref_count > 1 || 1259 prev_object->type != OBJT_DEFAULT || 1260 prev_object->backing_object != NULL) { 1261 return (FALSE); 1262 } 1263 1264 prev_size >>= PAGE_SHIFT; 1265 next_size >>= PAGE_SHIFT; 1266 /* 1267 * Remove any pages that may still be in the object from a previous 1268 * deallocation. 1269 */ 1270 1271 vm_object_page_remove(prev_object, 1272 prev_pindex + prev_size, 1273 prev_pindex + prev_size + next_size, FALSE); 1274 1275 /* 1276 * Extend the object if necessary. 1277 */ 1278 newsize = prev_pindex + prev_size + next_size; 1279 if (newsize > prev_object->size) 1280 prev_object->size = newsize; 1281 1282 return (TRUE); 1283 } 1284 1285 #ifdef DDB 1286 1287 static int 1288 _vm_object_in_map(map, object, entry) 1289 vm_map_t map; 1290 vm_object_t object; 1291 vm_map_entry_t entry; 1292 { 1293 vm_map_t tmpm; 1294 vm_map_entry_t tmpe; 1295 vm_object_t obj; 1296 int entcount; 1297 1298 if (map == 0) 1299 return 0; 1300 1301 if (entry == 0) { 1302 tmpe = map->header.next; 1303 entcount = map->nentries; 1304 while (entcount-- && (tmpe != &map->header)) { 1305 if( _vm_object_in_map(map, object, tmpe)) { 1306 return 1; 1307 } 1308 tmpe = tmpe->next; 1309 } 1310 } else if (entry->is_sub_map || entry->is_a_map) { 1311 tmpm = entry->object.share_map; 1312 tmpe = tmpm->header.next; 1313 entcount = tmpm->nentries; 1314 while (entcount-- && tmpe != &tmpm->header) { 1315 if( _vm_object_in_map(tmpm, object, tmpe)) { 1316 return 1; 1317 } 1318 tmpe = tmpe->next; 1319 } 1320 } else if (obj = entry->object.vm_object) { 1321 for(; obj; obj=obj->backing_object) 1322 if( obj == object) { 1323 return 1; 1324 } 1325 } 1326 return 0; 1327 } 1328 1329 static int 1330 vm_object_in_map( object) 1331 vm_object_t object; 1332 { 1333 struct proc *p; 1334 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 1335 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1336 continue; 1337 /* 1338 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1339 continue; 1340 } 1341 */ 1342 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1343 return 1; 1344 } 1345 if( _vm_object_in_map( kernel_map, object, 0)) 1346 return 1; 1347 if( _vm_object_in_map( kmem_map, object, 0)) 1348 return 1; 1349 if( _vm_object_in_map( pager_map, object, 0)) 1350 return 1; 1351 if( _vm_object_in_map( buffer_map, object, 0)) 1352 return 1; 1353 if( _vm_object_in_map( io_map, object, 0)) 1354 return 1; 1355 if( _vm_object_in_map( phys_map, object, 0)) 1356 return 1; 1357 if( _vm_object_in_map( mb_map, object, 0)) 1358 return 1; 1359 if( _vm_object_in_map( u_map, object, 0)) 1360 return 1; 1361 return 0; 1362 } 1363 1364 1365 #ifdef DDB 1366 static void 1367 DDB_vm_object_check() 1368 { 1369 vm_object_t object; 1370 1371 /* 1372 * make sure that internal objs are in a map somewhere 1373 * and none have zero ref counts. 1374 */ 1375 for (object = vm_object_list.tqh_first; 1376 object != NULL; 1377 object = object->object_list.tqe_next) { 1378 if (object->handle == NULL && 1379 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1380 if (object->ref_count == 0) { 1381 printf("vmochk: internal obj has zero ref count: %d\n", 1382 object->size); 1383 } 1384 if (!vm_object_in_map(object)) { 1385 printf("vmochk: internal obj is not in a map: " 1386 "ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", 1387 object->ref_count, object->size, 1388 object->size, object->backing_object); 1389 } 1390 } 1391 } 1392 } 1393 #endif /* DDB */ 1394 1395 /* 1396 * vm_object_print: [ debug ] 1397 */ 1398 void 1399 vm_object_print(iobject, full, dummy3, dummy4) 1400 /* db_expr_t */ int iobject; 1401 boolean_t full; 1402 /* db_expr_t */ int dummy3; 1403 char *dummy4; 1404 { 1405 vm_object_t object = (vm_object_t)iobject; /* XXX */ 1406 register vm_page_t p; 1407 1408 register int count; 1409 1410 if (object == NULL) 1411 return; 1412 1413 iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", 1414 (int) object, (int) object->size, 1415 object->resident_page_count, object->ref_count); 1416 printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", 1417 (int) object->paging_offset, 1418 (int) object->backing_object, (int) object->backing_object_offset); 1419 printf("cache: next=%p, prev=%p\n", 1420 object->cached_list.tqe_next, object->cached_list.tqe_prev); 1421 1422 if (!full) 1423 return; 1424 1425 indent += 2; 1426 count = 0; 1427 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 1428 if (count == 0) 1429 iprintf("memory:="); 1430 else if (count == 6) { 1431 printf("\n"); 1432 iprintf(" ..."); 1433 count = 0; 1434 } else 1435 printf(","); 1436 count++; 1437 1438 printf("(off=0x%lx,page=0x%lx)", 1439 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); 1440 } 1441 if (count != 0) 1442 printf("\n"); 1443 indent -= 2; 1444 } 1445 #endif /* DDB */ 1446