1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $FreeBSD$ 65 */ 66 67 /* 68 * Virtual memory object module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/lock.h> 74 #include <sys/mman.h> 75 #include <sys/mount.h> 76 #include <sys/mutex.h> 77 #include <sys/proc.h> /* for curproc, pageproc */ 78 #include <sys/socket.h> 79 #include <sys/vnode.h> 80 #include <sys/vmmeter.h> 81 #include <sys/sx.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_param.h> 85 #include <vm/pmap.h> 86 #include <vm/vm_map.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pageout.h> 90 #include <vm/vm_pager.h> 91 #include <vm/vm_zone.h> 92 #include <vm/swap_pager.h> 93 #include <vm/vm_kern.h> 94 #include <vm/vm_extern.h> 95 96 static void vm_object_qcollapse __P((vm_object_t object)); 97 98 /* 99 * Virtual memory objects maintain the actual data 100 * associated with allocated virtual memory. A given 101 * page of memory exists within exactly one object. 102 * 103 * An object is only deallocated when all "references" 104 * are given up. Only one "reference" to a given 105 * region of an object should be writeable. 106 * 107 * Associated with each object is a list of all resident 108 * memory pages belonging to that object; this list is 109 * maintained by the "vm_page" module, and locked by the object's 110 * lock. 111 * 112 * Each object also records a "pager" routine which is 113 * used to retrieve (and store) pages to the proper backing 114 * storage. In addition, objects may be backed by other 115 * objects from which they were virtual-copied. 116 * 117 * The only items within the object structure which are 118 * modified after time of creation are: 119 * reference count locked by object's lock 120 * pager routine locked by object's lock 121 * 122 */ 123 124 struct object_q vm_object_list; 125 static struct mtx vm_object_list_mtx; /* lock for object list and count */ 126 static long vm_object_count; /* count of all objects */ 127 vm_object_t kernel_object; 128 vm_object_t kmem_object; 129 static struct vm_object kernel_object_store; 130 static struct vm_object kmem_object_store; 131 extern int vm_pageout_page_count; 132 133 static long object_collapses; 134 static long object_bypasses; 135 static int next_index; 136 static vm_zone_t obj_zone; 137 static struct vm_zone obj_zone_store; 138 static int object_hash_rand; 139 #define VM_OBJECTS_INIT 256 140 static struct vm_object vm_objects_init[VM_OBJECTS_INIT]; 141 142 void 143 _vm_object_allocate(type, size, object) 144 objtype_t type; 145 vm_size_t size; 146 vm_object_t object; 147 { 148 int incr; 149 150 mtx_assert(&vm_mtx, MA_OWNED); 151 TAILQ_INIT(&object->memq); 152 TAILQ_INIT(&object->shadow_head); 153 154 object->type = type; 155 object->size = size; 156 object->ref_count = 1; 157 object->flags = 0; 158 if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) 159 vm_object_set_flag(object, OBJ_ONEMAPPING); 160 object->paging_in_progress = 0; 161 object->resident_page_count = 0; 162 object->shadow_count = 0; 163 object->pg_color = next_index; 164 if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1)) 165 incr = PQ_L2_SIZE / 3 + PQ_PRIME1; 166 else 167 incr = size; 168 next_index = (next_index + incr) & PQ_L2_MASK; 169 object->handle = NULL; 170 object->backing_object = NULL; 171 object->backing_object_offset = (vm_ooffset_t) 0; 172 /* 173 * Try to generate a number that will spread objects out in the 174 * hash table. We 'wipe' new objects across the hash in 128 page 175 * increments plus 1 more to offset it a little more by the time 176 * it wraps around. 177 */ 178 object->hash_rand = object_hash_rand - 129; 179 180 object->generation++; 181 182 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 183 vm_object_count++; 184 object_hash_rand = object->hash_rand; 185 } 186 187 /* 188 * vm_object_init: 189 * 190 * Initialize the VM objects module. 191 */ 192 void 193 vm_object_init() 194 { 195 196 mtx_assert(&vm_mtx, MA_OWNED); 197 TAILQ_INIT(&vm_object_list); 198 mtx_init(&vm_object_list_mtx, "vm object_list", MTX_DEF); 199 vm_object_count = 0; 200 201 kernel_object = &kernel_object_store; 202 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 203 kernel_object); 204 205 kmem_object = &kmem_object_store; 206 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 207 kmem_object); 208 209 obj_zone = &obj_zone_store; 210 zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object), 211 vm_objects_init, VM_OBJECTS_INIT); 212 } 213 214 void 215 vm_object_init2() 216 { 217 zinitna(obj_zone, NULL, NULL, 0, 0, 0, 1); 218 } 219 220 /* 221 * vm_object_allocate: 222 * 223 * Returns a new object with the given size. 224 */ 225 226 vm_object_t 227 vm_object_allocate(type, size) 228 objtype_t type; 229 vm_size_t size; 230 { 231 vm_object_t result; 232 233 mtx_assert(&vm_mtx, MA_OWNED); 234 result = (vm_object_t) zalloc(obj_zone); 235 _vm_object_allocate(type, size, result); 236 237 return (result); 238 } 239 240 241 /* 242 * vm_object_reference: 243 * 244 * Gets another reference to the given object. 245 */ 246 void 247 vm_object_reference(object) 248 vm_object_t object; 249 { 250 251 mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); 252 if (object == NULL) 253 return; 254 255 KASSERT(!(object->flags & OBJ_DEAD), 256 ("vm_object_reference: attempting to reference dead obj")); 257 258 object->ref_count++; 259 if (object->type == OBJT_VNODE) { 260 mtx_unlock(VM_OBJECT_MTX(object)); 261 mtx_assert(&Giant, MA_OWNED); 262 while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curproc)) { 263 printf("vm_object_reference: delay in getting object\n"); 264 } 265 mtx_lock(VM_OBJECT_MTX(object)); 266 } 267 } 268 269 /* 270 * handle deallocating a object of type OBJT_VNODE 271 * 272 * requires vm_mtx 273 * may block 274 */ 275 void 276 vm_object_vndeallocate(object) 277 vm_object_t object; 278 { 279 struct vnode *vp = (struct vnode *) object->handle; 280 281 mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); 282 KASSERT(object->type == OBJT_VNODE, 283 ("vm_object_vndeallocate: not a vnode object")); 284 KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); 285 #ifdef INVARIANTS 286 if (object->ref_count == 0) { 287 vprint("vm_object_vndeallocate", vp); 288 panic("vm_object_vndeallocate: bad object reference count"); 289 } 290 #endif 291 292 object->ref_count--; 293 if (object->ref_count == 0) { 294 vp->v_flag &= ~VTEXT; 295 vm_object_clear_flag(object, OBJ_OPT); 296 } 297 /* 298 * vrele may need a vop lock 299 */ 300 mtx_unlock(VM_OBJECT_MTX(object)); 301 mtx_assert(&Giant, MA_OWNED); 302 vrele(vp); 303 mtx_lock(VM_OBJECT_MTX(object)); 304 } 305 306 /* 307 * vm_object_deallocate: 308 * 309 * Release a reference to the specified object, 310 * gained either through a vm_object_allocate 311 * or a vm_object_reference call. When all references 312 * are gone, storage associated with this object 313 * may be relinquished. 314 * 315 * No object may be locked. 316 * vm_mtx must be held 317 */ 318 void 319 vm_object_deallocate(object) 320 vm_object_t object; 321 { 322 vm_object_t temp; 323 324 mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); 325 while (object != NULL) { 326 327 if (object->type == OBJT_VNODE) { 328 vm_object_vndeallocate(object); 329 return; 330 } 331 332 KASSERT(object->ref_count != 0, 333 ("vm_object_deallocate: object deallocated too many times: %d", object->type)); 334 335 /* 336 * If the reference count goes to 0 we start calling 337 * vm_object_terminate() on the object chain. 338 * A ref count of 1 may be a special case depending on the 339 * shadow count being 0 or 1. 340 */ 341 object->ref_count--; 342 if (object->ref_count > 1) { 343 return; 344 } else if (object->ref_count == 1) { 345 if (object->shadow_count == 0) { 346 vm_object_set_flag(object, OBJ_ONEMAPPING); 347 } else if ((object->shadow_count == 1) && 348 (object->handle == NULL) && 349 (object->type == OBJT_DEFAULT || 350 object->type == OBJT_SWAP)) { 351 vm_object_t robject; 352 353 robject = TAILQ_FIRST(&object->shadow_head); 354 KASSERT(robject != NULL, 355 ("vm_object_deallocate: ref_count: %d, shadow_count: %d", 356 object->ref_count, 357 object->shadow_count)); 358 #ifdef objlocks 359 mtx_lock(VM_OBJECT_MTX(robject)); 360 #endif 361 if ((robject->handle == NULL) && 362 (robject->type == OBJT_DEFAULT || 363 robject->type == OBJT_SWAP)) { 364 365 robject->ref_count++; 366 367 while ( 368 robject->paging_in_progress || 369 object->paging_in_progress 370 ) { 371 #ifdef objlocks 372 mtx_unlock(VM_OBJECT_MTX(object)); 373 #endif 374 vm_object_pip_sleep(robject, "objde1"); 375 #ifdef objlocks 376 mtx_unlock(VM_OBJECT_MTX(robject)); 377 mtx_lock(VM_OBJECT_MTX(object)); 378 #endif 379 vm_object_pip_sleep(object, "objde2"); 380 #ifdef objlocks 381 mtx_lock(VM_OBJECT_MTX(robject)); 382 #endif 383 } 384 385 if (robject->ref_count == 1) { 386 robject->ref_count--; 387 #ifdef objlocks 388 mtx_unlock(VM_OBJECT_MTX(object)); 389 #endif 390 object = robject; 391 goto doterm; 392 } 393 394 #ifdef objlocks 395 mtx_unlock(VM_OBJECT_MTX(object)); 396 #endif 397 object = robject; 398 vm_object_collapse(object); 399 continue; 400 } 401 } 402 403 return; 404 405 } 406 407 doterm: 408 409 temp = object->backing_object; 410 if (temp) { 411 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 412 temp->shadow_count--; 413 if (temp->ref_count == 0) 414 vm_object_clear_flag(temp, OBJ_OPT); 415 temp->generation++; 416 object->backing_object = NULL; 417 } 418 vm_object_terminate(object); 419 /* unlocks and deallocates object */ 420 object = temp; 421 } 422 } 423 424 /* 425 * vm_object_terminate actually destroys the specified object, freeing 426 * up all previously used resources. 427 * 428 * The object must be locked. 429 * This routine may block. 430 */ 431 void 432 vm_object_terminate(object) 433 vm_object_t object; 434 { 435 vm_page_t p; 436 int s; 437 438 mtx_assert(&Giant, MA_OWNED); 439 mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); 440 /* 441 * Make sure no one uses us. 442 */ 443 vm_object_set_flag(object, OBJ_DEAD); 444 445 /* 446 * wait for the pageout daemon to be done with the object 447 */ 448 vm_object_pip_wait(object, "objtrm"); 449 450 KASSERT(!object->paging_in_progress, 451 ("vm_object_terminate: pageout in progress")); 452 453 /* 454 * Clean and free the pages, as appropriate. All references to the 455 * object are gone, so we don't need to lock it. 456 */ 457 if (object->type == OBJT_VNODE) { 458 struct vnode *vp; 459 460 /* 461 * Freeze optimized copies. 462 */ 463 vm_freeze_copyopts(object, 0, object->size); 464 465 /* 466 * Clean pages and flush buffers. 467 */ 468 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 469 470 vp = (struct vnode *) object->handle; 471 mtx_unlock(VM_OBJECT_MTX(object)); 472 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 473 mtx_lock(VM_OBJECT_MTX(object)); 474 } 475 476 KASSERT(object->ref_count == 0, 477 ("vm_object_terminate: object with references, ref_count=%d", 478 object->ref_count)); 479 480 /* 481 * Now free any remaining pages. For internal objects, this also 482 * removes them from paging queues. Don't free wired pages, just 483 * remove them from the object. 484 */ 485 s = splvm(); 486 while ((p = TAILQ_FIRST(&object->memq)) != NULL) { 487 KASSERT(!p->busy && (p->flags & PG_BUSY) == 0, 488 ("vm_object_terminate: freeing busy page %p " 489 "p->busy = %d, p->flags %x\n", p, p->busy, p->flags)); 490 if (p->wire_count == 0) { 491 vm_page_busy(p); 492 vm_page_free(p); 493 cnt.v_pfree++; 494 } else { 495 vm_page_busy(p); 496 vm_page_remove(p); 497 } 498 } 499 splx(s); 500 501 /* 502 * Let the pager know object is dead. 503 */ 504 vm_pager_deallocate(object); 505 506 /* 507 * Remove the object from the global object list. 508 */ 509 mtx_lock(&vm_object_list_mtx); 510 TAILQ_REMOVE(&vm_object_list, object, object_list); 511 mtx_unlock(&vm_object_list_mtx); 512 513 wakeup(object); 514 515 /* 516 * Free the space for the object. 517 */ 518 zfree(obj_zone, object); 519 } 520 521 /* 522 * vm_object_page_clean 523 * 524 * Clean all dirty pages in the specified range of object. Leaves page 525 * on whatever queue it is currently on. If NOSYNC is set then do not 526 * write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC), 527 * leaving the object dirty. 528 * 529 * Odd semantics: if start == end, we clean everything. 530 * 531 * The object must be locked. 532 */ 533 534 void 535 vm_object_page_clean(object, start, end, flags) 536 vm_object_t object; 537 vm_pindex_t start; 538 vm_pindex_t end; 539 int flags; 540 { 541 vm_page_t p, np, tp; 542 vm_offset_t tstart, tend; 543 vm_pindex_t pi; 544 int s; 545 struct vnode *vp; 546 int runlen; 547 int maxf; 548 int chkb; 549 int maxb; 550 int i; 551 int clearobjflags; 552 int pagerflags; 553 vm_page_t maf[vm_pageout_page_count]; 554 vm_page_t mab[vm_pageout_page_count]; 555 vm_page_t ma[vm_pageout_page_count]; 556 int curgeneration; 557 558 mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); 559 if (object->type != OBJT_VNODE || 560 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 561 return; 562 563 pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : 0; 564 pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0; 565 566 vp = object->handle; 567 568 vm_object_set_flag(object, OBJ_CLEANING); 569 570 tstart = start; 571 if (end == 0) { 572 tend = object->size; 573 } else { 574 tend = end; 575 } 576 577 /* 578 * Generally set CLEANCHK interlock and make the page read-only so 579 * we can then clear the object flags. 580 * 581 * However, if this is a nosync mmap then the object is likely to 582 * stay dirty so do not mess with the page and do not clear the 583 * object flags. 584 */ 585 586 clearobjflags = 1; 587 588 TAILQ_FOREACH(p, &object->memq, listq) { 589 vm_page_flag_set(p, PG_CLEANCHK); 590 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) 591 clearobjflags = 0; 592 else 593 vm_page_protect(p, VM_PROT_READ); 594 } 595 596 if (clearobjflags && (tstart == 0) && (tend == object->size)) { 597 vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 598 } 599 600 rescan: 601 curgeneration = object->generation; 602 603 for(p = TAILQ_FIRST(&object->memq); p; p = np) { 604 np = TAILQ_NEXT(p, listq); 605 606 pi = p->pindex; 607 if (((p->flags & PG_CLEANCHK) == 0) || 608 (pi < tstart) || (pi >= tend) || 609 (p->valid == 0) || 610 ((p->queue - p->pc) == PQ_CACHE)) { 611 vm_page_flag_clear(p, PG_CLEANCHK); 612 continue; 613 } 614 615 vm_page_test_dirty(p); 616 if ((p->dirty & p->valid) == 0) { 617 vm_page_flag_clear(p, PG_CLEANCHK); 618 continue; 619 } 620 621 /* 622 * If we have been asked to skip nosync pages and this is a 623 * nosync page, skip it. Note that the object flags were 624 * not cleared in this case so we do not have to set them. 625 */ 626 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { 627 vm_page_flag_clear(p, PG_CLEANCHK); 628 continue; 629 } 630 631 s = splvm(); 632 while (vm_page_sleep_busy(p, TRUE, "vpcwai")) { 633 if (object->generation != curgeneration) { 634 splx(s); 635 goto rescan; 636 } 637 } 638 639 maxf = 0; 640 for(i=1;i<vm_pageout_page_count;i++) { 641 if ((tp = vm_page_lookup(object, pi + i)) != NULL) { 642 if ((tp->flags & PG_BUSY) || 643 (tp->flags & PG_CLEANCHK) == 0 || 644 (tp->busy != 0)) 645 break; 646 if((tp->queue - tp->pc) == PQ_CACHE) { 647 vm_page_flag_clear(tp, PG_CLEANCHK); 648 break; 649 } 650 vm_page_test_dirty(tp); 651 if ((tp->dirty & tp->valid) == 0) { 652 vm_page_flag_clear(tp, PG_CLEANCHK); 653 break; 654 } 655 maf[ i - 1 ] = tp; 656 maxf++; 657 continue; 658 } 659 break; 660 } 661 662 maxb = 0; 663 chkb = vm_pageout_page_count - maxf; 664 if (chkb) { 665 for(i = 1; i < chkb;i++) { 666 if ((tp = vm_page_lookup(object, pi - i)) != NULL) { 667 if ((tp->flags & PG_BUSY) || 668 (tp->flags & PG_CLEANCHK) == 0 || 669 (tp->busy != 0)) 670 break; 671 if((tp->queue - tp->pc) == PQ_CACHE) { 672 vm_page_flag_clear(tp, PG_CLEANCHK); 673 break; 674 } 675 vm_page_test_dirty(tp); 676 if ((tp->dirty & tp->valid) == 0) { 677 vm_page_flag_clear(tp, PG_CLEANCHK); 678 break; 679 } 680 mab[ i - 1 ] = tp; 681 maxb++; 682 continue; 683 } 684 break; 685 } 686 } 687 688 for(i=0;i<maxb;i++) { 689 int index = (maxb - i) - 1; 690 ma[index] = mab[i]; 691 vm_page_flag_clear(ma[index], PG_CLEANCHK); 692 } 693 vm_page_flag_clear(p, PG_CLEANCHK); 694 ma[maxb] = p; 695 for(i=0;i<maxf;i++) { 696 int index = (maxb + i) + 1; 697 ma[index] = maf[i]; 698 vm_page_flag_clear(ma[index], PG_CLEANCHK); 699 } 700 runlen = maxb + maxf + 1; 701 702 splx(s); 703 vm_pageout_flush(ma, runlen, pagerflags); 704 for (i = 0; i<runlen; i++) { 705 if (ma[i]->valid & ma[i]->dirty) { 706 vm_page_protect(ma[i], VM_PROT_READ); 707 vm_page_flag_set(ma[i], PG_CLEANCHK); 708 } 709 } 710 if (object->generation != curgeneration) 711 goto rescan; 712 } 713 714 #if 0 715 VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); 716 #endif 717 718 vm_object_clear_flag(object, OBJ_CLEANING); 719 return; 720 } 721 722 #ifdef not_used 723 /* XXX I cannot tell if this should be an exported symbol */ 724 /* 725 * vm_object_deactivate_pages 726 * 727 * Deactivate all pages in the specified object. (Keep its pages 728 * in memory even though it is no longer referenced.) 729 * 730 * The object must be locked. 731 */ 732 static void 733 vm_object_deactivate_pages(object) 734 vm_object_t object; 735 { 736 vm_page_t p, next; 737 738 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 739 next = TAILQ_NEXT(p, listq); 740 vm_page_deactivate(p); 741 } 742 } 743 #endif 744 745 /* 746 * Same as vm_object_pmap_copy, except range checking really 747 * works, and is meant for small sections of an object. 748 * 749 * This code protects resident pages by making them read-only 750 * and is typically called on a fork or split when a page 751 * is converted to copy-on-write. 752 * 753 * NOTE: If the page is already at VM_PROT_NONE, calling 754 * vm_page_protect will have no effect. 755 */ 756 757 void 758 vm_object_pmap_copy_1(object, start, end) 759 vm_object_t object; 760 vm_pindex_t start; 761 vm_pindex_t end; 762 { 763 vm_pindex_t idx; 764 vm_page_t p; 765 766 mtx_assert(&vm_mtx, MA_OWNED); 767 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 768 return; 769 770 for (idx = start; idx < end; idx++) { 771 p = vm_page_lookup(object, idx); 772 if (p == NULL) 773 continue; 774 vm_page_protect(p, VM_PROT_READ); 775 } 776 } 777 778 /* 779 * vm_object_pmap_remove: 780 * 781 * Removes all physical pages in the specified 782 * object range from all physical maps. 783 * 784 * The object must *not* be locked. 785 */ 786 void 787 vm_object_pmap_remove(object, start, end) 788 vm_object_t object; 789 vm_pindex_t start; 790 vm_pindex_t end; 791 { 792 vm_page_t p; 793 794 mtx_assert(&vm_mtx, MA_OWNED); 795 if (object == NULL) 796 return; 797 TAILQ_FOREACH(p, &object->memq, listq) { 798 if (p->pindex >= start && p->pindex < end) 799 vm_page_protect(p, VM_PROT_NONE); 800 } 801 if ((start == 0) && (object->size == end)) 802 vm_object_clear_flag(object, OBJ_WRITEABLE); 803 } 804 805 /* 806 * vm_object_madvise: 807 * 808 * Implements the madvise function at the object/page level. 809 * 810 * MADV_WILLNEED (any object) 811 * 812 * Activate the specified pages if they are resident. 813 * 814 * MADV_DONTNEED (any object) 815 * 816 * Deactivate the specified pages if they are resident. 817 * 818 * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, 819 * OBJ_ONEMAPPING only) 820 * 821 * Deactivate and clean the specified pages if they are 822 * resident. This permits the process to reuse the pages 823 * without faulting or the kernel to reclaim the pages 824 * without I/O. 825 */ 826 void 827 vm_object_madvise(object, pindex, count, advise) 828 vm_object_t object; 829 vm_pindex_t pindex; 830 int count; 831 int advise; 832 { 833 vm_pindex_t end, tpindex; 834 vm_object_t tobject; 835 vm_page_t m; 836 837 mtx_assert(&vm_mtx, MA_OWNED); 838 if (object == NULL) 839 return; 840 841 end = pindex + count; 842 843 /* 844 * Locate and adjust resident pages 845 */ 846 847 for (; pindex < end; pindex += 1) { 848 relookup: 849 tobject = object; 850 tpindex = pindex; 851 shadowlookup: 852 /* 853 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages 854 * and those pages must be OBJ_ONEMAPPING. 855 */ 856 if (advise == MADV_FREE) { 857 if ((tobject->type != OBJT_DEFAULT && 858 tobject->type != OBJT_SWAP) || 859 (tobject->flags & OBJ_ONEMAPPING) == 0) { 860 continue; 861 } 862 } 863 864 m = vm_page_lookup(tobject, tpindex); 865 866 if (m == NULL) { 867 /* 868 * There may be swap even if there is no backing page 869 */ 870 if (advise == MADV_FREE && tobject->type == OBJT_SWAP) 871 swap_pager_freespace(tobject, tpindex, 1); 872 873 /* 874 * next object 875 */ 876 tobject = tobject->backing_object; 877 if (tobject == NULL) 878 continue; 879 tpindex += OFF_TO_IDX(tobject->backing_object_offset); 880 goto shadowlookup; 881 } 882 883 /* 884 * If the page is busy or not in a normal active state, 885 * we skip it. If the page is not managed there are no 886 * page queues to mess with. Things can break if we mess 887 * with pages in any of the below states. 888 */ 889 if ( 890 m->hold_count || 891 m->wire_count || 892 (m->flags & PG_UNMANAGED) || 893 m->valid != VM_PAGE_BITS_ALL 894 ) { 895 continue; 896 } 897 898 if (vm_page_sleep_busy(m, TRUE, "madvpo")) 899 goto relookup; 900 901 if (advise == MADV_WILLNEED) { 902 vm_page_activate(m); 903 } else if (advise == MADV_DONTNEED) { 904 vm_page_dontneed(m); 905 } else if (advise == MADV_FREE) { 906 /* 907 * Mark the page clean. This will allow the page 908 * to be freed up by the system. However, such pages 909 * are often reused quickly by malloc()/free() 910 * so we do not do anything that would cause 911 * a page fault if we can help it. 912 * 913 * Specifically, we do not try to actually free 914 * the page now nor do we try to put it in the 915 * cache (which would cause a page fault on reuse). 916 * 917 * But we do make the page is freeable as we 918 * can without actually taking the step of unmapping 919 * it. 920 */ 921 pmap_clear_modify(m); 922 m->dirty = 0; 923 m->act_count = 0; 924 vm_page_dontneed(m); 925 if (tobject->type == OBJT_SWAP) 926 swap_pager_freespace(tobject, tpindex, 1); 927 } 928 } 929 } 930 931 /* 932 * vm_object_shadow: 933 * 934 * Create a new object which is backed by the 935 * specified existing object range. The source 936 * object reference is deallocated. 937 * 938 * The new object and offset into that object 939 * are returned in the source parameters. 940 */ 941 942 void 943 vm_object_shadow(object, offset, length) 944 vm_object_t *object; /* IN/OUT */ 945 vm_ooffset_t *offset; /* IN/OUT */ 946 vm_size_t length; 947 { 948 vm_object_t source; 949 vm_object_t result; 950 951 mtx_assert(&vm_mtx, MA_OWNED); 952 source = *object; 953 954 /* 955 * Don't create the new object if the old object isn't shared. 956 */ 957 958 if (source != NULL && 959 source->ref_count == 1 && 960 source->handle == NULL && 961 (source->type == OBJT_DEFAULT || 962 source->type == OBJT_SWAP)) 963 return; 964 965 /* 966 * Allocate a new object with the given length 967 */ 968 result = vm_object_allocate(OBJT_DEFAULT, length); 969 KASSERT(result != NULL, ("vm_object_shadow: no object for shadowing")); 970 971 /* 972 * The new object shadows the source object, adding a reference to it. 973 * Our caller changes his reference to point to the new object, 974 * removing a reference to the source object. Net result: no change 975 * of reference count. 976 * 977 * Try to optimize the result object's page color when shadowing 978 * in order to maintain page coloring consistency in the combined 979 * shadowed object. 980 */ 981 result->backing_object = source; 982 if (source) { 983 TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list); 984 source->shadow_count++; 985 source->generation++; 986 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & PQ_L2_MASK; 987 } 988 989 /* 990 * Store the offset into the source object, and fix up the offset into 991 * the new object. 992 */ 993 994 result->backing_object_offset = *offset; 995 996 /* 997 * Return the new things 998 */ 999 1000 *offset = 0; 1001 *object = result; 1002 } 1003 1004 #define OBSC_TEST_ALL_SHADOWED 0x0001 1005 #define OBSC_COLLAPSE_NOWAIT 0x0002 1006 #define OBSC_COLLAPSE_WAIT 0x0004 1007 1008 static __inline int 1009 vm_object_backing_scan(vm_object_t object, int op) 1010 { 1011 int s; 1012 int r = 1; 1013 vm_page_t p; 1014 vm_object_t backing_object; 1015 vm_pindex_t backing_offset_index; 1016 1017 s = splvm(); 1018 mtx_assert(&vm_mtx, MA_OWNED); 1019 1020 backing_object = object->backing_object; 1021 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 1022 1023 /* 1024 * Initial conditions 1025 */ 1026 1027 if (op & OBSC_TEST_ALL_SHADOWED) { 1028 /* 1029 * We do not want to have to test for the existence of 1030 * swap pages in the backing object. XXX but with the 1031 * new swapper this would be pretty easy to do. 1032 * 1033 * XXX what about anonymous MAP_SHARED memory that hasn't 1034 * been ZFOD faulted yet? If we do not test for this, the 1035 * shadow test may succeed! XXX 1036 */ 1037 if (backing_object->type != OBJT_DEFAULT) { 1038 splx(s); 1039 return(0); 1040 } 1041 } 1042 if (op & OBSC_COLLAPSE_WAIT) { 1043 vm_object_set_flag(backing_object, OBJ_DEAD); 1044 } 1045 1046 /* 1047 * Our scan 1048 */ 1049 1050 p = TAILQ_FIRST(&backing_object->memq); 1051 while (p) { 1052 vm_page_t next = TAILQ_NEXT(p, listq); 1053 vm_pindex_t new_pindex = p->pindex - backing_offset_index; 1054 1055 if (op & OBSC_TEST_ALL_SHADOWED) { 1056 vm_page_t pp; 1057 1058 /* 1059 * Ignore pages outside the parent object's range 1060 * and outside the parent object's mapping of the 1061 * backing object. 1062 * 1063 * note that we do not busy the backing object's 1064 * page. 1065 */ 1066 1067 if ( 1068 p->pindex < backing_offset_index || 1069 new_pindex >= object->size 1070 ) { 1071 p = next; 1072 continue; 1073 } 1074 1075 /* 1076 * See if the parent has the page or if the parent's 1077 * object pager has the page. If the parent has the 1078 * page but the page is not valid, the parent's 1079 * object pager must have the page. 1080 * 1081 * If this fails, the parent does not completely shadow 1082 * the object and we might as well give up now. 1083 */ 1084 1085 pp = vm_page_lookup(object, new_pindex); 1086 if ( 1087 (pp == NULL || pp->valid == 0) && 1088 !vm_pager_has_page(object, new_pindex, NULL, NULL) 1089 ) { 1090 r = 0; 1091 break; 1092 } 1093 } 1094 1095 /* 1096 * Check for busy page 1097 */ 1098 1099 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { 1100 vm_page_t pp; 1101 1102 if (op & OBSC_COLLAPSE_NOWAIT) { 1103 if ( 1104 (p->flags & PG_BUSY) || 1105 !p->valid || 1106 p->hold_count || 1107 p->wire_count || 1108 p->busy 1109 ) { 1110 p = next; 1111 continue; 1112 } 1113 } else if (op & OBSC_COLLAPSE_WAIT) { 1114 if (vm_page_sleep_busy(p, TRUE, "vmocol")) { 1115 /* 1116 * If we slept, anything could have 1117 * happened. Since the object is 1118 * marked dead, the backing offset 1119 * should not have changed so we 1120 * just restart our scan. 1121 */ 1122 p = TAILQ_FIRST(&backing_object->memq); 1123 continue; 1124 } 1125 } 1126 1127 /* 1128 * Busy the page 1129 */ 1130 vm_page_busy(p); 1131 1132 KASSERT( 1133 p->object == backing_object, 1134 ("vm_object_qcollapse(): object mismatch") 1135 ); 1136 1137 /* 1138 * Destroy any associated swap 1139 */ 1140 if (backing_object->type == OBJT_SWAP) { 1141 swap_pager_freespace( 1142 backing_object, 1143 p->pindex, 1144 1 1145 ); 1146 } 1147 1148 if ( 1149 p->pindex < backing_offset_index || 1150 new_pindex >= object->size 1151 ) { 1152 /* 1153 * Page is out of the parent object's range, we 1154 * can simply destroy it. 1155 */ 1156 vm_page_protect(p, VM_PROT_NONE); 1157 vm_page_free(p); 1158 p = next; 1159 continue; 1160 } 1161 1162 pp = vm_page_lookup(object, new_pindex); 1163 if ( 1164 pp != NULL || 1165 vm_pager_has_page(object, new_pindex, NULL, NULL) 1166 ) { 1167 /* 1168 * page already exists in parent OR swap exists 1169 * for this location in the parent. Destroy 1170 * the original page from the backing object. 1171 * 1172 * Leave the parent's page alone 1173 */ 1174 vm_page_protect(p, VM_PROT_NONE); 1175 vm_page_free(p); 1176 p = next; 1177 continue; 1178 } 1179 1180 /* 1181 * Page does not exist in parent, rename the 1182 * page from the backing object to the main object. 1183 * 1184 * If the page was mapped to a process, it can remain 1185 * mapped through the rename. 1186 */ 1187 if ((p->queue - p->pc) == PQ_CACHE) 1188 vm_page_deactivate(p); 1189 1190 vm_page_rename(p, object, new_pindex); 1191 /* page automatically made dirty by rename */ 1192 } 1193 p = next; 1194 } 1195 splx(s); 1196 return(r); 1197 } 1198 1199 1200 /* 1201 * this version of collapse allows the operation to occur earlier and 1202 * when paging_in_progress is true for an object... This is not a complete 1203 * operation, but should plug 99.9% of the rest of the leaks. 1204 */ 1205 static void 1206 vm_object_qcollapse(object) 1207 vm_object_t object; 1208 { 1209 vm_object_t backing_object = object->backing_object; 1210 1211 if (backing_object->ref_count != 1) 1212 return; 1213 1214 backing_object->ref_count += 2; 1215 1216 vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT); 1217 1218 backing_object->ref_count -= 2; 1219 } 1220 1221 /* 1222 * vm_object_collapse: 1223 * 1224 * Collapse an object with the object backing it. 1225 * Pages in the backing object are moved into the 1226 * parent, and the backing object is deallocated. 1227 */ 1228 void 1229 vm_object_collapse(object) 1230 vm_object_t object; 1231 { 1232 1233 mtx_assert(&vm_mtx, MA_OWNED); 1234 1235 while (TRUE) { 1236 vm_object_t backing_object; 1237 1238 /* 1239 * Verify that the conditions are right for collapse: 1240 * 1241 * The object exists and the backing object exists. 1242 */ 1243 if (object == NULL) 1244 break; 1245 1246 if ((backing_object = object->backing_object) == NULL) 1247 break; 1248 1249 /* 1250 * we check the backing object first, because it is most likely 1251 * not collapsable. 1252 */ 1253 if (backing_object->handle != NULL || 1254 (backing_object->type != OBJT_DEFAULT && 1255 backing_object->type != OBJT_SWAP) || 1256 (backing_object->flags & OBJ_DEAD) || 1257 object->handle != NULL || 1258 (object->type != OBJT_DEFAULT && 1259 object->type != OBJT_SWAP) || 1260 (object->flags & OBJ_DEAD)) { 1261 break; 1262 } 1263 1264 if ( 1265 object->paging_in_progress != 0 || 1266 backing_object->paging_in_progress != 0 1267 ) { 1268 vm_object_qcollapse(object); 1269 break; 1270 } 1271 1272 /* 1273 * We know that we can either collapse the backing object (if 1274 * the parent is the only reference to it) or (perhaps) have 1275 * the parent bypass the object if the parent happens to shadow 1276 * all the resident pages in the entire backing object. 1277 * 1278 * This is ignoring pager-backed pages such as swap pages. 1279 * vm_object_backing_scan fails the shadowing test in this 1280 * case. 1281 */ 1282 1283 if (backing_object->ref_count == 1) { 1284 /* 1285 * If there is exactly one reference to the backing 1286 * object, we can collapse it into the parent. 1287 */ 1288 1289 vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT); 1290 1291 /* 1292 * Move the pager from backing_object to object. 1293 */ 1294 1295 if (backing_object->type == OBJT_SWAP) { 1296 vm_object_pip_add(backing_object, 1); 1297 1298 /* 1299 * scrap the paging_offset junk and do a 1300 * discrete copy. This also removes major 1301 * assumptions about how the swap-pager 1302 * works from where it doesn't belong. The 1303 * new swapper is able to optimize the 1304 * destroy-source case. 1305 */ 1306 1307 vm_object_pip_add(object, 1); 1308 swap_pager_copy( 1309 backing_object, 1310 object, 1311 OFF_TO_IDX(object->backing_object_offset), TRUE); 1312 vm_object_pip_wakeup(object); 1313 1314 vm_object_pip_wakeup(backing_object); 1315 } 1316 /* 1317 * Object now shadows whatever backing_object did. 1318 * Note that the reference to 1319 * backing_object->backing_object moves from within 1320 * backing_object to within object. 1321 */ 1322 1323 TAILQ_REMOVE( 1324 &object->backing_object->shadow_head, 1325 object, 1326 shadow_list 1327 ); 1328 object->backing_object->shadow_count--; 1329 object->backing_object->generation++; 1330 if (backing_object->backing_object) { 1331 TAILQ_REMOVE( 1332 &backing_object->backing_object->shadow_head, 1333 backing_object, 1334 shadow_list 1335 ); 1336 backing_object->backing_object->shadow_count--; 1337 backing_object->backing_object->generation++; 1338 } 1339 object->backing_object = backing_object->backing_object; 1340 if (object->backing_object) { 1341 TAILQ_INSERT_TAIL( 1342 &object->backing_object->shadow_head, 1343 object, 1344 shadow_list 1345 ); 1346 object->backing_object->shadow_count++; 1347 object->backing_object->generation++; 1348 } 1349 1350 object->backing_object_offset += 1351 backing_object->backing_object_offset; 1352 1353 /* 1354 * Discard backing_object. 1355 * 1356 * Since the backing object has no pages, no pager left, 1357 * and no object references within it, all that is 1358 * necessary is to dispose of it. 1359 */ 1360 1361 TAILQ_REMOVE( 1362 &vm_object_list, 1363 backing_object, 1364 object_list 1365 ); 1366 vm_object_count--; 1367 1368 zfree(obj_zone, backing_object); 1369 1370 object_collapses++; 1371 } else { 1372 vm_object_t new_backing_object; 1373 1374 /* 1375 * If we do not entirely shadow the backing object, 1376 * there is nothing we can do so we give up. 1377 */ 1378 1379 if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) { 1380 break; 1381 } 1382 1383 /* 1384 * Make the parent shadow the next object in the 1385 * chain. Deallocating backing_object will not remove 1386 * it, since its reference count is at least 2. 1387 */ 1388 1389 TAILQ_REMOVE( 1390 &backing_object->shadow_head, 1391 object, 1392 shadow_list 1393 ); 1394 backing_object->shadow_count--; 1395 backing_object->generation++; 1396 1397 new_backing_object = backing_object->backing_object; 1398 if ((object->backing_object = new_backing_object) != NULL) { 1399 vm_object_reference(new_backing_object); 1400 TAILQ_INSERT_TAIL( 1401 &new_backing_object->shadow_head, 1402 object, 1403 shadow_list 1404 ); 1405 new_backing_object->shadow_count++; 1406 new_backing_object->generation++; 1407 object->backing_object_offset += 1408 backing_object->backing_object_offset; 1409 } 1410 1411 /* 1412 * Drop the reference count on backing_object. Since 1413 * its ref_count was at least 2, it will not vanish; 1414 * so we don't need to call vm_object_deallocate, but 1415 * we do anyway. 1416 */ 1417 vm_object_deallocate(backing_object); 1418 object_bypasses++; 1419 } 1420 1421 /* 1422 * Try again with this object's new backing object. 1423 */ 1424 } 1425 } 1426 1427 /* 1428 * vm_object_page_remove: [internal] 1429 * 1430 * Removes all physical pages in the specified 1431 * object range from the object's list of pages. 1432 * 1433 * The object must be locked. 1434 */ 1435 void 1436 vm_object_page_remove(object, start, end, clean_only) 1437 vm_object_t object; 1438 vm_pindex_t start; 1439 vm_pindex_t end; 1440 boolean_t clean_only; 1441 { 1442 vm_page_t p, next; 1443 unsigned int size; 1444 int all; 1445 1446 mtx_assert(&vm_mtx, MA_OWNED); 1447 1448 if (object == NULL || 1449 object->resident_page_count == 0) 1450 return; 1451 1452 all = ((end == 0) && (start == 0)); 1453 1454 /* 1455 * Since physically-backed objects do not use managed pages, we can't 1456 * remove pages from the object (we must instead remove the page 1457 * references, and then destroy the object). 1458 */ 1459 KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object")); 1460 1461 vm_object_pip_add(object, 1); 1462 again: 1463 size = end - start; 1464 if (all || size > object->resident_page_count / 4) { 1465 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 1466 next = TAILQ_NEXT(p, listq); 1467 if (all || ((start <= p->pindex) && (p->pindex < end))) { 1468 if (p->wire_count != 0) { 1469 vm_page_protect(p, VM_PROT_NONE); 1470 if (!clean_only) 1471 p->valid = 0; 1472 continue; 1473 } 1474 1475 /* 1476 * The busy flags are only cleared at 1477 * interrupt -- minimize the spl transitions 1478 */ 1479 1480 if (vm_page_sleep_busy(p, TRUE, "vmopar")) 1481 goto again; 1482 1483 if (clean_only && p->valid) { 1484 vm_page_test_dirty(p); 1485 if (p->valid & p->dirty) 1486 continue; 1487 } 1488 1489 vm_page_busy(p); 1490 vm_page_protect(p, VM_PROT_NONE); 1491 vm_page_free(p); 1492 } 1493 } 1494 } else { 1495 while (size > 0) { 1496 if ((p = vm_page_lookup(object, start)) != 0) { 1497 1498 if (p->wire_count != 0) { 1499 vm_page_protect(p, VM_PROT_NONE); 1500 if (!clean_only) 1501 p->valid = 0; 1502 start += 1; 1503 size -= 1; 1504 continue; 1505 } 1506 1507 /* 1508 * The busy flags are only cleared at 1509 * interrupt -- minimize the spl transitions 1510 */ 1511 if (vm_page_sleep_busy(p, TRUE, "vmopar")) 1512 goto again; 1513 1514 if (clean_only && p->valid) { 1515 vm_page_test_dirty(p); 1516 if (p->valid & p->dirty) { 1517 start += 1; 1518 size -= 1; 1519 continue; 1520 } 1521 } 1522 1523 vm_page_busy(p); 1524 vm_page_protect(p, VM_PROT_NONE); 1525 vm_page_free(p); 1526 } 1527 start += 1; 1528 size -= 1; 1529 } 1530 } 1531 vm_object_pip_wakeup(object); 1532 } 1533 1534 /* 1535 * Routine: vm_object_coalesce 1536 * Function: Coalesces two objects backing up adjoining 1537 * regions of memory into a single object. 1538 * 1539 * returns TRUE if objects were combined. 1540 * 1541 * NOTE: Only works at the moment if the second object is NULL - 1542 * if it's not, which object do we lock first? 1543 * 1544 * Parameters: 1545 * prev_object First object to coalesce 1546 * prev_offset Offset into prev_object 1547 * next_object Second object into coalesce 1548 * next_offset Offset into next_object 1549 * 1550 * prev_size Size of reference to prev_object 1551 * next_size Size of reference to next_object 1552 * 1553 * Conditions: 1554 * The object must *not* be locked. 1555 */ 1556 boolean_t 1557 vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) 1558 vm_object_t prev_object; 1559 vm_pindex_t prev_pindex; 1560 vm_size_t prev_size, next_size; 1561 { 1562 vm_pindex_t next_pindex; 1563 1564 mtx_assert(&vm_mtx, MA_OWNED); 1565 1566 if (prev_object == NULL) { 1567 return (TRUE); 1568 } 1569 1570 if (prev_object->type != OBJT_DEFAULT && 1571 prev_object->type != OBJT_SWAP) { 1572 return (FALSE); 1573 } 1574 1575 /* 1576 * Try to collapse the object first 1577 */ 1578 vm_object_collapse(prev_object); 1579 1580 /* 1581 * Can't coalesce if: . more than one reference . paged out . shadows 1582 * another object . has a copy elsewhere (any of which mean that the 1583 * pages not mapped to prev_entry may be in use anyway) 1584 */ 1585 1586 if (prev_object->backing_object != NULL) { 1587 return (FALSE); 1588 } 1589 1590 prev_size >>= PAGE_SHIFT; 1591 next_size >>= PAGE_SHIFT; 1592 next_pindex = prev_pindex + prev_size; 1593 1594 if ((prev_object->ref_count > 1) && 1595 (prev_object->size != next_pindex)) { 1596 return (FALSE); 1597 } 1598 1599 /* 1600 * Remove any pages that may still be in the object from a previous 1601 * deallocation. 1602 */ 1603 if (next_pindex < prev_object->size) { 1604 vm_object_page_remove(prev_object, 1605 next_pindex, 1606 next_pindex + next_size, FALSE); 1607 if (prev_object->type == OBJT_SWAP) 1608 swap_pager_freespace(prev_object, 1609 next_pindex, next_size); 1610 } 1611 1612 /* 1613 * Extend the object if necessary. 1614 */ 1615 if (next_pindex + next_size > prev_object->size) 1616 prev_object->size = next_pindex + next_size; 1617 1618 return (TRUE); 1619 } 1620 1621 #include "opt_ddb.h" 1622 #ifdef DDB 1623 #include <sys/kernel.h> 1624 1625 #include <sys/cons.h> 1626 1627 #include <ddb/ddb.h> 1628 1629 static int _vm_object_in_map __P((vm_map_t map, vm_object_t object, 1630 vm_map_entry_t entry)); 1631 static int vm_object_in_map __P((vm_object_t object)); 1632 1633 static int 1634 _vm_object_in_map(map, object, entry) 1635 vm_map_t map; 1636 vm_object_t object; 1637 vm_map_entry_t entry; 1638 { 1639 vm_map_t tmpm; 1640 vm_map_entry_t tmpe; 1641 vm_object_t obj; 1642 int entcount; 1643 1644 if (map == 0) 1645 return 0; 1646 1647 if (entry == 0) { 1648 tmpe = map->header.next; 1649 entcount = map->nentries; 1650 while (entcount-- && (tmpe != &map->header)) { 1651 if( _vm_object_in_map(map, object, tmpe)) { 1652 return 1; 1653 } 1654 tmpe = tmpe->next; 1655 } 1656 } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 1657 tmpm = entry->object.sub_map; 1658 tmpe = tmpm->header.next; 1659 entcount = tmpm->nentries; 1660 while (entcount-- && tmpe != &tmpm->header) { 1661 if( _vm_object_in_map(tmpm, object, tmpe)) { 1662 return 1; 1663 } 1664 tmpe = tmpe->next; 1665 } 1666 } else if ((obj = entry->object.vm_object) != NULL) { 1667 for(; obj; obj=obj->backing_object) 1668 if( obj == object) { 1669 return 1; 1670 } 1671 } 1672 return 0; 1673 } 1674 1675 static int 1676 vm_object_in_map( object) 1677 vm_object_t object; 1678 { 1679 struct proc *p; 1680 1681 /* sx_slock(&allproc_lock); */ 1682 LIST_FOREACH(p, &allproc, p_list) { 1683 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1684 continue; 1685 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { 1686 /* sx_sunlock(&allproc_lock); */ 1687 return 1; 1688 } 1689 } 1690 /* sx_sunlock(&allproc_lock); */ 1691 if( _vm_object_in_map( kernel_map, object, 0)) 1692 return 1; 1693 if( _vm_object_in_map( kmem_map, object, 0)) 1694 return 1; 1695 if( _vm_object_in_map( pager_map, object, 0)) 1696 return 1; 1697 if( _vm_object_in_map( buffer_map, object, 0)) 1698 return 1; 1699 return 0; 1700 } 1701 1702 DB_SHOW_COMMAND(vmochk, vm_object_check) 1703 { 1704 vm_object_t object; 1705 1706 /* 1707 * make sure that internal objs are in a map somewhere 1708 * and none have zero ref counts. 1709 */ 1710 TAILQ_FOREACH(object, &vm_object_list, object_list) { 1711 if (object->handle == NULL && 1712 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1713 if (object->ref_count == 0) { 1714 db_printf("vmochk: internal obj has zero ref count: %ld\n", 1715 (long)object->size); 1716 } 1717 if (!vm_object_in_map(object)) { 1718 db_printf( 1719 "vmochk: internal obj is not in a map: " 1720 "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", 1721 object->ref_count, (u_long)object->size, 1722 (u_long)object->size, 1723 (void *)object->backing_object); 1724 } 1725 } 1726 } 1727 } 1728 1729 /* 1730 * vm_object_print: [ debug ] 1731 */ 1732 DB_SHOW_COMMAND(object, vm_object_print_static) 1733 { 1734 /* XXX convert args. */ 1735 vm_object_t object = (vm_object_t)addr; 1736 boolean_t full = have_addr; 1737 1738 vm_page_t p; 1739 1740 /* XXX count is an (unused) arg. Avoid shadowing it. */ 1741 #define count was_count 1742 1743 int count; 1744 1745 if (object == NULL) 1746 return; 1747 1748 db_iprintf( 1749 "Object %p: type=%d, size=0x%lx, res=%d, ref=%d, flags=0x%x\n", 1750 object, (int)object->type, (u_long)object->size, 1751 object->resident_page_count, object->ref_count, object->flags); 1752 /* 1753 * XXX no %qd in kernel. Truncate object->backing_object_offset. 1754 */ 1755 db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n", 1756 object->shadow_count, 1757 object->backing_object ? object->backing_object->ref_count : 0, 1758 object->backing_object, (long)object->backing_object_offset); 1759 1760 if (!full) 1761 return; 1762 1763 db_indent += 2; 1764 count = 0; 1765 TAILQ_FOREACH(p, &object->memq, listq) { 1766 if (count == 0) 1767 db_iprintf("memory:="); 1768 else if (count == 6) { 1769 db_printf("\n"); 1770 db_iprintf(" ..."); 1771 count = 0; 1772 } else 1773 db_printf(","); 1774 count++; 1775 1776 db_printf("(off=0x%lx,page=0x%lx)", 1777 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); 1778 } 1779 if (count != 0) 1780 db_printf("\n"); 1781 db_indent -= 2; 1782 } 1783 1784 /* XXX. */ 1785 #undef count 1786 1787 /* XXX need this non-static entry for calling from vm_map_print. */ 1788 void 1789 vm_object_print(addr, have_addr, count, modif) 1790 /* db_expr_t */ long addr; 1791 boolean_t have_addr; 1792 /* db_expr_t */ long count; 1793 char *modif; 1794 { 1795 vm_object_print_static(addr, have_addr, count, modif); 1796 } 1797 1798 DB_SHOW_COMMAND(vmopag, vm_object_print_pages) 1799 { 1800 vm_object_t object; 1801 int nl = 0; 1802 int c; 1803 1804 TAILQ_FOREACH(object, &vm_object_list, object_list) { 1805 vm_pindex_t idx, fidx; 1806 vm_pindex_t osize; 1807 vm_offset_t pa = -1, padiff; 1808 int rcount; 1809 vm_page_t m; 1810 1811 db_printf("new object: %p\n", (void *)object); 1812 if ( nl > 18) { 1813 c = cngetc(); 1814 if (c != ' ') 1815 return; 1816 nl = 0; 1817 } 1818 nl++; 1819 rcount = 0; 1820 fidx = 0; 1821 osize = object->size; 1822 if (osize > 128) 1823 osize = 128; 1824 for(idx=0;idx<osize;idx++) { 1825 m = vm_page_lookup(object, idx); 1826 if (m == NULL) { 1827 if (rcount) { 1828 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 1829 (long)fidx, rcount, (long)pa); 1830 if ( nl > 18) { 1831 c = cngetc(); 1832 if (c != ' ') 1833 return; 1834 nl = 0; 1835 } 1836 nl++; 1837 rcount = 0; 1838 } 1839 continue; 1840 } 1841 1842 1843 if (rcount && 1844 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { 1845 ++rcount; 1846 continue; 1847 } 1848 if (rcount) { 1849 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m); 1850 padiff >>= PAGE_SHIFT; 1851 padiff &= PQ_L2_MASK; 1852 if (padiff == 0) { 1853 pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE; 1854 ++rcount; 1855 continue; 1856 } 1857 db_printf(" index(%ld)run(%d)pa(0x%lx)", 1858 (long)fidx, rcount, (long)pa); 1859 db_printf("pd(%ld)\n", (long)padiff); 1860 if ( nl > 18) { 1861 c = cngetc(); 1862 if (c != ' ') 1863 return; 1864 nl = 0; 1865 } 1866 nl++; 1867 } 1868 fidx = idx; 1869 pa = VM_PAGE_TO_PHYS(m); 1870 rcount = 1; 1871 } 1872 if (rcount) { 1873 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 1874 (long)fidx, rcount, (long)pa); 1875 if ( nl > 18) { 1876 c = cngetc(); 1877 if (c != ' ') 1878 return; 1879 nl = 0; 1880 } 1881 nl++; 1882 } 1883 } 1884 } 1885 #endif /* DDB */ 1886