1 /*- 2 * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) 3 * 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * The Mach Operating System project at Carnegie-Mellon University. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 35 * 36 * 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 */ 62 63 /* 64 * Virtual memory object module. 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include "opt_vm.h" 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/cpuset.h> 75 #include <sys/lock.h> 76 #include <sys/mman.h> 77 #include <sys/mount.h> 78 #include <sys/kernel.h> 79 #include <sys/pctrie.h> 80 #include <sys/sysctl.h> 81 #include <sys/mutex.h> 82 #include <sys/proc.h> /* for curproc, pageproc */ 83 #include <sys/socket.h> 84 #include <sys/resourcevar.h> 85 #include <sys/rwlock.h> 86 #include <sys/user.h> 87 #include <sys/vnode.h> 88 #include <sys/vmmeter.h> 89 #include <sys/sx.h> 90 91 #include <vm/vm.h> 92 #include <vm/vm_param.h> 93 #include <vm/pmap.h> 94 #include <vm/vm_map.h> 95 #include <vm/vm_object.h> 96 #include <vm/vm_page.h> 97 #include <vm/vm_pageout.h> 98 #include <vm/vm_pager.h> 99 #include <vm/swap_pager.h> 100 #include <vm/vm_kern.h> 101 #include <vm/vm_extern.h> 102 #include <vm/vm_radix.h> 103 #include <vm/vm_reserv.h> 104 #include <vm/uma.h> 105 106 static int old_msync; 107 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, 108 "Use old (insecure) msync behavior"); 109 110 static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, 111 int pagerflags, int flags, boolean_t *clearobjflags, 112 boolean_t *eio); 113 static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, 114 boolean_t *clearobjflags); 115 static void vm_object_qcollapse(vm_object_t object); 116 static void vm_object_vndeallocate(vm_object_t object); 117 118 /* 119 * Virtual memory objects maintain the actual data 120 * associated with allocated virtual memory. A given 121 * page of memory exists within exactly one object. 122 * 123 * An object is only deallocated when all "references" 124 * are given up. Only one "reference" to a given 125 * region of an object should be writeable. 126 * 127 * Associated with each object is a list of all resident 128 * memory pages belonging to that object; this list is 129 * maintained by the "vm_page" module, and locked by the object's 130 * lock. 131 * 132 * Each object also records a "pager" routine which is 133 * used to retrieve (and store) pages to the proper backing 134 * storage. In addition, objects may be backed by other 135 * objects from which they were virtual-copied. 136 * 137 * The only items within the object structure which are 138 * modified after time of creation are: 139 * reference count locked by object's lock 140 * pager routine locked by object's lock 141 * 142 */ 143 144 struct object_q vm_object_list; 145 struct mtx vm_object_list_mtx; /* lock for object list and count */ 146 147 struct vm_object kernel_object_store; 148 149 static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, 150 "VM object stats"); 151 152 static counter_u64_t object_collapses = EARLY_COUNTER; 153 SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD, 154 &object_collapses, 155 "VM object collapses"); 156 157 static counter_u64_t object_bypasses = EARLY_COUNTER; 158 SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD, 159 &object_bypasses, 160 "VM object bypasses"); 161 162 static void 163 counter_startup(void) 164 { 165 166 object_collapses = counter_u64_alloc(M_WAITOK); 167 object_bypasses = counter_u64_alloc(M_WAITOK); 168 } 169 SYSINIT(object_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL); 170 171 static uma_zone_t obj_zone; 172 173 static int vm_object_zinit(void *mem, int size, int flags); 174 175 #ifdef INVARIANTS 176 static void vm_object_zdtor(void *mem, int size, void *arg); 177 178 static void 179 vm_object_zdtor(void *mem, int size, void *arg) 180 { 181 vm_object_t object; 182 183 object = (vm_object_t)mem; 184 KASSERT(object->ref_count == 0, 185 ("object %p ref_count = %d", object, object->ref_count)); 186 KASSERT(TAILQ_EMPTY(&object->memq), 187 ("object %p has resident pages in its memq", object)); 188 KASSERT(vm_radix_is_empty(&object->rtree), 189 ("object %p has resident pages in its trie", object)); 190 #if VM_NRESERVLEVEL > 0 191 KASSERT(LIST_EMPTY(&object->rvq), 192 ("object %p has reservations", 193 object)); 194 #endif 195 KASSERT(object->paging_in_progress == 0, 196 ("object %p paging_in_progress = %d", 197 object, object->paging_in_progress)); 198 KASSERT(object->resident_page_count == 0, 199 ("object %p resident_page_count = %d", 200 object, object->resident_page_count)); 201 KASSERT(object->shadow_count == 0, 202 ("object %p shadow_count = %d", 203 object, object->shadow_count)); 204 KASSERT(object->type == OBJT_DEAD, 205 ("object %p has non-dead type %d", 206 object, object->type)); 207 } 208 #endif 209 210 static int 211 vm_object_zinit(void *mem, int size, int flags) 212 { 213 vm_object_t object; 214 215 object = (vm_object_t)mem; 216 rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW); 217 218 /* These are true for any object that has been freed */ 219 object->type = OBJT_DEAD; 220 object->ref_count = 0; 221 vm_radix_init(&object->rtree); 222 object->paging_in_progress = 0; 223 object->resident_page_count = 0; 224 object->shadow_count = 0; 225 object->flags = OBJ_DEAD; 226 227 mtx_lock(&vm_object_list_mtx); 228 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 229 mtx_unlock(&vm_object_list_mtx); 230 return (0); 231 } 232 233 static void 234 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) 235 { 236 237 TAILQ_INIT(&object->memq); 238 LIST_INIT(&object->shadow_head); 239 240 object->type = type; 241 if (type == OBJT_SWAP) 242 pctrie_init(&object->un_pager.swp.swp_blks); 243 244 /* 245 * Ensure that swap_pager_swapoff() iteration over object_list 246 * sees up to date type and pctrie head if it observed 247 * non-dead object. 248 */ 249 atomic_thread_fence_rel(); 250 251 switch (type) { 252 case OBJT_DEAD: 253 panic("_vm_object_allocate: can't create OBJT_DEAD"); 254 case OBJT_DEFAULT: 255 case OBJT_SWAP: 256 object->flags = OBJ_ONEMAPPING; 257 break; 258 case OBJT_DEVICE: 259 case OBJT_SG: 260 object->flags = OBJ_FICTITIOUS | OBJ_UNMANAGED; 261 break; 262 case OBJT_MGTDEVICE: 263 object->flags = OBJ_FICTITIOUS; 264 break; 265 case OBJT_PHYS: 266 object->flags = OBJ_UNMANAGED; 267 break; 268 case OBJT_VNODE: 269 object->flags = 0; 270 break; 271 default: 272 panic("_vm_object_allocate: type %d is undefined", type); 273 } 274 object->size = size; 275 object->generation = 1; 276 object->ref_count = 1; 277 object->memattr = VM_MEMATTR_DEFAULT; 278 object->cred = NULL; 279 object->charge = 0; 280 object->handle = NULL; 281 object->backing_object = NULL; 282 object->backing_object_offset = (vm_ooffset_t) 0; 283 #if VM_NRESERVLEVEL > 0 284 LIST_INIT(&object->rvq); 285 #endif 286 umtx_shm_object_init(object); 287 } 288 289 /* 290 * vm_object_init: 291 * 292 * Initialize the VM objects module. 293 */ 294 void 295 vm_object_init(void) 296 { 297 TAILQ_INIT(&vm_object_list); 298 mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF); 299 300 rw_init(&kernel_object->lock, "kernel vm object"); 301 _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - 302 VM_MIN_KERNEL_ADDRESS), kernel_object); 303 #if VM_NRESERVLEVEL > 0 304 kernel_object->flags |= OBJ_COLORED; 305 kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); 306 #endif 307 308 /* 309 * The lock portion of struct vm_object must be type stable due 310 * to vm_pageout_fallback_object_lock locking a vm object 311 * without holding any references to it. 312 */ 313 obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, 314 #ifdef INVARIANTS 315 vm_object_zdtor, 316 #else 317 NULL, 318 #endif 319 vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 320 321 vm_radix_zinit(); 322 } 323 324 void 325 vm_object_clear_flag(vm_object_t object, u_short bits) 326 { 327 328 VM_OBJECT_ASSERT_WLOCKED(object); 329 object->flags &= ~bits; 330 } 331 332 /* 333 * Sets the default memory attribute for the specified object. Pages 334 * that are allocated to this object are by default assigned this memory 335 * attribute. 336 * 337 * Presently, this function must be called before any pages are allocated 338 * to the object. In the future, this requirement may be relaxed for 339 * "default" and "swap" objects. 340 */ 341 int 342 vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) 343 { 344 345 VM_OBJECT_ASSERT_WLOCKED(object); 346 switch (object->type) { 347 case OBJT_DEFAULT: 348 case OBJT_DEVICE: 349 case OBJT_MGTDEVICE: 350 case OBJT_PHYS: 351 case OBJT_SG: 352 case OBJT_SWAP: 353 case OBJT_VNODE: 354 if (!TAILQ_EMPTY(&object->memq)) 355 return (KERN_FAILURE); 356 break; 357 case OBJT_DEAD: 358 return (KERN_INVALID_ARGUMENT); 359 default: 360 panic("vm_object_set_memattr: object %p is of undefined type", 361 object); 362 } 363 object->memattr = memattr; 364 return (KERN_SUCCESS); 365 } 366 367 void 368 vm_object_pip_add(vm_object_t object, short i) 369 { 370 371 VM_OBJECT_ASSERT_WLOCKED(object); 372 object->paging_in_progress += i; 373 } 374 375 void 376 vm_object_pip_subtract(vm_object_t object, short i) 377 { 378 379 VM_OBJECT_ASSERT_WLOCKED(object); 380 object->paging_in_progress -= i; 381 } 382 383 void 384 vm_object_pip_wakeup(vm_object_t object) 385 { 386 387 VM_OBJECT_ASSERT_WLOCKED(object); 388 object->paging_in_progress--; 389 if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { 390 vm_object_clear_flag(object, OBJ_PIPWNT); 391 wakeup(object); 392 } 393 } 394 395 void 396 vm_object_pip_wakeupn(vm_object_t object, short i) 397 { 398 399 VM_OBJECT_ASSERT_WLOCKED(object); 400 if (i) 401 object->paging_in_progress -= i; 402 if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { 403 vm_object_clear_flag(object, OBJ_PIPWNT); 404 wakeup(object); 405 } 406 } 407 408 void 409 vm_object_pip_wait(vm_object_t object, char *waitid) 410 { 411 412 VM_OBJECT_ASSERT_WLOCKED(object); 413 while (object->paging_in_progress) { 414 object->flags |= OBJ_PIPWNT; 415 VM_OBJECT_SLEEP(object, object, PVM, waitid, 0); 416 } 417 } 418 419 /* 420 * vm_object_allocate: 421 * 422 * Returns a new object with the given size. 423 */ 424 vm_object_t 425 vm_object_allocate(objtype_t type, vm_pindex_t size) 426 { 427 vm_object_t object; 428 429 object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); 430 _vm_object_allocate(type, size, object); 431 return (object); 432 } 433 434 435 /* 436 * vm_object_reference: 437 * 438 * Gets another reference to the given object. Note: OBJ_DEAD 439 * objects can be referenced during final cleaning. 440 */ 441 void 442 vm_object_reference(vm_object_t object) 443 { 444 if (object == NULL) 445 return; 446 VM_OBJECT_WLOCK(object); 447 vm_object_reference_locked(object); 448 VM_OBJECT_WUNLOCK(object); 449 } 450 451 /* 452 * vm_object_reference_locked: 453 * 454 * Gets another reference to the given object. 455 * 456 * The object must be locked. 457 */ 458 void 459 vm_object_reference_locked(vm_object_t object) 460 { 461 struct vnode *vp; 462 463 VM_OBJECT_ASSERT_WLOCKED(object); 464 object->ref_count++; 465 if (object->type == OBJT_VNODE) { 466 vp = object->handle; 467 vref(vp); 468 } 469 } 470 471 /* 472 * Handle deallocating an object of type OBJT_VNODE. 473 */ 474 static void 475 vm_object_vndeallocate(vm_object_t object) 476 { 477 struct vnode *vp = (struct vnode *) object->handle; 478 479 VM_OBJECT_ASSERT_WLOCKED(object); 480 KASSERT(object->type == OBJT_VNODE, 481 ("vm_object_vndeallocate: not a vnode object")); 482 KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); 483 #ifdef INVARIANTS 484 if (object->ref_count == 0) { 485 vn_printf(vp, "vm_object_vndeallocate "); 486 panic("vm_object_vndeallocate: bad object reference count"); 487 } 488 #endif 489 490 if (!umtx_shm_vnobj_persistent && object->ref_count == 1) 491 umtx_shm_object_terminated(object); 492 493 /* 494 * The test for text of vp vnode does not need a bypass to 495 * reach right VV_TEXT there, since it is obtained from 496 * object->handle. 497 */ 498 if (object->ref_count > 1 || (vp->v_vflag & VV_TEXT) == 0) { 499 object->ref_count--; 500 VM_OBJECT_WUNLOCK(object); 501 /* vrele may need the vnode lock. */ 502 vrele(vp); 503 } else { 504 vhold(vp); 505 VM_OBJECT_WUNLOCK(object); 506 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 507 vdrop(vp); 508 VM_OBJECT_WLOCK(object); 509 object->ref_count--; 510 if (object->type == OBJT_DEAD) { 511 VM_OBJECT_WUNLOCK(object); 512 VOP_UNLOCK(vp, 0); 513 } else { 514 if (object->ref_count == 0) 515 VOP_UNSET_TEXT(vp); 516 VM_OBJECT_WUNLOCK(object); 517 vput(vp); 518 } 519 } 520 } 521 522 /* 523 * vm_object_deallocate: 524 * 525 * Release a reference to the specified object, 526 * gained either through a vm_object_allocate 527 * or a vm_object_reference call. When all references 528 * are gone, storage associated with this object 529 * may be relinquished. 530 * 531 * No object may be locked. 532 */ 533 void 534 vm_object_deallocate(vm_object_t object) 535 { 536 vm_object_t temp; 537 struct vnode *vp; 538 539 while (object != NULL) { 540 VM_OBJECT_WLOCK(object); 541 if (object->type == OBJT_VNODE) { 542 vm_object_vndeallocate(object); 543 return; 544 } 545 546 KASSERT(object->ref_count != 0, 547 ("vm_object_deallocate: object deallocated too many times: %d", object->type)); 548 549 /* 550 * If the reference count goes to 0 we start calling 551 * vm_object_terminate() on the object chain. 552 * A ref count of 1 may be a special case depending on the 553 * shadow count being 0 or 1. 554 */ 555 object->ref_count--; 556 if (object->ref_count > 1) { 557 VM_OBJECT_WUNLOCK(object); 558 return; 559 } else if (object->ref_count == 1) { 560 if (object->type == OBJT_SWAP && 561 (object->flags & OBJ_TMPFS) != 0) { 562 vp = object->un_pager.swp.swp_tmpfs; 563 vhold(vp); 564 VM_OBJECT_WUNLOCK(object); 565 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 566 VM_OBJECT_WLOCK(object); 567 if (object->type == OBJT_DEAD || 568 object->ref_count != 1) { 569 VM_OBJECT_WUNLOCK(object); 570 VOP_UNLOCK(vp, 0); 571 vdrop(vp); 572 return; 573 } 574 if ((object->flags & OBJ_TMPFS) != 0) 575 VOP_UNSET_TEXT(vp); 576 VOP_UNLOCK(vp, 0); 577 vdrop(vp); 578 } 579 if (object->shadow_count == 0 && 580 object->handle == NULL && 581 (object->type == OBJT_DEFAULT || 582 (object->type == OBJT_SWAP && 583 (object->flags & OBJ_TMPFS_NODE) == 0))) { 584 vm_object_set_flag(object, OBJ_ONEMAPPING); 585 } else if ((object->shadow_count == 1) && 586 (object->handle == NULL) && 587 (object->type == OBJT_DEFAULT || 588 object->type == OBJT_SWAP)) { 589 vm_object_t robject; 590 591 robject = LIST_FIRST(&object->shadow_head); 592 KASSERT(robject != NULL, 593 ("vm_object_deallocate: ref_count: %d, shadow_count: %d", 594 object->ref_count, 595 object->shadow_count)); 596 KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0, 597 ("shadowed tmpfs v_object %p", object)); 598 if (!VM_OBJECT_TRYWLOCK(robject)) { 599 /* 600 * Avoid a potential deadlock. 601 */ 602 object->ref_count++; 603 VM_OBJECT_WUNLOCK(object); 604 /* 605 * More likely than not the thread 606 * holding robject's lock has lower 607 * priority than the current thread. 608 * Let the lower priority thread run. 609 */ 610 pause("vmo_de", 1); 611 continue; 612 } 613 /* 614 * Collapse object into its shadow unless its 615 * shadow is dead. In that case, object will 616 * be deallocated by the thread that is 617 * deallocating its shadow. 618 */ 619 if ((robject->flags & OBJ_DEAD) == 0 && 620 (robject->handle == NULL) && 621 (robject->type == OBJT_DEFAULT || 622 robject->type == OBJT_SWAP)) { 623 624 robject->ref_count++; 625 retry: 626 if (robject->paging_in_progress) { 627 VM_OBJECT_WUNLOCK(object); 628 vm_object_pip_wait(robject, 629 "objde1"); 630 temp = robject->backing_object; 631 if (object == temp) { 632 VM_OBJECT_WLOCK(object); 633 goto retry; 634 } 635 } else if (object->paging_in_progress) { 636 VM_OBJECT_WUNLOCK(robject); 637 object->flags |= OBJ_PIPWNT; 638 VM_OBJECT_SLEEP(object, object, 639 PDROP | PVM, "objde2", 0); 640 VM_OBJECT_WLOCK(robject); 641 temp = robject->backing_object; 642 if (object == temp) { 643 VM_OBJECT_WLOCK(object); 644 goto retry; 645 } 646 } else 647 VM_OBJECT_WUNLOCK(object); 648 649 if (robject->ref_count == 1) { 650 robject->ref_count--; 651 object = robject; 652 goto doterm; 653 } 654 object = robject; 655 vm_object_collapse(object); 656 VM_OBJECT_WUNLOCK(object); 657 continue; 658 } 659 VM_OBJECT_WUNLOCK(robject); 660 } 661 VM_OBJECT_WUNLOCK(object); 662 return; 663 } 664 doterm: 665 umtx_shm_object_terminated(object); 666 temp = object->backing_object; 667 if (temp != NULL) { 668 KASSERT((object->flags & OBJ_TMPFS_NODE) == 0, 669 ("shadowed tmpfs v_object 2 %p", object)); 670 VM_OBJECT_WLOCK(temp); 671 LIST_REMOVE(object, shadow_list); 672 temp->shadow_count--; 673 VM_OBJECT_WUNLOCK(temp); 674 object->backing_object = NULL; 675 } 676 /* 677 * Don't double-terminate, we could be in a termination 678 * recursion due to the terminate having to sync data 679 * to disk. 680 */ 681 if ((object->flags & OBJ_DEAD) == 0) 682 vm_object_terminate(object); 683 else 684 VM_OBJECT_WUNLOCK(object); 685 object = temp; 686 } 687 } 688 689 /* 690 * vm_object_destroy removes the object from the global object list 691 * and frees the space for the object. 692 */ 693 void 694 vm_object_destroy(vm_object_t object) 695 { 696 697 /* 698 * Release the allocation charge. 699 */ 700 if (object->cred != NULL) { 701 swap_release_by_cred(object->charge, object->cred); 702 object->charge = 0; 703 crfree(object->cred); 704 object->cred = NULL; 705 } 706 707 /* 708 * Free the space for the object. 709 */ 710 uma_zfree(obj_zone, object); 711 } 712 713 /* 714 * vm_object_terminate_pages removes any remaining pageable pages 715 * from the object and resets the object to an empty state. 716 */ 717 static void 718 vm_object_terminate_pages(vm_object_t object) 719 { 720 vm_page_t p, p_next; 721 struct mtx *mtx, *mtx1; 722 struct vm_pagequeue *pq, *pq1; 723 int dequeued; 724 725 VM_OBJECT_ASSERT_WLOCKED(object); 726 727 mtx = NULL; 728 pq = NULL; 729 730 /* 731 * Free any remaining pageable pages. This also removes them from the 732 * paging queues. However, don't free wired pages, just remove them 733 * from the object. Rather than incrementally removing each page from 734 * the object, the page and object are reset to any empty state. 735 */ 736 TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { 737 vm_page_assert_unbusied(p); 738 if ((object->flags & OBJ_UNMANAGED) == 0) { 739 /* 740 * vm_page_free_prep() only needs the page 741 * lock for managed pages. 742 */ 743 mtx1 = vm_page_lockptr(p); 744 if (mtx1 != mtx) { 745 if (mtx != NULL) 746 mtx_unlock(mtx); 747 if (pq != NULL) { 748 vm_pagequeue_cnt_add(pq, dequeued); 749 vm_pagequeue_unlock(pq); 750 pq = NULL; 751 } 752 mtx = mtx1; 753 mtx_lock(mtx); 754 } 755 } 756 p->object = NULL; 757 if (p->wire_count != 0) 758 goto unlist; 759 VM_CNT_INC(v_pfree); 760 p->flags &= ~PG_ZERO; 761 if (p->queue != PQ_NONE) { 762 KASSERT(p->queue < PQ_COUNT, ("vm_object_terminate: " 763 "page %p is not queued", p)); 764 pq1 = vm_page_pagequeue(p); 765 if (pq != pq1) { 766 if (pq != NULL) { 767 vm_pagequeue_cnt_add(pq, dequeued); 768 vm_pagequeue_unlock(pq); 769 } 770 pq = pq1; 771 vm_pagequeue_lock(pq); 772 dequeued = 0; 773 } 774 p->queue = PQ_NONE; 775 TAILQ_REMOVE(&pq->pq_pl, p, plinks.q); 776 dequeued--; 777 } 778 if (vm_page_free_prep(p, true)) 779 continue; 780 unlist: 781 TAILQ_REMOVE(&object->memq, p, listq); 782 } 783 if (pq != NULL) { 784 vm_pagequeue_cnt_add(pq, dequeued); 785 vm_pagequeue_unlock(pq); 786 } 787 if (mtx != NULL) 788 mtx_unlock(mtx); 789 790 vm_page_free_phys_pglist(&object->memq); 791 792 /* 793 * If the object contained any pages, then reset it to an empty state. 794 * None of the object's fields, including "resident_page_count", were 795 * modified by the preceding loop. 796 */ 797 if (object->resident_page_count != 0) { 798 vm_radix_reclaim_allnodes(&object->rtree); 799 TAILQ_INIT(&object->memq); 800 object->resident_page_count = 0; 801 if (object->type == OBJT_VNODE) 802 vdrop(object->handle); 803 } 804 } 805 806 /* 807 * vm_object_terminate actually destroys the specified object, freeing 808 * up all previously used resources. 809 * 810 * The object must be locked. 811 * This routine may block. 812 */ 813 void 814 vm_object_terminate(vm_object_t object) 815 { 816 817 VM_OBJECT_ASSERT_WLOCKED(object); 818 819 /* 820 * Make sure no one uses us. 821 */ 822 vm_object_set_flag(object, OBJ_DEAD); 823 824 /* 825 * wait for the pageout daemon to be done with the object 826 */ 827 vm_object_pip_wait(object, "objtrm"); 828 829 KASSERT(!object->paging_in_progress, 830 ("vm_object_terminate: pageout in progress")); 831 832 /* 833 * Clean and free the pages, as appropriate. All references to the 834 * object are gone, so we don't need to lock it. 835 */ 836 if (object->type == OBJT_VNODE) { 837 struct vnode *vp = (struct vnode *)object->handle; 838 839 /* 840 * Clean pages and flush buffers. 841 */ 842 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 843 VM_OBJECT_WUNLOCK(object); 844 845 vinvalbuf(vp, V_SAVE, 0, 0); 846 847 BO_LOCK(&vp->v_bufobj); 848 vp->v_bufobj.bo_flag |= BO_DEAD; 849 BO_UNLOCK(&vp->v_bufobj); 850 851 VM_OBJECT_WLOCK(object); 852 } 853 854 KASSERT(object->ref_count == 0, 855 ("vm_object_terminate: object with references, ref_count=%d", 856 object->ref_count)); 857 858 if ((object->flags & OBJ_PG_DTOR) == 0) 859 vm_object_terminate_pages(object); 860 861 #if VM_NRESERVLEVEL > 0 862 if (__predict_false(!LIST_EMPTY(&object->rvq))) 863 vm_reserv_break_all(object); 864 #endif 865 866 KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT || 867 object->type == OBJT_SWAP, 868 ("%s: non-swap obj %p has cred", __func__, object)); 869 870 /* 871 * Let the pager know object is dead. 872 */ 873 vm_pager_deallocate(object); 874 VM_OBJECT_WUNLOCK(object); 875 876 vm_object_destroy(object); 877 } 878 879 /* 880 * Make the page read-only so that we can clear the object flags. However, if 881 * this is a nosync mmap then the object is likely to stay dirty so do not 882 * mess with the page and do not clear the object flags. Returns TRUE if the 883 * page should be flushed, and FALSE otherwise. 884 */ 885 static boolean_t 886 vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags) 887 { 888 889 /* 890 * If we have been asked to skip nosync pages and this is a 891 * nosync page, skip it. Note that the object flags were not 892 * cleared in this case so we do not have to set them. 893 */ 894 if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) { 895 *clearobjflags = FALSE; 896 return (FALSE); 897 } else { 898 pmap_remove_write(p); 899 return (p->dirty != 0); 900 } 901 } 902 903 /* 904 * vm_object_page_clean 905 * 906 * Clean all dirty pages in the specified range of object. Leaves page 907 * on whatever queue it is currently on. If NOSYNC is set then do not 908 * write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC), 909 * leaving the object dirty. 910 * 911 * When stuffing pages asynchronously, allow clustering. XXX we need a 912 * synchronous clustering mode implementation. 913 * 914 * Odd semantics: if start == end, we clean everything. 915 * 916 * The object must be locked. 917 * 918 * Returns FALSE if some page from the range was not written, as 919 * reported by the pager, and TRUE otherwise. 920 */ 921 boolean_t 922 vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, 923 int flags) 924 { 925 vm_page_t np, p; 926 vm_pindex_t pi, tend, tstart; 927 int curgeneration, n, pagerflags; 928 boolean_t clearobjflags, eio, res; 929 930 VM_OBJECT_ASSERT_WLOCKED(object); 931 932 /* 933 * The OBJ_MIGHTBEDIRTY flag is only set for OBJT_VNODE 934 * objects. The check below prevents the function from 935 * operating on non-vnode objects. 936 */ 937 if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 || 938 object->resident_page_count == 0) 939 return (TRUE); 940 941 pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ? 942 VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; 943 pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0; 944 945 tstart = OFF_TO_IDX(start); 946 tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK); 947 clearobjflags = tstart == 0 && tend >= object->size; 948 res = TRUE; 949 950 rescan: 951 curgeneration = object->generation; 952 953 for (p = vm_page_find_least(object, tstart); p != NULL; p = np) { 954 pi = p->pindex; 955 if (pi >= tend) 956 break; 957 np = TAILQ_NEXT(p, listq); 958 if (p->valid == 0) 959 continue; 960 if (vm_page_sleep_if_busy(p, "vpcwai")) { 961 if (object->generation != curgeneration) { 962 if ((flags & OBJPC_SYNC) != 0) 963 goto rescan; 964 else 965 clearobjflags = FALSE; 966 } 967 np = vm_page_find_least(object, pi); 968 continue; 969 } 970 if (!vm_object_page_remove_write(p, flags, &clearobjflags)) 971 continue; 972 973 n = vm_object_page_collect_flush(object, p, pagerflags, 974 flags, &clearobjflags, &eio); 975 if (eio) { 976 res = FALSE; 977 clearobjflags = FALSE; 978 } 979 if (object->generation != curgeneration) { 980 if ((flags & OBJPC_SYNC) != 0) 981 goto rescan; 982 else 983 clearobjflags = FALSE; 984 } 985 986 /* 987 * If the VOP_PUTPAGES() did a truncated write, so 988 * that even the first page of the run is not fully 989 * written, vm_pageout_flush() returns 0 as the run 990 * length. Since the condition that caused truncated 991 * write may be permanent, e.g. exhausted free space, 992 * accepting n == 0 would cause an infinite loop. 993 * 994 * Forwarding the iterator leaves the unwritten page 995 * behind, but there is not much we can do there if 996 * filesystem refuses to write it. 997 */ 998 if (n == 0) { 999 n = 1; 1000 clearobjflags = FALSE; 1001 } 1002 np = vm_page_find_least(object, pi + n); 1003 } 1004 #if 0 1005 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); 1006 #endif 1007 1008 if (clearobjflags) 1009 vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY); 1010 return (res); 1011 } 1012 1013 static int 1014 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, 1015 int flags, boolean_t *clearobjflags, boolean_t *eio) 1016 { 1017 vm_page_t ma[vm_pageout_page_count], p_first, tp; 1018 int count, i, mreq, runlen; 1019 1020 vm_page_lock_assert(p, MA_NOTOWNED); 1021 VM_OBJECT_ASSERT_WLOCKED(object); 1022 1023 count = 1; 1024 mreq = 0; 1025 1026 for (tp = p; count < vm_pageout_page_count; count++) { 1027 tp = vm_page_next(tp); 1028 if (tp == NULL || vm_page_busied(tp)) 1029 break; 1030 if (!vm_object_page_remove_write(tp, flags, clearobjflags)) 1031 break; 1032 } 1033 1034 for (p_first = p; count < vm_pageout_page_count; count++) { 1035 tp = vm_page_prev(p_first); 1036 if (tp == NULL || vm_page_busied(tp)) 1037 break; 1038 if (!vm_object_page_remove_write(tp, flags, clearobjflags)) 1039 break; 1040 p_first = tp; 1041 mreq++; 1042 } 1043 1044 for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++) 1045 ma[i] = tp; 1046 1047 vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio); 1048 return (runlen); 1049 } 1050 1051 /* 1052 * Note that there is absolutely no sense in writing out 1053 * anonymous objects, so we track down the vnode object 1054 * to write out. 1055 * We invalidate (remove) all pages from the address space 1056 * for semantic correctness. 1057 * 1058 * If the backing object is a device object with unmanaged pages, then any 1059 * mappings to the specified range of pages must be removed before this 1060 * function is called. 1061 * 1062 * Note: certain anonymous maps, such as MAP_NOSYNC maps, 1063 * may start out with a NULL object. 1064 */ 1065 boolean_t 1066 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, 1067 boolean_t syncio, boolean_t invalidate) 1068 { 1069 vm_object_t backing_object; 1070 struct vnode *vp; 1071 struct mount *mp; 1072 int error, flags, fsync_after; 1073 boolean_t res; 1074 1075 if (object == NULL) 1076 return (TRUE); 1077 res = TRUE; 1078 error = 0; 1079 VM_OBJECT_WLOCK(object); 1080 while ((backing_object = object->backing_object) != NULL) { 1081 VM_OBJECT_WLOCK(backing_object); 1082 offset += object->backing_object_offset; 1083 VM_OBJECT_WUNLOCK(object); 1084 object = backing_object; 1085 if (object->size < OFF_TO_IDX(offset + size)) 1086 size = IDX_TO_OFF(object->size) - offset; 1087 } 1088 /* 1089 * Flush pages if writing is allowed, invalidate them 1090 * if invalidation requested. Pages undergoing I/O 1091 * will be ignored by vm_object_page_remove(). 1092 * 1093 * We cannot lock the vnode and then wait for paging 1094 * to complete without deadlocking against vm_fault. 1095 * Instead we simply call vm_object_page_remove() and 1096 * allow it to block internally on a page-by-page 1097 * basis when it encounters pages undergoing async 1098 * I/O. 1099 */ 1100 if (object->type == OBJT_VNODE && 1101 (object->flags & OBJ_MIGHTBEDIRTY) != 0 && 1102 ((vp = object->handle)->v_vflag & VV_NOSYNC) == 0) { 1103 VM_OBJECT_WUNLOCK(object); 1104 (void) vn_start_write(vp, &mp, V_WAIT); 1105 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1106 if (syncio && !invalidate && offset == 0 && 1107 atop(size) == object->size) { 1108 /* 1109 * If syncing the whole mapping of the file, 1110 * it is faster to schedule all the writes in 1111 * async mode, also allowing the clustering, 1112 * and then wait for i/o to complete. 1113 */ 1114 flags = 0; 1115 fsync_after = TRUE; 1116 } else { 1117 flags = (syncio || invalidate) ? OBJPC_SYNC : 0; 1118 flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0; 1119 fsync_after = FALSE; 1120 } 1121 VM_OBJECT_WLOCK(object); 1122 res = vm_object_page_clean(object, offset, offset + size, 1123 flags); 1124 VM_OBJECT_WUNLOCK(object); 1125 if (fsync_after) 1126 error = VOP_FSYNC(vp, MNT_WAIT, curthread); 1127 VOP_UNLOCK(vp, 0); 1128 vn_finished_write(mp); 1129 if (error != 0) 1130 res = FALSE; 1131 VM_OBJECT_WLOCK(object); 1132 } 1133 if ((object->type == OBJT_VNODE || 1134 object->type == OBJT_DEVICE) && invalidate) { 1135 if (object->type == OBJT_DEVICE) 1136 /* 1137 * The option OBJPR_NOTMAPPED must be passed here 1138 * because vm_object_page_remove() cannot remove 1139 * unmanaged mappings. 1140 */ 1141 flags = OBJPR_NOTMAPPED; 1142 else if (old_msync) 1143 flags = 0; 1144 else 1145 flags = OBJPR_CLEANONLY; 1146 vm_object_page_remove(object, OFF_TO_IDX(offset), 1147 OFF_TO_IDX(offset + size + PAGE_MASK), flags); 1148 } 1149 VM_OBJECT_WUNLOCK(object); 1150 return (res); 1151 } 1152 1153 /* 1154 * Determine whether the given advice can be applied to the object. Advice is 1155 * not applied to unmanaged pages since they never belong to page queues, and 1156 * since MADV_FREE is destructive, it can apply only to anonymous pages that 1157 * have been mapped at most once. 1158 */ 1159 static bool 1160 vm_object_advice_applies(vm_object_t object, int advice) 1161 { 1162 1163 if ((object->flags & OBJ_UNMANAGED) != 0) 1164 return (false); 1165 if (advice != MADV_FREE) 1166 return (true); 1167 return ((object->type == OBJT_DEFAULT || object->type == OBJT_SWAP) && 1168 (object->flags & OBJ_ONEMAPPING) != 0); 1169 } 1170 1171 static void 1172 vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex, 1173 vm_size_t size) 1174 { 1175 1176 if (advice == MADV_FREE && object->type == OBJT_SWAP) 1177 swap_pager_freespace(object, pindex, size); 1178 } 1179 1180 /* 1181 * vm_object_madvise: 1182 * 1183 * Implements the madvise function at the object/page level. 1184 * 1185 * MADV_WILLNEED (any object) 1186 * 1187 * Activate the specified pages if they are resident. 1188 * 1189 * MADV_DONTNEED (any object) 1190 * 1191 * Deactivate the specified pages if they are resident. 1192 * 1193 * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, 1194 * OBJ_ONEMAPPING only) 1195 * 1196 * Deactivate and clean the specified pages if they are 1197 * resident. This permits the process to reuse the pages 1198 * without faulting or the kernel to reclaim the pages 1199 * without I/O. 1200 */ 1201 void 1202 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, 1203 int advice) 1204 { 1205 vm_pindex_t tpindex; 1206 vm_object_t backing_object, tobject; 1207 vm_page_t m, tm; 1208 1209 if (object == NULL) 1210 return; 1211 1212 relookup: 1213 VM_OBJECT_WLOCK(object); 1214 if (!vm_object_advice_applies(object, advice)) { 1215 VM_OBJECT_WUNLOCK(object); 1216 return; 1217 } 1218 for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) { 1219 tobject = object; 1220 1221 /* 1222 * If the next page isn't resident in the top-level object, we 1223 * need to search the shadow chain. When applying MADV_FREE, we 1224 * take care to release any swap space used to store 1225 * non-resident pages. 1226 */ 1227 if (m == NULL || pindex < m->pindex) { 1228 /* 1229 * Optimize a common case: if the top-level object has 1230 * no backing object, we can skip over the non-resident 1231 * range in constant time. 1232 */ 1233 if (object->backing_object == NULL) { 1234 tpindex = (m != NULL && m->pindex < end) ? 1235 m->pindex : end; 1236 vm_object_madvise_freespace(object, advice, 1237 pindex, tpindex - pindex); 1238 if ((pindex = tpindex) == end) 1239 break; 1240 goto next_page; 1241 } 1242 1243 tpindex = pindex; 1244 do { 1245 vm_object_madvise_freespace(tobject, advice, 1246 tpindex, 1); 1247 /* 1248 * Prepare to search the next object in the 1249 * chain. 1250 */ 1251 backing_object = tobject->backing_object; 1252 if (backing_object == NULL) 1253 goto next_pindex; 1254 VM_OBJECT_WLOCK(backing_object); 1255 tpindex += 1256 OFF_TO_IDX(tobject->backing_object_offset); 1257 if (tobject != object) 1258 VM_OBJECT_WUNLOCK(tobject); 1259 tobject = backing_object; 1260 if (!vm_object_advice_applies(tobject, advice)) 1261 goto next_pindex; 1262 } while ((tm = vm_page_lookup(tobject, tpindex)) == 1263 NULL); 1264 } else { 1265 next_page: 1266 tm = m; 1267 m = TAILQ_NEXT(m, listq); 1268 } 1269 1270 /* 1271 * If the page is not in a normal state, skip it. 1272 */ 1273 if (tm->valid != VM_PAGE_BITS_ALL) 1274 goto next_pindex; 1275 vm_page_lock(tm); 1276 if (tm->hold_count != 0 || tm->wire_count != 0) { 1277 vm_page_unlock(tm); 1278 goto next_pindex; 1279 } 1280 KASSERT((tm->flags & PG_FICTITIOUS) == 0, 1281 ("vm_object_madvise: page %p is fictitious", tm)); 1282 KASSERT((tm->oflags & VPO_UNMANAGED) == 0, 1283 ("vm_object_madvise: page %p is not managed", tm)); 1284 if (vm_page_busied(tm)) { 1285 if (object != tobject) 1286 VM_OBJECT_WUNLOCK(tobject); 1287 VM_OBJECT_WUNLOCK(object); 1288 if (advice == MADV_WILLNEED) { 1289 /* 1290 * Reference the page before unlocking and 1291 * sleeping so that the page daemon is less 1292 * likely to reclaim it. 1293 */ 1294 vm_page_aflag_set(tm, PGA_REFERENCED); 1295 } 1296 vm_page_busy_sleep(tm, "madvpo", false); 1297 goto relookup; 1298 } 1299 vm_page_advise(tm, advice); 1300 vm_page_unlock(tm); 1301 vm_object_madvise_freespace(tobject, advice, tm->pindex, 1); 1302 next_pindex: 1303 if (tobject != object) 1304 VM_OBJECT_WUNLOCK(tobject); 1305 } 1306 VM_OBJECT_WUNLOCK(object); 1307 } 1308 1309 /* 1310 * vm_object_shadow: 1311 * 1312 * Create a new object which is backed by the 1313 * specified existing object range. The source 1314 * object reference is deallocated. 1315 * 1316 * The new object and offset into that object 1317 * are returned in the source parameters. 1318 */ 1319 void 1320 vm_object_shadow( 1321 vm_object_t *object, /* IN/OUT */ 1322 vm_ooffset_t *offset, /* IN/OUT */ 1323 vm_size_t length) 1324 { 1325 vm_object_t source; 1326 vm_object_t result; 1327 1328 source = *object; 1329 1330 /* 1331 * Don't create the new object if the old object isn't shared. 1332 */ 1333 if (source != NULL) { 1334 VM_OBJECT_WLOCK(source); 1335 if (source->ref_count == 1 && 1336 source->handle == NULL && 1337 (source->type == OBJT_DEFAULT || 1338 source->type == OBJT_SWAP)) { 1339 VM_OBJECT_WUNLOCK(source); 1340 return; 1341 } 1342 VM_OBJECT_WUNLOCK(source); 1343 } 1344 1345 /* 1346 * Allocate a new object with the given length. 1347 */ 1348 result = vm_object_allocate(OBJT_DEFAULT, atop(length)); 1349 1350 /* 1351 * The new object shadows the source object, adding a reference to it. 1352 * Our caller changes his reference to point to the new object, 1353 * removing a reference to the source object. Net result: no change 1354 * of reference count. 1355 * 1356 * Try to optimize the result object's page color when shadowing 1357 * in order to maintain page coloring consistency in the combined 1358 * shadowed object. 1359 */ 1360 result->backing_object = source; 1361 /* 1362 * Store the offset into the source object, and fix up the offset into 1363 * the new object. 1364 */ 1365 result->backing_object_offset = *offset; 1366 if (source != NULL) { 1367 VM_OBJECT_WLOCK(source); 1368 result->domain = source->domain; 1369 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); 1370 source->shadow_count++; 1371 #if VM_NRESERVLEVEL > 0 1372 result->flags |= source->flags & OBJ_COLORED; 1373 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & 1374 ((1 << (VM_NFREEORDER - 1)) - 1); 1375 #endif 1376 VM_OBJECT_WUNLOCK(source); 1377 } 1378 1379 1380 /* 1381 * Return the new things 1382 */ 1383 *offset = 0; 1384 *object = result; 1385 } 1386 1387 /* 1388 * vm_object_split: 1389 * 1390 * Split the pages in a map entry into a new object. This affords 1391 * easier removal of unused pages, and keeps object inheritance from 1392 * being a negative impact on memory usage. 1393 */ 1394 void 1395 vm_object_split(vm_map_entry_t entry) 1396 { 1397 vm_page_t m, m_next; 1398 vm_object_t orig_object, new_object, source; 1399 vm_pindex_t idx, offidxstart; 1400 vm_size_t size; 1401 1402 orig_object = entry->object.vm_object; 1403 if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) 1404 return; 1405 if (orig_object->ref_count <= 1) 1406 return; 1407 VM_OBJECT_WUNLOCK(orig_object); 1408 1409 offidxstart = OFF_TO_IDX(entry->offset); 1410 size = atop(entry->end - entry->start); 1411 1412 /* 1413 * If swap_pager_copy() is later called, it will convert new_object 1414 * into a swap object. 1415 */ 1416 new_object = vm_object_allocate(OBJT_DEFAULT, size); 1417 1418 /* 1419 * At this point, the new object is still private, so the order in 1420 * which the original and new objects are locked does not matter. 1421 */ 1422 VM_OBJECT_WLOCK(new_object); 1423 VM_OBJECT_WLOCK(orig_object); 1424 new_object->domain = orig_object->domain; 1425 source = orig_object->backing_object; 1426 if (source != NULL) { 1427 VM_OBJECT_WLOCK(source); 1428 if ((source->flags & OBJ_DEAD) != 0) { 1429 VM_OBJECT_WUNLOCK(source); 1430 VM_OBJECT_WUNLOCK(orig_object); 1431 VM_OBJECT_WUNLOCK(new_object); 1432 vm_object_deallocate(new_object); 1433 VM_OBJECT_WLOCK(orig_object); 1434 return; 1435 } 1436 LIST_INSERT_HEAD(&source->shadow_head, 1437 new_object, shadow_list); 1438 source->shadow_count++; 1439 vm_object_reference_locked(source); /* for new_object */ 1440 vm_object_clear_flag(source, OBJ_ONEMAPPING); 1441 VM_OBJECT_WUNLOCK(source); 1442 new_object->backing_object_offset = 1443 orig_object->backing_object_offset + entry->offset; 1444 new_object->backing_object = source; 1445 } 1446 if (orig_object->cred != NULL) { 1447 new_object->cred = orig_object->cred; 1448 crhold(orig_object->cred); 1449 new_object->charge = ptoa(size); 1450 KASSERT(orig_object->charge >= ptoa(size), 1451 ("orig_object->charge < 0")); 1452 orig_object->charge -= ptoa(size); 1453 } 1454 retry: 1455 m = vm_page_find_least(orig_object, offidxstart); 1456 for (; m != NULL && (idx = m->pindex - offidxstart) < size; 1457 m = m_next) { 1458 m_next = TAILQ_NEXT(m, listq); 1459 1460 /* 1461 * We must wait for pending I/O to complete before we can 1462 * rename the page. 1463 * 1464 * We do not have to VM_PROT_NONE the page as mappings should 1465 * not be changed by this operation. 1466 */ 1467 if (vm_page_busied(m)) { 1468 VM_OBJECT_WUNLOCK(new_object); 1469 vm_page_lock(m); 1470 VM_OBJECT_WUNLOCK(orig_object); 1471 vm_page_busy_sleep(m, "spltwt", false); 1472 VM_OBJECT_WLOCK(orig_object); 1473 VM_OBJECT_WLOCK(new_object); 1474 goto retry; 1475 } 1476 1477 /* vm_page_rename() will dirty the page. */ 1478 if (vm_page_rename(m, new_object, idx)) { 1479 VM_OBJECT_WUNLOCK(new_object); 1480 VM_OBJECT_WUNLOCK(orig_object); 1481 vm_radix_wait(); 1482 VM_OBJECT_WLOCK(orig_object); 1483 VM_OBJECT_WLOCK(new_object); 1484 goto retry; 1485 } 1486 #if VM_NRESERVLEVEL > 0 1487 /* 1488 * If some of the reservation's allocated pages remain with 1489 * the original object, then transferring the reservation to 1490 * the new object is neither particularly beneficial nor 1491 * particularly harmful as compared to leaving the reservation 1492 * with the original object. If, however, all of the 1493 * reservation's allocated pages are transferred to the new 1494 * object, then transferring the reservation is typically 1495 * beneficial. Determining which of these two cases applies 1496 * would be more costly than unconditionally renaming the 1497 * reservation. 1498 */ 1499 vm_reserv_rename(m, new_object, orig_object, offidxstart); 1500 #endif 1501 if (orig_object->type == OBJT_SWAP) 1502 vm_page_xbusy(m); 1503 } 1504 if (orig_object->type == OBJT_SWAP) { 1505 /* 1506 * swap_pager_copy() can sleep, in which case the orig_object's 1507 * and new_object's locks are released and reacquired. 1508 */ 1509 swap_pager_copy(orig_object, new_object, offidxstart, 0); 1510 TAILQ_FOREACH(m, &new_object->memq, listq) 1511 vm_page_xunbusy(m); 1512 } 1513 VM_OBJECT_WUNLOCK(orig_object); 1514 VM_OBJECT_WUNLOCK(new_object); 1515 entry->object.vm_object = new_object; 1516 entry->offset = 0LL; 1517 vm_object_deallocate(orig_object); 1518 VM_OBJECT_WLOCK(new_object); 1519 } 1520 1521 #define OBSC_COLLAPSE_NOWAIT 0x0002 1522 #define OBSC_COLLAPSE_WAIT 0x0004 1523 1524 static vm_page_t 1525 vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, 1526 int op) 1527 { 1528 vm_object_t backing_object; 1529 1530 VM_OBJECT_ASSERT_WLOCKED(object); 1531 backing_object = object->backing_object; 1532 VM_OBJECT_ASSERT_WLOCKED(backing_object); 1533 1534 KASSERT(p == NULL || vm_page_busied(p), ("unbusy page %p", p)); 1535 KASSERT(p == NULL || p->object == object || p->object == backing_object, 1536 ("invalid ownership %p %p %p", p, object, backing_object)); 1537 if ((op & OBSC_COLLAPSE_NOWAIT) != 0) 1538 return (next); 1539 if (p != NULL) 1540 vm_page_lock(p); 1541 VM_OBJECT_WUNLOCK(object); 1542 VM_OBJECT_WUNLOCK(backing_object); 1543 /* The page is only NULL when rename fails. */ 1544 if (p == NULL) 1545 vm_radix_wait(); 1546 else 1547 vm_page_busy_sleep(p, "vmocol", false); 1548 VM_OBJECT_WLOCK(object); 1549 VM_OBJECT_WLOCK(backing_object); 1550 return (TAILQ_FIRST(&backing_object->memq)); 1551 } 1552 1553 static bool 1554 vm_object_scan_all_shadowed(vm_object_t object) 1555 { 1556 vm_object_t backing_object; 1557 vm_page_t p, pp; 1558 vm_pindex_t backing_offset_index, new_pindex, pi, ps; 1559 1560 VM_OBJECT_ASSERT_WLOCKED(object); 1561 VM_OBJECT_ASSERT_WLOCKED(object->backing_object); 1562 1563 backing_object = object->backing_object; 1564 1565 if (backing_object->type != OBJT_DEFAULT && 1566 backing_object->type != OBJT_SWAP) 1567 return (false); 1568 1569 pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 1570 p = vm_page_find_least(backing_object, pi); 1571 ps = swap_pager_find_least(backing_object, pi); 1572 1573 /* 1574 * Only check pages inside the parent object's range and 1575 * inside the parent object's mapping of the backing object. 1576 */ 1577 for (;; pi++) { 1578 if (p != NULL && p->pindex < pi) 1579 p = TAILQ_NEXT(p, listq); 1580 if (ps < pi) 1581 ps = swap_pager_find_least(backing_object, pi); 1582 if (p == NULL && ps >= backing_object->size) 1583 break; 1584 else if (p == NULL) 1585 pi = ps; 1586 else 1587 pi = MIN(p->pindex, ps); 1588 1589 new_pindex = pi - backing_offset_index; 1590 if (new_pindex >= object->size) 1591 break; 1592 1593 /* 1594 * See if the parent has the page or if the parent's object 1595 * pager has the page. If the parent has the page but the page 1596 * is not valid, the parent's object pager must have the page. 1597 * 1598 * If this fails, the parent does not completely shadow the 1599 * object and we might as well give up now. 1600 */ 1601 pp = vm_page_lookup(object, new_pindex); 1602 if ((pp == NULL || pp->valid == 0) && 1603 !vm_pager_has_page(object, new_pindex, NULL, NULL)) 1604 return (false); 1605 } 1606 return (true); 1607 } 1608 1609 static bool 1610 vm_object_collapse_scan(vm_object_t object, int op) 1611 { 1612 vm_object_t backing_object; 1613 vm_page_t next, p, pp; 1614 vm_pindex_t backing_offset_index, new_pindex; 1615 1616 VM_OBJECT_ASSERT_WLOCKED(object); 1617 VM_OBJECT_ASSERT_WLOCKED(object->backing_object); 1618 1619 backing_object = object->backing_object; 1620 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 1621 1622 /* 1623 * Initial conditions 1624 */ 1625 if ((op & OBSC_COLLAPSE_WAIT) != 0) 1626 vm_object_set_flag(backing_object, OBJ_DEAD); 1627 1628 /* 1629 * Our scan 1630 */ 1631 for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { 1632 next = TAILQ_NEXT(p, listq); 1633 new_pindex = p->pindex - backing_offset_index; 1634 1635 /* 1636 * Check for busy page 1637 */ 1638 if (vm_page_busied(p)) { 1639 next = vm_object_collapse_scan_wait(object, p, next, op); 1640 continue; 1641 } 1642 1643 KASSERT(p->object == backing_object, 1644 ("vm_object_collapse_scan: object mismatch")); 1645 1646 if (p->pindex < backing_offset_index || 1647 new_pindex >= object->size) { 1648 if (backing_object->type == OBJT_SWAP) 1649 swap_pager_freespace(backing_object, p->pindex, 1650 1); 1651 1652 /* 1653 * Page is out of the parent object's range, we can 1654 * simply destroy it. 1655 */ 1656 vm_page_lock(p); 1657 KASSERT(!pmap_page_is_mapped(p), 1658 ("freeing mapped page %p", p)); 1659 if (p->wire_count == 0) 1660 vm_page_free(p); 1661 else 1662 vm_page_remove(p); 1663 vm_page_unlock(p); 1664 continue; 1665 } 1666 1667 pp = vm_page_lookup(object, new_pindex); 1668 if (pp != NULL && vm_page_busied(pp)) { 1669 /* 1670 * The page in the parent is busy and possibly not 1671 * (yet) valid. Until its state is finalized by the 1672 * busy bit owner, we can't tell whether it shadows the 1673 * original page. Therefore, we must either skip it 1674 * and the original (backing_object) page or wait for 1675 * its state to be finalized. 1676 * 1677 * This is due to a race with vm_fault() where we must 1678 * unbusy the original (backing_obj) page before we can 1679 * (re)lock the parent. Hence we can get here. 1680 */ 1681 next = vm_object_collapse_scan_wait(object, pp, next, 1682 op); 1683 continue; 1684 } 1685 1686 KASSERT(pp == NULL || pp->valid != 0, 1687 ("unbusy invalid page %p", pp)); 1688 1689 if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL, 1690 NULL)) { 1691 /* 1692 * The page already exists in the parent OR swap exists 1693 * for this location in the parent. Leave the parent's 1694 * page alone. Destroy the original page from the 1695 * backing object. 1696 */ 1697 if (backing_object->type == OBJT_SWAP) 1698 swap_pager_freespace(backing_object, p->pindex, 1699 1); 1700 vm_page_lock(p); 1701 KASSERT(!pmap_page_is_mapped(p), 1702 ("freeing mapped page %p", p)); 1703 if (p->wire_count == 0) 1704 vm_page_free(p); 1705 else 1706 vm_page_remove(p); 1707 vm_page_unlock(p); 1708 continue; 1709 } 1710 1711 /* 1712 * Page does not exist in parent, rename the page from the 1713 * backing object to the main object. 1714 * 1715 * If the page was mapped to a process, it can remain mapped 1716 * through the rename. vm_page_rename() will dirty the page. 1717 */ 1718 if (vm_page_rename(p, object, new_pindex)) { 1719 next = vm_object_collapse_scan_wait(object, NULL, next, 1720 op); 1721 continue; 1722 } 1723 1724 /* Use the old pindex to free the right page. */ 1725 if (backing_object->type == OBJT_SWAP) 1726 swap_pager_freespace(backing_object, 1727 new_pindex + backing_offset_index, 1); 1728 1729 #if VM_NRESERVLEVEL > 0 1730 /* 1731 * Rename the reservation. 1732 */ 1733 vm_reserv_rename(p, object, backing_object, 1734 backing_offset_index); 1735 #endif 1736 } 1737 return (true); 1738 } 1739 1740 1741 /* 1742 * this version of collapse allows the operation to occur earlier and 1743 * when paging_in_progress is true for an object... This is not a complete 1744 * operation, but should plug 99.9% of the rest of the leaks. 1745 */ 1746 static void 1747 vm_object_qcollapse(vm_object_t object) 1748 { 1749 vm_object_t backing_object = object->backing_object; 1750 1751 VM_OBJECT_ASSERT_WLOCKED(object); 1752 VM_OBJECT_ASSERT_WLOCKED(backing_object); 1753 1754 if (backing_object->ref_count != 1) 1755 return; 1756 1757 vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT); 1758 } 1759 1760 /* 1761 * vm_object_collapse: 1762 * 1763 * Collapse an object with the object backing it. 1764 * Pages in the backing object are moved into the 1765 * parent, and the backing object is deallocated. 1766 */ 1767 void 1768 vm_object_collapse(vm_object_t object) 1769 { 1770 vm_object_t backing_object, new_backing_object; 1771 1772 VM_OBJECT_ASSERT_WLOCKED(object); 1773 1774 while (TRUE) { 1775 /* 1776 * Verify that the conditions are right for collapse: 1777 * 1778 * The object exists and the backing object exists. 1779 */ 1780 if ((backing_object = object->backing_object) == NULL) 1781 break; 1782 1783 /* 1784 * we check the backing object first, because it is most likely 1785 * not collapsable. 1786 */ 1787 VM_OBJECT_WLOCK(backing_object); 1788 if (backing_object->handle != NULL || 1789 (backing_object->type != OBJT_DEFAULT && 1790 backing_object->type != OBJT_SWAP) || 1791 (backing_object->flags & OBJ_DEAD) || 1792 object->handle != NULL || 1793 (object->type != OBJT_DEFAULT && 1794 object->type != OBJT_SWAP) || 1795 (object->flags & OBJ_DEAD)) { 1796 VM_OBJECT_WUNLOCK(backing_object); 1797 break; 1798 } 1799 1800 if (object->paging_in_progress != 0 || 1801 backing_object->paging_in_progress != 0) { 1802 vm_object_qcollapse(object); 1803 VM_OBJECT_WUNLOCK(backing_object); 1804 break; 1805 } 1806 1807 /* 1808 * We know that we can either collapse the backing object (if 1809 * the parent is the only reference to it) or (perhaps) have 1810 * the parent bypass the object if the parent happens to shadow 1811 * all the resident pages in the entire backing object. 1812 * 1813 * This is ignoring pager-backed pages such as swap pages. 1814 * vm_object_collapse_scan fails the shadowing test in this 1815 * case. 1816 */ 1817 if (backing_object->ref_count == 1) { 1818 vm_object_pip_add(object, 1); 1819 vm_object_pip_add(backing_object, 1); 1820 1821 /* 1822 * If there is exactly one reference to the backing 1823 * object, we can collapse it into the parent. 1824 */ 1825 vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT); 1826 1827 #if VM_NRESERVLEVEL > 0 1828 /* 1829 * Break any reservations from backing_object. 1830 */ 1831 if (__predict_false(!LIST_EMPTY(&backing_object->rvq))) 1832 vm_reserv_break_all(backing_object); 1833 #endif 1834 1835 /* 1836 * Move the pager from backing_object to object. 1837 */ 1838 if (backing_object->type == OBJT_SWAP) { 1839 /* 1840 * swap_pager_copy() can sleep, in which case 1841 * the backing_object's and object's locks are 1842 * released and reacquired. 1843 * Since swap_pager_copy() is being asked to 1844 * destroy the source, it will change the 1845 * backing_object's type to OBJT_DEFAULT. 1846 */ 1847 swap_pager_copy( 1848 backing_object, 1849 object, 1850 OFF_TO_IDX(object->backing_object_offset), TRUE); 1851 } 1852 /* 1853 * Object now shadows whatever backing_object did. 1854 * Note that the reference to 1855 * backing_object->backing_object moves from within 1856 * backing_object to within object. 1857 */ 1858 LIST_REMOVE(object, shadow_list); 1859 backing_object->shadow_count--; 1860 if (backing_object->backing_object) { 1861 VM_OBJECT_WLOCK(backing_object->backing_object); 1862 LIST_REMOVE(backing_object, shadow_list); 1863 LIST_INSERT_HEAD( 1864 &backing_object->backing_object->shadow_head, 1865 object, shadow_list); 1866 /* 1867 * The shadow_count has not changed. 1868 */ 1869 VM_OBJECT_WUNLOCK(backing_object->backing_object); 1870 } 1871 object->backing_object = backing_object->backing_object; 1872 object->backing_object_offset += 1873 backing_object->backing_object_offset; 1874 1875 /* 1876 * Discard backing_object. 1877 * 1878 * Since the backing object has no pages, no pager left, 1879 * and no object references within it, all that is 1880 * necessary is to dispose of it. 1881 */ 1882 KASSERT(backing_object->ref_count == 1, ( 1883 "backing_object %p was somehow re-referenced during collapse!", 1884 backing_object)); 1885 vm_object_pip_wakeup(backing_object); 1886 backing_object->type = OBJT_DEAD; 1887 backing_object->ref_count = 0; 1888 VM_OBJECT_WUNLOCK(backing_object); 1889 vm_object_destroy(backing_object); 1890 1891 vm_object_pip_wakeup(object); 1892 counter_u64_add(object_collapses, 1); 1893 } else { 1894 /* 1895 * If we do not entirely shadow the backing object, 1896 * there is nothing we can do so we give up. 1897 */ 1898 if (object->resident_page_count != object->size && 1899 !vm_object_scan_all_shadowed(object)) { 1900 VM_OBJECT_WUNLOCK(backing_object); 1901 break; 1902 } 1903 1904 /* 1905 * Make the parent shadow the next object in the 1906 * chain. Deallocating backing_object will not remove 1907 * it, since its reference count is at least 2. 1908 */ 1909 LIST_REMOVE(object, shadow_list); 1910 backing_object->shadow_count--; 1911 1912 new_backing_object = backing_object->backing_object; 1913 if ((object->backing_object = new_backing_object) != NULL) { 1914 VM_OBJECT_WLOCK(new_backing_object); 1915 LIST_INSERT_HEAD( 1916 &new_backing_object->shadow_head, 1917 object, 1918 shadow_list 1919 ); 1920 new_backing_object->shadow_count++; 1921 vm_object_reference_locked(new_backing_object); 1922 VM_OBJECT_WUNLOCK(new_backing_object); 1923 object->backing_object_offset += 1924 backing_object->backing_object_offset; 1925 } 1926 1927 /* 1928 * Drop the reference count on backing_object. Since 1929 * its ref_count was at least 2, it will not vanish. 1930 */ 1931 backing_object->ref_count--; 1932 VM_OBJECT_WUNLOCK(backing_object); 1933 counter_u64_add(object_bypasses, 1); 1934 } 1935 1936 /* 1937 * Try again with this object's new backing object. 1938 */ 1939 } 1940 } 1941 1942 /* 1943 * vm_object_page_remove: 1944 * 1945 * For the given object, either frees or invalidates each of the 1946 * specified pages. In general, a page is freed. However, if a page is 1947 * wired for any reason other than the existence of a managed, wired 1948 * mapping, then it may be invalidated but not removed from the object. 1949 * Pages are specified by the given range ["start", "end") and the option 1950 * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range 1951 * extends from "start" to the end of the object. If the option 1952 * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the 1953 * specified range are affected. If the option OBJPR_NOTMAPPED is 1954 * specified, then the pages within the specified range must have no 1955 * mappings. Otherwise, if this option is not specified, any mappings to 1956 * the specified pages are removed before the pages are freed or 1957 * invalidated. 1958 * 1959 * In general, this operation should only be performed on objects that 1960 * contain managed pages. There are, however, two exceptions. First, it 1961 * is performed on the kernel and kmem objects by vm_map_entry_delete(). 1962 * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device- 1963 * backed pages. In both of these cases, the option OBJPR_CLEANONLY must 1964 * not be specified and the option OBJPR_NOTMAPPED must be specified. 1965 * 1966 * The object must be locked. 1967 */ 1968 void 1969 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, 1970 int options) 1971 { 1972 vm_page_t p, next; 1973 struct mtx *mtx; 1974 struct pglist pgl; 1975 1976 VM_OBJECT_ASSERT_WLOCKED(object); 1977 KASSERT((object->flags & OBJ_UNMANAGED) == 0 || 1978 (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, 1979 ("vm_object_page_remove: illegal options for object %p", object)); 1980 if (object->resident_page_count == 0) 1981 return; 1982 vm_object_pip_add(object, 1); 1983 TAILQ_INIT(&pgl); 1984 again: 1985 p = vm_page_find_least(object, start); 1986 mtx = NULL; 1987 1988 /* 1989 * Here, the variable "p" is either (1) the page with the least pindex 1990 * greater than or equal to the parameter "start" or (2) NULL. 1991 */ 1992 for (; p != NULL && (p->pindex < end || end == 0); p = next) { 1993 next = TAILQ_NEXT(p, listq); 1994 1995 /* 1996 * If the page is wired for any reason besides the existence 1997 * of managed, wired mappings, then it cannot be freed. For 1998 * example, fictitious pages, which represent device memory, 1999 * are inherently wired and cannot be freed. They can, 2000 * however, be invalidated if the option OBJPR_CLEANONLY is 2001 * not specified. 2002 */ 2003 vm_page_change_lock(p, &mtx); 2004 if (vm_page_xbusied(p)) { 2005 VM_OBJECT_WUNLOCK(object); 2006 vm_page_busy_sleep(p, "vmopax", true); 2007 VM_OBJECT_WLOCK(object); 2008 goto again; 2009 } 2010 if (p->wire_count != 0) { 2011 if ((options & OBJPR_NOTMAPPED) == 0 && 2012 object->ref_count != 0) 2013 pmap_remove_all(p); 2014 if ((options & OBJPR_CLEANONLY) == 0) { 2015 p->valid = 0; 2016 vm_page_undirty(p); 2017 } 2018 continue; 2019 } 2020 if (vm_page_busied(p)) { 2021 VM_OBJECT_WUNLOCK(object); 2022 vm_page_busy_sleep(p, "vmopar", false); 2023 VM_OBJECT_WLOCK(object); 2024 goto again; 2025 } 2026 KASSERT((p->flags & PG_FICTITIOUS) == 0, 2027 ("vm_object_page_remove: page %p is fictitious", p)); 2028 if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { 2029 if ((options & OBJPR_NOTMAPPED) == 0 && 2030 object->ref_count != 0) 2031 pmap_remove_write(p); 2032 if (p->dirty != 0) 2033 continue; 2034 } 2035 if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) 2036 pmap_remove_all(p); 2037 p->flags &= ~PG_ZERO; 2038 if (vm_page_free_prep(p, false)) 2039 TAILQ_INSERT_TAIL(&pgl, p, listq); 2040 } 2041 if (mtx != NULL) 2042 mtx_unlock(mtx); 2043 vm_page_free_phys_pglist(&pgl); 2044 vm_object_pip_wakeup(object); 2045 } 2046 2047 /* 2048 * vm_object_page_noreuse: 2049 * 2050 * For the given object, attempt to move the specified pages to 2051 * the head of the inactive queue. This bypasses regular LRU 2052 * operation and allows the pages to be reused quickly under memory 2053 * pressure. If a page is wired for any reason, then it will not 2054 * be queued. Pages are specified by the range ["start", "end"). 2055 * As a special case, if "end" is zero, then the range extends from 2056 * "start" to the end of the object. 2057 * 2058 * This operation should only be performed on objects that 2059 * contain non-fictitious, managed pages. 2060 * 2061 * The object must be locked. 2062 */ 2063 void 2064 vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) 2065 { 2066 struct mtx *mtx; 2067 vm_page_t p, next; 2068 2069 VM_OBJECT_ASSERT_LOCKED(object); 2070 KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0, 2071 ("vm_object_page_noreuse: illegal object %p", object)); 2072 if (object->resident_page_count == 0) 2073 return; 2074 p = vm_page_find_least(object, start); 2075 2076 /* 2077 * Here, the variable "p" is either (1) the page with the least pindex 2078 * greater than or equal to the parameter "start" or (2) NULL. 2079 */ 2080 mtx = NULL; 2081 for (; p != NULL && (p->pindex < end || end == 0); p = next) { 2082 next = TAILQ_NEXT(p, listq); 2083 vm_page_change_lock(p, &mtx); 2084 vm_page_deactivate_noreuse(p); 2085 } 2086 if (mtx != NULL) 2087 mtx_unlock(mtx); 2088 } 2089 2090 /* 2091 * Populate the specified range of the object with valid pages. Returns 2092 * TRUE if the range is successfully populated and FALSE otherwise. 2093 * 2094 * Note: This function should be optimized to pass a larger array of 2095 * pages to vm_pager_get_pages() before it is applied to a non- 2096 * OBJT_DEVICE object. 2097 * 2098 * The object must be locked. 2099 */ 2100 boolean_t 2101 vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) 2102 { 2103 vm_page_t m; 2104 vm_pindex_t pindex; 2105 int rv; 2106 2107 VM_OBJECT_ASSERT_WLOCKED(object); 2108 for (pindex = start; pindex < end; pindex++) { 2109 m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); 2110 if (m->valid != VM_PAGE_BITS_ALL) { 2111 rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); 2112 if (rv != VM_PAGER_OK) { 2113 vm_page_lock(m); 2114 vm_page_free(m); 2115 vm_page_unlock(m); 2116 break; 2117 } 2118 } 2119 /* 2120 * Keep "m" busy because a subsequent iteration may unlock 2121 * the object. 2122 */ 2123 } 2124 if (pindex > start) { 2125 m = vm_page_lookup(object, start); 2126 while (m != NULL && m->pindex < pindex) { 2127 vm_page_xunbusy(m); 2128 m = TAILQ_NEXT(m, listq); 2129 } 2130 } 2131 return (pindex == end); 2132 } 2133 2134 /* 2135 * Routine: vm_object_coalesce 2136 * Function: Coalesces two objects backing up adjoining 2137 * regions of memory into a single object. 2138 * 2139 * returns TRUE if objects were combined. 2140 * 2141 * NOTE: Only works at the moment if the second object is NULL - 2142 * if it's not, which object do we lock first? 2143 * 2144 * Parameters: 2145 * prev_object First object to coalesce 2146 * prev_offset Offset into prev_object 2147 * prev_size Size of reference to prev_object 2148 * next_size Size of reference to the second object 2149 * reserved Indicator that extension region has 2150 * swap accounted for 2151 * 2152 * Conditions: 2153 * The object must *not* be locked. 2154 */ 2155 boolean_t 2156 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, 2157 vm_size_t prev_size, vm_size_t next_size, boolean_t reserved) 2158 { 2159 vm_pindex_t next_pindex; 2160 2161 if (prev_object == NULL) 2162 return (TRUE); 2163 VM_OBJECT_WLOCK(prev_object); 2164 if ((prev_object->type != OBJT_DEFAULT && 2165 prev_object->type != OBJT_SWAP) || 2166 (prev_object->flags & OBJ_TMPFS_NODE) != 0) { 2167 VM_OBJECT_WUNLOCK(prev_object); 2168 return (FALSE); 2169 } 2170 2171 /* 2172 * Try to collapse the object first 2173 */ 2174 vm_object_collapse(prev_object); 2175 2176 /* 2177 * Can't coalesce if: . more than one reference . paged out . shadows 2178 * another object . has a copy elsewhere (any of which mean that the 2179 * pages not mapped to prev_entry may be in use anyway) 2180 */ 2181 if (prev_object->backing_object != NULL) { 2182 VM_OBJECT_WUNLOCK(prev_object); 2183 return (FALSE); 2184 } 2185 2186 prev_size >>= PAGE_SHIFT; 2187 next_size >>= PAGE_SHIFT; 2188 next_pindex = OFF_TO_IDX(prev_offset) + prev_size; 2189 2190 if ((prev_object->ref_count > 1) && 2191 (prev_object->size != next_pindex)) { 2192 VM_OBJECT_WUNLOCK(prev_object); 2193 return (FALSE); 2194 } 2195 2196 /* 2197 * Account for the charge. 2198 */ 2199 if (prev_object->cred != NULL) { 2200 2201 /* 2202 * If prev_object was charged, then this mapping, 2203 * although not charged now, may become writable 2204 * later. Non-NULL cred in the object would prevent 2205 * swap reservation during enabling of the write 2206 * access, so reserve swap now. Failed reservation 2207 * cause allocation of the separate object for the map 2208 * entry, and swap reservation for this entry is 2209 * managed in appropriate time. 2210 */ 2211 if (!reserved && !swap_reserve_by_cred(ptoa(next_size), 2212 prev_object->cred)) { 2213 VM_OBJECT_WUNLOCK(prev_object); 2214 return (FALSE); 2215 } 2216 prev_object->charge += ptoa(next_size); 2217 } 2218 2219 /* 2220 * Remove any pages that may still be in the object from a previous 2221 * deallocation. 2222 */ 2223 if (next_pindex < prev_object->size) { 2224 vm_object_page_remove(prev_object, next_pindex, next_pindex + 2225 next_size, 0); 2226 if (prev_object->type == OBJT_SWAP) 2227 swap_pager_freespace(prev_object, 2228 next_pindex, next_size); 2229 #if 0 2230 if (prev_object->cred != NULL) { 2231 KASSERT(prev_object->charge >= 2232 ptoa(prev_object->size - next_pindex), 2233 ("object %p overcharged 1 %jx %jx", prev_object, 2234 (uintmax_t)next_pindex, (uintmax_t)next_size)); 2235 prev_object->charge -= ptoa(prev_object->size - 2236 next_pindex); 2237 } 2238 #endif 2239 } 2240 2241 /* 2242 * Extend the object if necessary. 2243 */ 2244 if (next_pindex + next_size > prev_object->size) 2245 prev_object->size = next_pindex + next_size; 2246 2247 VM_OBJECT_WUNLOCK(prev_object); 2248 return (TRUE); 2249 } 2250 2251 void 2252 vm_object_set_writeable_dirty(vm_object_t object) 2253 { 2254 2255 VM_OBJECT_ASSERT_WLOCKED(object); 2256 if (object->type != OBJT_VNODE) { 2257 if ((object->flags & OBJ_TMPFS_NODE) != 0) { 2258 KASSERT(object->type == OBJT_SWAP, ("non-swap tmpfs")); 2259 vm_object_set_flag(object, OBJ_TMPFS_DIRTY); 2260 } 2261 return; 2262 } 2263 object->generation++; 2264 if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) 2265 return; 2266 vm_object_set_flag(object, OBJ_MIGHTBEDIRTY); 2267 } 2268 2269 /* 2270 * vm_object_unwire: 2271 * 2272 * For each page offset within the specified range of the given object, 2273 * find the highest-level page in the shadow chain and unwire it. A page 2274 * must exist at every page offset, and the highest-level page must be 2275 * wired. 2276 */ 2277 void 2278 vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, 2279 uint8_t queue) 2280 { 2281 vm_object_t tobject; 2282 vm_page_t m, tm; 2283 vm_pindex_t end_pindex, pindex, tpindex; 2284 int depth, locked_depth; 2285 2286 KASSERT((offset & PAGE_MASK) == 0, 2287 ("vm_object_unwire: offset is not page aligned")); 2288 KASSERT((length & PAGE_MASK) == 0, 2289 ("vm_object_unwire: length is not a multiple of PAGE_SIZE")); 2290 /* The wired count of a fictitious page never changes. */ 2291 if ((object->flags & OBJ_FICTITIOUS) != 0) 2292 return; 2293 pindex = OFF_TO_IDX(offset); 2294 end_pindex = pindex + atop(length); 2295 locked_depth = 1; 2296 VM_OBJECT_RLOCK(object); 2297 m = vm_page_find_least(object, pindex); 2298 while (pindex < end_pindex) { 2299 if (m == NULL || pindex < m->pindex) { 2300 /* 2301 * The first object in the shadow chain doesn't 2302 * contain a page at the current index. Therefore, 2303 * the page must exist in a backing object. 2304 */ 2305 tobject = object; 2306 tpindex = pindex; 2307 depth = 0; 2308 do { 2309 tpindex += 2310 OFF_TO_IDX(tobject->backing_object_offset); 2311 tobject = tobject->backing_object; 2312 KASSERT(tobject != NULL, 2313 ("vm_object_unwire: missing page")); 2314 if ((tobject->flags & OBJ_FICTITIOUS) != 0) 2315 goto next_page; 2316 depth++; 2317 if (depth == locked_depth) { 2318 locked_depth++; 2319 VM_OBJECT_RLOCK(tobject); 2320 } 2321 } while ((tm = vm_page_lookup(tobject, tpindex)) == 2322 NULL); 2323 } else { 2324 tm = m; 2325 m = TAILQ_NEXT(m, listq); 2326 } 2327 vm_page_lock(tm); 2328 vm_page_unwire(tm, queue); 2329 vm_page_unlock(tm); 2330 next_page: 2331 pindex++; 2332 } 2333 /* Release the accumulated object locks. */ 2334 for (depth = 0; depth < locked_depth; depth++) { 2335 tobject = object->backing_object; 2336 VM_OBJECT_RUNLOCK(object); 2337 object = tobject; 2338 } 2339 } 2340 2341 struct vnode * 2342 vm_object_vnode(vm_object_t object) 2343 { 2344 2345 VM_OBJECT_ASSERT_LOCKED(object); 2346 if (object->type == OBJT_VNODE) 2347 return (object->handle); 2348 if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) 2349 return (object->un_pager.swp.swp_tmpfs); 2350 return (NULL); 2351 } 2352 2353 static int 2354 sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) 2355 { 2356 struct kinfo_vmobject *kvo; 2357 char *fullpath, *freepath; 2358 struct vnode *vp; 2359 struct vattr va; 2360 vm_object_t obj; 2361 vm_page_t m; 2362 int count, error; 2363 2364 if (req->oldptr == NULL) { 2365 /* 2366 * If an old buffer has not been provided, generate an 2367 * estimate of the space needed for a subsequent call. 2368 */ 2369 mtx_lock(&vm_object_list_mtx); 2370 count = 0; 2371 TAILQ_FOREACH(obj, &vm_object_list, object_list) { 2372 if (obj->type == OBJT_DEAD) 2373 continue; 2374 count++; 2375 } 2376 mtx_unlock(&vm_object_list_mtx); 2377 return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) * 2378 count * 11 / 10)); 2379 } 2380 2381 kvo = malloc(sizeof(*kvo), M_TEMP, M_WAITOK); 2382 error = 0; 2383 2384 /* 2385 * VM objects are type stable and are never removed from the 2386 * list once added. This allows us to safely read obj->object_list 2387 * after reacquiring the VM object lock. 2388 */ 2389 mtx_lock(&vm_object_list_mtx); 2390 TAILQ_FOREACH(obj, &vm_object_list, object_list) { 2391 if (obj->type == OBJT_DEAD) 2392 continue; 2393 VM_OBJECT_RLOCK(obj); 2394 if (obj->type == OBJT_DEAD) { 2395 VM_OBJECT_RUNLOCK(obj); 2396 continue; 2397 } 2398 mtx_unlock(&vm_object_list_mtx); 2399 kvo->kvo_size = ptoa(obj->size); 2400 kvo->kvo_resident = obj->resident_page_count; 2401 kvo->kvo_ref_count = obj->ref_count; 2402 kvo->kvo_shadow_count = obj->shadow_count; 2403 kvo->kvo_memattr = obj->memattr; 2404 kvo->kvo_active = 0; 2405 kvo->kvo_inactive = 0; 2406 TAILQ_FOREACH(m, &obj->memq, listq) { 2407 /* 2408 * A page may belong to the object but be 2409 * dequeued and set to PQ_NONE while the 2410 * object lock is not held. This makes the 2411 * reads of m->queue below racy, and we do not 2412 * count pages set to PQ_NONE. However, this 2413 * sysctl is only meant to give an 2414 * approximation of the system anyway. 2415 */ 2416 if (vm_page_active(m)) 2417 kvo->kvo_active++; 2418 else if (vm_page_inactive(m)) 2419 kvo->kvo_inactive++; 2420 } 2421 2422 kvo->kvo_vn_fileid = 0; 2423 kvo->kvo_vn_fsid = 0; 2424 kvo->kvo_vn_fsid_freebsd11 = 0; 2425 freepath = NULL; 2426 fullpath = ""; 2427 vp = NULL; 2428 switch (obj->type) { 2429 case OBJT_DEFAULT: 2430 kvo->kvo_type = KVME_TYPE_DEFAULT; 2431 break; 2432 case OBJT_VNODE: 2433 kvo->kvo_type = KVME_TYPE_VNODE; 2434 vp = obj->handle; 2435 vref(vp); 2436 break; 2437 case OBJT_SWAP: 2438 kvo->kvo_type = KVME_TYPE_SWAP; 2439 break; 2440 case OBJT_DEVICE: 2441 kvo->kvo_type = KVME_TYPE_DEVICE; 2442 break; 2443 case OBJT_PHYS: 2444 kvo->kvo_type = KVME_TYPE_PHYS; 2445 break; 2446 case OBJT_DEAD: 2447 kvo->kvo_type = KVME_TYPE_DEAD; 2448 break; 2449 case OBJT_SG: 2450 kvo->kvo_type = KVME_TYPE_SG; 2451 break; 2452 case OBJT_MGTDEVICE: 2453 kvo->kvo_type = KVME_TYPE_MGTDEVICE; 2454 break; 2455 default: 2456 kvo->kvo_type = KVME_TYPE_UNKNOWN; 2457 break; 2458 } 2459 VM_OBJECT_RUNLOCK(obj); 2460 if (vp != NULL) { 2461 vn_fullpath(curthread, vp, &fullpath, &freepath); 2462 vn_lock(vp, LK_SHARED | LK_RETRY); 2463 if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) { 2464 kvo->kvo_vn_fileid = va.va_fileid; 2465 kvo->kvo_vn_fsid = va.va_fsid; 2466 kvo->kvo_vn_fsid_freebsd11 = va.va_fsid; 2467 /* truncate */ 2468 } 2469 vput(vp); 2470 } 2471 2472 strlcpy(kvo->kvo_path, fullpath, sizeof(kvo->kvo_path)); 2473 if (freepath != NULL) 2474 free(freepath, M_TEMP); 2475 2476 /* Pack record size down */ 2477 kvo->kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) 2478 + strlen(kvo->kvo_path) + 1; 2479 kvo->kvo_structsize = roundup(kvo->kvo_structsize, 2480 sizeof(uint64_t)); 2481 error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize); 2482 mtx_lock(&vm_object_list_mtx); 2483 if (error) 2484 break; 2485 } 2486 mtx_unlock(&vm_object_list_mtx); 2487 free(kvo, M_TEMP); 2488 return (error); 2489 } 2490 SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | 2491 CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject", 2492 "List of VM objects"); 2493 2494 #include "opt_ddb.h" 2495 #ifdef DDB 2496 #include <sys/kernel.h> 2497 2498 #include <sys/cons.h> 2499 2500 #include <ddb/ddb.h> 2501 2502 static int 2503 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) 2504 { 2505 vm_map_t tmpm; 2506 vm_map_entry_t tmpe; 2507 vm_object_t obj; 2508 int entcount; 2509 2510 if (map == 0) 2511 return 0; 2512 2513 if (entry == 0) { 2514 tmpe = map->header.next; 2515 entcount = map->nentries; 2516 while (entcount-- && (tmpe != &map->header)) { 2517 if (_vm_object_in_map(map, object, tmpe)) { 2518 return 1; 2519 } 2520 tmpe = tmpe->next; 2521 } 2522 } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 2523 tmpm = entry->object.sub_map; 2524 tmpe = tmpm->header.next; 2525 entcount = tmpm->nentries; 2526 while (entcount-- && tmpe != &tmpm->header) { 2527 if (_vm_object_in_map(tmpm, object, tmpe)) { 2528 return 1; 2529 } 2530 tmpe = tmpe->next; 2531 } 2532 } else if ((obj = entry->object.vm_object) != NULL) { 2533 for (; obj; obj = obj->backing_object) 2534 if (obj == object) { 2535 return 1; 2536 } 2537 } 2538 return 0; 2539 } 2540 2541 static int 2542 vm_object_in_map(vm_object_t object) 2543 { 2544 struct proc *p; 2545 2546 /* sx_slock(&allproc_lock); */ 2547 FOREACH_PROC_IN_SYSTEM(p) { 2548 if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 2549 continue; 2550 if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { 2551 /* sx_sunlock(&allproc_lock); */ 2552 return 1; 2553 } 2554 } 2555 /* sx_sunlock(&allproc_lock); */ 2556 if (_vm_object_in_map(kernel_map, object, 0)) 2557 return 1; 2558 return 0; 2559 } 2560 2561 DB_SHOW_COMMAND(vmochk, vm_object_check) 2562 { 2563 vm_object_t object; 2564 2565 /* 2566 * make sure that internal objs are in a map somewhere 2567 * and none have zero ref counts. 2568 */ 2569 TAILQ_FOREACH(object, &vm_object_list, object_list) { 2570 if (object->handle == NULL && 2571 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 2572 if (object->ref_count == 0) { 2573 db_printf("vmochk: internal obj has zero ref count: %ld\n", 2574 (long)object->size); 2575 } 2576 if (!vm_object_in_map(object)) { 2577 db_printf( 2578 "vmochk: internal obj is not in a map: " 2579 "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", 2580 object->ref_count, (u_long)object->size, 2581 (u_long)object->size, 2582 (void *)object->backing_object); 2583 } 2584 } 2585 } 2586 } 2587 2588 /* 2589 * vm_object_print: [ debug ] 2590 */ 2591 DB_SHOW_COMMAND(object, vm_object_print_static) 2592 { 2593 /* XXX convert args. */ 2594 vm_object_t object = (vm_object_t)addr; 2595 boolean_t full = have_addr; 2596 2597 vm_page_t p; 2598 2599 /* XXX count is an (unused) arg. Avoid shadowing it. */ 2600 #define count was_count 2601 2602 int count; 2603 2604 if (object == NULL) 2605 return; 2606 2607 db_iprintf( 2608 "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n", 2609 object, (int)object->type, (uintmax_t)object->size, 2610 object->resident_page_count, object->ref_count, object->flags, 2611 object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge); 2612 db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n", 2613 object->shadow_count, 2614 object->backing_object ? object->backing_object->ref_count : 0, 2615 object->backing_object, (uintmax_t)object->backing_object_offset); 2616 2617 if (!full) 2618 return; 2619 2620 db_indent += 2; 2621 count = 0; 2622 TAILQ_FOREACH(p, &object->memq, listq) { 2623 if (count == 0) 2624 db_iprintf("memory:="); 2625 else if (count == 6) { 2626 db_printf("\n"); 2627 db_iprintf(" ..."); 2628 count = 0; 2629 } else 2630 db_printf(","); 2631 count++; 2632 2633 db_printf("(off=0x%jx,page=0x%jx)", 2634 (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); 2635 } 2636 if (count != 0) 2637 db_printf("\n"); 2638 db_indent -= 2; 2639 } 2640 2641 /* XXX. */ 2642 #undef count 2643 2644 /* XXX need this non-static entry for calling from vm_map_print. */ 2645 void 2646 vm_object_print( 2647 /* db_expr_t */ long addr, 2648 boolean_t have_addr, 2649 /* db_expr_t */ long count, 2650 char *modif) 2651 { 2652 vm_object_print_static(addr, have_addr, count, modif); 2653 } 2654 2655 DB_SHOW_COMMAND(vmopag, vm_object_print_pages) 2656 { 2657 vm_object_t object; 2658 vm_pindex_t fidx; 2659 vm_paddr_t pa; 2660 vm_page_t m, prev_m; 2661 int rcount, nl, c; 2662 2663 nl = 0; 2664 TAILQ_FOREACH(object, &vm_object_list, object_list) { 2665 db_printf("new object: %p\n", (void *)object); 2666 if (nl > 18) { 2667 c = cngetc(); 2668 if (c != ' ') 2669 return; 2670 nl = 0; 2671 } 2672 nl++; 2673 rcount = 0; 2674 fidx = 0; 2675 pa = -1; 2676 TAILQ_FOREACH(m, &object->memq, listq) { 2677 if (m->pindex > 128) 2678 break; 2679 if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && 2680 prev_m->pindex + 1 != m->pindex) { 2681 if (rcount) { 2682 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 2683 (long)fidx, rcount, (long)pa); 2684 if (nl > 18) { 2685 c = cngetc(); 2686 if (c != ' ') 2687 return; 2688 nl = 0; 2689 } 2690 nl++; 2691 rcount = 0; 2692 } 2693 } 2694 if (rcount && 2695 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { 2696 ++rcount; 2697 continue; 2698 } 2699 if (rcount) { 2700 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 2701 (long)fidx, rcount, (long)pa); 2702 if (nl > 18) { 2703 c = cngetc(); 2704 if (c != ' ') 2705 return; 2706 nl = 0; 2707 } 2708 nl++; 2709 } 2710 fidx = m->pindex; 2711 pa = VM_PAGE_TO_PHYS(m); 2712 rcount = 1; 2713 } 2714 if (rcount) { 2715 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 2716 (long)fidx, rcount, (long)pa); 2717 if (nl > 18) { 2718 c = cngetc(); 2719 if (c != ' ') 2720 return; 2721 nl = 0; 2722 } 2723 nl++; 2724 } 2725 } 2726 } 2727 #endif /* DDB */ 2728