1 /*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * The Mach Operating System project at Carnegie-Mellon University. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 34 */ 35 36 /*- 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 */ 62 63 /* 64 * GENERAL RULES ON VM_PAGE MANIPULATION 65 * 66 * - A page queue lock is required when adding or removing a page from a 67 * page queue regardless of other locks or the busy state of a page. 68 * 69 * * In general, no thread besides the page daemon can acquire or 70 * hold more than one page queue lock at a time. 71 * 72 * * The page daemon can acquire and hold any pair of page queue 73 * locks in any order. 74 * 75 * - The object lock is required when inserting or removing 76 * pages from an object (vm_page_insert() or vm_page_remove()). 77 * 78 */ 79 80 /* 81 * Resident memory management module. 82 */ 83 84 #include <sys/cdefs.h> 85 __FBSDID("$FreeBSD$"); 86 87 #include "opt_vm.h" 88 89 #include <sys/param.h> 90 #include <sys/systm.h> 91 #include <sys/lock.h> 92 #include <sys/kernel.h> 93 #include <sys/limits.h> 94 #include <sys/linker.h> 95 #include <sys/malloc.h> 96 #include <sys/mman.h> 97 #include <sys/msgbuf.h> 98 #include <sys/mutex.h> 99 #include <sys/proc.h> 100 #include <sys/rwlock.h> 101 #include <sys/sbuf.h> 102 #include <sys/smp.h> 103 #include <sys/sysctl.h> 104 #include <sys/vmmeter.h> 105 #include <sys/vnode.h> 106 107 #include <vm/vm.h> 108 #include <vm/pmap.h> 109 #include <vm/vm_param.h> 110 #include <vm/vm_kern.h> 111 #include <vm/vm_object.h> 112 #include <vm/vm_page.h> 113 #include <vm/vm_pageout.h> 114 #include <vm/vm_pager.h> 115 #include <vm/vm_phys.h> 116 #include <vm/vm_radix.h> 117 #include <vm/vm_reserv.h> 118 #include <vm/vm_extern.h> 119 #include <vm/uma.h> 120 #include <vm/uma_int.h> 121 122 #include <machine/md_var.h> 123 124 /* 125 * Associated with page of user-allocatable memory is a 126 * page structure. 127 */ 128 129 struct vm_domain vm_dom[MAXMEMDOM]; 130 struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx; 131 132 struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; 133 134 /* 135 * bogus page -- for I/O to/from partially complete buffers, 136 * or for paging into sparsely invalid regions. 137 */ 138 vm_page_t bogus_page; 139 140 vm_page_t vm_page_array; 141 long vm_page_array_size; 142 long first_page; 143 144 static int boot_pages = UMA_BOOT_PAGES; 145 SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 146 &boot_pages, 0, 147 "number of pages allocated for bootstrapping the VM system"); 148 149 static int pa_tryrelock_restart; 150 SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, 151 &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); 152 153 static TAILQ_HEAD(, vm_page) blacklist_head; 154 static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); 155 SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | 156 CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); 157 158 /* Is the page daemon waiting for free pages? */ 159 static int vm_pageout_pages_needed; 160 161 static uma_zone_t fakepg_zone; 162 163 static void vm_page_alloc_check(vm_page_t m); 164 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); 165 static void vm_page_enqueue(uint8_t queue, vm_page_t m); 166 static void vm_page_free_phys(vm_page_t m); 167 static void vm_page_free_wakeup(void); 168 static void vm_page_init(void *dummy); 169 static int vm_page_insert_after(vm_page_t m, vm_object_t object, 170 vm_pindex_t pindex, vm_page_t mpred); 171 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, 172 vm_page_t mpred); 173 static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, 174 vm_paddr_t high); 175 static int vm_page_alloc_fail(vm_object_t object, int req); 176 177 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); 178 179 static void 180 vm_page_init(void *dummy) 181 { 182 183 fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, 184 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); 185 bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | 186 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 187 } 188 189 /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ 190 #if PAGE_SIZE == 32768 191 #ifdef CTASSERT 192 CTASSERT(sizeof(u_long) >= 8); 193 #endif 194 #endif 195 196 /* 197 * Try to acquire a physical address lock while a pmap is locked. If we 198 * fail to trylock we unlock and lock the pmap directly and cache the 199 * locked pa in *locked. The caller should then restart their loop in case 200 * the virtual to physical mapping has changed. 201 */ 202 int 203 vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) 204 { 205 vm_paddr_t lockpa; 206 207 lockpa = *locked; 208 *locked = pa; 209 if (lockpa) { 210 PA_LOCK_ASSERT(lockpa, MA_OWNED); 211 if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) 212 return (0); 213 PA_UNLOCK(lockpa); 214 } 215 if (PA_TRYLOCK(pa)) 216 return (0); 217 PMAP_UNLOCK(pmap); 218 atomic_add_int(&pa_tryrelock_restart, 1); 219 PA_LOCK(pa); 220 PMAP_LOCK(pmap); 221 return (EAGAIN); 222 } 223 224 /* 225 * vm_set_page_size: 226 * 227 * Sets the page size, perhaps based upon the memory 228 * size. Must be called before any use of page-size 229 * dependent functions. 230 */ 231 void 232 vm_set_page_size(void) 233 { 234 if (vm_cnt.v_page_size == 0) 235 vm_cnt.v_page_size = PAGE_SIZE; 236 if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0) 237 panic("vm_set_page_size: page size not a power of two"); 238 } 239 240 /* 241 * vm_page_blacklist_next: 242 * 243 * Find the next entry in the provided string of blacklist 244 * addresses. Entries are separated by space, comma, or newline. 245 * If an invalid integer is encountered then the rest of the 246 * string is skipped. Updates the list pointer to the next 247 * character, or NULL if the string is exhausted or invalid. 248 */ 249 static vm_paddr_t 250 vm_page_blacklist_next(char **list, char *end) 251 { 252 vm_paddr_t bad; 253 char *cp, *pos; 254 255 if (list == NULL || *list == NULL) 256 return (0); 257 if (**list =='\0') { 258 *list = NULL; 259 return (0); 260 } 261 262 /* 263 * If there's no end pointer then the buffer is coming from 264 * the kenv and we know it's null-terminated. 265 */ 266 if (end == NULL) 267 end = *list + strlen(*list); 268 269 /* Ensure that strtoq() won't walk off the end */ 270 if (*end != '\0') { 271 if (*end == '\n' || *end == ' ' || *end == ',') 272 *end = '\0'; 273 else { 274 printf("Blacklist not terminated, skipping\n"); 275 *list = NULL; 276 return (0); 277 } 278 } 279 280 for (pos = *list; *pos != '\0'; pos = cp) { 281 bad = strtoq(pos, &cp, 0); 282 if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') { 283 if (bad == 0) { 284 if (++cp < end) 285 continue; 286 else 287 break; 288 } 289 } else 290 break; 291 if (*cp == '\0' || ++cp >= end) 292 *list = NULL; 293 else 294 *list = cp; 295 return (trunc_page(bad)); 296 } 297 printf("Garbage in RAM blacklist, skipping\n"); 298 *list = NULL; 299 return (0); 300 } 301 302 /* 303 * vm_page_blacklist_check: 304 * 305 * Iterate through the provided string of blacklist addresses, pulling 306 * each entry out of the physical allocator free list and putting it 307 * onto a list for reporting via the vm.page_blacklist sysctl. 308 */ 309 static void 310 vm_page_blacklist_check(char *list, char *end) 311 { 312 vm_paddr_t pa; 313 vm_page_t m; 314 char *next; 315 int ret; 316 317 next = list; 318 while (next != NULL) { 319 if ((pa = vm_page_blacklist_next(&next, end)) == 0) 320 continue; 321 m = vm_phys_paddr_to_vm_page(pa); 322 if (m == NULL) 323 continue; 324 mtx_lock(&vm_page_queue_free_mtx); 325 ret = vm_phys_unfree_page(m); 326 mtx_unlock(&vm_page_queue_free_mtx); 327 if (ret == TRUE) { 328 TAILQ_INSERT_TAIL(&blacklist_head, m, listq); 329 if (bootverbose) 330 printf("Skipping page with pa 0x%jx\n", 331 (uintmax_t)pa); 332 } 333 } 334 } 335 336 /* 337 * vm_page_blacklist_load: 338 * 339 * Search for a special module named "ram_blacklist". It'll be a 340 * plain text file provided by the user via the loader directive 341 * of the same name. 342 */ 343 static void 344 vm_page_blacklist_load(char **list, char **end) 345 { 346 void *mod; 347 u_char *ptr; 348 u_int len; 349 350 mod = NULL; 351 ptr = NULL; 352 353 mod = preload_search_by_type("ram_blacklist"); 354 if (mod != NULL) { 355 ptr = preload_fetch_addr(mod); 356 len = preload_fetch_size(mod); 357 } 358 *list = ptr; 359 if (ptr != NULL) 360 *end = ptr + len; 361 else 362 *end = NULL; 363 return; 364 } 365 366 static int 367 sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) 368 { 369 vm_page_t m; 370 struct sbuf sbuf; 371 int error, first; 372 373 first = 1; 374 error = sysctl_wire_old_buffer(req, 0); 375 if (error != 0) 376 return (error); 377 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 378 TAILQ_FOREACH(m, &blacklist_head, listq) { 379 sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",", 380 (uintmax_t)m->phys_addr); 381 first = 0; 382 } 383 error = sbuf_finish(&sbuf); 384 sbuf_delete(&sbuf); 385 return (error); 386 } 387 388 static void 389 vm_page_domain_init(struct vm_domain *vmd) 390 { 391 struct vm_pagequeue *pq; 392 int i; 393 394 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = 395 "vm inactive pagequeue"; 396 *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) = 397 &vm_cnt.v_inactive_count; 398 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = 399 "vm active pagequeue"; 400 *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = 401 &vm_cnt.v_active_count; 402 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) = 403 "vm laundry pagequeue"; 404 *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) = 405 &vm_cnt.v_laundry_count; 406 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = 407 "vm unswappable pagequeue"; 408 /* Unswappable dirty pages are counted as being in the laundry. */ 409 *__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) = 410 &vm_cnt.v_laundry_count; 411 vmd->vmd_page_count = 0; 412 vmd->vmd_free_count = 0; 413 vmd->vmd_segs = 0; 414 vmd->vmd_oom = FALSE; 415 for (i = 0; i < PQ_COUNT; i++) { 416 pq = &vmd->vmd_pagequeues[i]; 417 TAILQ_INIT(&pq->pq_pl); 418 mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", 419 MTX_DEF | MTX_DUPOK); 420 } 421 } 422 423 /* 424 * vm_page_startup: 425 * 426 * Initializes the resident memory module. Allocates physical memory for 427 * bootstrapping UMA and some data structures that are used to manage 428 * physical pages. Initializes these structures, and populates the free 429 * page queues. 430 */ 431 vm_offset_t 432 vm_page_startup(vm_offset_t vaddr) 433 { 434 struct vm_domain *vmd; 435 struct vm_phys_seg *seg; 436 vm_page_t m; 437 char *list, *listend; 438 vm_offset_t mapped; 439 vm_paddr_t end, high_avail, low_avail, new_end, page_range, size; 440 vm_paddr_t biggestsize, last_pa, pa; 441 u_long pagecount; 442 int biggestone, i, pages_per_zone, segind; 443 444 biggestsize = 0; 445 biggestone = 0; 446 vaddr = round_page(vaddr); 447 448 for (i = 0; phys_avail[i + 1]; i += 2) { 449 phys_avail[i] = round_page(phys_avail[i]); 450 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 451 } 452 for (i = 0; phys_avail[i + 1]; i += 2) { 453 size = phys_avail[i + 1] - phys_avail[i]; 454 if (size > biggestsize) { 455 biggestone = i; 456 biggestsize = size; 457 } 458 } 459 460 end = phys_avail[biggestone+1]; 461 462 /* 463 * Initialize the page and queue locks. 464 */ 465 mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); 466 for (i = 0; i < PA_LOCK_COUNT; i++) 467 mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); 468 for (i = 0; i < vm_ndomains; i++) 469 vm_page_domain_init(&vm_dom[i]); 470 471 /* 472 * Almost all of the pages needed for bootstrapping UMA are used 473 * for zone structures, so if the number of CPUs results in those 474 * structures taking more than one page each, we set aside more pages 475 * in proportion to the zone structure size. 476 */ 477 pages_per_zone = howmany(sizeof(struct uma_zone) + 478 sizeof(struct uma_cache) * (mp_maxid + 1) + 479 roundup2(sizeof(struct uma_slab), sizeof(void *)), UMA_SLAB_SIZE); 480 if (pages_per_zone > 1) { 481 /* Reserve more pages so that we don't run out. */ 482 boot_pages = UMA_BOOT_PAGES_ZONES * pages_per_zone; 483 } 484 485 /* 486 * Allocate memory for use when boot strapping the kernel memory 487 * allocator. 488 * 489 * CTFLAG_RDTUN doesn't work during the early boot process, so we must 490 * manually fetch the value. 491 */ 492 TUNABLE_INT_FETCH("vm.boot_pages", &boot_pages); 493 new_end = end - (boot_pages * UMA_SLAB_SIZE); 494 new_end = trunc_page(new_end); 495 mapped = pmap_map(&vaddr, new_end, end, 496 VM_PROT_READ | VM_PROT_WRITE); 497 bzero((void *)mapped, end - new_end); 498 uma_startup((void *)mapped, boot_pages); 499 500 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ 501 defined(__i386__) || defined(__mips__) 502 /* 503 * Allocate a bitmap to indicate that a random physical page 504 * needs to be included in a minidump. 505 * 506 * The amd64 port needs this to indicate which direct map pages 507 * need to be dumped, via calls to dump_add_page()/dump_drop_page(). 508 * 509 * However, i386 still needs this workspace internally within the 510 * minidump code. In theory, they are not needed on i386, but are 511 * included should the sf_buf code decide to use them. 512 */ 513 last_pa = 0; 514 for (i = 0; dump_avail[i + 1] != 0; i += 2) 515 if (dump_avail[i + 1] > last_pa) 516 last_pa = dump_avail[i + 1]; 517 page_range = last_pa / PAGE_SIZE; 518 vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); 519 new_end -= vm_page_dump_size; 520 vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, 521 new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); 522 bzero((void *)vm_page_dump, vm_page_dump_size); 523 #else 524 (void)last_pa; 525 #endif 526 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) 527 /* 528 * Include the UMA bootstrap pages and vm_page_dump in a crash dump. 529 * When pmap_map() uses the direct map, they are not automatically 530 * included. 531 */ 532 for (pa = new_end; pa < end; pa += PAGE_SIZE) 533 dump_add_page(pa); 534 #endif 535 phys_avail[biggestone + 1] = new_end; 536 #ifdef __amd64__ 537 /* 538 * Request that the physical pages underlying the message buffer be 539 * included in a crash dump. Since the message buffer is accessed 540 * through the direct map, they are not automatically included. 541 */ 542 pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); 543 last_pa = pa + round_page(msgbufsize); 544 while (pa < last_pa) { 545 dump_add_page(pa); 546 pa += PAGE_SIZE; 547 } 548 #endif 549 /* 550 * Compute the number of pages of memory that will be available for 551 * use, taking into account the overhead of a page structure per page. 552 * In other words, solve 553 * "available physical memory" - round_page(page_range * 554 * sizeof(struct vm_page)) = page_range * PAGE_SIZE 555 * for page_range. 556 */ 557 low_avail = phys_avail[0]; 558 high_avail = phys_avail[1]; 559 for (i = 0; i < vm_phys_nsegs; i++) { 560 if (vm_phys_segs[i].start < low_avail) 561 low_avail = vm_phys_segs[i].start; 562 if (vm_phys_segs[i].end > high_avail) 563 high_avail = vm_phys_segs[i].end; 564 } 565 /* Skip the first chunk. It is already accounted for. */ 566 for (i = 2; phys_avail[i + 1] != 0; i += 2) { 567 if (phys_avail[i] < low_avail) 568 low_avail = phys_avail[i]; 569 if (phys_avail[i + 1] > high_avail) 570 high_avail = phys_avail[i + 1]; 571 } 572 first_page = low_avail / PAGE_SIZE; 573 #ifdef VM_PHYSSEG_SPARSE 574 size = 0; 575 for (i = 0; i < vm_phys_nsegs; i++) 576 size += vm_phys_segs[i].end - vm_phys_segs[i].start; 577 for (i = 0; phys_avail[i + 1] != 0; i += 2) 578 size += phys_avail[i + 1] - phys_avail[i]; 579 #elif defined(VM_PHYSSEG_DENSE) 580 size = high_avail - low_avail; 581 #else 582 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." 583 #endif 584 585 #ifdef VM_PHYSSEG_DENSE 586 /* 587 * In the VM_PHYSSEG_DENSE case, the number of pages can account for 588 * the overhead of a page structure per page only if vm_page_array is 589 * allocated from the last physical memory chunk. Otherwise, we must 590 * allocate page structures representing the physical memory 591 * underlying vm_page_array, even though they will not be used. 592 */ 593 if (new_end != high_avail) 594 page_range = size / PAGE_SIZE; 595 else 596 #endif 597 { 598 page_range = size / (PAGE_SIZE + sizeof(struct vm_page)); 599 600 /* 601 * If the partial bytes remaining are large enough for 602 * a page (PAGE_SIZE) without a corresponding 603 * 'struct vm_page', then new_end will contain an 604 * extra page after subtracting the length of the VM 605 * page array. Compensate by subtracting an extra 606 * page from new_end. 607 */ 608 if (size % (PAGE_SIZE + sizeof(struct vm_page)) >= PAGE_SIZE) { 609 if (new_end == high_avail) 610 high_avail -= PAGE_SIZE; 611 new_end -= PAGE_SIZE; 612 } 613 } 614 end = new_end; 615 616 /* 617 * Reserve an unmapped guard page to trap access to vm_page_array[-1]. 618 * However, because this page is allocated from KVM, out-of-bounds 619 * accesses using the direct map will not be trapped. 620 */ 621 vaddr += PAGE_SIZE; 622 623 /* 624 * Allocate physical memory for the page structures, and map it. 625 */ 626 new_end = trunc_page(end - page_range * sizeof(struct vm_page)); 627 mapped = pmap_map(&vaddr, new_end, end, 628 VM_PROT_READ | VM_PROT_WRITE); 629 vm_page_array = (vm_page_t)mapped; 630 vm_page_array_size = page_range; 631 632 #if VM_NRESERVLEVEL > 0 633 /* 634 * Allocate physical memory for the reservation management system's 635 * data structures, and map it. 636 */ 637 if (high_avail == end) 638 high_avail = new_end; 639 new_end = vm_reserv_startup(&vaddr, new_end, high_avail); 640 #endif 641 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) 642 /* 643 * Include vm_page_array and vm_reserv_array in a crash dump. 644 */ 645 for (pa = new_end; pa < end; pa += PAGE_SIZE) 646 dump_add_page(pa); 647 #endif 648 phys_avail[biggestone + 1] = new_end; 649 650 /* 651 * Add physical memory segments corresponding to the available 652 * physical pages. 653 */ 654 for (i = 0; phys_avail[i + 1] != 0; i += 2) 655 vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); 656 657 /* 658 * Initialize the physical memory allocator. 659 */ 660 vm_phys_init(); 661 662 /* 663 * Initialize the page structures and add every available page to the 664 * physical memory allocator's free lists. 665 */ 666 vm_cnt.v_page_count = 0; 667 vm_cnt.v_free_count = 0; 668 for (segind = 0; segind < vm_phys_nsegs; segind++) { 669 seg = &vm_phys_segs[segind]; 670 for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE) 671 vm_phys_init_page(pa); 672 673 /* 674 * Add the segment to the free lists only if it is covered by 675 * one of the ranges in phys_avail. Because we've added the 676 * ranges to the vm_phys_segs array, we can assume that each 677 * segment is either entirely contained in one of the ranges, 678 * or doesn't overlap any of them. 679 */ 680 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 681 if (seg->start < phys_avail[i] || 682 seg->end > phys_avail[i + 1]) 683 continue; 684 685 m = seg->first_page; 686 pagecount = (u_long)atop(seg->end - seg->start); 687 688 mtx_lock(&vm_page_queue_free_mtx); 689 vm_phys_free_contig(m, pagecount); 690 vm_phys_freecnt_adj(m, (int)pagecount); 691 mtx_unlock(&vm_page_queue_free_mtx); 692 vm_cnt.v_page_count += (u_int)pagecount; 693 694 vmd = &vm_dom[seg->domain]; 695 vmd->vmd_page_count += (u_int)pagecount; 696 vmd->vmd_segs |= 1UL << m->segind; 697 break; 698 } 699 } 700 701 /* 702 * Remove blacklisted pages from the physical memory allocator. 703 */ 704 TAILQ_INIT(&blacklist_head); 705 vm_page_blacklist_load(&list, &listend); 706 vm_page_blacklist_check(list, listend); 707 708 list = kern_getenv("vm.blacklist"); 709 vm_page_blacklist_check(list, NULL); 710 711 freeenv(list); 712 #if VM_NRESERVLEVEL > 0 713 /* 714 * Initialize the reservation management system. 715 */ 716 vm_reserv_init(); 717 #endif 718 return (vaddr); 719 } 720 721 void 722 vm_page_reference(vm_page_t m) 723 { 724 725 vm_page_aflag_set(m, PGA_REFERENCED); 726 } 727 728 /* 729 * vm_page_busy_downgrade: 730 * 731 * Downgrade an exclusive busy page into a single shared busy page. 732 */ 733 void 734 vm_page_busy_downgrade(vm_page_t m) 735 { 736 u_int x; 737 bool locked; 738 739 vm_page_assert_xbusied(m); 740 locked = mtx_owned(vm_page_lockptr(m)); 741 742 for (;;) { 743 x = m->busy_lock; 744 x &= VPB_BIT_WAITERS; 745 if (x != 0 && !locked) 746 vm_page_lock(m); 747 if (atomic_cmpset_rel_int(&m->busy_lock, 748 VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1))) 749 break; 750 if (x != 0 && !locked) 751 vm_page_unlock(m); 752 } 753 if (x != 0) { 754 wakeup(m); 755 if (!locked) 756 vm_page_unlock(m); 757 } 758 } 759 760 /* 761 * vm_page_sbusied: 762 * 763 * Return a positive value if the page is shared busied, 0 otherwise. 764 */ 765 int 766 vm_page_sbusied(vm_page_t m) 767 { 768 u_int x; 769 770 x = m->busy_lock; 771 return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED); 772 } 773 774 /* 775 * vm_page_sunbusy: 776 * 777 * Shared unbusy a page. 778 */ 779 void 780 vm_page_sunbusy(vm_page_t m) 781 { 782 u_int x; 783 784 vm_page_lock_assert(m, MA_NOTOWNED); 785 vm_page_assert_sbusied(m); 786 787 for (;;) { 788 x = m->busy_lock; 789 if (VPB_SHARERS(x) > 1) { 790 if (atomic_cmpset_int(&m->busy_lock, x, 791 x - VPB_ONE_SHARER)) 792 break; 793 continue; 794 } 795 if ((x & VPB_BIT_WAITERS) == 0) { 796 KASSERT(x == VPB_SHARERS_WORD(1), 797 ("vm_page_sunbusy: invalid lock state")); 798 if (atomic_cmpset_int(&m->busy_lock, 799 VPB_SHARERS_WORD(1), VPB_UNBUSIED)) 800 break; 801 continue; 802 } 803 KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS), 804 ("vm_page_sunbusy: invalid lock state for waiters")); 805 806 vm_page_lock(m); 807 if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) { 808 vm_page_unlock(m); 809 continue; 810 } 811 wakeup(m); 812 vm_page_unlock(m); 813 break; 814 } 815 } 816 817 /* 818 * vm_page_busy_sleep: 819 * 820 * Sleep and release the page lock, using the page pointer as wchan. 821 * This is used to implement the hard-path of busying mechanism. 822 * 823 * The given page must be locked. 824 * 825 * If nonshared is true, sleep only if the page is xbusy. 826 */ 827 void 828 vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared) 829 { 830 u_int x; 831 832 vm_page_assert_locked(m); 833 834 x = m->busy_lock; 835 if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) || 836 ((x & VPB_BIT_WAITERS) == 0 && 837 !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) { 838 vm_page_unlock(m); 839 return; 840 } 841 msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0); 842 } 843 844 /* 845 * vm_page_trysbusy: 846 * 847 * Try to shared busy a page. 848 * If the operation succeeds 1 is returned otherwise 0. 849 * The operation never sleeps. 850 */ 851 int 852 vm_page_trysbusy(vm_page_t m) 853 { 854 u_int x; 855 856 for (;;) { 857 x = m->busy_lock; 858 if ((x & VPB_BIT_SHARED) == 0) 859 return (0); 860 if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER)) 861 return (1); 862 } 863 } 864 865 static void 866 vm_page_xunbusy_locked(vm_page_t m) 867 { 868 869 vm_page_assert_xbusied(m); 870 vm_page_assert_locked(m); 871 872 atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); 873 /* There is a waiter, do wakeup() instead of vm_page_flash(). */ 874 wakeup(m); 875 } 876 877 void 878 vm_page_xunbusy_maybelocked(vm_page_t m) 879 { 880 bool lockacq; 881 882 vm_page_assert_xbusied(m); 883 884 /* 885 * Fast path for unbusy. If it succeeds, we know that there 886 * are no waiters, so we do not need a wakeup. 887 */ 888 if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER, 889 VPB_UNBUSIED)) 890 return; 891 892 lockacq = !mtx_owned(vm_page_lockptr(m)); 893 if (lockacq) 894 vm_page_lock(m); 895 vm_page_xunbusy_locked(m); 896 if (lockacq) 897 vm_page_unlock(m); 898 } 899 900 /* 901 * vm_page_xunbusy_hard: 902 * 903 * Called after the first try the exclusive unbusy of a page failed. 904 * It is assumed that the waiters bit is on. 905 */ 906 void 907 vm_page_xunbusy_hard(vm_page_t m) 908 { 909 910 vm_page_assert_xbusied(m); 911 912 vm_page_lock(m); 913 vm_page_xunbusy_locked(m); 914 vm_page_unlock(m); 915 } 916 917 /* 918 * vm_page_flash: 919 * 920 * Wakeup anyone waiting for the page. 921 * The ownership bits do not change. 922 * 923 * The given page must be locked. 924 */ 925 void 926 vm_page_flash(vm_page_t m) 927 { 928 u_int x; 929 930 vm_page_lock_assert(m, MA_OWNED); 931 932 for (;;) { 933 x = m->busy_lock; 934 if ((x & VPB_BIT_WAITERS) == 0) 935 return; 936 if (atomic_cmpset_int(&m->busy_lock, x, 937 x & (~VPB_BIT_WAITERS))) 938 break; 939 } 940 wakeup(m); 941 } 942 943 /* 944 * Avoid releasing and reacquiring the same page lock. 945 */ 946 void 947 vm_page_change_lock(vm_page_t m, struct mtx **mtx) 948 { 949 struct mtx *mtx1; 950 951 mtx1 = vm_page_lockptr(m); 952 if (*mtx == mtx1) 953 return; 954 if (*mtx != NULL) 955 mtx_unlock(*mtx); 956 *mtx = mtx1; 957 mtx_lock(mtx1); 958 } 959 960 /* 961 * Keep page from being freed by the page daemon 962 * much of the same effect as wiring, except much lower 963 * overhead and should be used only for *very* temporary 964 * holding ("wiring"). 965 */ 966 void 967 vm_page_hold(vm_page_t mem) 968 { 969 970 vm_page_lock_assert(mem, MA_OWNED); 971 mem->hold_count++; 972 } 973 974 void 975 vm_page_unhold(vm_page_t mem) 976 { 977 978 vm_page_lock_assert(mem, MA_OWNED); 979 KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!")); 980 --mem->hold_count; 981 if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0) 982 vm_page_free_toq(mem); 983 } 984 985 /* 986 * vm_page_unhold_pages: 987 * 988 * Unhold each of the pages that is referenced by the given array. 989 */ 990 void 991 vm_page_unhold_pages(vm_page_t *ma, int count) 992 { 993 struct mtx *mtx; 994 995 mtx = NULL; 996 for (; count != 0; count--) { 997 vm_page_change_lock(*ma, &mtx); 998 vm_page_unhold(*ma); 999 ma++; 1000 } 1001 if (mtx != NULL) 1002 mtx_unlock(mtx); 1003 } 1004 1005 vm_page_t 1006 PHYS_TO_VM_PAGE(vm_paddr_t pa) 1007 { 1008 vm_page_t m; 1009 1010 #ifdef VM_PHYSSEG_SPARSE 1011 m = vm_phys_paddr_to_vm_page(pa); 1012 if (m == NULL) 1013 m = vm_phys_fictitious_to_vm_page(pa); 1014 return (m); 1015 #elif defined(VM_PHYSSEG_DENSE) 1016 long pi; 1017 1018 pi = atop(pa); 1019 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1020 m = &vm_page_array[pi - first_page]; 1021 return (m); 1022 } 1023 return (vm_phys_fictitious_to_vm_page(pa)); 1024 #else 1025 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." 1026 #endif 1027 } 1028 1029 /* 1030 * vm_page_getfake: 1031 * 1032 * Create a fictitious page with the specified physical address and 1033 * memory attribute. The memory attribute is the only the machine- 1034 * dependent aspect of a fictitious page that must be initialized. 1035 */ 1036 vm_page_t 1037 vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) 1038 { 1039 vm_page_t m; 1040 1041 m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); 1042 vm_page_initfake(m, paddr, memattr); 1043 return (m); 1044 } 1045 1046 void 1047 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 1048 { 1049 1050 if ((m->flags & PG_FICTITIOUS) != 0) { 1051 /* 1052 * The page's memattr might have changed since the 1053 * previous initialization. Update the pmap to the 1054 * new memattr. 1055 */ 1056 goto memattr; 1057 } 1058 m->phys_addr = paddr; 1059 m->queue = PQ_NONE; 1060 /* Fictitious pages don't use "segind". */ 1061 m->flags = PG_FICTITIOUS; 1062 /* Fictitious pages don't use "order" or "pool". */ 1063 m->oflags = VPO_UNMANAGED; 1064 m->busy_lock = VPB_SINGLE_EXCLUSIVER; 1065 m->wire_count = 1; 1066 pmap_page_init(m); 1067 memattr: 1068 pmap_page_set_memattr(m, memattr); 1069 } 1070 1071 /* 1072 * vm_page_putfake: 1073 * 1074 * Release a fictitious page. 1075 */ 1076 void 1077 vm_page_putfake(vm_page_t m) 1078 { 1079 1080 KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); 1081 KASSERT((m->flags & PG_FICTITIOUS) != 0, 1082 ("vm_page_putfake: bad page %p", m)); 1083 uma_zfree(fakepg_zone, m); 1084 } 1085 1086 /* 1087 * vm_page_updatefake: 1088 * 1089 * Update the given fictitious page to the specified physical address and 1090 * memory attribute. 1091 */ 1092 void 1093 vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 1094 { 1095 1096 KASSERT((m->flags & PG_FICTITIOUS) != 0, 1097 ("vm_page_updatefake: bad page %p", m)); 1098 m->phys_addr = paddr; 1099 pmap_page_set_memattr(m, memattr); 1100 } 1101 1102 /* 1103 * vm_page_free: 1104 * 1105 * Free a page. 1106 */ 1107 void 1108 vm_page_free(vm_page_t m) 1109 { 1110 1111 m->flags &= ~PG_ZERO; 1112 vm_page_free_toq(m); 1113 } 1114 1115 /* 1116 * vm_page_free_zero: 1117 * 1118 * Free a page to the zerod-pages queue 1119 */ 1120 void 1121 vm_page_free_zero(vm_page_t m) 1122 { 1123 1124 m->flags |= PG_ZERO; 1125 vm_page_free_toq(m); 1126 } 1127 1128 /* 1129 * Unbusy and handle the page queueing for a page from a getpages request that 1130 * was optionally read ahead or behind. 1131 */ 1132 void 1133 vm_page_readahead_finish(vm_page_t m) 1134 { 1135 1136 /* We shouldn't put invalid pages on queues. */ 1137 KASSERT(m->valid != 0, ("%s: %p is invalid", __func__, m)); 1138 1139 /* 1140 * Since the page is not the actually needed one, whether it should 1141 * be activated or deactivated is not obvious. Empirical results 1142 * have shown that deactivating the page is usually the best choice, 1143 * unless the page is wanted by another thread. 1144 */ 1145 vm_page_lock(m); 1146 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 1147 vm_page_activate(m); 1148 else 1149 vm_page_deactivate(m); 1150 vm_page_unlock(m); 1151 vm_page_xunbusy(m); 1152 } 1153 1154 /* 1155 * vm_page_sleep_if_busy: 1156 * 1157 * Sleep and release the page queues lock if the page is busied. 1158 * Returns TRUE if the thread slept. 1159 * 1160 * The given page must be unlocked and object containing it must 1161 * be locked. 1162 */ 1163 int 1164 vm_page_sleep_if_busy(vm_page_t m, const char *msg) 1165 { 1166 vm_object_t obj; 1167 1168 vm_page_lock_assert(m, MA_NOTOWNED); 1169 VM_OBJECT_ASSERT_WLOCKED(m->object); 1170 1171 if (vm_page_busied(m)) { 1172 /* 1173 * The page-specific object must be cached because page 1174 * identity can change during the sleep, causing the 1175 * re-lock of a different object. 1176 * It is assumed that a reference to the object is already 1177 * held by the callers. 1178 */ 1179 obj = m->object; 1180 vm_page_lock(m); 1181 VM_OBJECT_WUNLOCK(obj); 1182 vm_page_busy_sleep(m, msg, false); 1183 VM_OBJECT_WLOCK(obj); 1184 return (TRUE); 1185 } 1186 return (FALSE); 1187 } 1188 1189 /* 1190 * vm_page_dirty_KBI: [ internal use only ] 1191 * 1192 * Set all bits in the page's dirty field. 1193 * 1194 * The object containing the specified page must be locked if the 1195 * call is made from the machine-independent layer. 1196 * 1197 * See vm_page_clear_dirty_mask(). 1198 * 1199 * This function should only be called by vm_page_dirty(). 1200 */ 1201 void 1202 vm_page_dirty_KBI(vm_page_t m) 1203 { 1204 1205 /* Refer to this operation by its public name. */ 1206 KASSERT(m->valid == VM_PAGE_BITS_ALL, 1207 ("vm_page_dirty: page is invalid!")); 1208 m->dirty = VM_PAGE_BITS_ALL; 1209 } 1210 1211 /* 1212 * vm_page_insert: [ internal use only ] 1213 * 1214 * Inserts the given mem entry into the object and object list. 1215 * 1216 * The object must be locked. 1217 */ 1218 int 1219 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) 1220 { 1221 vm_page_t mpred; 1222 1223 VM_OBJECT_ASSERT_WLOCKED(object); 1224 mpred = vm_radix_lookup_le(&object->rtree, pindex); 1225 return (vm_page_insert_after(m, object, pindex, mpred)); 1226 } 1227 1228 /* 1229 * vm_page_insert_after: 1230 * 1231 * Inserts the page "m" into the specified object at offset "pindex". 1232 * 1233 * The page "mpred" must immediately precede the offset "pindex" within 1234 * the specified object. 1235 * 1236 * The object must be locked. 1237 */ 1238 static int 1239 vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, 1240 vm_page_t mpred) 1241 { 1242 vm_page_t msucc; 1243 1244 VM_OBJECT_ASSERT_WLOCKED(object); 1245 KASSERT(m->object == NULL, 1246 ("vm_page_insert_after: page already inserted")); 1247 if (mpred != NULL) { 1248 KASSERT(mpred->object == object, 1249 ("vm_page_insert_after: object doesn't contain mpred")); 1250 KASSERT(mpred->pindex < pindex, 1251 ("vm_page_insert_after: mpred doesn't precede pindex")); 1252 msucc = TAILQ_NEXT(mpred, listq); 1253 } else 1254 msucc = TAILQ_FIRST(&object->memq); 1255 if (msucc != NULL) 1256 KASSERT(msucc->pindex > pindex, 1257 ("vm_page_insert_after: msucc doesn't succeed pindex")); 1258 1259 /* 1260 * Record the object/offset pair in this page 1261 */ 1262 m->object = object; 1263 m->pindex = pindex; 1264 1265 /* 1266 * Now link into the object's ordered list of backed pages. 1267 */ 1268 if (vm_radix_insert(&object->rtree, m)) { 1269 m->object = NULL; 1270 m->pindex = 0; 1271 return (1); 1272 } 1273 vm_page_insert_radixdone(m, object, mpred); 1274 return (0); 1275 } 1276 1277 /* 1278 * vm_page_insert_radixdone: 1279 * 1280 * Complete page "m" insertion into the specified object after the 1281 * radix trie hooking. 1282 * 1283 * The page "mpred" must precede the offset "m->pindex" within the 1284 * specified object. 1285 * 1286 * The object must be locked. 1287 */ 1288 static void 1289 vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) 1290 { 1291 1292 VM_OBJECT_ASSERT_WLOCKED(object); 1293 KASSERT(object != NULL && m->object == object, 1294 ("vm_page_insert_radixdone: page %p has inconsistent object", m)); 1295 if (mpred != NULL) { 1296 KASSERT(mpred->object == object, 1297 ("vm_page_insert_after: object doesn't contain mpred")); 1298 KASSERT(mpred->pindex < m->pindex, 1299 ("vm_page_insert_after: mpred doesn't precede pindex")); 1300 } 1301 1302 if (mpred != NULL) 1303 TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq); 1304 else 1305 TAILQ_INSERT_HEAD(&object->memq, m, listq); 1306 1307 /* 1308 * Show that the object has one more resident page. 1309 */ 1310 object->resident_page_count++; 1311 1312 /* 1313 * Hold the vnode until the last page is released. 1314 */ 1315 if (object->resident_page_count == 1 && object->type == OBJT_VNODE) 1316 vhold(object->handle); 1317 1318 /* 1319 * Since we are inserting a new and possibly dirty page, 1320 * update the object's OBJ_MIGHTBEDIRTY flag. 1321 */ 1322 if (pmap_page_is_write_mapped(m)) 1323 vm_object_set_writeable_dirty(object); 1324 } 1325 1326 /* 1327 * vm_page_remove: 1328 * 1329 * Removes the specified page from its containing object, but does not 1330 * invalidate any backing storage. 1331 * 1332 * The object must be locked. The page must be locked if it is managed. 1333 */ 1334 void 1335 vm_page_remove(vm_page_t m) 1336 { 1337 vm_object_t object; 1338 vm_page_t mrem; 1339 1340 if ((m->oflags & VPO_UNMANAGED) == 0) 1341 vm_page_assert_locked(m); 1342 if ((object = m->object) == NULL) 1343 return; 1344 VM_OBJECT_ASSERT_WLOCKED(object); 1345 if (vm_page_xbusied(m)) 1346 vm_page_xunbusy_maybelocked(m); 1347 mrem = vm_radix_remove(&object->rtree, m->pindex); 1348 KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); 1349 1350 /* 1351 * Now remove from the object's list of backed pages. 1352 */ 1353 TAILQ_REMOVE(&object->memq, m, listq); 1354 1355 /* 1356 * And show that the object has one fewer resident page. 1357 */ 1358 object->resident_page_count--; 1359 1360 /* 1361 * The vnode may now be recycled. 1362 */ 1363 if (object->resident_page_count == 0 && object->type == OBJT_VNODE) 1364 vdrop(object->handle); 1365 1366 m->object = NULL; 1367 } 1368 1369 /* 1370 * vm_page_lookup: 1371 * 1372 * Returns the page associated with the object/offset 1373 * pair specified; if none is found, NULL is returned. 1374 * 1375 * The object must be locked. 1376 */ 1377 vm_page_t 1378 vm_page_lookup(vm_object_t object, vm_pindex_t pindex) 1379 { 1380 1381 VM_OBJECT_ASSERT_LOCKED(object); 1382 return (vm_radix_lookup(&object->rtree, pindex)); 1383 } 1384 1385 /* 1386 * vm_page_find_least: 1387 * 1388 * Returns the page associated with the object with least pindex 1389 * greater than or equal to the parameter pindex, or NULL. 1390 * 1391 * The object must be locked. 1392 */ 1393 vm_page_t 1394 vm_page_find_least(vm_object_t object, vm_pindex_t pindex) 1395 { 1396 vm_page_t m; 1397 1398 VM_OBJECT_ASSERT_LOCKED(object); 1399 if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex) 1400 m = vm_radix_lookup_ge(&object->rtree, pindex); 1401 return (m); 1402 } 1403 1404 /* 1405 * Returns the given page's successor (by pindex) within the object if it is 1406 * resident; if none is found, NULL is returned. 1407 * 1408 * The object must be locked. 1409 */ 1410 vm_page_t 1411 vm_page_next(vm_page_t m) 1412 { 1413 vm_page_t next; 1414 1415 VM_OBJECT_ASSERT_LOCKED(m->object); 1416 if ((next = TAILQ_NEXT(m, listq)) != NULL) { 1417 MPASS(next->object == m->object); 1418 if (next->pindex != m->pindex + 1) 1419 next = NULL; 1420 } 1421 return (next); 1422 } 1423 1424 /* 1425 * Returns the given page's predecessor (by pindex) within the object if it is 1426 * resident; if none is found, NULL is returned. 1427 * 1428 * The object must be locked. 1429 */ 1430 vm_page_t 1431 vm_page_prev(vm_page_t m) 1432 { 1433 vm_page_t prev; 1434 1435 VM_OBJECT_ASSERT_LOCKED(m->object); 1436 if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL) { 1437 MPASS(prev->object == m->object); 1438 if (prev->pindex != m->pindex - 1) 1439 prev = NULL; 1440 } 1441 return (prev); 1442 } 1443 1444 /* 1445 * Uses the page mnew as a replacement for an existing page at index 1446 * pindex which must be already present in the object. 1447 * 1448 * The existing page must not be on a paging queue. 1449 */ 1450 vm_page_t 1451 vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) 1452 { 1453 vm_page_t mold; 1454 1455 VM_OBJECT_ASSERT_WLOCKED(object); 1456 KASSERT(mnew->object == NULL, 1457 ("vm_page_replace: page already in object")); 1458 1459 /* 1460 * This function mostly follows vm_page_insert() and 1461 * vm_page_remove() without the radix, object count and vnode 1462 * dance. Double check such functions for more comments. 1463 */ 1464 1465 mnew->object = object; 1466 mnew->pindex = pindex; 1467 mold = vm_radix_replace(&object->rtree, mnew); 1468 KASSERT(mold->queue == PQ_NONE, 1469 ("vm_page_replace: mold is on a paging queue")); 1470 1471 /* Keep the resident page list in sorted order. */ 1472 TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq); 1473 TAILQ_REMOVE(&object->memq, mold, listq); 1474 1475 mold->object = NULL; 1476 vm_page_xunbusy_maybelocked(mold); 1477 1478 /* 1479 * The object's resident_page_count does not change because we have 1480 * swapped one page for another, but OBJ_MIGHTBEDIRTY. 1481 */ 1482 if (pmap_page_is_write_mapped(mnew)) 1483 vm_object_set_writeable_dirty(object); 1484 return (mold); 1485 } 1486 1487 /* 1488 * vm_page_rename: 1489 * 1490 * Move the given memory entry from its 1491 * current object to the specified target object/offset. 1492 * 1493 * Note: swap associated with the page must be invalidated by the move. We 1494 * have to do this for several reasons: (1) we aren't freeing the 1495 * page, (2) we are dirtying the page, (3) the VM system is probably 1496 * moving the page from object A to B, and will then later move 1497 * the backing store from A to B and we can't have a conflict. 1498 * 1499 * Note: we *always* dirty the page. It is necessary both for the 1500 * fact that we moved it, and because we may be invalidating 1501 * swap. 1502 * 1503 * The objects must be locked. 1504 */ 1505 int 1506 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) 1507 { 1508 vm_page_t mpred; 1509 vm_pindex_t opidx; 1510 1511 VM_OBJECT_ASSERT_WLOCKED(new_object); 1512 1513 mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); 1514 KASSERT(mpred == NULL || mpred->pindex != new_pindex, 1515 ("vm_page_rename: pindex already renamed")); 1516 1517 /* 1518 * Create a custom version of vm_page_insert() which does not depend 1519 * by m_prev and can cheat on the implementation aspects of the 1520 * function. 1521 */ 1522 opidx = m->pindex; 1523 m->pindex = new_pindex; 1524 if (vm_radix_insert(&new_object->rtree, m)) { 1525 m->pindex = opidx; 1526 return (1); 1527 } 1528 1529 /* 1530 * The operation cannot fail anymore. The removal must happen before 1531 * the listq iterator is tainted. 1532 */ 1533 m->pindex = opidx; 1534 vm_page_lock(m); 1535 vm_page_remove(m); 1536 1537 /* Return back to the new pindex to complete vm_page_insert(). */ 1538 m->pindex = new_pindex; 1539 m->object = new_object; 1540 vm_page_unlock(m); 1541 vm_page_insert_radixdone(m, new_object, mpred); 1542 vm_page_dirty(m); 1543 return (0); 1544 } 1545 1546 /* 1547 * vm_page_alloc: 1548 * 1549 * Allocate and return a page that is associated with the specified 1550 * object and offset pair. By default, this page is exclusive busied. 1551 * 1552 * The caller must always specify an allocation class. 1553 * 1554 * allocation classes: 1555 * VM_ALLOC_NORMAL normal process request 1556 * VM_ALLOC_SYSTEM system *really* needs a page 1557 * VM_ALLOC_INTERRUPT interrupt time request 1558 * 1559 * optional allocation flags: 1560 * VM_ALLOC_COUNT(number) the number of additional pages that the caller 1561 * intends to allocate 1562 * VM_ALLOC_NOBUSY do not exclusive busy the page 1563 * VM_ALLOC_NODUMP do not include the page in a kernel core dump 1564 * VM_ALLOC_NOOBJ page is not associated with an object and 1565 * should not be exclusive busy 1566 * VM_ALLOC_SBUSY shared busy the allocated page 1567 * VM_ALLOC_WIRED wire the allocated page 1568 * VM_ALLOC_ZERO prefer a zeroed page 1569 * 1570 * This routine may not sleep. 1571 */ 1572 vm_page_t 1573 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) 1574 { 1575 1576 return (vm_page_alloc_after(object, pindex, req, object != NULL ? 1577 vm_radix_lookup_le(&object->rtree, pindex) : NULL)); 1578 } 1579 1580 /* 1581 * Allocate a page in the specified object with the given page index. To 1582 * optimize insertion of the page into the object, the caller must also specifiy 1583 * the resident page in the object with largest index smaller than the given 1584 * page index, or NULL if no such page exists. 1585 */ 1586 vm_page_t 1587 vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, 1588 vm_page_t mpred) 1589 { 1590 vm_page_t m; 1591 int flags, req_class; 1592 u_int free_count; 1593 1594 KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && 1595 (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && 1596 ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != 1597 (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), 1598 ("inconsistent object(%p)/req(%x)", object, req)); 1599 KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, 1600 ("Can't sleep and retry object insertion.")); 1601 KASSERT(mpred == NULL || mpred->pindex < pindex, 1602 ("mpred %p doesn't precede pindex 0x%jx", mpred, 1603 (uintmax_t)pindex)); 1604 if (object != NULL) 1605 VM_OBJECT_ASSERT_WLOCKED(object); 1606 1607 req_class = req & VM_ALLOC_CLASS_MASK; 1608 1609 /* 1610 * The page daemon is allowed to dig deeper into the free page list. 1611 */ 1612 if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) 1613 req_class = VM_ALLOC_SYSTEM; 1614 1615 /* 1616 * Allocate a page if the number of free pages exceeds the minimum 1617 * for the request class. 1618 */ 1619 again: 1620 mtx_lock(&vm_page_queue_free_mtx); 1621 if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || 1622 (req_class == VM_ALLOC_SYSTEM && 1623 vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || 1624 (req_class == VM_ALLOC_INTERRUPT && 1625 vm_cnt.v_free_count > 0)) { 1626 /* 1627 * Can we allocate the page from a reservation? 1628 */ 1629 #if VM_NRESERVLEVEL > 0 1630 if (object == NULL || (object->flags & (OBJ_COLORED | 1631 OBJ_FICTITIOUS)) != OBJ_COLORED || (m = 1632 vm_reserv_alloc_page(object, pindex, mpred)) == NULL) 1633 #endif 1634 { 1635 /* 1636 * If not, allocate it from the free page queues. 1637 */ 1638 m = vm_phys_alloc_pages(object != NULL ? 1639 VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); 1640 #if VM_NRESERVLEVEL > 0 1641 if (m == NULL && vm_reserv_reclaim_inactive()) { 1642 m = vm_phys_alloc_pages(object != NULL ? 1643 VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 1644 0); 1645 } 1646 #endif 1647 } 1648 } else { 1649 /* 1650 * Not allocatable, give up. 1651 */ 1652 if (vm_page_alloc_fail(object, req)) 1653 goto again; 1654 return (NULL); 1655 } 1656 1657 /* 1658 * At this point we had better have found a good page. 1659 */ 1660 KASSERT(m != NULL, ("missing page")); 1661 free_count = vm_phys_freecnt_adj(m, -1); 1662 mtx_unlock(&vm_page_queue_free_mtx); 1663 vm_page_alloc_check(m); 1664 1665 /* 1666 * Initialize the page. Only the PG_ZERO flag is inherited. 1667 */ 1668 flags = 0; 1669 if ((req & VM_ALLOC_ZERO) != 0) 1670 flags = PG_ZERO; 1671 flags &= m->flags; 1672 if ((req & VM_ALLOC_NODUMP) != 0) 1673 flags |= PG_NODUMP; 1674 m->flags = flags; 1675 m->aflags = 0; 1676 m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? 1677 VPO_UNMANAGED : 0; 1678 m->busy_lock = VPB_UNBUSIED; 1679 if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) 1680 m->busy_lock = VPB_SINGLE_EXCLUSIVER; 1681 if ((req & VM_ALLOC_SBUSY) != 0) 1682 m->busy_lock = VPB_SHARERS_WORD(1); 1683 if (req & VM_ALLOC_WIRED) { 1684 /* 1685 * The page lock is not required for wiring a page until that 1686 * page is inserted into the object. 1687 */ 1688 atomic_add_int(&vm_cnt.v_wire_count, 1); 1689 m->wire_count = 1; 1690 } 1691 m->act_count = 0; 1692 1693 if (object != NULL) { 1694 if (vm_page_insert_after(m, object, pindex, mpred)) { 1695 pagedaemon_wakeup(); 1696 if (req & VM_ALLOC_WIRED) { 1697 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1698 m->wire_count = 0; 1699 } 1700 KASSERT(m->object == NULL, ("page %p has object", m)); 1701 m->oflags = VPO_UNMANAGED; 1702 m->busy_lock = VPB_UNBUSIED; 1703 /* Don't change PG_ZERO. */ 1704 vm_page_free_toq(m); 1705 if (req & VM_ALLOC_WAITFAIL) { 1706 VM_OBJECT_WUNLOCK(object); 1707 vm_radix_wait(); 1708 VM_OBJECT_WLOCK(object); 1709 } 1710 return (NULL); 1711 } 1712 1713 /* Ignore device objects; the pager sets "memattr" for them. */ 1714 if (object->memattr != VM_MEMATTR_DEFAULT && 1715 (object->flags & OBJ_FICTITIOUS) == 0) 1716 pmap_page_set_memattr(m, object->memattr); 1717 } else 1718 m->pindex = pindex; 1719 1720 /* 1721 * Don't wakeup too often - wakeup the pageout daemon when 1722 * we would be nearly out of memory. 1723 */ 1724 if (vm_paging_needed(free_count)) 1725 pagedaemon_wakeup(); 1726 1727 return (m); 1728 } 1729 1730 /* 1731 * vm_page_alloc_contig: 1732 * 1733 * Allocate a contiguous set of physical pages of the given size "npages" 1734 * from the free lists. All of the physical pages must be at or above 1735 * the given physical address "low" and below the given physical address 1736 * "high". The given value "alignment" determines the alignment of the 1737 * first physical page in the set. If the given value "boundary" is 1738 * non-zero, then the set of physical pages cannot cross any physical 1739 * address boundary that is a multiple of that value. Both "alignment" 1740 * and "boundary" must be a power of two. 1741 * 1742 * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, 1743 * then the memory attribute setting for the physical pages is configured 1744 * to the object's memory attribute setting. Otherwise, the memory 1745 * attribute setting for the physical pages is configured to "memattr", 1746 * overriding the object's memory attribute setting. However, if the 1747 * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the 1748 * memory attribute setting for the physical pages cannot be configured 1749 * to VM_MEMATTR_DEFAULT. 1750 * 1751 * The specified object may not contain fictitious pages. 1752 * 1753 * The caller must always specify an allocation class. 1754 * 1755 * allocation classes: 1756 * VM_ALLOC_NORMAL normal process request 1757 * VM_ALLOC_SYSTEM system *really* needs a page 1758 * VM_ALLOC_INTERRUPT interrupt time request 1759 * 1760 * optional allocation flags: 1761 * VM_ALLOC_NOBUSY do not exclusive busy the page 1762 * VM_ALLOC_NODUMP do not include the page in a kernel core dump 1763 * VM_ALLOC_NOOBJ page is not associated with an object and 1764 * should not be exclusive busy 1765 * VM_ALLOC_SBUSY shared busy the allocated page 1766 * VM_ALLOC_WIRED wire the allocated page 1767 * VM_ALLOC_ZERO prefer a zeroed page 1768 * 1769 * This routine may not sleep. 1770 */ 1771 vm_page_t 1772 vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, 1773 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 1774 vm_paddr_t boundary, vm_memattr_t memattr) 1775 { 1776 vm_page_t m, m_ret, mpred; 1777 u_int busy_lock, flags, oflags; 1778 int req_class; 1779 1780 mpred = NULL; /* XXX: pacify gcc */ 1781 KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && 1782 (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && 1783 ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != 1784 (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), 1785 ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object, 1786 req)); 1787 KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, 1788 ("Can't sleep and retry object insertion.")); 1789 if (object != NULL) { 1790 VM_OBJECT_ASSERT_WLOCKED(object); 1791 KASSERT((object->flags & OBJ_FICTITIOUS) == 0, 1792 ("vm_page_alloc_contig: object %p has fictitious pages", 1793 object)); 1794 } 1795 KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); 1796 req_class = req & VM_ALLOC_CLASS_MASK; 1797 1798 /* 1799 * The page daemon is allowed to dig deeper into the free page list. 1800 */ 1801 if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) 1802 req_class = VM_ALLOC_SYSTEM; 1803 1804 if (object != NULL) { 1805 mpred = vm_radix_lookup_le(&object->rtree, pindex); 1806 KASSERT(mpred == NULL || mpred->pindex != pindex, 1807 ("vm_page_alloc_contig: pindex already allocated")); 1808 } 1809 1810 /* 1811 * Can we allocate the pages without the number of free pages falling 1812 * below the lower bound for the allocation class? 1813 */ 1814 again: 1815 mtx_lock(&vm_page_queue_free_mtx); 1816 if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || 1817 (req_class == VM_ALLOC_SYSTEM && 1818 vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) || 1819 (req_class == VM_ALLOC_INTERRUPT && 1820 vm_cnt.v_free_count >= npages)) { 1821 /* 1822 * Can we allocate the pages from a reservation? 1823 */ 1824 #if VM_NRESERVLEVEL > 0 1825 retry: 1826 if (object == NULL || (object->flags & OBJ_COLORED) == 0 || 1827 (m_ret = vm_reserv_alloc_contig(object, pindex, npages, 1828 low, high, alignment, boundary, mpred)) == NULL) 1829 #endif 1830 /* 1831 * If not, allocate them from the free page queues. 1832 */ 1833 m_ret = vm_phys_alloc_contig(npages, low, high, 1834 alignment, boundary); 1835 } else { 1836 if (vm_page_alloc_fail(object, req)) 1837 goto again; 1838 return (NULL); 1839 } 1840 if (m_ret != NULL) 1841 vm_phys_freecnt_adj(m_ret, -npages); 1842 else { 1843 #if VM_NRESERVLEVEL > 0 1844 if (vm_reserv_reclaim_contig(npages, low, high, alignment, 1845 boundary)) 1846 goto retry; 1847 #endif 1848 } 1849 mtx_unlock(&vm_page_queue_free_mtx); 1850 if (m_ret == NULL) 1851 return (NULL); 1852 for (m = m_ret; m < &m_ret[npages]; m++) 1853 vm_page_alloc_check(m); 1854 1855 /* 1856 * Initialize the pages. Only the PG_ZERO flag is inherited. 1857 */ 1858 flags = 0; 1859 if ((req & VM_ALLOC_ZERO) != 0) 1860 flags = PG_ZERO; 1861 if ((req & VM_ALLOC_NODUMP) != 0) 1862 flags |= PG_NODUMP; 1863 oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? 1864 VPO_UNMANAGED : 0; 1865 busy_lock = VPB_UNBUSIED; 1866 if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) 1867 busy_lock = VPB_SINGLE_EXCLUSIVER; 1868 if ((req & VM_ALLOC_SBUSY) != 0) 1869 busy_lock = VPB_SHARERS_WORD(1); 1870 if ((req & VM_ALLOC_WIRED) != 0) 1871 atomic_add_int(&vm_cnt.v_wire_count, npages); 1872 if (object != NULL) { 1873 if (object->memattr != VM_MEMATTR_DEFAULT && 1874 memattr == VM_MEMATTR_DEFAULT) 1875 memattr = object->memattr; 1876 } 1877 for (m = m_ret; m < &m_ret[npages]; m++) { 1878 m->aflags = 0; 1879 m->flags = (m->flags | PG_NODUMP) & flags; 1880 m->busy_lock = busy_lock; 1881 if ((req & VM_ALLOC_WIRED) != 0) 1882 m->wire_count = 1; 1883 m->act_count = 0; 1884 m->oflags = oflags; 1885 if (object != NULL) { 1886 if (vm_page_insert_after(m, object, pindex, mpred)) { 1887 pagedaemon_wakeup(); 1888 if ((req & VM_ALLOC_WIRED) != 0) 1889 atomic_subtract_int( 1890 &vm_cnt.v_wire_count, npages); 1891 KASSERT(m->object == NULL, 1892 ("page %p has object", m)); 1893 mpred = m; 1894 for (m = m_ret; m < &m_ret[npages]; m++) { 1895 if (m <= mpred && 1896 (req & VM_ALLOC_WIRED) != 0) 1897 m->wire_count = 0; 1898 m->oflags = VPO_UNMANAGED; 1899 m->busy_lock = VPB_UNBUSIED; 1900 /* Don't change PG_ZERO. */ 1901 vm_page_free_toq(m); 1902 } 1903 if (req & VM_ALLOC_WAITFAIL) { 1904 VM_OBJECT_WUNLOCK(object); 1905 vm_radix_wait(); 1906 VM_OBJECT_WLOCK(object); 1907 } 1908 return (NULL); 1909 } 1910 mpred = m; 1911 } else 1912 m->pindex = pindex; 1913 if (memattr != VM_MEMATTR_DEFAULT) 1914 pmap_page_set_memattr(m, memattr); 1915 pindex++; 1916 } 1917 if (vm_paging_needed(vm_cnt.v_free_count)) 1918 pagedaemon_wakeup(); 1919 return (m_ret); 1920 } 1921 1922 /* 1923 * Check a page that has been freshly dequeued from a freelist. 1924 */ 1925 static void 1926 vm_page_alloc_check(vm_page_t m) 1927 { 1928 1929 KASSERT(m->object == NULL, ("page %p has object", m)); 1930 KASSERT(m->queue == PQ_NONE, 1931 ("page %p has unexpected queue %d", m, m->queue)); 1932 KASSERT(m->wire_count == 0, ("page %p is wired", m)); 1933 KASSERT(m->hold_count == 0, ("page %p is held", m)); 1934 KASSERT(!vm_page_busied(m), ("page %p is busy", m)); 1935 KASSERT(m->dirty == 0, ("page %p is dirty", m)); 1936 KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, 1937 ("page %p has unexpected memattr %d", 1938 m, pmap_page_get_memattr(m))); 1939 KASSERT(m->valid == 0, ("free page %p is valid", m)); 1940 } 1941 1942 /* 1943 * vm_page_alloc_freelist: 1944 * 1945 * Allocate a physical page from the specified free page list. 1946 * 1947 * The caller must always specify an allocation class. 1948 * 1949 * allocation classes: 1950 * VM_ALLOC_NORMAL normal process request 1951 * VM_ALLOC_SYSTEM system *really* needs a page 1952 * VM_ALLOC_INTERRUPT interrupt time request 1953 * 1954 * optional allocation flags: 1955 * VM_ALLOC_COUNT(number) the number of additional pages that the caller 1956 * intends to allocate 1957 * VM_ALLOC_WIRED wire the allocated page 1958 * VM_ALLOC_ZERO prefer a zeroed page 1959 * 1960 * This routine may not sleep. 1961 */ 1962 vm_page_t 1963 vm_page_alloc_freelist(int flind, int req) 1964 { 1965 vm_page_t m; 1966 u_int flags, free_count; 1967 int req_class; 1968 1969 req_class = req & VM_ALLOC_CLASS_MASK; 1970 1971 /* 1972 * The page daemon is allowed to dig deeper into the free page list. 1973 */ 1974 if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) 1975 req_class = VM_ALLOC_SYSTEM; 1976 1977 /* 1978 * Do not allocate reserved pages unless the req has asked for it. 1979 */ 1980 again: 1981 mtx_lock(&vm_page_queue_free_mtx); 1982 if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || 1983 (req_class == VM_ALLOC_SYSTEM && 1984 vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || 1985 (req_class == VM_ALLOC_INTERRUPT && 1986 vm_cnt.v_free_count > 0)) { 1987 m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); 1988 } else { 1989 if (vm_page_alloc_fail(NULL, req)) 1990 goto again; 1991 return (NULL); 1992 } 1993 if (m == NULL) { 1994 mtx_unlock(&vm_page_queue_free_mtx); 1995 return (NULL); 1996 } 1997 free_count = vm_phys_freecnt_adj(m, -1); 1998 mtx_unlock(&vm_page_queue_free_mtx); 1999 vm_page_alloc_check(m); 2000 2001 /* 2002 * Initialize the page. Only the PG_ZERO flag is inherited. 2003 */ 2004 m->aflags = 0; 2005 flags = 0; 2006 if ((req & VM_ALLOC_ZERO) != 0) 2007 flags = PG_ZERO; 2008 m->flags &= flags; 2009 if ((req & VM_ALLOC_WIRED) != 0) { 2010 /* 2011 * The page lock is not required for wiring a page that does 2012 * not belong to an object. 2013 */ 2014 atomic_add_int(&vm_cnt.v_wire_count, 1); 2015 m->wire_count = 1; 2016 } 2017 /* Unmanaged pages don't use "act_count". */ 2018 m->oflags = VPO_UNMANAGED; 2019 if (vm_paging_needed(free_count)) 2020 pagedaemon_wakeup(); 2021 return (m); 2022 } 2023 2024 #define VPSC_ANY 0 /* No restrictions. */ 2025 #define VPSC_NORESERV 1 /* Skip reservations; implies VPSC_NOSUPER. */ 2026 #define VPSC_NOSUPER 2 /* Skip superpages. */ 2027 2028 /* 2029 * vm_page_scan_contig: 2030 * 2031 * Scan vm_page_array[] between the specified entries "m_start" and 2032 * "m_end" for a run of contiguous physical pages that satisfy the 2033 * specified conditions, and return the lowest page in the run. The 2034 * specified "alignment" determines the alignment of the lowest physical 2035 * page in the run. If the specified "boundary" is non-zero, then the 2036 * run of physical pages cannot span a physical address that is a 2037 * multiple of "boundary". 2038 * 2039 * "m_end" is never dereferenced, so it need not point to a vm_page 2040 * structure within vm_page_array[]. 2041 * 2042 * "npages" must be greater than zero. "m_start" and "m_end" must not 2043 * span a hole (or discontiguity) in the physical address space. Both 2044 * "alignment" and "boundary" must be a power of two. 2045 */ 2046 vm_page_t 2047 vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, 2048 u_long alignment, vm_paddr_t boundary, int options) 2049 { 2050 struct mtx *m_mtx; 2051 vm_object_t object; 2052 vm_paddr_t pa; 2053 vm_page_t m, m_run; 2054 #if VM_NRESERVLEVEL > 0 2055 int level; 2056 #endif 2057 int m_inc, order, run_ext, run_len; 2058 2059 KASSERT(npages > 0, ("npages is 0")); 2060 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 2061 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 2062 m_run = NULL; 2063 run_len = 0; 2064 m_mtx = NULL; 2065 for (m = m_start; m < m_end && run_len < npages; m += m_inc) { 2066 KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, 2067 ("page %p is PG_FICTITIOUS or PG_MARKER", m)); 2068 2069 /* 2070 * If the current page would be the start of a run, check its 2071 * physical address against the end, alignment, and boundary 2072 * conditions. If it doesn't satisfy these conditions, either 2073 * terminate the scan or advance to the next page that 2074 * satisfies the failed condition. 2075 */ 2076 if (run_len == 0) { 2077 KASSERT(m_run == NULL, ("m_run != NULL")); 2078 if (m + npages > m_end) 2079 break; 2080 pa = VM_PAGE_TO_PHYS(m); 2081 if ((pa & (alignment - 1)) != 0) { 2082 m_inc = atop(roundup2(pa, alignment) - pa); 2083 continue; 2084 } 2085 if (rounddown2(pa ^ (pa + ptoa(npages) - 1), 2086 boundary) != 0) { 2087 m_inc = atop(roundup2(pa, boundary) - pa); 2088 continue; 2089 } 2090 } else 2091 KASSERT(m_run != NULL, ("m_run == NULL")); 2092 2093 vm_page_change_lock(m, &m_mtx); 2094 m_inc = 1; 2095 retry: 2096 if (m->wire_count != 0 || m->hold_count != 0) 2097 run_ext = 0; 2098 #if VM_NRESERVLEVEL > 0 2099 else if ((level = vm_reserv_level(m)) >= 0 && 2100 (options & VPSC_NORESERV) != 0) { 2101 run_ext = 0; 2102 /* Advance to the end of the reservation. */ 2103 pa = VM_PAGE_TO_PHYS(m); 2104 m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - 2105 pa); 2106 } 2107 #endif 2108 else if ((object = m->object) != NULL) { 2109 /* 2110 * The page is considered eligible for relocation if 2111 * and only if it could be laundered or reclaimed by 2112 * the page daemon. 2113 */ 2114 if (!VM_OBJECT_TRYRLOCK(object)) { 2115 mtx_unlock(m_mtx); 2116 VM_OBJECT_RLOCK(object); 2117 mtx_lock(m_mtx); 2118 if (m->object != object) { 2119 /* 2120 * The page may have been freed. 2121 */ 2122 VM_OBJECT_RUNLOCK(object); 2123 goto retry; 2124 } else if (m->wire_count != 0 || 2125 m->hold_count != 0) { 2126 run_ext = 0; 2127 goto unlock; 2128 } 2129 } 2130 KASSERT((m->flags & PG_UNHOLDFREE) == 0, 2131 ("page %p is PG_UNHOLDFREE", m)); 2132 /* Don't care: PG_NODUMP, PG_ZERO. */ 2133 if (object->type != OBJT_DEFAULT && 2134 object->type != OBJT_SWAP && 2135 object->type != OBJT_VNODE) { 2136 run_ext = 0; 2137 #if VM_NRESERVLEVEL > 0 2138 } else if ((options & VPSC_NOSUPER) != 0 && 2139 (level = vm_reserv_level_iffullpop(m)) >= 0) { 2140 run_ext = 0; 2141 /* Advance to the end of the superpage. */ 2142 pa = VM_PAGE_TO_PHYS(m); 2143 m_inc = atop(roundup2(pa + 1, 2144 vm_reserv_size(level)) - pa); 2145 #endif 2146 } else if (object->memattr == VM_MEMATTR_DEFAULT && 2147 m->queue != PQ_NONE && !vm_page_busied(m)) { 2148 /* 2149 * The page is allocated but eligible for 2150 * relocation. Extend the current run by one 2151 * page. 2152 */ 2153 KASSERT(pmap_page_get_memattr(m) == 2154 VM_MEMATTR_DEFAULT, 2155 ("page %p has an unexpected memattr", m)); 2156 KASSERT((m->oflags & (VPO_SWAPINPROG | 2157 VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, 2158 ("page %p has unexpected oflags", m)); 2159 /* Don't care: VPO_NOSYNC. */ 2160 run_ext = 1; 2161 } else 2162 run_ext = 0; 2163 unlock: 2164 VM_OBJECT_RUNLOCK(object); 2165 #if VM_NRESERVLEVEL > 0 2166 } else if (level >= 0) { 2167 /* 2168 * The page is reserved but not yet allocated. In 2169 * other words, it is still free. Extend the current 2170 * run by one page. 2171 */ 2172 run_ext = 1; 2173 #endif 2174 } else if ((order = m->order) < VM_NFREEORDER) { 2175 /* 2176 * The page is enqueued in the physical memory 2177 * allocator's free page queues. Moreover, it is the 2178 * first page in a power-of-two-sized run of 2179 * contiguous free pages. Add these pages to the end 2180 * of the current run, and jump ahead. 2181 */ 2182 run_ext = 1 << order; 2183 m_inc = 1 << order; 2184 } else { 2185 /* 2186 * Skip the page for one of the following reasons: (1) 2187 * It is enqueued in the physical memory allocator's 2188 * free page queues. However, it is not the first 2189 * page in a run of contiguous free pages. (This case 2190 * rarely occurs because the scan is performed in 2191 * ascending order.) (2) It is not reserved, and it is 2192 * transitioning from free to allocated. (Conversely, 2193 * the transition from allocated to free for managed 2194 * pages is blocked by the page lock.) (3) It is 2195 * allocated but not contained by an object and not 2196 * wired, e.g., allocated by Xen's balloon driver. 2197 */ 2198 run_ext = 0; 2199 } 2200 2201 /* 2202 * Extend or reset the current run of pages. 2203 */ 2204 if (run_ext > 0) { 2205 if (run_len == 0) 2206 m_run = m; 2207 run_len += run_ext; 2208 } else { 2209 if (run_len > 0) { 2210 m_run = NULL; 2211 run_len = 0; 2212 } 2213 } 2214 } 2215 if (m_mtx != NULL) 2216 mtx_unlock(m_mtx); 2217 if (run_len >= npages) 2218 return (m_run); 2219 return (NULL); 2220 } 2221 2222 /* 2223 * vm_page_reclaim_run: 2224 * 2225 * Try to relocate each of the allocated virtual pages within the 2226 * specified run of physical pages to a new physical address. Free the 2227 * physical pages underlying the relocated virtual pages. A virtual page 2228 * is relocatable if and only if it could be laundered or reclaimed by 2229 * the page daemon. Whenever possible, a virtual page is relocated to a 2230 * physical address above "high". 2231 * 2232 * Returns 0 if every physical page within the run was already free or 2233 * just freed by a successful relocation. Otherwise, returns a non-zero 2234 * value indicating why the last attempt to relocate a virtual page was 2235 * unsuccessful. 2236 * 2237 * "req_class" must be an allocation class. 2238 */ 2239 static int 2240 vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, 2241 vm_paddr_t high) 2242 { 2243 struct mtx *m_mtx; 2244 struct spglist free; 2245 vm_object_t object; 2246 vm_paddr_t pa; 2247 vm_page_t m, m_end, m_new; 2248 int error, order, req; 2249 2250 KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class, 2251 ("req_class is not an allocation class")); 2252 SLIST_INIT(&free); 2253 error = 0; 2254 m = m_run; 2255 m_end = m_run + npages; 2256 m_mtx = NULL; 2257 for (; error == 0 && m < m_end; m++) { 2258 KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, 2259 ("page %p is PG_FICTITIOUS or PG_MARKER", m)); 2260 2261 /* 2262 * Avoid releasing and reacquiring the same page lock. 2263 */ 2264 vm_page_change_lock(m, &m_mtx); 2265 retry: 2266 if (m->wire_count != 0 || m->hold_count != 0) 2267 error = EBUSY; 2268 else if ((object = m->object) != NULL) { 2269 /* 2270 * The page is relocated if and only if it could be 2271 * laundered or reclaimed by the page daemon. 2272 */ 2273 if (!VM_OBJECT_TRYWLOCK(object)) { 2274 mtx_unlock(m_mtx); 2275 VM_OBJECT_WLOCK(object); 2276 mtx_lock(m_mtx); 2277 if (m->object != object) { 2278 /* 2279 * The page may have been freed. 2280 */ 2281 VM_OBJECT_WUNLOCK(object); 2282 goto retry; 2283 } else if (m->wire_count != 0 || 2284 m->hold_count != 0) { 2285 error = EBUSY; 2286 goto unlock; 2287 } 2288 } 2289 KASSERT((m->flags & PG_UNHOLDFREE) == 0, 2290 ("page %p is PG_UNHOLDFREE", m)); 2291 /* Don't care: PG_NODUMP, PG_ZERO. */ 2292 if (object->type != OBJT_DEFAULT && 2293 object->type != OBJT_SWAP && 2294 object->type != OBJT_VNODE) 2295 error = EINVAL; 2296 else if (object->memattr != VM_MEMATTR_DEFAULT) 2297 error = EINVAL; 2298 else if (m->queue != PQ_NONE && !vm_page_busied(m)) { 2299 KASSERT(pmap_page_get_memattr(m) == 2300 VM_MEMATTR_DEFAULT, 2301 ("page %p has an unexpected memattr", m)); 2302 KASSERT((m->oflags & (VPO_SWAPINPROG | 2303 VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, 2304 ("page %p has unexpected oflags", m)); 2305 /* Don't care: VPO_NOSYNC. */ 2306 if (m->valid != 0) { 2307 /* 2308 * First, try to allocate a new page 2309 * that is above "high". Failing 2310 * that, try to allocate a new page 2311 * that is below "m_run". Allocate 2312 * the new page between the end of 2313 * "m_run" and "high" only as a last 2314 * resort. 2315 */ 2316 req = req_class | VM_ALLOC_NOOBJ; 2317 if ((m->flags & PG_NODUMP) != 0) 2318 req |= VM_ALLOC_NODUMP; 2319 if (trunc_page(high) != 2320 ~(vm_paddr_t)PAGE_MASK) { 2321 m_new = vm_page_alloc_contig( 2322 NULL, 0, req, 1, 2323 round_page(high), 2324 ~(vm_paddr_t)0, 2325 PAGE_SIZE, 0, 2326 VM_MEMATTR_DEFAULT); 2327 } else 2328 m_new = NULL; 2329 if (m_new == NULL) { 2330 pa = VM_PAGE_TO_PHYS(m_run); 2331 m_new = vm_page_alloc_contig( 2332 NULL, 0, req, 1, 2333 0, pa - 1, PAGE_SIZE, 0, 2334 VM_MEMATTR_DEFAULT); 2335 } 2336 if (m_new == NULL) { 2337 pa += ptoa(npages); 2338 m_new = vm_page_alloc_contig( 2339 NULL, 0, req, 1, 2340 pa, high, PAGE_SIZE, 0, 2341 VM_MEMATTR_DEFAULT); 2342 } 2343 if (m_new == NULL) { 2344 error = ENOMEM; 2345 goto unlock; 2346 } 2347 KASSERT(m_new->wire_count == 0, 2348 ("page %p is wired", m)); 2349 2350 /* 2351 * Replace "m" with the new page. For 2352 * vm_page_replace(), "m" must be busy 2353 * and dequeued. Finally, change "m" 2354 * as if vm_page_free() was called. 2355 */ 2356 if (object->ref_count != 0) 2357 pmap_remove_all(m); 2358 m_new->aflags = m->aflags; 2359 KASSERT(m_new->oflags == VPO_UNMANAGED, 2360 ("page %p is managed", m)); 2361 m_new->oflags = m->oflags & VPO_NOSYNC; 2362 pmap_copy_page(m, m_new); 2363 m_new->valid = m->valid; 2364 m_new->dirty = m->dirty; 2365 m->flags &= ~PG_ZERO; 2366 vm_page_xbusy(m); 2367 vm_page_remque(m); 2368 vm_page_replace_checked(m_new, object, 2369 m->pindex, m); 2370 m->valid = 0; 2371 vm_page_undirty(m); 2372 2373 /* 2374 * The new page must be deactivated 2375 * before the object is unlocked. 2376 */ 2377 vm_page_change_lock(m_new, &m_mtx); 2378 vm_page_deactivate(m_new); 2379 } else { 2380 m->flags &= ~PG_ZERO; 2381 vm_page_remque(m); 2382 vm_page_remove(m); 2383 KASSERT(m->dirty == 0, 2384 ("page %p is dirty", m)); 2385 } 2386 SLIST_INSERT_HEAD(&free, m, plinks.s.ss); 2387 } else 2388 error = EBUSY; 2389 unlock: 2390 VM_OBJECT_WUNLOCK(object); 2391 } else { 2392 mtx_lock(&vm_page_queue_free_mtx); 2393 order = m->order; 2394 if (order < VM_NFREEORDER) { 2395 /* 2396 * The page is enqueued in the physical memory 2397 * allocator's free page queues. Moreover, it 2398 * is the first page in a power-of-two-sized 2399 * run of contiguous free pages. Jump ahead 2400 * to the last page within that run, and 2401 * continue from there. 2402 */ 2403 m += (1 << order) - 1; 2404 } 2405 #if VM_NRESERVLEVEL > 0 2406 else if (vm_reserv_is_page_free(m)) 2407 order = 0; 2408 #endif 2409 mtx_unlock(&vm_page_queue_free_mtx); 2410 if (order == VM_NFREEORDER) 2411 error = EINVAL; 2412 } 2413 } 2414 if (m_mtx != NULL) 2415 mtx_unlock(m_mtx); 2416 if ((m = SLIST_FIRST(&free)) != NULL) { 2417 mtx_lock(&vm_page_queue_free_mtx); 2418 do { 2419 SLIST_REMOVE_HEAD(&free, plinks.s.ss); 2420 vm_page_free_phys(m); 2421 } while ((m = SLIST_FIRST(&free)) != NULL); 2422 vm_page_free_wakeup(); 2423 mtx_unlock(&vm_page_queue_free_mtx); 2424 } 2425 return (error); 2426 } 2427 2428 #define NRUNS 16 2429 2430 CTASSERT(powerof2(NRUNS)); 2431 2432 #define RUN_INDEX(count) ((count) & (NRUNS - 1)) 2433 2434 #define MIN_RECLAIM 8 2435 2436 /* 2437 * vm_page_reclaim_contig: 2438 * 2439 * Reclaim allocated, contiguous physical memory satisfying the specified 2440 * conditions by relocating the virtual pages using that physical memory. 2441 * Returns true if reclamation is successful and false otherwise. Since 2442 * relocation requires the allocation of physical pages, reclamation may 2443 * fail due to a shortage of free pages. When reclamation fails, callers 2444 * are expected to perform VM_WAIT before retrying a failed allocation 2445 * operation, e.g., vm_page_alloc_contig(). 2446 * 2447 * The caller must always specify an allocation class through "req". 2448 * 2449 * allocation classes: 2450 * VM_ALLOC_NORMAL normal process request 2451 * VM_ALLOC_SYSTEM system *really* needs a page 2452 * VM_ALLOC_INTERRUPT interrupt time request 2453 * 2454 * The optional allocation flags are ignored. 2455 * 2456 * "npages" must be greater than zero. Both "alignment" and "boundary" 2457 * must be a power of two. 2458 */ 2459 bool 2460 vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, 2461 u_long alignment, vm_paddr_t boundary) 2462 { 2463 vm_paddr_t curr_low; 2464 vm_page_t m_run, m_runs[NRUNS]; 2465 u_long count, reclaimed; 2466 int error, i, options, req_class; 2467 2468 KASSERT(npages > 0, ("npages is 0")); 2469 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 2470 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 2471 req_class = req & VM_ALLOC_CLASS_MASK; 2472 2473 /* 2474 * The page daemon is allowed to dig deeper into the free page list. 2475 */ 2476 if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) 2477 req_class = VM_ALLOC_SYSTEM; 2478 2479 /* 2480 * Return if the number of free pages cannot satisfy the requested 2481 * allocation. 2482 */ 2483 count = vm_cnt.v_free_count; 2484 if (count < npages + vm_cnt.v_free_reserved || (count < npages + 2485 vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || 2486 (count < npages && req_class == VM_ALLOC_INTERRUPT)) 2487 return (false); 2488 2489 /* 2490 * Scan up to three times, relaxing the restrictions ("options") on 2491 * the reclamation of reservations and superpages each time. 2492 */ 2493 for (options = VPSC_NORESERV;;) { 2494 /* 2495 * Find the highest runs that satisfy the given constraints 2496 * and restrictions, and record them in "m_runs". 2497 */ 2498 curr_low = low; 2499 count = 0; 2500 for (;;) { 2501 m_run = vm_phys_scan_contig(npages, curr_low, high, 2502 alignment, boundary, options); 2503 if (m_run == NULL) 2504 break; 2505 curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); 2506 m_runs[RUN_INDEX(count)] = m_run; 2507 count++; 2508 } 2509 2510 /* 2511 * Reclaim the highest runs in LIFO (descending) order until 2512 * the number of reclaimed pages, "reclaimed", is at least 2513 * MIN_RECLAIM. Reset "reclaimed" each time because each 2514 * reclamation is idempotent, and runs will (likely) recur 2515 * from one scan to the next as restrictions are relaxed. 2516 */ 2517 reclaimed = 0; 2518 for (i = 0; count > 0 && i < NRUNS; i++) { 2519 count--; 2520 m_run = m_runs[RUN_INDEX(count)]; 2521 error = vm_page_reclaim_run(req_class, npages, m_run, 2522 high); 2523 if (error == 0) { 2524 reclaimed += npages; 2525 if (reclaimed >= MIN_RECLAIM) 2526 return (true); 2527 } 2528 } 2529 2530 /* 2531 * Either relax the restrictions on the next scan or return if 2532 * the last scan had no restrictions. 2533 */ 2534 if (options == VPSC_NORESERV) 2535 options = VPSC_NOSUPER; 2536 else if (options == VPSC_NOSUPER) 2537 options = VPSC_ANY; 2538 else if (options == VPSC_ANY) 2539 return (reclaimed != 0); 2540 } 2541 } 2542 2543 /* 2544 * vm_wait: (also see VM_WAIT macro) 2545 * 2546 * Sleep until free pages are available for allocation. 2547 * - Called in various places before memory allocations. 2548 */ 2549 static void 2550 _vm_wait(void) 2551 { 2552 2553 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 2554 if (curproc == pageproc) { 2555 vm_pageout_pages_needed = 1; 2556 msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, 2557 PDROP | PSWP, "VMWait", 0); 2558 } else { 2559 if (__predict_false(pageproc == NULL)) 2560 panic("vm_wait in early boot"); 2561 if (!vm_pageout_wanted) { 2562 vm_pageout_wanted = true; 2563 wakeup(&vm_pageout_wanted); 2564 } 2565 vm_pages_needed = true; 2566 msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM, 2567 "vmwait", 0); 2568 } 2569 } 2570 2571 void 2572 vm_wait(void) 2573 { 2574 2575 mtx_lock(&vm_page_queue_free_mtx); 2576 _vm_wait(); 2577 } 2578 2579 /* 2580 * vm_page_alloc_fail: 2581 * 2582 * Called when a page allocation function fails. Informs the 2583 * pagedaemon and performs the requested wait. Requires the 2584 * page_queue_free and object lock on entry. Returns with the 2585 * object lock held and free lock released. Returns an error when 2586 * retry is necessary. 2587 * 2588 */ 2589 static int 2590 vm_page_alloc_fail(vm_object_t object, int req) 2591 { 2592 2593 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 2594 2595 atomic_add_int(&vm_pageout_deficit, 2596 max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); 2597 pagedaemon_wakeup(); 2598 if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) { 2599 if (object != NULL) 2600 VM_OBJECT_WUNLOCK(object); 2601 _vm_wait(); 2602 if (object != NULL) 2603 VM_OBJECT_WLOCK(object); 2604 if (req & VM_ALLOC_WAITOK) 2605 return (EAGAIN); 2606 } else 2607 mtx_unlock(&vm_page_queue_free_mtx); 2608 return (0); 2609 } 2610 2611 /* 2612 * vm_waitpfault: (also see VM_WAITPFAULT macro) 2613 * 2614 * Sleep until free pages are available for allocation. 2615 * - Called only in vm_fault so that processes page faulting 2616 * can be easily tracked. 2617 * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing 2618 * processes will be able to grab memory first. Do not change 2619 * this balance without careful testing first. 2620 */ 2621 void 2622 vm_waitpfault(void) 2623 { 2624 2625 mtx_lock(&vm_page_queue_free_mtx); 2626 if (!vm_pageout_wanted) { 2627 vm_pageout_wanted = true; 2628 wakeup(&vm_pageout_wanted); 2629 } 2630 vm_pages_needed = true; 2631 msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER, 2632 "pfault", 0); 2633 } 2634 2635 struct vm_pagequeue * 2636 vm_page_pagequeue(vm_page_t m) 2637 { 2638 2639 if (vm_page_in_laundry(m)) 2640 return (&vm_dom[0].vmd_pagequeues[m->queue]); 2641 else 2642 return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); 2643 } 2644 2645 /* 2646 * vm_page_dequeue: 2647 * 2648 * Remove the given page from its current page queue. 2649 * 2650 * The page must be locked. 2651 */ 2652 void 2653 vm_page_dequeue(vm_page_t m) 2654 { 2655 struct vm_pagequeue *pq; 2656 2657 vm_page_assert_locked(m); 2658 KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", 2659 m)); 2660 pq = vm_page_pagequeue(m); 2661 vm_pagequeue_lock(pq); 2662 m->queue = PQ_NONE; 2663 TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); 2664 vm_pagequeue_cnt_dec(pq); 2665 vm_pagequeue_unlock(pq); 2666 } 2667 2668 /* 2669 * vm_page_dequeue_locked: 2670 * 2671 * Remove the given page from its current page queue. 2672 * 2673 * The page and page queue must be locked. 2674 */ 2675 void 2676 vm_page_dequeue_locked(vm_page_t m) 2677 { 2678 struct vm_pagequeue *pq; 2679 2680 vm_page_lock_assert(m, MA_OWNED); 2681 pq = vm_page_pagequeue(m); 2682 vm_pagequeue_assert_locked(pq); 2683 m->queue = PQ_NONE; 2684 TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); 2685 vm_pagequeue_cnt_dec(pq); 2686 } 2687 2688 /* 2689 * vm_page_enqueue: 2690 * 2691 * Add the given page to the specified page queue. 2692 * 2693 * The page must be locked. 2694 */ 2695 static void 2696 vm_page_enqueue(uint8_t queue, vm_page_t m) 2697 { 2698 struct vm_pagequeue *pq; 2699 2700 vm_page_lock_assert(m, MA_OWNED); 2701 KASSERT(queue < PQ_COUNT, 2702 ("vm_page_enqueue: invalid queue %u request for page %p", 2703 queue, m)); 2704 if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE) 2705 pq = &vm_dom[0].vmd_pagequeues[queue]; 2706 else 2707 pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; 2708 vm_pagequeue_lock(pq); 2709 m->queue = queue; 2710 TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); 2711 vm_pagequeue_cnt_inc(pq); 2712 vm_pagequeue_unlock(pq); 2713 } 2714 2715 /* 2716 * vm_page_requeue: 2717 * 2718 * Move the given page to the tail of its current page queue. 2719 * 2720 * The page must be locked. 2721 */ 2722 void 2723 vm_page_requeue(vm_page_t m) 2724 { 2725 struct vm_pagequeue *pq; 2726 2727 vm_page_lock_assert(m, MA_OWNED); 2728 KASSERT(m->queue != PQ_NONE, 2729 ("vm_page_requeue: page %p is not queued", m)); 2730 pq = vm_page_pagequeue(m); 2731 vm_pagequeue_lock(pq); 2732 TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); 2733 TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); 2734 vm_pagequeue_unlock(pq); 2735 } 2736 2737 /* 2738 * vm_page_requeue_locked: 2739 * 2740 * Move the given page to the tail of its current page queue. 2741 * 2742 * The page queue must be locked. 2743 */ 2744 void 2745 vm_page_requeue_locked(vm_page_t m) 2746 { 2747 struct vm_pagequeue *pq; 2748 2749 KASSERT(m->queue != PQ_NONE, 2750 ("vm_page_requeue_locked: page %p is not queued", m)); 2751 pq = vm_page_pagequeue(m); 2752 vm_pagequeue_assert_locked(pq); 2753 TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); 2754 TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); 2755 } 2756 2757 /* 2758 * vm_page_activate: 2759 * 2760 * Put the specified page on the active list (if appropriate). 2761 * Ensure that act_count is at least ACT_INIT but do not otherwise 2762 * mess with it. 2763 * 2764 * The page must be locked. 2765 */ 2766 void 2767 vm_page_activate(vm_page_t m) 2768 { 2769 int queue; 2770 2771 vm_page_lock_assert(m, MA_OWNED); 2772 if ((queue = m->queue) != PQ_ACTIVE) { 2773 if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { 2774 if (m->act_count < ACT_INIT) 2775 m->act_count = ACT_INIT; 2776 if (queue != PQ_NONE) 2777 vm_page_dequeue(m); 2778 vm_page_enqueue(PQ_ACTIVE, m); 2779 } else 2780 KASSERT(queue == PQ_NONE, 2781 ("vm_page_activate: wired page %p is queued", m)); 2782 } else { 2783 if (m->act_count < ACT_INIT) 2784 m->act_count = ACT_INIT; 2785 } 2786 } 2787 2788 /* 2789 * vm_page_free_wakeup: 2790 * 2791 * Helper routine for vm_page_free_toq(). This routine is called 2792 * when a page is added to the free queues. 2793 * 2794 * The page queues must be locked. 2795 */ 2796 static void 2797 vm_page_free_wakeup(void) 2798 { 2799 2800 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 2801 /* 2802 * if pageout daemon needs pages, then tell it that there are 2803 * some free. 2804 */ 2805 if (vm_pageout_pages_needed && 2806 vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) { 2807 wakeup(&vm_pageout_pages_needed); 2808 vm_pageout_pages_needed = 0; 2809 } 2810 /* 2811 * wakeup processes that are waiting on memory if we hit a 2812 * high water mark. And wakeup scheduler process if we have 2813 * lots of memory. this process will swapin processes. 2814 */ 2815 if (vm_pages_needed && !vm_page_count_min()) { 2816 vm_pages_needed = false; 2817 wakeup(&vm_cnt.v_free_count); 2818 } 2819 } 2820 2821 /* 2822 * vm_page_free_prep: 2823 * 2824 * Prepares the given page to be put on the free list, 2825 * disassociating it from any VM object. The caller may return 2826 * the page to the free list only if this function returns true. 2827 * 2828 * The object must be locked. The page must be locked if it is 2829 * managed. For a queued managed page, the pagequeue_locked 2830 * argument specifies whether the page queue is already locked. 2831 */ 2832 bool 2833 vm_page_free_prep(vm_page_t m, bool pagequeue_locked) 2834 { 2835 2836 #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) 2837 if ((m->flags & PG_ZERO) != 0) { 2838 uint64_t *p; 2839 int i; 2840 p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2841 for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++) 2842 KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx", 2843 m, i, (uintmax_t)*p)); 2844 } 2845 #endif 2846 if ((m->oflags & VPO_UNMANAGED) == 0) { 2847 vm_page_lock_assert(m, MA_OWNED); 2848 KASSERT(!pmap_page_is_mapped(m), 2849 ("vm_page_free_toq: freeing mapped page %p", m)); 2850 } else 2851 KASSERT(m->queue == PQ_NONE, 2852 ("vm_page_free_toq: unmanaged page %p is queued", m)); 2853 VM_CNT_INC(v_tfree); 2854 2855 if (vm_page_sbusied(m)) 2856 panic("vm_page_free: freeing busy page %p", m); 2857 2858 vm_page_remove(m); 2859 2860 /* 2861 * If fictitious remove object association and 2862 * return. 2863 */ 2864 if ((m->flags & PG_FICTITIOUS) != 0) { 2865 KASSERT(m->wire_count == 1, 2866 ("fictitious page %p is not wired", m)); 2867 KASSERT(m->queue == PQ_NONE, 2868 ("fictitious page %p is queued", m)); 2869 return (false); 2870 } 2871 2872 if (m->queue != PQ_NONE) { 2873 if (pagequeue_locked) 2874 vm_page_dequeue_locked(m); 2875 else 2876 vm_page_dequeue(m); 2877 } 2878 m->valid = 0; 2879 vm_page_undirty(m); 2880 2881 if (m->wire_count != 0) 2882 panic("vm_page_free: freeing wired page %p", m); 2883 if (m->hold_count != 0) { 2884 m->flags &= ~PG_ZERO; 2885 KASSERT((m->flags & PG_UNHOLDFREE) == 0, 2886 ("vm_page_free: freeing PG_UNHOLDFREE page %p", m)); 2887 m->flags |= PG_UNHOLDFREE; 2888 return (false); 2889 } 2890 2891 /* 2892 * Restore the default memory attribute to the page. 2893 */ 2894 if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) 2895 pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); 2896 2897 return (true); 2898 } 2899 2900 /* 2901 * Insert the page into the physical memory allocator's free page 2902 * queues. This is the last step to free a page. 2903 */ 2904 static void 2905 vm_page_free_phys(vm_page_t m) 2906 { 2907 2908 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 2909 2910 vm_phys_freecnt_adj(m, 1); 2911 #if VM_NRESERVLEVEL > 0 2912 if (!vm_reserv_free_page(m)) 2913 #endif 2914 vm_phys_free_pages(m, 0); 2915 } 2916 2917 void 2918 vm_page_free_phys_pglist(struct pglist *tq) 2919 { 2920 vm_page_t m; 2921 2922 if (TAILQ_EMPTY(tq)) 2923 return; 2924 mtx_lock(&vm_page_queue_free_mtx); 2925 TAILQ_FOREACH(m, tq, listq) 2926 vm_page_free_phys(m); 2927 vm_page_free_wakeup(); 2928 mtx_unlock(&vm_page_queue_free_mtx); 2929 } 2930 2931 /* 2932 * vm_page_free_toq: 2933 * 2934 * Returns the given page to the free list, disassociating it 2935 * from any VM object. 2936 * 2937 * The object must be locked. The page must be locked if it is 2938 * managed. 2939 */ 2940 void 2941 vm_page_free_toq(vm_page_t m) 2942 { 2943 2944 if (!vm_page_free_prep(m, false)) 2945 return; 2946 mtx_lock(&vm_page_queue_free_mtx); 2947 vm_page_free_phys(m); 2948 vm_page_free_wakeup(); 2949 mtx_unlock(&vm_page_queue_free_mtx); 2950 } 2951 2952 /* 2953 * vm_page_wire: 2954 * 2955 * Mark this page as wired down by yet 2956 * another map, removing it from paging queues 2957 * as necessary. 2958 * 2959 * If the page is fictitious, then its wire count must remain one. 2960 * 2961 * The page must be locked. 2962 */ 2963 void 2964 vm_page_wire(vm_page_t m) 2965 { 2966 2967 /* 2968 * Only bump the wire statistics if the page is not already wired, 2969 * and only unqueue the page if it is on some queue (if it is unmanaged 2970 * it is already off the queues). 2971 */ 2972 vm_page_lock_assert(m, MA_OWNED); 2973 if ((m->flags & PG_FICTITIOUS) != 0) { 2974 KASSERT(m->wire_count == 1, 2975 ("vm_page_wire: fictitious page %p's wire count isn't one", 2976 m)); 2977 return; 2978 } 2979 if (m->wire_count == 0) { 2980 KASSERT((m->oflags & VPO_UNMANAGED) == 0 || 2981 m->queue == PQ_NONE, 2982 ("vm_page_wire: unmanaged page %p is queued", m)); 2983 vm_page_remque(m); 2984 atomic_add_int(&vm_cnt.v_wire_count, 1); 2985 } 2986 m->wire_count++; 2987 KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); 2988 } 2989 2990 /* 2991 * vm_page_unwire: 2992 * 2993 * Release one wiring of the specified page, potentially allowing it to be 2994 * paged out. Returns TRUE if the number of wirings transitions to zero and 2995 * FALSE otherwise. 2996 * 2997 * Only managed pages belonging to an object can be paged out. If the number 2998 * of wirings transitions to zero and the page is eligible for page out, then 2999 * the page is added to the specified paging queue (unless PQ_NONE is 3000 * specified). 3001 * 3002 * If a page is fictitious, then its wire count must always be one. 3003 * 3004 * A managed page must be locked. 3005 */ 3006 boolean_t 3007 vm_page_unwire(vm_page_t m, uint8_t queue) 3008 { 3009 3010 KASSERT(queue < PQ_COUNT || queue == PQ_NONE, 3011 ("vm_page_unwire: invalid queue %u request for page %p", 3012 queue, m)); 3013 if ((m->oflags & VPO_UNMANAGED) == 0) 3014 vm_page_assert_locked(m); 3015 if ((m->flags & PG_FICTITIOUS) != 0) { 3016 KASSERT(m->wire_count == 1, 3017 ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); 3018 return (FALSE); 3019 } 3020 if (m->wire_count > 0) { 3021 m->wire_count--; 3022 if (m->wire_count == 0) { 3023 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 3024 if ((m->oflags & VPO_UNMANAGED) == 0 && 3025 m->object != NULL && queue != PQ_NONE) 3026 vm_page_enqueue(queue, m); 3027 return (TRUE); 3028 } else 3029 return (FALSE); 3030 } else 3031 panic("vm_page_unwire: page %p's wire count is zero", m); 3032 } 3033 3034 /* 3035 * Move the specified page to the inactive queue. 3036 * 3037 * Normally, "noreuse" is FALSE, resulting in LRU ordering of the inactive 3038 * queue. However, setting "noreuse" to TRUE will accelerate the specified 3039 * page's reclamation, but it will not unmap the page from any address space. 3040 * This is implemented by inserting the page near the head of the inactive 3041 * queue, using a marker page to guide FIFO insertion ordering. 3042 * 3043 * The page must be locked. 3044 */ 3045 static inline void 3046 _vm_page_deactivate(vm_page_t m, boolean_t noreuse) 3047 { 3048 struct vm_pagequeue *pq; 3049 int queue; 3050 3051 vm_page_assert_locked(m); 3052 3053 /* 3054 * Ignore if the page is already inactive, unless it is unlikely to be 3055 * reactivated. 3056 */ 3057 if ((queue = m->queue) == PQ_INACTIVE && !noreuse) 3058 return; 3059 if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { 3060 pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; 3061 /* Avoid multiple acquisitions of the inactive queue lock. */ 3062 if (queue == PQ_INACTIVE) { 3063 vm_pagequeue_lock(pq); 3064 vm_page_dequeue_locked(m); 3065 } else { 3066 if (queue != PQ_NONE) 3067 vm_page_dequeue(m); 3068 vm_pagequeue_lock(pq); 3069 } 3070 m->queue = PQ_INACTIVE; 3071 if (noreuse) 3072 TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead, 3073 m, plinks.q); 3074 else 3075 TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); 3076 vm_pagequeue_cnt_inc(pq); 3077 vm_pagequeue_unlock(pq); 3078 } 3079 } 3080 3081 /* 3082 * Move the specified page to the inactive queue. 3083 * 3084 * The page must be locked. 3085 */ 3086 void 3087 vm_page_deactivate(vm_page_t m) 3088 { 3089 3090 _vm_page_deactivate(m, FALSE); 3091 } 3092 3093 /* 3094 * Move the specified page to the inactive queue with the expectation 3095 * that it is unlikely to be reused. 3096 * 3097 * The page must be locked. 3098 */ 3099 void 3100 vm_page_deactivate_noreuse(vm_page_t m) 3101 { 3102 3103 _vm_page_deactivate(m, TRUE); 3104 } 3105 3106 /* 3107 * vm_page_launder 3108 * 3109 * Put a page in the laundry. 3110 */ 3111 void 3112 vm_page_launder(vm_page_t m) 3113 { 3114 int queue; 3115 3116 vm_page_assert_locked(m); 3117 if ((queue = m->queue) != PQ_LAUNDRY) { 3118 if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { 3119 if (queue != PQ_NONE) 3120 vm_page_dequeue(m); 3121 vm_page_enqueue(PQ_LAUNDRY, m); 3122 } else 3123 KASSERT(queue == PQ_NONE, 3124 ("wired page %p is queued", m)); 3125 } 3126 } 3127 3128 /* 3129 * vm_page_unswappable 3130 * 3131 * Put a page in the PQ_UNSWAPPABLE holding queue. 3132 */ 3133 void 3134 vm_page_unswappable(vm_page_t m) 3135 { 3136 3137 vm_page_assert_locked(m); 3138 KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0, 3139 ("page %p already unswappable", m)); 3140 if (m->queue != PQ_NONE) 3141 vm_page_dequeue(m); 3142 vm_page_enqueue(PQ_UNSWAPPABLE, m); 3143 } 3144 3145 /* 3146 * Attempt to free the page. If it cannot be freed, do nothing. Returns true 3147 * if the page is freed and false otherwise. 3148 * 3149 * The page must be managed. The page and its containing object must be 3150 * locked. 3151 */ 3152 bool 3153 vm_page_try_to_free(vm_page_t m) 3154 { 3155 3156 vm_page_assert_locked(m); 3157 VM_OBJECT_ASSERT_WLOCKED(m->object); 3158 KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); 3159 if (m->dirty != 0 || m->hold_count != 0 || m->wire_count != 0 || 3160 vm_page_busied(m)) 3161 return (false); 3162 if (m->object->ref_count != 0) { 3163 pmap_remove_all(m); 3164 if (m->dirty != 0) 3165 return (false); 3166 } 3167 vm_page_free(m); 3168 return (true); 3169 } 3170 3171 /* 3172 * vm_page_advise 3173 * 3174 * Apply the specified advice to the given page. 3175 * 3176 * The object and page must be locked. 3177 */ 3178 void 3179 vm_page_advise(vm_page_t m, int advice) 3180 { 3181 3182 vm_page_assert_locked(m); 3183 VM_OBJECT_ASSERT_WLOCKED(m->object); 3184 if (advice == MADV_FREE) 3185 /* 3186 * Mark the page clean. This will allow the page to be freed 3187 * without first paging it out. MADV_FREE pages are often 3188 * quickly reused by malloc(3), so we do not do anything that 3189 * would result in a page fault on a later access. 3190 */ 3191 vm_page_undirty(m); 3192 else if (advice != MADV_DONTNEED) { 3193 if (advice == MADV_WILLNEED) 3194 vm_page_activate(m); 3195 return; 3196 } 3197 3198 /* 3199 * Clear any references to the page. Otherwise, the page daemon will 3200 * immediately reactivate the page. 3201 */ 3202 vm_page_aflag_clear(m, PGA_REFERENCED); 3203 3204 if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) 3205 vm_page_dirty(m); 3206 3207 /* 3208 * Place clean pages near the head of the inactive queue rather than 3209 * the tail, thus defeating the queue's LRU operation and ensuring that 3210 * the page will be reused quickly. Dirty pages not already in the 3211 * laundry are moved there. 3212 */ 3213 if (m->dirty == 0) 3214 vm_page_deactivate_noreuse(m); 3215 else 3216 vm_page_launder(m); 3217 } 3218 3219 /* 3220 * Grab a page, waiting until we are waken up due to the page 3221 * changing state. We keep on waiting, if the page continues 3222 * to be in the object. If the page doesn't exist, first allocate it 3223 * and then conditionally zero it. 3224 * 3225 * This routine may sleep. 3226 * 3227 * The object must be locked on entry. The lock will, however, be released 3228 * and reacquired if the routine sleeps. 3229 */ 3230 vm_page_t 3231 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) 3232 { 3233 vm_page_t m; 3234 int sleep; 3235 int pflags; 3236 3237 VM_OBJECT_ASSERT_WLOCKED(object); 3238 KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || 3239 (allocflags & VM_ALLOC_IGN_SBUSY) != 0, 3240 ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); 3241 pflags = allocflags & 3242 ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); 3243 if ((allocflags & VM_ALLOC_NOWAIT) == 0) 3244 pflags |= VM_ALLOC_WAITFAIL; 3245 retrylookup: 3246 if ((m = vm_page_lookup(object, pindex)) != NULL) { 3247 sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? 3248 vm_page_xbusied(m) : vm_page_busied(m); 3249 if (sleep) { 3250 if ((allocflags & VM_ALLOC_NOWAIT) != 0) 3251 return (NULL); 3252 /* 3253 * Reference the page before unlocking and 3254 * sleeping so that the page daemon is less 3255 * likely to reclaim it. 3256 */ 3257 vm_page_aflag_set(m, PGA_REFERENCED); 3258 vm_page_lock(m); 3259 VM_OBJECT_WUNLOCK(object); 3260 vm_page_busy_sleep(m, "pgrbwt", (allocflags & 3261 VM_ALLOC_IGN_SBUSY) != 0); 3262 VM_OBJECT_WLOCK(object); 3263 goto retrylookup; 3264 } else { 3265 if ((allocflags & VM_ALLOC_WIRED) != 0) { 3266 vm_page_lock(m); 3267 vm_page_wire(m); 3268 vm_page_unlock(m); 3269 } 3270 if ((allocflags & 3271 (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) 3272 vm_page_xbusy(m); 3273 if ((allocflags & VM_ALLOC_SBUSY) != 0) 3274 vm_page_sbusy(m); 3275 return (m); 3276 } 3277 } 3278 m = vm_page_alloc(object, pindex, pflags); 3279 if (m == NULL) { 3280 if ((allocflags & VM_ALLOC_NOWAIT) != 0) 3281 return (NULL); 3282 goto retrylookup; 3283 } 3284 if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) 3285 pmap_zero_page(m); 3286 return (m); 3287 } 3288 3289 /* 3290 * Return the specified range of pages from the given object. For each 3291 * page offset within the range, if a page already exists within the object 3292 * at that offset and it is busy, then wait for it to change state. If, 3293 * instead, the page doesn't exist, then allocate it. 3294 * 3295 * The caller must always specify an allocation class. 3296 * 3297 * allocation classes: 3298 * VM_ALLOC_NORMAL normal process request 3299 * VM_ALLOC_SYSTEM system *really* needs the pages 3300 * 3301 * The caller must always specify that the pages are to be busied and/or 3302 * wired. 3303 * 3304 * optional allocation flags: 3305 * VM_ALLOC_IGN_SBUSY do not sleep on soft busy pages 3306 * VM_ALLOC_NOBUSY do not exclusive busy the page 3307 * VM_ALLOC_NOWAIT do not sleep 3308 * VM_ALLOC_SBUSY set page to sbusy state 3309 * VM_ALLOC_WIRED wire the pages 3310 * VM_ALLOC_ZERO zero and validate any invalid pages 3311 * 3312 * If VM_ALLOC_NOWAIT is not specified, this routine may sleep. Otherwise, it 3313 * may return a partial prefix of the requested range. 3314 */ 3315 int 3316 vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, 3317 vm_page_t *ma, int count) 3318 { 3319 vm_page_t m, mpred; 3320 int pflags; 3321 int i; 3322 bool sleep; 3323 3324 VM_OBJECT_ASSERT_WLOCKED(object); 3325 KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0, 3326 ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed")); 3327 KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || 3328 (allocflags & VM_ALLOC_WIRED) != 0, 3329 ("vm_page_grab_pages: the pages must be busied or wired")); 3330 KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || 3331 (allocflags & VM_ALLOC_IGN_SBUSY) != 0, 3332 ("vm_page_grab_pages: VM_ALLOC_SBUSY/IGN_SBUSY mismatch")); 3333 if (count == 0) 3334 return (0); 3335 pflags = allocflags & ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | 3336 VM_ALLOC_WAITFAIL | VM_ALLOC_IGN_SBUSY); 3337 if ((allocflags & VM_ALLOC_NOWAIT) == 0) 3338 pflags |= VM_ALLOC_WAITFAIL; 3339 i = 0; 3340 retrylookup: 3341 m = vm_radix_lookup_le(&object->rtree, pindex + i); 3342 if (m == NULL || m->pindex != pindex + i) { 3343 mpred = m; 3344 m = NULL; 3345 } else 3346 mpred = TAILQ_PREV(m, pglist, listq); 3347 for (; i < count; i++) { 3348 if (m != NULL) { 3349 sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? 3350 vm_page_xbusied(m) : vm_page_busied(m); 3351 if (sleep) { 3352 if ((allocflags & VM_ALLOC_NOWAIT) != 0) 3353 break; 3354 /* 3355 * Reference the page before unlocking and 3356 * sleeping so that the page daemon is less 3357 * likely to reclaim it. 3358 */ 3359 vm_page_aflag_set(m, PGA_REFERENCED); 3360 vm_page_lock(m); 3361 VM_OBJECT_WUNLOCK(object); 3362 vm_page_busy_sleep(m, "grbmaw", (allocflags & 3363 VM_ALLOC_IGN_SBUSY) != 0); 3364 VM_OBJECT_WLOCK(object); 3365 goto retrylookup; 3366 } 3367 if ((allocflags & VM_ALLOC_WIRED) != 0) { 3368 vm_page_lock(m); 3369 vm_page_wire(m); 3370 vm_page_unlock(m); 3371 } 3372 if ((allocflags & (VM_ALLOC_NOBUSY | 3373 VM_ALLOC_SBUSY)) == 0) 3374 vm_page_xbusy(m); 3375 if ((allocflags & VM_ALLOC_SBUSY) != 0) 3376 vm_page_sbusy(m); 3377 } else { 3378 m = vm_page_alloc_after(object, pindex + i, 3379 pflags | VM_ALLOC_COUNT(count - i), mpred); 3380 if (m == NULL) { 3381 if ((allocflags & VM_ALLOC_NOWAIT) != 0) 3382 break; 3383 goto retrylookup; 3384 } 3385 } 3386 if (m->valid == 0 && (allocflags & VM_ALLOC_ZERO) != 0) { 3387 if ((m->flags & PG_ZERO) == 0) 3388 pmap_zero_page(m); 3389 m->valid = VM_PAGE_BITS_ALL; 3390 } 3391 ma[i] = mpred = m; 3392 m = vm_page_next(m); 3393 } 3394 return (i); 3395 } 3396 3397 /* 3398 * Mapping function for valid or dirty bits in a page. 3399 * 3400 * Inputs are required to range within a page. 3401 */ 3402 vm_page_bits_t 3403 vm_page_bits(int base, int size) 3404 { 3405 int first_bit; 3406 int last_bit; 3407 3408 KASSERT( 3409 base + size <= PAGE_SIZE, 3410 ("vm_page_bits: illegal base/size %d/%d", base, size) 3411 ); 3412 3413 if (size == 0) /* handle degenerate case */ 3414 return (0); 3415 3416 first_bit = base >> DEV_BSHIFT; 3417 last_bit = (base + size - 1) >> DEV_BSHIFT; 3418 3419 return (((vm_page_bits_t)2 << last_bit) - 3420 ((vm_page_bits_t)1 << first_bit)); 3421 } 3422 3423 /* 3424 * vm_page_set_valid_range: 3425 * 3426 * Sets portions of a page valid. The arguments are expected 3427 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 3428 * of any partial chunks touched by the range. The invalid portion of 3429 * such chunks will be zeroed. 3430 * 3431 * (base + size) must be less then or equal to PAGE_SIZE. 3432 */ 3433 void 3434 vm_page_set_valid_range(vm_page_t m, int base, int size) 3435 { 3436 int endoff, frag; 3437 3438 VM_OBJECT_ASSERT_WLOCKED(m->object); 3439 if (size == 0) /* handle degenerate case */ 3440 return; 3441 3442 /* 3443 * If the base is not DEV_BSIZE aligned and the valid 3444 * bit is clear, we have to zero out a portion of the 3445 * first block. 3446 */ 3447 if ((frag = rounddown2(base, DEV_BSIZE)) != base && 3448 (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) 3449 pmap_zero_page_area(m, frag, base - frag); 3450 3451 /* 3452 * If the ending offset is not DEV_BSIZE aligned and the 3453 * valid bit is clear, we have to zero out a portion of 3454 * the last block. 3455 */ 3456 endoff = base + size; 3457 if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && 3458 (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) 3459 pmap_zero_page_area(m, endoff, 3460 DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 3461 3462 /* 3463 * Assert that no previously invalid block that is now being validated 3464 * is already dirty. 3465 */ 3466 KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, 3467 ("vm_page_set_valid_range: page %p is dirty", m)); 3468 3469 /* 3470 * Set valid bits inclusive of any overlap. 3471 */ 3472 m->valid |= vm_page_bits(base, size); 3473 } 3474 3475 /* 3476 * Clear the given bits from the specified page's dirty field. 3477 */ 3478 static __inline void 3479 vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) 3480 { 3481 uintptr_t addr; 3482 #if PAGE_SIZE < 16384 3483 int shift; 3484 #endif 3485 3486 /* 3487 * If the object is locked and the page is neither exclusive busy nor 3488 * write mapped, then the page's dirty field cannot possibly be 3489 * set by a concurrent pmap operation. 3490 */ 3491 VM_OBJECT_ASSERT_WLOCKED(m->object); 3492 if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) 3493 m->dirty &= ~pagebits; 3494 else { 3495 /* 3496 * The pmap layer can call vm_page_dirty() without 3497 * holding a distinguished lock. The combination of 3498 * the object's lock and an atomic operation suffice 3499 * to guarantee consistency of the page dirty field. 3500 * 3501 * For PAGE_SIZE == 32768 case, compiler already 3502 * properly aligns the dirty field, so no forcible 3503 * alignment is needed. Only require existence of 3504 * atomic_clear_64 when page size is 32768. 3505 */ 3506 addr = (uintptr_t)&m->dirty; 3507 #if PAGE_SIZE == 32768 3508 atomic_clear_64((uint64_t *)addr, pagebits); 3509 #elif PAGE_SIZE == 16384 3510 atomic_clear_32((uint32_t *)addr, pagebits); 3511 #else /* PAGE_SIZE <= 8192 */ 3512 /* 3513 * Use a trick to perform a 32-bit atomic on the 3514 * containing aligned word, to not depend on the existence 3515 * of atomic_clear_{8, 16}. 3516 */ 3517 shift = addr & (sizeof(uint32_t) - 1); 3518 #if BYTE_ORDER == BIG_ENDIAN 3519 shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY; 3520 #else 3521 shift *= NBBY; 3522 #endif 3523 addr &= ~(sizeof(uint32_t) - 1); 3524 atomic_clear_32((uint32_t *)addr, pagebits << shift); 3525 #endif /* PAGE_SIZE */ 3526 } 3527 } 3528 3529 /* 3530 * vm_page_set_validclean: 3531 * 3532 * Sets portions of a page valid and clean. The arguments are expected 3533 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 3534 * of any partial chunks touched by the range. The invalid portion of 3535 * such chunks will be zero'd. 3536 * 3537 * (base + size) must be less then or equal to PAGE_SIZE. 3538 */ 3539 void 3540 vm_page_set_validclean(vm_page_t m, int base, int size) 3541 { 3542 vm_page_bits_t oldvalid, pagebits; 3543 int endoff, frag; 3544 3545 VM_OBJECT_ASSERT_WLOCKED(m->object); 3546 if (size == 0) /* handle degenerate case */ 3547 return; 3548 3549 /* 3550 * If the base is not DEV_BSIZE aligned and the valid 3551 * bit is clear, we have to zero out a portion of the 3552 * first block. 3553 */ 3554 if ((frag = rounddown2(base, DEV_BSIZE)) != base && 3555 (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) 3556 pmap_zero_page_area(m, frag, base - frag); 3557 3558 /* 3559 * If the ending offset is not DEV_BSIZE aligned and the 3560 * valid bit is clear, we have to zero out a portion of 3561 * the last block. 3562 */ 3563 endoff = base + size; 3564 if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && 3565 (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) 3566 pmap_zero_page_area(m, endoff, 3567 DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 3568 3569 /* 3570 * Set valid, clear dirty bits. If validating the entire 3571 * page we can safely clear the pmap modify bit. We also 3572 * use this opportunity to clear the VPO_NOSYNC flag. If a process 3573 * takes a write fault on a MAP_NOSYNC memory area the flag will 3574 * be set again. 3575 * 3576 * We set valid bits inclusive of any overlap, but we can only 3577 * clear dirty bits for DEV_BSIZE chunks that are fully within 3578 * the range. 3579 */ 3580 oldvalid = m->valid; 3581 pagebits = vm_page_bits(base, size); 3582 m->valid |= pagebits; 3583 #if 0 /* NOT YET */ 3584 if ((frag = base & (DEV_BSIZE - 1)) != 0) { 3585 frag = DEV_BSIZE - frag; 3586 base += frag; 3587 size -= frag; 3588 if (size < 0) 3589 size = 0; 3590 } 3591 pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); 3592 #endif 3593 if (base == 0 && size == PAGE_SIZE) { 3594 /* 3595 * The page can only be modified within the pmap if it is 3596 * mapped, and it can only be mapped if it was previously 3597 * fully valid. 3598 */ 3599 if (oldvalid == VM_PAGE_BITS_ALL) 3600 /* 3601 * Perform the pmap_clear_modify() first. Otherwise, 3602 * a concurrent pmap operation, such as 3603 * pmap_protect(), could clear a modification in the 3604 * pmap and set the dirty field on the page before 3605 * pmap_clear_modify() had begun and after the dirty 3606 * field was cleared here. 3607 */ 3608 pmap_clear_modify(m); 3609 m->dirty = 0; 3610 m->oflags &= ~VPO_NOSYNC; 3611 } else if (oldvalid != VM_PAGE_BITS_ALL) 3612 m->dirty &= ~pagebits; 3613 else 3614 vm_page_clear_dirty_mask(m, pagebits); 3615 } 3616 3617 void 3618 vm_page_clear_dirty(vm_page_t m, int base, int size) 3619 { 3620 3621 vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); 3622 } 3623 3624 /* 3625 * vm_page_set_invalid: 3626 * 3627 * Invalidates DEV_BSIZE'd chunks within a page. Both the 3628 * valid and dirty bits for the effected areas are cleared. 3629 */ 3630 void 3631 vm_page_set_invalid(vm_page_t m, int base, int size) 3632 { 3633 vm_page_bits_t bits; 3634 vm_object_t object; 3635 3636 object = m->object; 3637 VM_OBJECT_ASSERT_WLOCKED(object); 3638 if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) + 3639 size >= object->un_pager.vnp.vnp_size) 3640 bits = VM_PAGE_BITS_ALL; 3641 else 3642 bits = vm_page_bits(base, size); 3643 if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL && 3644 bits != 0) 3645 pmap_remove_all(m); 3646 KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) || 3647 !pmap_page_is_mapped(m), 3648 ("vm_page_set_invalid: page %p is mapped", m)); 3649 m->valid &= ~bits; 3650 m->dirty &= ~bits; 3651 } 3652 3653 /* 3654 * vm_page_zero_invalid() 3655 * 3656 * The kernel assumes that the invalid portions of a page contain 3657 * garbage, but such pages can be mapped into memory by user code. 3658 * When this occurs, we must zero out the non-valid portions of the 3659 * page so user code sees what it expects. 3660 * 3661 * Pages are most often semi-valid when the end of a file is mapped 3662 * into memory and the file's size is not page aligned. 3663 */ 3664 void 3665 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) 3666 { 3667 int b; 3668 int i; 3669 3670 VM_OBJECT_ASSERT_WLOCKED(m->object); 3671 /* 3672 * Scan the valid bits looking for invalid sections that 3673 * must be zeroed. Invalid sub-DEV_BSIZE'd areas ( where the 3674 * valid bit may be set ) have already been zeroed by 3675 * vm_page_set_validclean(). 3676 */ 3677 for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { 3678 if (i == (PAGE_SIZE / DEV_BSIZE) || 3679 (m->valid & ((vm_page_bits_t)1 << i))) { 3680 if (i > b) { 3681 pmap_zero_page_area(m, 3682 b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); 3683 } 3684 b = i + 1; 3685 } 3686 } 3687 3688 /* 3689 * setvalid is TRUE when we can safely set the zero'd areas 3690 * as being valid. We can do this if there are no cache consistancy 3691 * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. 3692 */ 3693 if (setvalid) 3694 m->valid = VM_PAGE_BITS_ALL; 3695 } 3696 3697 /* 3698 * vm_page_is_valid: 3699 * 3700 * Is (partial) page valid? Note that the case where size == 0 3701 * will return FALSE in the degenerate case where the page is 3702 * entirely invalid, and TRUE otherwise. 3703 */ 3704 int 3705 vm_page_is_valid(vm_page_t m, int base, int size) 3706 { 3707 vm_page_bits_t bits; 3708 3709 VM_OBJECT_ASSERT_LOCKED(m->object); 3710 bits = vm_page_bits(base, size); 3711 return (m->valid != 0 && (m->valid & bits) == bits); 3712 } 3713 3714 /* 3715 * Returns true if all of the specified predicates are true for the entire 3716 * (super)page and false otherwise. 3717 */ 3718 bool 3719 vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m) 3720 { 3721 vm_object_t object; 3722 int i, npages; 3723 3724 object = m->object; 3725 VM_OBJECT_ASSERT_LOCKED(object); 3726 npages = atop(pagesizes[m->psind]); 3727 3728 /* 3729 * The physically contiguous pages that make up a superpage, i.e., a 3730 * page with a page size index ("psind") greater than zero, will 3731 * occupy adjacent entries in vm_page_array[]. 3732 */ 3733 for (i = 0; i < npages; i++) { 3734 /* Always test object consistency, including "skip_m". */ 3735 if (m[i].object != object) 3736 return (false); 3737 if (&m[i] == skip_m) 3738 continue; 3739 if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i])) 3740 return (false); 3741 if ((flags & PS_ALL_DIRTY) != 0) { 3742 /* 3743 * Calling vm_page_test_dirty() or pmap_is_modified() 3744 * might stop this case from spuriously returning 3745 * "false". However, that would require a write lock 3746 * on the object containing "m[i]". 3747 */ 3748 if (m[i].dirty != VM_PAGE_BITS_ALL) 3749 return (false); 3750 } 3751 if ((flags & PS_ALL_VALID) != 0 && 3752 m[i].valid != VM_PAGE_BITS_ALL) 3753 return (false); 3754 } 3755 return (true); 3756 } 3757 3758 /* 3759 * Set the page's dirty bits if the page is modified. 3760 */ 3761 void 3762 vm_page_test_dirty(vm_page_t m) 3763 { 3764 3765 VM_OBJECT_ASSERT_WLOCKED(m->object); 3766 if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) 3767 vm_page_dirty(m); 3768 } 3769 3770 void 3771 vm_page_lock_KBI(vm_page_t m, const char *file, int line) 3772 { 3773 3774 mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); 3775 } 3776 3777 void 3778 vm_page_unlock_KBI(vm_page_t m, const char *file, int line) 3779 { 3780 3781 mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); 3782 } 3783 3784 int 3785 vm_page_trylock_KBI(vm_page_t m, const char *file, int line) 3786 { 3787 3788 return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); 3789 } 3790 3791 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) 3792 void 3793 vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line) 3794 { 3795 3796 vm_page_lock_assert_KBI(m, MA_OWNED, file, line); 3797 } 3798 3799 void 3800 vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) 3801 { 3802 3803 mtx_assert_(vm_page_lockptr(m), a, file, line); 3804 } 3805 #endif 3806 3807 #ifdef INVARIANTS 3808 void 3809 vm_page_object_lock_assert(vm_page_t m) 3810 { 3811 3812 /* 3813 * Certain of the page's fields may only be modified by the 3814 * holder of the containing object's lock or the exclusive busy. 3815 * holder. Unfortunately, the holder of the write busy is 3816 * not recorded, and thus cannot be checked here. 3817 */ 3818 if (m->object != NULL && !vm_page_xbusied(m)) 3819 VM_OBJECT_ASSERT_WLOCKED(m->object); 3820 } 3821 3822 void 3823 vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits) 3824 { 3825 3826 if ((bits & PGA_WRITEABLE) == 0) 3827 return; 3828 3829 /* 3830 * The PGA_WRITEABLE flag can only be set if the page is 3831 * managed, is exclusively busied or the object is locked. 3832 * Currently, this flag is only set by pmap_enter(). 3833 */ 3834 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3835 ("PGA_WRITEABLE on unmanaged page")); 3836 if (!vm_page_xbusied(m)) 3837 VM_OBJECT_ASSERT_LOCKED(m->object); 3838 } 3839 #endif 3840 3841 #include "opt_ddb.h" 3842 #ifdef DDB 3843 #include <sys/kernel.h> 3844 3845 #include <ddb/ddb.h> 3846 3847 DB_SHOW_COMMAND(page, vm_page_print_page_info) 3848 { 3849 3850 db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count); 3851 db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count); 3852 db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count); 3853 db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count); 3854 db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count); 3855 db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); 3856 db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); 3857 db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target); 3858 db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target); 3859 } 3860 3861 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 3862 { 3863 int dom; 3864 3865 db_printf("pq_free %d\n", vm_cnt.v_free_count); 3866 for (dom = 0; dom < vm_ndomains; dom++) { 3867 db_printf( 3868 "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n", 3869 dom, 3870 vm_dom[dom].vmd_page_count, 3871 vm_dom[dom].vmd_free_count, 3872 vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, 3873 vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, 3874 vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt, 3875 vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt); 3876 } 3877 } 3878 3879 DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) 3880 { 3881 vm_page_t m; 3882 boolean_t phys; 3883 3884 if (!have_addr) { 3885 db_printf("show pginfo addr\n"); 3886 return; 3887 } 3888 3889 phys = strchr(modif, 'p') != NULL; 3890 if (phys) 3891 m = PHYS_TO_VM_PAGE(addr); 3892 else 3893 m = (vm_page_t)addr; 3894 db_printf( 3895 "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" 3896 " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", 3897 m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, 3898 m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, 3899 m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); 3900 } 3901 #endif /* DDB */ 3902