1 /*- 2 * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) 3 * 4 * Copyright (c) 1991 Regents of the University of California. 5 * All rights reserved. 6 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * The Mach Operating System project at Carnegie-Mellon University. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 36 */ 37 38 /*- 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /* 66 * Resident memory management module. 67 */ 68 69 #include <sys/cdefs.h> 70 __FBSDID("$FreeBSD$"); 71 72 #include "opt_vm.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/counter.h> 77 #include <sys/domainset.h> 78 #include <sys/kernel.h> 79 #include <sys/limits.h> 80 #include <sys/linker.h> 81 #include <sys/lock.h> 82 #include <sys/malloc.h> 83 #include <sys/mman.h> 84 #include <sys/msgbuf.h> 85 #include <sys/mutex.h> 86 #include <sys/proc.h> 87 #include <sys/rwlock.h> 88 #include <sys/sleepqueue.h> 89 #include <sys/sbuf.h> 90 #include <sys/sched.h> 91 #include <sys/smp.h> 92 #include <sys/sysctl.h> 93 #include <sys/vmmeter.h> 94 #include <sys/vnode.h> 95 96 #include <vm/vm.h> 97 #include <vm/pmap.h> 98 #include <vm/vm_param.h> 99 #include <vm/vm_domainset.h> 100 #include <vm/vm_kern.h> 101 #include <vm/vm_map.h> 102 #include <vm/vm_object.h> 103 #include <vm/vm_page.h> 104 #include <vm/vm_pageout.h> 105 #include <vm/vm_phys.h> 106 #include <vm/vm_pagequeue.h> 107 #include <vm/vm_pager.h> 108 #include <vm/vm_radix.h> 109 #include <vm/vm_reserv.h> 110 #include <vm/vm_extern.h> 111 #include <vm/uma.h> 112 #include <vm/uma_int.h> 113 114 #include <machine/md_var.h> 115 116 struct vm_domain vm_dom[MAXMEMDOM]; 117 118 DPCPU_DEFINE_STATIC(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]); 119 120 struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; 121 122 struct mtx_padalign __exclusive_cache_line vm_domainset_lock; 123 /* The following fields are protected by the domainset lock. */ 124 domainset_t __exclusive_cache_line vm_min_domains; 125 domainset_t __exclusive_cache_line vm_severe_domains; 126 static int vm_min_waiters; 127 static int vm_severe_waiters; 128 static int vm_pageproc_waiters; 129 130 static SYSCTL_NODE(_vm_stats, OID_AUTO, page, CTLFLAG_RD, 0, 131 "VM page statistics"); 132 133 static counter_u64_t pqstate_commit_retries = EARLY_COUNTER; 134 SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, pqstate_commit_retries, 135 CTLFLAG_RD, &pqstate_commit_retries, 136 "Number of failed per-page atomic queue state updates"); 137 138 static counter_u64_t queue_ops = EARLY_COUNTER; 139 SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_ops, 140 CTLFLAG_RD, &queue_ops, 141 "Number of batched queue operations"); 142 143 static counter_u64_t queue_nops = EARLY_COUNTER; 144 SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_nops, 145 CTLFLAG_RD, &queue_nops, 146 "Number of batched queue operations with no effects"); 147 148 static void 149 counter_startup(void) 150 { 151 152 pqstate_commit_retries = counter_u64_alloc(M_WAITOK); 153 queue_ops = counter_u64_alloc(M_WAITOK); 154 queue_nops = counter_u64_alloc(M_WAITOK); 155 } 156 SYSINIT(page_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL); 157 158 /* 159 * bogus page -- for I/O to/from partially complete buffers, 160 * or for paging into sparsely invalid regions. 161 */ 162 vm_page_t bogus_page; 163 164 vm_page_t vm_page_array; 165 long vm_page_array_size; 166 long first_page; 167 168 static TAILQ_HEAD(, vm_page) blacklist_head; 169 static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); 170 SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | 171 CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); 172 173 static uma_zone_t fakepg_zone; 174 175 static void vm_page_alloc_check(vm_page_t m); 176 static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, 177 vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked); 178 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); 179 static void vm_page_enqueue(vm_page_t m, uint8_t queue); 180 static bool vm_page_free_prep(vm_page_t m); 181 static void vm_page_free_toq(vm_page_t m); 182 static void vm_page_init(void *dummy); 183 static int vm_page_insert_after(vm_page_t m, vm_object_t object, 184 vm_pindex_t pindex, vm_page_t mpred); 185 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, 186 vm_page_t mpred); 187 static void vm_page_mvqueue(vm_page_t m, const uint8_t queue, 188 const uint16_t nflag); 189 static int vm_page_reclaim_run(int req_class, int domain, u_long npages, 190 vm_page_t m_run, vm_paddr_t high); 191 static void vm_page_release_toq(vm_page_t m, uint8_t nqueue, bool noreuse); 192 static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, 193 int req); 194 static int vm_page_zone_import(void *arg, void **store, int cnt, int domain, 195 int flags); 196 static void vm_page_zone_release(void *arg, void **store, int cnt); 197 198 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); 199 200 static void 201 vm_page_init(void *dummy) 202 { 203 204 fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, 205 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); 206 bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | 207 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 208 } 209 210 /* 211 * The cache page zone is initialized later since we need to be able to allocate 212 * pages before UMA is fully initialized. 213 */ 214 static void 215 vm_page_init_cache_zones(void *dummy __unused) 216 { 217 struct vm_domain *vmd; 218 struct vm_pgcache *pgcache; 219 int cache, domain, maxcache, pool; 220 221 maxcache = 0; 222 TUNABLE_INT_FETCH("vm.pgcache_zone_max_pcpu", &maxcache); 223 maxcache *= mp_ncpus; 224 for (domain = 0; domain < vm_ndomains; domain++) { 225 vmd = VM_DOMAIN(domain); 226 for (pool = 0; pool < VM_NFREEPOOL; pool++) { 227 pgcache = &vmd->vmd_pgcache[pool]; 228 pgcache->domain = domain; 229 pgcache->pool = pool; 230 pgcache->zone = uma_zcache_create("vm pgcache", 231 PAGE_SIZE, NULL, NULL, NULL, NULL, 232 vm_page_zone_import, vm_page_zone_release, pgcache, 233 UMA_ZONE_VM); 234 235 /* 236 * Limit each pool's zone to 0.1% of the pages in the 237 * domain. 238 */ 239 cache = maxcache != 0 ? maxcache : 240 vmd->vmd_page_count / 1000; 241 uma_zone_set_maxcache(pgcache->zone, cache); 242 } 243 } 244 } 245 SYSINIT(vm_page2, SI_SUB_VM_CONF, SI_ORDER_ANY, vm_page_init_cache_zones, NULL); 246 247 /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ 248 #if PAGE_SIZE == 32768 249 #ifdef CTASSERT 250 CTASSERT(sizeof(u_long) >= 8); 251 #endif 252 #endif 253 254 /* 255 * vm_set_page_size: 256 * 257 * Sets the page size, perhaps based upon the memory 258 * size. Must be called before any use of page-size 259 * dependent functions. 260 */ 261 void 262 vm_set_page_size(void) 263 { 264 if (vm_cnt.v_page_size == 0) 265 vm_cnt.v_page_size = PAGE_SIZE; 266 if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0) 267 panic("vm_set_page_size: page size not a power of two"); 268 } 269 270 /* 271 * vm_page_blacklist_next: 272 * 273 * Find the next entry in the provided string of blacklist 274 * addresses. Entries are separated by space, comma, or newline. 275 * If an invalid integer is encountered then the rest of the 276 * string is skipped. Updates the list pointer to the next 277 * character, or NULL if the string is exhausted or invalid. 278 */ 279 static vm_paddr_t 280 vm_page_blacklist_next(char **list, char *end) 281 { 282 vm_paddr_t bad; 283 char *cp, *pos; 284 285 if (list == NULL || *list == NULL) 286 return (0); 287 if (**list =='\0') { 288 *list = NULL; 289 return (0); 290 } 291 292 /* 293 * If there's no end pointer then the buffer is coming from 294 * the kenv and we know it's null-terminated. 295 */ 296 if (end == NULL) 297 end = *list + strlen(*list); 298 299 /* Ensure that strtoq() won't walk off the end */ 300 if (*end != '\0') { 301 if (*end == '\n' || *end == ' ' || *end == ',') 302 *end = '\0'; 303 else { 304 printf("Blacklist not terminated, skipping\n"); 305 *list = NULL; 306 return (0); 307 } 308 } 309 310 for (pos = *list; *pos != '\0'; pos = cp) { 311 bad = strtoq(pos, &cp, 0); 312 if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') { 313 if (bad == 0) { 314 if (++cp < end) 315 continue; 316 else 317 break; 318 } 319 } else 320 break; 321 if (*cp == '\0' || ++cp >= end) 322 *list = NULL; 323 else 324 *list = cp; 325 return (trunc_page(bad)); 326 } 327 printf("Garbage in RAM blacklist, skipping\n"); 328 *list = NULL; 329 return (0); 330 } 331 332 bool 333 vm_page_blacklist_add(vm_paddr_t pa, bool verbose) 334 { 335 struct vm_domain *vmd; 336 vm_page_t m; 337 int ret; 338 339 m = vm_phys_paddr_to_vm_page(pa); 340 if (m == NULL) 341 return (true); /* page does not exist, no failure */ 342 343 vmd = vm_pagequeue_domain(m); 344 vm_domain_free_lock(vmd); 345 ret = vm_phys_unfree_page(m); 346 vm_domain_free_unlock(vmd); 347 if (ret != 0) { 348 vm_domain_freecnt_inc(vmd, -1); 349 TAILQ_INSERT_TAIL(&blacklist_head, m, listq); 350 if (verbose) 351 printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa); 352 } 353 return (ret); 354 } 355 356 /* 357 * vm_page_blacklist_check: 358 * 359 * Iterate through the provided string of blacklist addresses, pulling 360 * each entry out of the physical allocator free list and putting it 361 * onto a list for reporting via the vm.page_blacklist sysctl. 362 */ 363 static void 364 vm_page_blacklist_check(char *list, char *end) 365 { 366 vm_paddr_t pa; 367 char *next; 368 369 next = list; 370 while (next != NULL) { 371 if ((pa = vm_page_blacklist_next(&next, end)) == 0) 372 continue; 373 vm_page_blacklist_add(pa, bootverbose); 374 } 375 } 376 377 /* 378 * vm_page_blacklist_load: 379 * 380 * Search for a special module named "ram_blacklist". It'll be a 381 * plain text file provided by the user via the loader directive 382 * of the same name. 383 */ 384 static void 385 vm_page_blacklist_load(char **list, char **end) 386 { 387 void *mod; 388 u_char *ptr; 389 u_int len; 390 391 mod = NULL; 392 ptr = NULL; 393 394 mod = preload_search_by_type("ram_blacklist"); 395 if (mod != NULL) { 396 ptr = preload_fetch_addr(mod); 397 len = preload_fetch_size(mod); 398 } 399 *list = ptr; 400 if (ptr != NULL) 401 *end = ptr + len; 402 else 403 *end = NULL; 404 return; 405 } 406 407 static int 408 sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) 409 { 410 vm_page_t m; 411 struct sbuf sbuf; 412 int error, first; 413 414 first = 1; 415 error = sysctl_wire_old_buffer(req, 0); 416 if (error != 0) 417 return (error); 418 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 419 TAILQ_FOREACH(m, &blacklist_head, listq) { 420 sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",", 421 (uintmax_t)m->phys_addr); 422 first = 0; 423 } 424 error = sbuf_finish(&sbuf); 425 sbuf_delete(&sbuf); 426 return (error); 427 } 428 429 /* 430 * Initialize a dummy page for use in scans of the specified paging queue. 431 * In principle, this function only needs to set the flag PG_MARKER. 432 * Nonetheless, it write busies the page as a safety precaution. 433 */ 434 static void 435 vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags) 436 { 437 438 bzero(marker, sizeof(*marker)); 439 marker->flags = PG_MARKER; 440 marker->a.flags = aflags; 441 marker->busy_lock = VPB_CURTHREAD_EXCLUSIVE; 442 marker->a.queue = queue; 443 } 444 445 static void 446 vm_page_domain_init(int domain) 447 { 448 struct vm_domain *vmd; 449 struct vm_pagequeue *pq; 450 int i; 451 452 vmd = VM_DOMAIN(domain); 453 bzero(vmd, sizeof(*vmd)); 454 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = 455 "vm inactive pagequeue"; 456 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = 457 "vm active pagequeue"; 458 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) = 459 "vm laundry pagequeue"; 460 *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = 461 "vm unswappable pagequeue"; 462 vmd->vmd_domain = domain; 463 vmd->vmd_page_count = 0; 464 vmd->vmd_free_count = 0; 465 vmd->vmd_segs = 0; 466 vmd->vmd_oom = FALSE; 467 for (i = 0; i < PQ_COUNT; i++) { 468 pq = &vmd->vmd_pagequeues[i]; 469 TAILQ_INIT(&pq->pq_pl); 470 mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", 471 MTX_DEF | MTX_DUPOK); 472 pq->pq_pdpages = 0; 473 vm_page_init_marker(&vmd->vmd_markers[i], i, 0); 474 } 475 mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF); 476 mtx_init(&vmd->vmd_pageout_mtx, "vm pageout lock", NULL, MTX_DEF); 477 snprintf(vmd->vmd_name, sizeof(vmd->vmd_name), "%d", domain); 478 479 /* 480 * inacthead is used to provide FIFO ordering for LRU-bypassing 481 * insertions. 482 */ 483 vm_page_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE, PGA_ENQUEUED); 484 TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl, 485 &vmd->vmd_inacthead, plinks.q); 486 487 /* 488 * The clock pages are used to implement active queue scanning without 489 * requeues. Scans start at clock[0], which is advanced after the scan 490 * ends. When the two clock hands meet, they are reset and scanning 491 * resumes from the head of the queue. 492 */ 493 vm_page_init_marker(&vmd->vmd_clock[0], PQ_ACTIVE, PGA_ENQUEUED); 494 vm_page_init_marker(&vmd->vmd_clock[1], PQ_ACTIVE, PGA_ENQUEUED); 495 TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl, 496 &vmd->vmd_clock[0], plinks.q); 497 TAILQ_INSERT_TAIL(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl, 498 &vmd->vmd_clock[1], plinks.q); 499 } 500 501 /* 502 * Initialize a physical page in preparation for adding it to the free 503 * lists. 504 */ 505 static void 506 vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind) 507 { 508 509 m->object = NULL; 510 m->ref_count = 0; 511 m->busy_lock = VPB_FREED; 512 m->flags = m->a.flags = 0; 513 m->phys_addr = pa; 514 m->a.queue = PQ_NONE; 515 m->psind = 0; 516 m->segind = segind; 517 m->order = VM_NFREEORDER; 518 m->pool = VM_FREEPOOL_DEFAULT; 519 m->valid = m->dirty = 0; 520 pmap_page_init(m); 521 } 522 523 #ifndef PMAP_HAS_PAGE_ARRAY 524 static vm_paddr_t 525 vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range) 526 { 527 vm_paddr_t new_end; 528 529 /* 530 * Reserve an unmapped guard page to trap access to vm_page_array[-1]. 531 * However, because this page is allocated from KVM, out-of-bounds 532 * accesses using the direct map will not be trapped. 533 */ 534 *vaddr += PAGE_SIZE; 535 536 /* 537 * Allocate physical memory for the page structures, and map it. 538 */ 539 new_end = trunc_page(end - page_range * sizeof(struct vm_page)); 540 vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end, 541 VM_PROT_READ | VM_PROT_WRITE); 542 vm_page_array_size = page_range; 543 544 return (new_end); 545 } 546 #endif 547 548 /* 549 * vm_page_startup: 550 * 551 * Initializes the resident memory module. Allocates physical memory for 552 * bootstrapping UMA and some data structures that are used to manage 553 * physical pages. Initializes these structures, and populates the free 554 * page queues. 555 */ 556 vm_offset_t 557 vm_page_startup(vm_offset_t vaddr) 558 { 559 struct vm_phys_seg *seg; 560 vm_page_t m; 561 char *list, *listend; 562 vm_paddr_t end, high_avail, low_avail, new_end, size; 563 vm_paddr_t page_range __unused; 564 vm_paddr_t last_pa, pa; 565 u_long pagecount; 566 int biggestone, i, segind; 567 #ifdef WITNESS 568 vm_offset_t mapped; 569 int witness_size; 570 #endif 571 #if defined(__i386__) && defined(VM_PHYSSEG_DENSE) 572 long ii; 573 #endif 574 575 vaddr = round_page(vaddr); 576 577 vm_phys_early_startup(); 578 biggestone = vm_phys_avail_largest(); 579 end = phys_avail[biggestone+1]; 580 581 /* 582 * Initialize the page and queue locks. 583 */ 584 mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF); 585 for (i = 0; i < PA_LOCK_COUNT; i++) 586 mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); 587 for (i = 0; i < vm_ndomains; i++) 588 vm_page_domain_init(i); 589 590 new_end = end; 591 #ifdef WITNESS 592 witness_size = round_page(witness_startup_count()); 593 new_end -= witness_size; 594 mapped = pmap_map(&vaddr, new_end, new_end + witness_size, 595 VM_PROT_READ | VM_PROT_WRITE); 596 bzero((void *)mapped, witness_size); 597 witness_startup((void *)mapped); 598 #endif 599 600 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ 601 defined(__i386__) || defined(__mips__) || defined(__riscv) || \ 602 defined(__powerpc64__) 603 /* 604 * Allocate a bitmap to indicate that a random physical page 605 * needs to be included in a minidump. 606 * 607 * The amd64 port needs this to indicate which direct map pages 608 * need to be dumped, via calls to dump_add_page()/dump_drop_page(). 609 * 610 * However, i386 still needs this workspace internally within the 611 * minidump code. In theory, they are not needed on i386, but are 612 * included should the sf_buf code decide to use them. 613 */ 614 last_pa = 0; 615 for (i = 0; dump_avail[i + 1] != 0; i += 2) 616 if (dump_avail[i + 1] > last_pa) 617 last_pa = dump_avail[i + 1]; 618 page_range = last_pa / PAGE_SIZE; 619 vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); 620 new_end -= vm_page_dump_size; 621 vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, 622 new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); 623 bzero((void *)vm_page_dump, vm_page_dump_size); 624 #else 625 (void)last_pa; 626 #endif 627 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ 628 defined(__riscv) || defined(__powerpc64__) 629 /* 630 * Include the UMA bootstrap pages, witness pages and vm_page_dump 631 * in a crash dump. When pmap_map() uses the direct map, they are 632 * not automatically included. 633 */ 634 for (pa = new_end; pa < end; pa += PAGE_SIZE) 635 dump_add_page(pa); 636 #endif 637 phys_avail[biggestone + 1] = new_end; 638 #ifdef __amd64__ 639 /* 640 * Request that the physical pages underlying the message buffer be 641 * included in a crash dump. Since the message buffer is accessed 642 * through the direct map, they are not automatically included. 643 */ 644 pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); 645 last_pa = pa + round_page(msgbufsize); 646 while (pa < last_pa) { 647 dump_add_page(pa); 648 pa += PAGE_SIZE; 649 } 650 #endif 651 /* 652 * Compute the number of pages of memory that will be available for 653 * use, taking into account the overhead of a page structure per page. 654 * In other words, solve 655 * "available physical memory" - round_page(page_range * 656 * sizeof(struct vm_page)) = page_range * PAGE_SIZE 657 * for page_range. 658 */ 659 low_avail = phys_avail[0]; 660 high_avail = phys_avail[1]; 661 for (i = 0; i < vm_phys_nsegs; i++) { 662 if (vm_phys_segs[i].start < low_avail) 663 low_avail = vm_phys_segs[i].start; 664 if (vm_phys_segs[i].end > high_avail) 665 high_avail = vm_phys_segs[i].end; 666 } 667 /* Skip the first chunk. It is already accounted for. */ 668 for (i = 2; phys_avail[i + 1] != 0; i += 2) { 669 if (phys_avail[i] < low_avail) 670 low_avail = phys_avail[i]; 671 if (phys_avail[i + 1] > high_avail) 672 high_avail = phys_avail[i + 1]; 673 } 674 first_page = low_avail / PAGE_SIZE; 675 #ifdef VM_PHYSSEG_SPARSE 676 size = 0; 677 for (i = 0; i < vm_phys_nsegs; i++) 678 size += vm_phys_segs[i].end - vm_phys_segs[i].start; 679 for (i = 0; phys_avail[i + 1] != 0; i += 2) 680 size += phys_avail[i + 1] - phys_avail[i]; 681 #elif defined(VM_PHYSSEG_DENSE) 682 size = high_avail - low_avail; 683 #else 684 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." 685 #endif 686 687 #ifdef PMAP_HAS_PAGE_ARRAY 688 pmap_page_array_startup(size / PAGE_SIZE); 689 biggestone = vm_phys_avail_largest(); 690 end = new_end = phys_avail[biggestone + 1]; 691 #else 692 #ifdef VM_PHYSSEG_DENSE 693 /* 694 * In the VM_PHYSSEG_DENSE case, the number of pages can account for 695 * the overhead of a page structure per page only if vm_page_array is 696 * allocated from the last physical memory chunk. Otherwise, we must 697 * allocate page structures representing the physical memory 698 * underlying vm_page_array, even though they will not be used. 699 */ 700 if (new_end != high_avail) 701 page_range = size / PAGE_SIZE; 702 else 703 #endif 704 { 705 page_range = size / (PAGE_SIZE + sizeof(struct vm_page)); 706 707 /* 708 * If the partial bytes remaining are large enough for 709 * a page (PAGE_SIZE) without a corresponding 710 * 'struct vm_page', then new_end will contain an 711 * extra page after subtracting the length of the VM 712 * page array. Compensate by subtracting an extra 713 * page from new_end. 714 */ 715 if (size % (PAGE_SIZE + sizeof(struct vm_page)) >= PAGE_SIZE) { 716 if (new_end == high_avail) 717 high_avail -= PAGE_SIZE; 718 new_end -= PAGE_SIZE; 719 } 720 } 721 end = new_end; 722 new_end = vm_page_array_alloc(&vaddr, end, page_range); 723 #endif 724 725 #if VM_NRESERVLEVEL > 0 726 /* 727 * Allocate physical memory for the reservation management system's 728 * data structures, and map it. 729 */ 730 new_end = vm_reserv_startup(&vaddr, new_end); 731 #endif 732 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ 733 defined(__riscv) || defined(__powerpc64__) 734 /* 735 * Include vm_page_array and vm_reserv_array in a crash dump. 736 */ 737 for (pa = new_end; pa < end; pa += PAGE_SIZE) 738 dump_add_page(pa); 739 #endif 740 phys_avail[biggestone + 1] = new_end; 741 742 /* 743 * Add physical memory segments corresponding to the available 744 * physical pages. 745 */ 746 for (i = 0; phys_avail[i + 1] != 0; i += 2) 747 if (vm_phys_avail_size(i) != 0) 748 vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); 749 750 /* 751 * Initialize the physical memory allocator. 752 */ 753 vm_phys_init(); 754 755 /* 756 * Initialize the page structures and add every available page to the 757 * physical memory allocator's free lists. 758 */ 759 #if defined(__i386__) && defined(VM_PHYSSEG_DENSE) 760 for (ii = 0; ii < vm_page_array_size; ii++) { 761 m = &vm_page_array[ii]; 762 vm_page_init_page(m, (first_page + ii) << PAGE_SHIFT, 0); 763 m->flags = PG_FICTITIOUS; 764 } 765 #endif 766 vm_cnt.v_page_count = 0; 767 for (segind = 0; segind < vm_phys_nsegs; segind++) { 768 seg = &vm_phys_segs[segind]; 769 for (m = seg->first_page, pa = seg->start; pa < seg->end; 770 m++, pa += PAGE_SIZE) 771 vm_page_init_page(m, pa, segind); 772 773 /* 774 * Add the segment to the free lists only if it is covered by 775 * one of the ranges in phys_avail. Because we've added the 776 * ranges to the vm_phys_segs array, we can assume that each 777 * segment is either entirely contained in one of the ranges, 778 * or doesn't overlap any of them. 779 */ 780 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 781 struct vm_domain *vmd; 782 783 if (seg->start < phys_avail[i] || 784 seg->end > phys_avail[i + 1]) 785 continue; 786 787 m = seg->first_page; 788 pagecount = (u_long)atop(seg->end - seg->start); 789 790 vmd = VM_DOMAIN(seg->domain); 791 vm_domain_free_lock(vmd); 792 vm_phys_enqueue_contig(m, pagecount); 793 vm_domain_free_unlock(vmd); 794 vm_domain_freecnt_inc(vmd, pagecount); 795 vm_cnt.v_page_count += (u_int)pagecount; 796 797 vmd = VM_DOMAIN(seg->domain); 798 vmd->vmd_page_count += (u_int)pagecount; 799 vmd->vmd_segs |= 1UL << m->segind; 800 break; 801 } 802 } 803 804 /* 805 * Remove blacklisted pages from the physical memory allocator. 806 */ 807 TAILQ_INIT(&blacklist_head); 808 vm_page_blacklist_load(&list, &listend); 809 vm_page_blacklist_check(list, listend); 810 811 list = kern_getenv("vm.blacklist"); 812 vm_page_blacklist_check(list, NULL); 813 814 freeenv(list); 815 #if VM_NRESERVLEVEL > 0 816 /* 817 * Initialize the reservation management system. 818 */ 819 vm_reserv_init(); 820 #endif 821 822 return (vaddr); 823 } 824 825 void 826 vm_page_reference(vm_page_t m) 827 { 828 829 vm_page_aflag_set(m, PGA_REFERENCED); 830 } 831 832 static bool 833 vm_page_acquire_flags(vm_page_t m, int allocflags) 834 { 835 bool locked; 836 837 if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0) 838 locked = vm_page_trysbusy(m); 839 else 840 locked = vm_page_tryxbusy(m); 841 if (locked && (allocflags & VM_ALLOC_WIRED) != 0) 842 vm_page_wire(m); 843 return (locked); 844 } 845 846 /* 847 * vm_page_busy_sleep_flags 848 * 849 * Sleep for busy according to VM_ALLOC_ parameters. Returns true 850 * if the caller should retry and false otherwise. 851 */ 852 static bool 853 vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg, 854 int allocflags) 855 { 856 857 if ((allocflags & VM_ALLOC_NOWAIT) != 0) 858 return (false); 859 860 /* 861 * Reference the page before unlocking and sleeping so that 862 * the page daemon is less likely to reclaim it. 863 */ 864 if ((allocflags & VM_ALLOC_NOCREAT) == 0) 865 vm_page_reference(m); 866 867 if (_vm_page_busy_sleep(object, m, m->pindex, wmesg, allocflags, true)) 868 VM_OBJECT_WLOCK(object); 869 if ((allocflags & VM_ALLOC_WAITFAIL) != 0) 870 return (false); 871 872 return (true); 873 } 874 875 /* 876 * vm_page_busy_acquire: 877 * 878 * Acquire the busy lock as described by VM_ALLOC_* flags. Will loop 879 * and drop the object lock if necessary. 880 */ 881 bool 882 vm_page_busy_acquire(vm_page_t m, int allocflags) 883 { 884 vm_object_t obj; 885 bool locked; 886 887 /* 888 * The page-specific object must be cached because page 889 * identity can change during the sleep, causing the 890 * re-lock of a different object. 891 * It is assumed that a reference to the object is already 892 * held by the callers. 893 */ 894 obj = m->object; 895 for (;;) { 896 if (vm_page_acquire_flags(m, allocflags)) 897 return (true); 898 if ((allocflags & VM_ALLOC_NOWAIT) != 0) 899 return (false); 900 if (obj != NULL) 901 locked = VM_OBJECT_WOWNED(obj); 902 else 903 locked = false; 904 MPASS(locked || vm_page_wired(m)); 905 if (_vm_page_busy_sleep(obj, m, m->pindex, "vmpba", allocflags, 906 locked) && locked) 907 VM_OBJECT_WLOCK(obj); 908 if ((allocflags & VM_ALLOC_WAITFAIL) != 0) 909 return (false); 910 KASSERT(m->object == obj || m->object == NULL, 911 ("vm_page_busy_acquire: page %p does not belong to %p", 912 m, obj)); 913 } 914 } 915 916 /* 917 * vm_page_busy_downgrade: 918 * 919 * Downgrade an exclusive busy page into a single shared busy page. 920 */ 921 void 922 vm_page_busy_downgrade(vm_page_t m) 923 { 924 u_int x; 925 926 vm_page_assert_xbusied(m); 927 928 x = m->busy_lock; 929 for (;;) { 930 if (atomic_fcmpset_rel_int(&m->busy_lock, 931 &x, VPB_SHARERS_WORD(1))) 932 break; 933 } 934 if ((x & VPB_BIT_WAITERS) != 0) 935 wakeup(m); 936 } 937 938 /* 939 * 940 * vm_page_busy_tryupgrade: 941 * 942 * Attempt to upgrade a single shared busy into an exclusive busy. 943 */ 944 int 945 vm_page_busy_tryupgrade(vm_page_t m) 946 { 947 u_int ce, x; 948 949 vm_page_assert_sbusied(m); 950 951 x = m->busy_lock; 952 ce = VPB_CURTHREAD_EXCLUSIVE; 953 for (;;) { 954 if (VPB_SHARERS(x) > 1) 955 return (0); 956 KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1), 957 ("vm_page_busy_tryupgrade: invalid lock state")); 958 if (!atomic_fcmpset_acq_int(&m->busy_lock, &x, 959 ce | (x & VPB_BIT_WAITERS))) 960 continue; 961 return (1); 962 } 963 } 964 965 /* 966 * vm_page_sbusied: 967 * 968 * Return a positive value if the page is shared busied, 0 otherwise. 969 */ 970 int 971 vm_page_sbusied(vm_page_t m) 972 { 973 u_int x; 974 975 x = m->busy_lock; 976 return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED); 977 } 978 979 /* 980 * vm_page_sunbusy: 981 * 982 * Shared unbusy a page. 983 */ 984 void 985 vm_page_sunbusy(vm_page_t m) 986 { 987 u_int x; 988 989 vm_page_assert_sbusied(m); 990 991 x = m->busy_lock; 992 for (;;) { 993 KASSERT(x != VPB_FREED, 994 ("vm_page_sunbusy: Unlocking freed page.")); 995 if (VPB_SHARERS(x) > 1) { 996 if (atomic_fcmpset_int(&m->busy_lock, &x, 997 x - VPB_ONE_SHARER)) 998 break; 999 continue; 1000 } 1001 KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1), 1002 ("vm_page_sunbusy: invalid lock state")); 1003 if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED)) 1004 continue; 1005 if ((x & VPB_BIT_WAITERS) == 0) 1006 break; 1007 wakeup(m); 1008 break; 1009 } 1010 } 1011 1012 /* 1013 * vm_page_busy_sleep: 1014 * 1015 * Sleep if the page is busy, using the page pointer as wchan. 1016 * This is used to implement the hard-path of busying mechanism. 1017 * 1018 * If nonshared is true, sleep only if the page is xbusy. 1019 * 1020 * The object lock must be held on entry and will be released on exit. 1021 */ 1022 void 1023 vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared) 1024 { 1025 vm_object_t obj; 1026 1027 obj = m->object; 1028 VM_OBJECT_ASSERT_LOCKED(obj); 1029 vm_page_lock_assert(m, MA_NOTOWNED); 1030 1031 if (!_vm_page_busy_sleep(obj, m, m->pindex, wmesg, 1032 nonshared ? VM_ALLOC_SBUSY : 0 , true)) 1033 VM_OBJECT_DROP(obj); 1034 } 1035 1036 /* 1037 * vm_page_busy_sleep_unlocked: 1038 * 1039 * Sleep if the page is busy, using the page pointer as wchan. 1040 * This is used to implement the hard-path of busying mechanism. 1041 * 1042 * If nonshared is true, sleep only if the page is xbusy. 1043 * 1044 * The object lock must not be held on entry. The operation will 1045 * return if the page changes identity. 1046 */ 1047 void 1048 vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, 1049 const char *wmesg, bool nonshared) 1050 { 1051 1052 VM_OBJECT_ASSERT_UNLOCKED(obj); 1053 vm_page_lock_assert(m, MA_NOTOWNED); 1054 1055 _vm_page_busy_sleep(obj, m, pindex, wmesg, 1056 nonshared ? VM_ALLOC_SBUSY : 0, false); 1057 } 1058 1059 /* 1060 * _vm_page_busy_sleep: 1061 * 1062 * Internal busy sleep function. Verifies the page identity and 1063 * lockstate against parameters. Returns true if it sleeps and 1064 * false otherwise. 1065 * 1066 * If locked is true the lock will be dropped for any true returns 1067 * and held for any false returns. 1068 */ 1069 static bool 1070 _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, 1071 const char *wmesg, int allocflags, bool locked) 1072 { 1073 bool xsleep; 1074 u_int x; 1075 1076 /* 1077 * If the object is busy we must wait for that to drain to zero 1078 * before trying the page again. 1079 */ 1080 if (obj != NULL && vm_object_busied(obj)) { 1081 if (locked) 1082 VM_OBJECT_DROP(obj); 1083 vm_object_busy_wait(obj, wmesg); 1084 return (true); 1085 } 1086 1087 if (!vm_page_busied(m)) 1088 return (false); 1089 1090 xsleep = (allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0; 1091 sleepq_lock(m); 1092 x = atomic_load_int(&m->busy_lock); 1093 do { 1094 /* 1095 * If the page changes objects or becomes unlocked we can 1096 * simply return. 1097 */ 1098 if (x == VPB_UNBUSIED || 1099 (xsleep && (x & VPB_BIT_SHARED) != 0) || 1100 m->object != obj || m->pindex != pindex) { 1101 sleepq_release(m); 1102 return (false); 1103 } 1104 if ((x & VPB_BIT_WAITERS) != 0) 1105 break; 1106 } while (!atomic_fcmpset_int(&m->busy_lock, &x, x | VPB_BIT_WAITERS)); 1107 if (locked) 1108 VM_OBJECT_DROP(obj); 1109 DROP_GIANT(); 1110 sleepq_add(m, NULL, wmesg, 0, 0); 1111 sleepq_wait(m, PVM); 1112 PICKUP_GIANT(); 1113 return (true); 1114 } 1115 1116 /* 1117 * vm_page_trysbusy: 1118 * 1119 * Try to shared busy a page. 1120 * If the operation succeeds 1 is returned otherwise 0. 1121 * The operation never sleeps. 1122 */ 1123 int 1124 vm_page_trysbusy(vm_page_t m) 1125 { 1126 vm_object_t obj; 1127 u_int x; 1128 1129 obj = m->object; 1130 x = m->busy_lock; 1131 for (;;) { 1132 if ((x & VPB_BIT_SHARED) == 0) 1133 return (0); 1134 /* 1135 * Reduce the window for transient busies that will trigger 1136 * false negatives in vm_page_ps_test(). 1137 */ 1138 if (obj != NULL && vm_object_busied(obj)) 1139 return (0); 1140 if (atomic_fcmpset_acq_int(&m->busy_lock, &x, 1141 x + VPB_ONE_SHARER)) 1142 break; 1143 } 1144 1145 /* Refetch the object now that we're guaranteed that it is stable. */ 1146 obj = m->object; 1147 if (obj != NULL && vm_object_busied(obj)) { 1148 vm_page_sunbusy(m); 1149 return (0); 1150 } 1151 return (1); 1152 } 1153 1154 /* 1155 * vm_page_tryxbusy: 1156 * 1157 * Try to exclusive busy a page. 1158 * If the operation succeeds 1 is returned otherwise 0. 1159 * The operation never sleeps. 1160 */ 1161 int 1162 vm_page_tryxbusy(vm_page_t m) 1163 { 1164 vm_object_t obj; 1165 1166 if (atomic_cmpset_acq_int(&(m)->busy_lock, VPB_UNBUSIED, 1167 VPB_CURTHREAD_EXCLUSIVE) == 0) 1168 return (0); 1169 1170 obj = m->object; 1171 if (obj != NULL && vm_object_busied(obj)) { 1172 vm_page_xunbusy(m); 1173 return (0); 1174 } 1175 return (1); 1176 } 1177 1178 static void 1179 vm_page_xunbusy_hard_tail(vm_page_t m) 1180 { 1181 atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); 1182 /* Wake the waiter. */ 1183 wakeup(m); 1184 } 1185 1186 /* 1187 * vm_page_xunbusy_hard: 1188 * 1189 * Called when unbusy has failed because there is a waiter. 1190 */ 1191 void 1192 vm_page_xunbusy_hard(vm_page_t m) 1193 { 1194 vm_page_assert_xbusied(m); 1195 vm_page_xunbusy_hard_tail(m); 1196 } 1197 1198 void 1199 vm_page_xunbusy_hard_unchecked(vm_page_t m) 1200 { 1201 vm_page_assert_xbusied_unchecked(m); 1202 vm_page_xunbusy_hard_tail(m); 1203 } 1204 1205 static void 1206 vm_page_busy_free(vm_page_t m) 1207 { 1208 u_int x; 1209 1210 atomic_thread_fence_rel(); 1211 x = atomic_swap_int(&m->busy_lock, VPB_FREED); 1212 if ((x & VPB_BIT_WAITERS) != 0) 1213 wakeup(m); 1214 } 1215 1216 /* 1217 * vm_page_unhold_pages: 1218 * 1219 * Unhold each of the pages that is referenced by the given array. 1220 */ 1221 void 1222 vm_page_unhold_pages(vm_page_t *ma, int count) 1223 { 1224 1225 for (; count != 0; count--) { 1226 vm_page_unwire(*ma, PQ_ACTIVE); 1227 ma++; 1228 } 1229 } 1230 1231 vm_page_t 1232 PHYS_TO_VM_PAGE(vm_paddr_t pa) 1233 { 1234 vm_page_t m; 1235 1236 #ifdef VM_PHYSSEG_SPARSE 1237 m = vm_phys_paddr_to_vm_page(pa); 1238 if (m == NULL) 1239 m = vm_phys_fictitious_to_vm_page(pa); 1240 return (m); 1241 #elif defined(VM_PHYSSEG_DENSE) 1242 long pi; 1243 1244 pi = atop(pa); 1245 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1246 m = &vm_page_array[pi - first_page]; 1247 return (m); 1248 } 1249 return (vm_phys_fictitious_to_vm_page(pa)); 1250 #else 1251 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." 1252 #endif 1253 } 1254 1255 /* 1256 * vm_page_getfake: 1257 * 1258 * Create a fictitious page with the specified physical address and 1259 * memory attribute. The memory attribute is the only the machine- 1260 * dependent aspect of a fictitious page that must be initialized. 1261 */ 1262 vm_page_t 1263 vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) 1264 { 1265 vm_page_t m; 1266 1267 m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); 1268 vm_page_initfake(m, paddr, memattr); 1269 return (m); 1270 } 1271 1272 void 1273 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 1274 { 1275 1276 if ((m->flags & PG_FICTITIOUS) != 0) { 1277 /* 1278 * The page's memattr might have changed since the 1279 * previous initialization. Update the pmap to the 1280 * new memattr. 1281 */ 1282 goto memattr; 1283 } 1284 m->phys_addr = paddr; 1285 m->a.queue = PQ_NONE; 1286 /* Fictitious pages don't use "segind". */ 1287 m->flags = PG_FICTITIOUS; 1288 /* Fictitious pages don't use "order" or "pool". */ 1289 m->oflags = VPO_UNMANAGED; 1290 m->busy_lock = VPB_CURTHREAD_EXCLUSIVE; 1291 /* Fictitious pages are unevictable. */ 1292 m->ref_count = 1; 1293 pmap_page_init(m); 1294 memattr: 1295 pmap_page_set_memattr(m, memattr); 1296 } 1297 1298 /* 1299 * vm_page_putfake: 1300 * 1301 * Release a fictitious page. 1302 */ 1303 void 1304 vm_page_putfake(vm_page_t m) 1305 { 1306 1307 KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); 1308 KASSERT((m->flags & PG_FICTITIOUS) != 0, 1309 ("vm_page_putfake: bad page %p", m)); 1310 vm_page_assert_xbusied(m); 1311 vm_page_busy_free(m); 1312 uma_zfree(fakepg_zone, m); 1313 } 1314 1315 /* 1316 * vm_page_updatefake: 1317 * 1318 * Update the given fictitious page to the specified physical address and 1319 * memory attribute. 1320 */ 1321 void 1322 vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 1323 { 1324 1325 KASSERT((m->flags & PG_FICTITIOUS) != 0, 1326 ("vm_page_updatefake: bad page %p", m)); 1327 m->phys_addr = paddr; 1328 pmap_page_set_memattr(m, memattr); 1329 } 1330 1331 /* 1332 * vm_page_free: 1333 * 1334 * Free a page. 1335 */ 1336 void 1337 vm_page_free(vm_page_t m) 1338 { 1339 1340 m->flags &= ~PG_ZERO; 1341 vm_page_free_toq(m); 1342 } 1343 1344 /* 1345 * vm_page_free_zero: 1346 * 1347 * Free a page to the zerod-pages queue 1348 */ 1349 void 1350 vm_page_free_zero(vm_page_t m) 1351 { 1352 1353 m->flags |= PG_ZERO; 1354 vm_page_free_toq(m); 1355 } 1356 1357 /* 1358 * Unbusy and handle the page queueing for a page from a getpages request that 1359 * was optionally read ahead or behind. 1360 */ 1361 void 1362 vm_page_readahead_finish(vm_page_t m) 1363 { 1364 1365 /* We shouldn't put invalid pages on queues. */ 1366 KASSERT(!vm_page_none_valid(m), ("%s: %p is invalid", __func__, m)); 1367 1368 /* 1369 * Since the page is not the actually needed one, whether it should 1370 * be activated or deactivated is not obvious. Empirical results 1371 * have shown that deactivating the page is usually the best choice, 1372 * unless the page is wanted by another thread. 1373 */ 1374 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 1375 vm_page_activate(m); 1376 else 1377 vm_page_deactivate(m); 1378 vm_page_xunbusy_unchecked(m); 1379 } 1380 1381 /* 1382 * vm_page_sleep_if_busy: 1383 * 1384 * Sleep and release the object lock if the page is busied. 1385 * Returns TRUE if the thread slept. 1386 * 1387 * The given page must be unlocked and object containing it must 1388 * be locked. 1389 */ 1390 int 1391 vm_page_sleep_if_busy(vm_page_t m, const char *wmesg) 1392 { 1393 vm_object_t obj; 1394 1395 vm_page_lock_assert(m, MA_NOTOWNED); 1396 VM_OBJECT_ASSERT_WLOCKED(m->object); 1397 1398 /* 1399 * The page-specific object must be cached because page 1400 * identity can change during the sleep, causing the 1401 * re-lock of a different object. 1402 * It is assumed that a reference to the object is already 1403 * held by the callers. 1404 */ 1405 obj = m->object; 1406 if (_vm_page_busy_sleep(obj, m, m->pindex, wmesg, 0, true)) { 1407 VM_OBJECT_WLOCK(obj); 1408 return (TRUE); 1409 } 1410 return (FALSE); 1411 } 1412 1413 /* 1414 * vm_page_sleep_if_xbusy: 1415 * 1416 * Sleep and release the object lock if the page is xbusied. 1417 * Returns TRUE if the thread slept. 1418 * 1419 * The given page must be unlocked and object containing it must 1420 * be locked. 1421 */ 1422 int 1423 vm_page_sleep_if_xbusy(vm_page_t m, const char *wmesg) 1424 { 1425 vm_object_t obj; 1426 1427 vm_page_lock_assert(m, MA_NOTOWNED); 1428 VM_OBJECT_ASSERT_WLOCKED(m->object); 1429 1430 /* 1431 * The page-specific object must be cached because page 1432 * identity can change during the sleep, causing the 1433 * re-lock of a different object. 1434 * It is assumed that a reference to the object is already 1435 * held by the callers. 1436 */ 1437 obj = m->object; 1438 if (_vm_page_busy_sleep(obj, m, m->pindex, wmesg, VM_ALLOC_SBUSY, 1439 true)) { 1440 VM_OBJECT_WLOCK(obj); 1441 return (TRUE); 1442 } 1443 return (FALSE); 1444 } 1445 1446 /* 1447 * vm_page_dirty_KBI: [ internal use only ] 1448 * 1449 * Set all bits in the page's dirty field. 1450 * 1451 * The object containing the specified page must be locked if the 1452 * call is made from the machine-independent layer. 1453 * 1454 * See vm_page_clear_dirty_mask(). 1455 * 1456 * This function should only be called by vm_page_dirty(). 1457 */ 1458 void 1459 vm_page_dirty_KBI(vm_page_t m) 1460 { 1461 1462 /* Refer to this operation by its public name. */ 1463 KASSERT(vm_page_all_valid(m), ("vm_page_dirty: page is invalid!")); 1464 m->dirty = VM_PAGE_BITS_ALL; 1465 } 1466 1467 /* 1468 * vm_page_insert: [ internal use only ] 1469 * 1470 * Inserts the given mem entry into the object and object list. 1471 * 1472 * The object must be locked. 1473 */ 1474 int 1475 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) 1476 { 1477 vm_page_t mpred; 1478 1479 VM_OBJECT_ASSERT_WLOCKED(object); 1480 mpred = vm_radix_lookup_le(&object->rtree, pindex); 1481 return (vm_page_insert_after(m, object, pindex, mpred)); 1482 } 1483 1484 /* 1485 * vm_page_insert_after: 1486 * 1487 * Inserts the page "m" into the specified object at offset "pindex". 1488 * 1489 * The page "mpred" must immediately precede the offset "pindex" within 1490 * the specified object. 1491 * 1492 * The object must be locked. 1493 */ 1494 static int 1495 vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, 1496 vm_page_t mpred) 1497 { 1498 vm_page_t msucc; 1499 1500 VM_OBJECT_ASSERT_WLOCKED(object); 1501 KASSERT(m->object == NULL, 1502 ("vm_page_insert_after: page already inserted")); 1503 if (mpred != NULL) { 1504 KASSERT(mpred->object == object, 1505 ("vm_page_insert_after: object doesn't contain mpred")); 1506 KASSERT(mpred->pindex < pindex, 1507 ("vm_page_insert_after: mpred doesn't precede pindex")); 1508 msucc = TAILQ_NEXT(mpred, listq); 1509 } else 1510 msucc = TAILQ_FIRST(&object->memq); 1511 if (msucc != NULL) 1512 KASSERT(msucc->pindex > pindex, 1513 ("vm_page_insert_after: msucc doesn't succeed pindex")); 1514 1515 /* 1516 * Record the object/offset pair in this page. 1517 */ 1518 m->object = object; 1519 m->pindex = pindex; 1520 m->ref_count |= VPRC_OBJREF; 1521 1522 /* 1523 * Now link into the object's ordered list of backed pages. 1524 */ 1525 if (vm_radix_insert(&object->rtree, m)) { 1526 m->object = NULL; 1527 m->pindex = 0; 1528 m->ref_count &= ~VPRC_OBJREF; 1529 return (1); 1530 } 1531 vm_page_insert_radixdone(m, object, mpred); 1532 return (0); 1533 } 1534 1535 /* 1536 * vm_page_insert_radixdone: 1537 * 1538 * Complete page "m" insertion into the specified object after the 1539 * radix trie hooking. 1540 * 1541 * The page "mpred" must precede the offset "m->pindex" within the 1542 * specified object. 1543 * 1544 * The object must be locked. 1545 */ 1546 static void 1547 vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) 1548 { 1549 1550 VM_OBJECT_ASSERT_WLOCKED(object); 1551 KASSERT(object != NULL && m->object == object, 1552 ("vm_page_insert_radixdone: page %p has inconsistent object", m)); 1553 KASSERT((m->ref_count & VPRC_OBJREF) != 0, 1554 ("vm_page_insert_radixdone: page %p is missing object ref", m)); 1555 if (mpred != NULL) { 1556 KASSERT(mpred->object == object, 1557 ("vm_page_insert_radixdone: object doesn't contain mpred")); 1558 KASSERT(mpred->pindex < m->pindex, 1559 ("vm_page_insert_radixdone: mpred doesn't precede pindex")); 1560 } 1561 1562 if (mpred != NULL) 1563 TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq); 1564 else 1565 TAILQ_INSERT_HEAD(&object->memq, m, listq); 1566 1567 /* 1568 * Show that the object has one more resident page. 1569 */ 1570 object->resident_page_count++; 1571 1572 /* 1573 * Hold the vnode until the last page is released. 1574 */ 1575 if (object->resident_page_count == 1 && object->type == OBJT_VNODE) 1576 vhold(object->handle); 1577 1578 /* 1579 * Since we are inserting a new and possibly dirty page, 1580 * update the object's generation count. 1581 */ 1582 if (pmap_page_is_write_mapped(m)) 1583 vm_object_set_writeable_dirty(object); 1584 } 1585 1586 /* 1587 * Do the work to remove a page from its object. The caller is responsible for 1588 * updating the page's fields to reflect this removal. 1589 */ 1590 static void 1591 vm_page_object_remove(vm_page_t m) 1592 { 1593 vm_object_t object; 1594 vm_page_t mrem; 1595 1596 vm_page_assert_xbusied(m); 1597 object = m->object; 1598 VM_OBJECT_ASSERT_WLOCKED(object); 1599 KASSERT((m->ref_count & VPRC_OBJREF) != 0, 1600 ("page %p is missing its object ref", m)); 1601 1602 /* Deferred free of swap space. */ 1603 if ((m->a.flags & PGA_SWAP_FREE) != 0) 1604 vm_pager_page_unswapped(m); 1605 1606 mrem = vm_radix_remove(&object->rtree, m->pindex); 1607 KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); 1608 1609 /* 1610 * Now remove from the object's list of backed pages. 1611 */ 1612 TAILQ_REMOVE(&object->memq, m, listq); 1613 1614 /* 1615 * And show that the object has one fewer resident page. 1616 */ 1617 object->resident_page_count--; 1618 1619 /* 1620 * The vnode may now be recycled. 1621 */ 1622 if (object->resident_page_count == 0 && object->type == OBJT_VNODE) 1623 vdrop(object->handle); 1624 } 1625 1626 /* 1627 * vm_page_remove: 1628 * 1629 * Removes the specified page from its containing object, but does not 1630 * invalidate any backing storage. Returns true if the object's reference 1631 * was the last reference to the page, and false otherwise. 1632 * 1633 * The object must be locked and the page must be exclusively busied. 1634 * The exclusive busy will be released on return. If this is not the 1635 * final ref and the caller does not hold a wire reference it may not 1636 * continue to access the page. 1637 */ 1638 bool 1639 vm_page_remove(vm_page_t m) 1640 { 1641 bool dropped; 1642 1643 dropped = vm_page_remove_xbusy(m); 1644 vm_page_xunbusy(m); 1645 1646 return (dropped); 1647 } 1648 1649 /* 1650 * vm_page_remove_xbusy 1651 * 1652 * Removes the page but leaves the xbusy held. Returns true if this 1653 * removed the final ref and false otherwise. 1654 */ 1655 bool 1656 vm_page_remove_xbusy(vm_page_t m) 1657 { 1658 1659 vm_page_object_remove(m); 1660 m->object = NULL; 1661 return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); 1662 } 1663 1664 /* 1665 * vm_page_lookup: 1666 * 1667 * Returns the page associated with the object/offset 1668 * pair specified; if none is found, NULL is returned. 1669 * 1670 * The object must be locked. 1671 */ 1672 vm_page_t 1673 vm_page_lookup(vm_object_t object, vm_pindex_t pindex) 1674 { 1675 1676 VM_OBJECT_ASSERT_LOCKED(object); 1677 return (vm_radix_lookup(&object->rtree, pindex)); 1678 } 1679 1680 /* 1681 * vm_page_find_least: 1682 * 1683 * Returns the page associated with the object with least pindex 1684 * greater than or equal to the parameter pindex, or NULL. 1685 * 1686 * The object must be locked. 1687 */ 1688 vm_page_t 1689 vm_page_find_least(vm_object_t object, vm_pindex_t pindex) 1690 { 1691 vm_page_t m; 1692 1693 VM_OBJECT_ASSERT_LOCKED(object); 1694 if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex) 1695 m = vm_radix_lookup_ge(&object->rtree, pindex); 1696 return (m); 1697 } 1698 1699 /* 1700 * Returns the given page's successor (by pindex) within the object if it is 1701 * resident; if none is found, NULL is returned. 1702 * 1703 * The object must be locked. 1704 */ 1705 vm_page_t 1706 vm_page_next(vm_page_t m) 1707 { 1708 vm_page_t next; 1709 1710 VM_OBJECT_ASSERT_LOCKED(m->object); 1711 if ((next = TAILQ_NEXT(m, listq)) != NULL) { 1712 MPASS(next->object == m->object); 1713 if (next->pindex != m->pindex + 1) 1714 next = NULL; 1715 } 1716 return (next); 1717 } 1718 1719 /* 1720 * Returns the given page's predecessor (by pindex) within the object if it is 1721 * resident; if none is found, NULL is returned. 1722 * 1723 * The object must be locked. 1724 */ 1725 vm_page_t 1726 vm_page_prev(vm_page_t m) 1727 { 1728 vm_page_t prev; 1729 1730 VM_OBJECT_ASSERT_LOCKED(m->object); 1731 if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL) { 1732 MPASS(prev->object == m->object); 1733 if (prev->pindex != m->pindex - 1) 1734 prev = NULL; 1735 } 1736 return (prev); 1737 } 1738 1739 /* 1740 * Uses the page mnew as a replacement for an existing page at index 1741 * pindex which must be already present in the object. 1742 * 1743 * Both pages must be exclusively busied on enter. The old page is 1744 * unbusied on exit. 1745 * 1746 * A return value of true means mold is now free. If this is not the 1747 * final ref and the caller does not hold a wire reference it may not 1748 * continue to access the page. 1749 */ 1750 static bool 1751 vm_page_replace_hold(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, 1752 vm_page_t mold) 1753 { 1754 vm_page_t mret; 1755 bool dropped; 1756 1757 VM_OBJECT_ASSERT_WLOCKED(object); 1758 vm_page_assert_xbusied(mold); 1759 KASSERT(mnew->object == NULL && (mnew->ref_count & VPRC_OBJREF) == 0, 1760 ("vm_page_replace: page %p already in object", mnew)); 1761 1762 /* 1763 * This function mostly follows vm_page_insert() and 1764 * vm_page_remove() without the radix, object count and vnode 1765 * dance. Double check such functions for more comments. 1766 */ 1767 1768 mnew->object = object; 1769 mnew->pindex = pindex; 1770 atomic_set_int(&mnew->ref_count, VPRC_OBJREF); 1771 mret = vm_radix_replace(&object->rtree, mnew); 1772 KASSERT(mret == mold, 1773 ("invalid page replacement, mold=%p, mret=%p", mold, mret)); 1774 KASSERT((mold->oflags & VPO_UNMANAGED) == 1775 (mnew->oflags & VPO_UNMANAGED), 1776 ("vm_page_replace: mismatched VPO_UNMANAGED")); 1777 1778 /* Keep the resident page list in sorted order. */ 1779 TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq); 1780 TAILQ_REMOVE(&object->memq, mold, listq); 1781 mold->object = NULL; 1782 1783 /* 1784 * The object's resident_page_count does not change because we have 1785 * swapped one page for another, but the generation count should 1786 * change if the page is dirty. 1787 */ 1788 if (pmap_page_is_write_mapped(mnew)) 1789 vm_object_set_writeable_dirty(object); 1790 dropped = vm_page_drop(mold, VPRC_OBJREF) == VPRC_OBJREF; 1791 vm_page_xunbusy(mold); 1792 1793 return (dropped); 1794 } 1795 1796 void 1797 vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, 1798 vm_page_t mold) 1799 { 1800 1801 vm_page_assert_xbusied(mnew); 1802 1803 if (vm_page_replace_hold(mnew, object, pindex, mold)) 1804 vm_page_free(mold); 1805 } 1806 1807 /* 1808 * vm_page_rename: 1809 * 1810 * Move the given memory entry from its 1811 * current object to the specified target object/offset. 1812 * 1813 * Note: swap associated with the page must be invalidated by the move. We 1814 * have to do this for several reasons: (1) we aren't freeing the 1815 * page, (2) we are dirtying the page, (3) the VM system is probably 1816 * moving the page from object A to B, and will then later move 1817 * the backing store from A to B and we can't have a conflict. 1818 * 1819 * Note: we *always* dirty the page. It is necessary both for the 1820 * fact that we moved it, and because we may be invalidating 1821 * swap. 1822 * 1823 * The objects must be locked. 1824 */ 1825 int 1826 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) 1827 { 1828 vm_page_t mpred; 1829 vm_pindex_t opidx; 1830 1831 VM_OBJECT_ASSERT_WLOCKED(new_object); 1832 1833 KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); 1834 mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); 1835 KASSERT(mpred == NULL || mpred->pindex != new_pindex, 1836 ("vm_page_rename: pindex already renamed")); 1837 1838 /* 1839 * Create a custom version of vm_page_insert() which does not depend 1840 * by m_prev and can cheat on the implementation aspects of the 1841 * function. 1842 */ 1843 opidx = m->pindex; 1844 m->pindex = new_pindex; 1845 if (vm_radix_insert(&new_object->rtree, m)) { 1846 m->pindex = opidx; 1847 return (1); 1848 } 1849 1850 /* 1851 * The operation cannot fail anymore. The removal must happen before 1852 * the listq iterator is tainted. 1853 */ 1854 m->pindex = opidx; 1855 vm_page_object_remove(m); 1856 1857 /* Return back to the new pindex to complete vm_page_insert(). */ 1858 m->pindex = new_pindex; 1859 m->object = new_object; 1860 1861 vm_page_insert_radixdone(m, new_object, mpred); 1862 vm_page_dirty(m); 1863 return (0); 1864 } 1865 1866 /* 1867 * vm_page_alloc: 1868 * 1869 * Allocate and return a page that is associated with the specified 1870 * object and offset pair. By default, this page is exclusive busied. 1871 * 1872 * The caller must always specify an allocation class. 1873 * 1874 * allocation classes: 1875 * VM_ALLOC_NORMAL normal process request 1876 * VM_ALLOC_SYSTEM system *really* needs a page 1877 * VM_ALLOC_INTERRUPT interrupt time request 1878 * 1879 * optional allocation flags: 1880 * VM_ALLOC_COUNT(number) the number of additional pages that the caller 1881 * intends to allocate 1882 * VM_ALLOC_NOBUSY do not exclusive busy the page 1883 * VM_ALLOC_NODUMP do not include the page in a kernel core dump 1884 * VM_ALLOC_NOOBJ page is not associated with an object and 1885 * should not be exclusive busy 1886 * VM_ALLOC_SBUSY shared busy the allocated page 1887 * VM_ALLOC_WIRED wire the allocated page 1888 * VM_ALLOC_ZERO prefer a zeroed page 1889 */ 1890 vm_page_t 1891 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) 1892 { 1893 1894 return (vm_page_alloc_after(object, pindex, req, object != NULL ? 1895 vm_radix_lookup_le(&object->rtree, pindex) : NULL)); 1896 } 1897 1898 vm_page_t 1899 vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain, 1900 int req) 1901 { 1902 1903 return (vm_page_alloc_domain_after(object, pindex, domain, req, 1904 object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) : 1905 NULL)); 1906 } 1907 1908 /* 1909 * Allocate a page in the specified object with the given page index. To 1910 * optimize insertion of the page into the object, the caller must also specifiy 1911 * the resident page in the object with largest index smaller than the given 1912 * page index, or NULL if no such page exists. 1913 */ 1914 vm_page_t 1915 vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, 1916 int req, vm_page_t mpred) 1917 { 1918 struct vm_domainset_iter di; 1919 vm_page_t m; 1920 int domain; 1921 1922 vm_domainset_iter_page_init(&di, object, pindex, &domain, &req); 1923 do { 1924 m = vm_page_alloc_domain_after(object, pindex, domain, req, 1925 mpred); 1926 if (m != NULL) 1927 break; 1928 } while (vm_domainset_iter_page(&di, object, &domain) == 0); 1929 1930 return (m); 1931 } 1932 1933 /* 1934 * Returns true if the number of free pages exceeds the minimum 1935 * for the request class and false otherwise. 1936 */ 1937 static int 1938 _vm_domain_allocate(struct vm_domain *vmd, int req_class, int npages) 1939 { 1940 u_int limit, old, new; 1941 1942 if (req_class == VM_ALLOC_INTERRUPT) 1943 limit = 0; 1944 else if (req_class == VM_ALLOC_SYSTEM) 1945 limit = vmd->vmd_interrupt_free_min; 1946 else 1947 limit = vmd->vmd_free_reserved; 1948 1949 /* 1950 * Attempt to reserve the pages. Fail if we're below the limit. 1951 */ 1952 limit += npages; 1953 old = vmd->vmd_free_count; 1954 do { 1955 if (old < limit) 1956 return (0); 1957 new = old - npages; 1958 } while (atomic_fcmpset_int(&vmd->vmd_free_count, &old, new) == 0); 1959 1960 /* Wake the page daemon if we've crossed the threshold. */ 1961 if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old)) 1962 pagedaemon_wakeup(vmd->vmd_domain); 1963 1964 /* Only update bitsets on transitions. */ 1965 if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) || 1966 (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe)) 1967 vm_domain_set(vmd); 1968 1969 return (1); 1970 } 1971 1972 int 1973 vm_domain_allocate(struct vm_domain *vmd, int req, int npages) 1974 { 1975 int req_class; 1976 1977 /* 1978 * The page daemon is allowed to dig deeper into the free page list. 1979 */ 1980 req_class = req & VM_ALLOC_CLASS_MASK; 1981 if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) 1982 req_class = VM_ALLOC_SYSTEM; 1983 return (_vm_domain_allocate(vmd, req_class, npages)); 1984 } 1985 1986 vm_page_t 1987 vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, 1988 int req, vm_page_t mpred) 1989 { 1990 struct vm_domain *vmd; 1991 vm_page_t m; 1992 int flags, pool; 1993 1994 KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && 1995 (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && 1996 ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != 1997 (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), 1998 ("inconsistent object(%p)/req(%x)", object, req)); 1999 KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, 2000 ("Can't sleep and retry object insertion.")); 2001 KASSERT(mpred == NULL || mpred->pindex < pindex, 2002 ("mpred %p doesn't precede pindex 0x%jx", mpred, 2003 (uintmax_t)pindex)); 2004 if (object != NULL) 2005 VM_OBJECT_ASSERT_WLOCKED(object); 2006 2007 flags = 0; 2008 m = NULL; 2009 pool = object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT; 2010 again: 2011 #if VM_NRESERVLEVEL > 0 2012 /* 2013 * Can we allocate the page from a reservation? 2014 */ 2015 if (vm_object_reserv(object) && 2016 (m = vm_reserv_alloc_page(object, pindex, domain, req, mpred)) != 2017 NULL) { 2018 domain = vm_phys_domain(m); 2019 vmd = VM_DOMAIN(domain); 2020 goto found; 2021 } 2022 #endif 2023 vmd = VM_DOMAIN(domain); 2024 if (vmd->vmd_pgcache[pool].zone != NULL) { 2025 m = uma_zalloc(vmd->vmd_pgcache[pool].zone, M_NOWAIT); 2026 if (m != NULL) { 2027 flags |= PG_PCPU_CACHE; 2028 goto found; 2029 } 2030 } 2031 if (vm_domain_allocate(vmd, req, 1)) { 2032 /* 2033 * If not, allocate it from the free page queues. 2034 */ 2035 vm_domain_free_lock(vmd); 2036 m = vm_phys_alloc_pages(domain, pool, 0); 2037 vm_domain_free_unlock(vmd); 2038 if (m == NULL) { 2039 vm_domain_freecnt_inc(vmd, 1); 2040 #if VM_NRESERVLEVEL > 0 2041 if (vm_reserv_reclaim_inactive(domain)) 2042 goto again; 2043 #endif 2044 } 2045 } 2046 if (m == NULL) { 2047 /* 2048 * Not allocatable, give up. 2049 */ 2050 if (vm_domain_alloc_fail(vmd, object, req)) 2051 goto again; 2052 return (NULL); 2053 } 2054 2055 /* 2056 * At this point we had better have found a good page. 2057 */ 2058 found: 2059 vm_page_dequeue(m); 2060 vm_page_alloc_check(m); 2061 2062 /* 2063 * Initialize the page. Only the PG_ZERO flag is inherited. 2064 */ 2065 if ((req & VM_ALLOC_ZERO) != 0) 2066 flags |= (m->flags & PG_ZERO); 2067 if ((req & VM_ALLOC_NODUMP) != 0) 2068 flags |= PG_NODUMP; 2069 m->flags = flags; 2070 m->a.flags = 0; 2071 m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? 2072 VPO_UNMANAGED : 0; 2073 if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) 2074 m->busy_lock = VPB_CURTHREAD_EXCLUSIVE; 2075 else if ((req & VM_ALLOC_SBUSY) != 0) 2076 m->busy_lock = VPB_SHARERS_WORD(1); 2077 else 2078 m->busy_lock = VPB_UNBUSIED; 2079 if (req & VM_ALLOC_WIRED) { 2080 vm_wire_add(1); 2081 m->ref_count = 1; 2082 } 2083 m->a.act_count = 0; 2084 2085 if (object != NULL) { 2086 if (vm_page_insert_after(m, object, pindex, mpred)) { 2087 if (req & VM_ALLOC_WIRED) { 2088 vm_wire_sub(1); 2089 m->ref_count = 0; 2090 } 2091 KASSERT(m->object == NULL, ("page %p has object", m)); 2092 m->oflags = VPO_UNMANAGED; 2093 m->busy_lock = VPB_UNBUSIED; 2094 /* Don't change PG_ZERO. */ 2095 vm_page_free_toq(m); 2096 if (req & VM_ALLOC_WAITFAIL) { 2097 VM_OBJECT_WUNLOCK(object); 2098 vm_radix_wait(); 2099 VM_OBJECT_WLOCK(object); 2100 } 2101 return (NULL); 2102 } 2103 2104 /* Ignore device objects; the pager sets "memattr" for them. */ 2105 if (object->memattr != VM_MEMATTR_DEFAULT && 2106 (object->flags & OBJ_FICTITIOUS) == 0) 2107 pmap_page_set_memattr(m, object->memattr); 2108 } else 2109 m->pindex = pindex; 2110 2111 return (m); 2112 } 2113 2114 /* 2115 * vm_page_alloc_contig: 2116 * 2117 * Allocate a contiguous set of physical pages of the given size "npages" 2118 * from the free lists. All of the physical pages must be at or above 2119 * the given physical address "low" and below the given physical address 2120 * "high". The given value "alignment" determines the alignment of the 2121 * first physical page in the set. If the given value "boundary" is 2122 * non-zero, then the set of physical pages cannot cross any physical 2123 * address boundary that is a multiple of that value. Both "alignment" 2124 * and "boundary" must be a power of two. 2125 * 2126 * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, 2127 * then the memory attribute setting for the physical pages is configured 2128 * to the object's memory attribute setting. Otherwise, the memory 2129 * attribute setting for the physical pages is configured to "memattr", 2130 * overriding the object's memory attribute setting. However, if the 2131 * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the 2132 * memory attribute setting for the physical pages cannot be configured 2133 * to VM_MEMATTR_DEFAULT. 2134 * 2135 * The specified object may not contain fictitious pages. 2136 * 2137 * The caller must always specify an allocation class. 2138 * 2139 * allocation classes: 2140 * VM_ALLOC_NORMAL normal process request 2141 * VM_ALLOC_SYSTEM system *really* needs a page 2142 * VM_ALLOC_INTERRUPT interrupt time request 2143 * 2144 * optional allocation flags: 2145 * VM_ALLOC_NOBUSY do not exclusive busy the page 2146 * VM_ALLOC_NODUMP do not include the page in a kernel core dump 2147 * VM_ALLOC_NOOBJ page is not associated with an object and 2148 * should not be exclusive busy 2149 * VM_ALLOC_SBUSY shared busy the allocated page 2150 * VM_ALLOC_WIRED wire the allocated page 2151 * VM_ALLOC_ZERO prefer a zeroed page 2152 */ 2153 vm_page_t 2154 vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, 2155 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 2156 vm_paddr_t boundary, vm_memattr_t memattr) 2157 { 2158 struct vm_domainset_iter di; 2159 vm_page_t m; 2160 int domain; 2161 2162 vm_domainset_iter_page_init(&di, object, pindex, &domain, &req); 2163 do { 2164 m = vm_page_alloc_contig_domain(object, pindex, domain, req, 2165 npages, low, high, alignment, boundary, memattr); 2166 if (m != NULL) 2167 break; 2168 } while (vm_domainset_iter_page(&di, object, &domain) == 0); 2169 2170 return (m); 2171 } 2172 2173 vm_page_t 2174 vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, 2175 int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 2176 vm_paddr_t boundary, vm_memattr_t memattr) 2177 { 2178 struct vm_domain *vmd; 2179 vm_page_t m, m_ret, mpred; 2180 u_int busy_lock, flags, oflags; 2181 2182 mpred = NULL; /* XXX: pacify gcc */ 2183 KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && 2184 (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && 2185 ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != 2186 (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), 2187 ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object, 2188 req)); 2189 KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, 2190 ("Can't sleep and retry object insertion.")); 2191 if (object != NULL) { 2192 VM_OBJECT_ASSERT_WLOCKED(object); 2193 KASSERT((object->flags & OBJ_FICTITIOUS) == 0, 2194 ("vm_page_alloc_contig: object %p has fictitious pages", 2195 object)); 2196 } 2197 KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); 2198 2199 if (object != NULL) { 2200 mpred = vm_radix_lookup_le(&object->rtree, pindex); 2201 KASSERT(mpred == NULL || mpred->pindex != pindex, 2202 ("vm_page_alloc_contig: pindex already allocated")); 2203 } 2204 2205 /* 2206 * Can we allocate the pages without the number of free pages falling 2207 * below the lower bound for the allocation class? 2208 */ 2209 m_ret = NULL; 2210 again: 2211 #if VM_NRESERVLEVEL > 0 2212 /* 2213 * Can we allocate the pages from a reservation? 2214 */ 2215 if (vm_object_reserv(object) && 2216 (m_ret = vm_reserv_alloc_contig(object, pindex, domain, req, 2217 mpred, npages, low, high, alignment, boundary)) != NULL) { 2218 domain = vm_phys_domain(m_ret); 2219 vmd = VM_DOMAIN(domain); 2220 goto found; 2221 } 2222 #endif 2223 vmd = VM_DOMAIN(domain); 2224 if (vm_domain_allocate(vmd, req, npages)) { 2225 /* 2226 * allocate them from the free page queues. 2227 */ 2228 vm_domain_free_lock(vmd); 2229 m_ret = vm_phys_alloc_contig(domain, npages, low, high, 2230 alignment, boundary); 2231 vm_domain_free_unlock(vmd); 2232 if (m_ret == NULL) { 2233 vm_domain_freecnt_inc(vmd, npages); 2234 #if VM_NRESERVLEVEL > 0 2235 if (vm_reserv_reclaim_contig(domain, npages, low, 2236 high, alignment, boundary)) 2237 goto again; 2238 #endif 2239 } 2240 } 2241 if (m_ret == NULL) { 2242 if (vm_domain_alloc_fail(vmd, object, req)) 2243 goto again; 2244 return (NULL); 2245 } 2246 #if VM_NRESERVLEVEL > 0 2247 found: 2248 #endif 2249 for (m = m_ret; m < &m_ret[npages]; m++) { 2250 vm_page_dequeue(m); 2251 vm_page_alloc_check(m); 2252 } 2253 2254 /* 2255 * Initialize the pages. Only the PG_ZERO flag is inherited. 2256 */ 2257 flags = 0; 2258 if ((req & VM_ALLOC_ZERO) != 0) 2259 flags = PG_ZERO; 2260 if ((req & VM_ALLOC_NODUMP) != 0) 2261 flags |= PG_NODUMP; 2262 oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? 2263 VPO_UNMANAGED : 0; 2264 if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) 2265 busy_lock = VPB_CURTHREAD_EXCLUSIVE; 2266 else if ((req & VM_ALLOC_SBUSY) != 0) 2267 busy_lock = VPB_SHARERS_WORD(1); 2268 else 2269 busy_lock = VPB_UNBUSIED; 2270 if ((req & VM_ALLOC_WIRED) != 0) 2271 vm_wire_add(npages); 2272 if (object != NULL) { 2273 if (object->memattr != VM_MEMATTR_DEFAULT && 2274 memattr == VM_MEMATTR_DEFAULT) 2275 memattr = object->memattr; 2276 } 2277 for (m = m_ret; m < &m_ret[npages]; m++) { 2278 m->a.flags = 0; 2279 m->flags = (m->flags | PG_NODUMP) & flags; 2280 m->busy_lock = busy_lock; 2281 if ((req & VM_ALLOC_WIRED) != 0) 2282 m->ref_count = 1; 2283 m->a.act_count = 0; 2284 m->oflags = oflags; 2285 if (object != NULL) { 2286 if (vm_page_insert_after(m, object, pindex, mpred)) { 2287 if ((req & VM_ALLOC_WIRED) != 0) 2288 vm_wire_sub(npages); 2289 KASSERT(m->object == NULL, 2290 ("page %p has object", m)); 2291 mpred = m; 2292 for (m = m_ret; m < &m_ret[npages]; m++) { 2293 if (m <= mpred && 2294 (req & VM_ALLOC_WIRED) != 0) 2295 m->ref_count = 0; 2296 m->oflags = VPO_UNMANAGED; 2297 m->busy_lock = VPB_UNBUSIED; 2298 /* Don't change PG_ZERO. */ 2299 vm_page_free_toq(m); 2300 } 2301 if (req & VM_ALLOC_WAITFAIL) { 2302 VM_OBJECT_WUNLOCK(object); 2303 vm_radix_wait(); 2304 VM_OBJECT_WLOCK(object); 2305 } 2306 return (NULL); 2307 } 2308 mpred = m; 2309 } else 2310 m->pindex = pindex; 2311 if (memattr != VM_MEMATTR_DEFAULT) 2312 pmap_page_set_memattr(m, memattr); 2313 pindex++; 2314 } 2315 return (m_ret); 2316 } 2317 2318 /* 2319 * Check a page that has been freshly dequeued from a freelist. 2320 */ 2321 static void 2322 vm_page_alloc_check(vm_page_t m) 2323 { 2324 2325 KASSERT(m->object == NULL, ("page %p has object", m)); 2326 KASSERT(m->a.queue == PQ_NONE && 2327 (m->a.flags & PGA_QUEUE_STATE_MASK) == 0, 2328 ("page %p has unexpected queue %d, flags %#x", 2329 m, m->a.queue, (m->a.flags & PGA_QUEUE_STATE_MASK))); 2330 KASSERT(m->ref_count == 0, ("page %p has references", m)); 2331 KASSERT(vm_page_busy_freed(m), ("page %p is not freed", m)); 2332 KASSERT(m->dirty == 0, ("page %p is dirty", m)); 2333 KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, 2334 ("page %p has unexpected memattr %d", 2335 m, pmap_page_get_memattr(m))); 2336 KASSERT(m->valid == 0, ("free page %p is valid", m)); 2337 } 2338 2339 /* 2340 * vm_page_alloc_freelist: 2341 * 2342 * Allocate a physical page from the specified free page list. 2343 * 2344 * The caller must always specify an allocation class. 2345 * 2346 * allocation classes: 2347 * VM_ALLOC_NORMAL normal process request 2348 * VM_ALLOC_SYSTEM system *really* needs a page 2349 * VM_ALLOC_INTERRUPT interrupt time request 2350 * 2351 * optional allocation flags: 2352 * VM_ALLOC_COUNT(number) the number of additional pages that the caller 2353 * intends to allocate 2354 * VM_ALLOC_WIRED wire the allocated page 2355 * VM_ALLOC_ZERO prefer a zeroed page 2356 */ 2357 vm_page_t 2358 vm_page_alloc_freelist(int freelist, int req) 2359 { 2360 struct vm_domainset_iter di; 2361 vm_page_t m; 2362 int domain; 2363 2364 vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req); 2365 do { 2366 m = vm_page_alloc_freelist_domain(domain, freelist, req); 2367 if (m != NULL) 2368 break; 2369 } while (vm_domainset_iter_page(&di, NULL, &domain) == 0); 2370 2371 return (m); 2372 } 2373 2374 vm_page_t 2375 vm_page_alloc_freelist_domain(int domain, int freelist, int req) 2376 { 2377 struct vm_domain *vmd; 2378 vm_page_t m; 2379 u_int flags; 2380 2381 m = NULL; 2382 vmd = VM_DOMAIN(domain); 2383 again: 2384 if (vm_domain_allocate(vmd, req, 1)) { 2385 vm_domain_free_lock(vmd); 2386 m = vm_phys_alloc_freelist_pages(domain, freelist, 2387 VM_FREEPOOL_DIRECT, 0); 2388 vm_domain_free_unlock(vmd); 2389 if (m == NULL) 2390 vm_domain_freecnt_inc(vmd, 1); 2391 } 2392 if (m == NULL) { 2393 if (vm_domain_alloc_fail(vmd, NULL, req)) 2394 goto again; 2395 return (NULL); 2396 } 2397 vm_page_dequeue(m); 2398 vm_page_alloc_check(m); 2399 2400 /* 2401 * Initialize the page. Only the PG_ZERO flag is inherited. 2402 */ 2403 m->a.flags = 0; 2404 flags = 0; 2405 if ((req & VM_ALLOC_ZERO) != 0) 2406 flags = PG_ZERO; 2407 m->flags &= flags; 2408 if ((req & VM_ALLOC_WIRED) != 0) { 2409 vm_wire_add(1); 2410 m->ref_count = 1; 2411 } 2412 /* Unmanaged pages don't use "act_count". */ 2413 m->oflags = VPO_UNMANAGED; 2414 return (m); 2415 } 2416 2417 static int 2418 vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags) 2419 { 2420 struct vm_domain *vmd; 2421 struct vm_pgcache *pgcache; 2422 int i; 2423 2424 pgcache = arg; 2425 vmd = VM_DOMAIN(pgcache->domain); 2426 2427 /* 2428 * The page daemon should avoid creating extra memory pressure since its 2429 * main purpose is to replenish the store of free pages. 2430 */ 2431 if (vmd->vmd_severeset || curproc == pageproc || 2432 !_vm_domain_allocate(vmd, VM_ALLOC_NORMAL, cnt)) 2433 return (0); 2434 domain = vmd->vmd_domain; 2435 vm_domain_free_lock(vmd); 2436 i = vm_phys_alloc_npages(domain, pgcache->pool, cnt, 2437 (vm_page_t *)store); 2438 vm_domain_free_unlock(vmd); 2439 if (cnt != i) 2440 vm_domain_freecnt_inc(vmd, cnt - i); 2441 2442 return (i); 2443 } 2444 2445 static void 2446 vm_page_zone_release(void *arg, void **store, int cnt) 2447 { 2448 struct vm_domain *vmd; 2449 struct vm_pgcache *pgcache; 2450 vm_page_t m; 2451 int i; 2452 2453 pgcache = arg; 2454 vmd = VM_DOMAIN(pgcache->domain); 2455 vm_domain_free_lock(vmd); 2456 for (i = 0; i < cnt; i++) { 2457 m = (vm_page_t)store[i]; 2458 vm_phys_free_pages(m, 0); 2459 } 2460 vm_domain_free_unlock(vmd); 2461 vm_domain_freecnt_inc(vmd, cnt); 2462 } 2463 2464 #define VPSC_ANY 0 /* No restrictions. */ 2465 #define VPSC_NORESERV 1 /* Skip reservations; implies VPSC_NOSUPER. */ 2466 #define VPSC_NOSUPER 2 /* Skip superpages. */ 2467 2468 /* 2469 * vm_page_scan_contig: 2470 * 2471 * Scan vm_page_array[] between the specified entries "m_start" and 2472 * "m_end" for a run of contiguous physical pages that satisfy the 2473 * specified conditions, and return the lowest page in the run. The 2474 * specified "alignment" determines the alignment of the lowest physical 2475 * page in the run. If the specified "boundary" is non-zero, then the 2476 * run of physical pages cannot span a physical address that is a 2477 * multiple of "boundary". 2478 * 2479 * "m_end" is never dereferenced, so it need not point to a vm_page 2480 * structure within vm_page_array[]. 2481 * 2482 * "npages" must be greater than zero. "m_start" and "m_end" must not 2483 * span a hole (or discontiguity) in the physical address space. Both 2484 * "alignment" and "boundary" must be a power of two. 2485 */ 2486 vm_page_t 2487 vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, 2488 u_long alignment, vm_paddr_t boundary, int options) 2489 { 2490 vm_object_t object; 2491 vm_paddr_t pa; 2492 vm_page_t m, m_run; 2493 #if VM_NRESERVLEVEL > 0 2494 int level; 2495 #endif 2496 int m_inc, order, run_ext, run_len; 2497 2498 KASSERT(npages > 0, ("npages is 0")); 2499 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 2500 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 2501 m_run = NULL; 2502 run_len = 0; 2503 for (m = m_start; m < m_end && run_len < npages; m += m_inc) { 2504 KASSERT((m->flags & PG_MARKER) == 0, 2505 ("page %p is PG_MARKER", m)); 2506 KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1, 2507 ("fictitious page %p has invalid ref count", m)); 2508 2509 /* 2510 * If the current page would be the start of a run, check its 2511 * physical address against the end, alignment, and boundary 2512 * conditions. If it doesn't satisfy these conditions, either 2513 * terminate the scan or advance to the next page that 2514 * satisfies the failed condition. 2515 */ 2516 if (run_len == 0) { 2517 KASSERT(m_run == NULL, ("m_run != NULL")); 2518 if (m + npages > m_end) 2519 break; 2520 pa = VM_PAGE_TO_PHYS(m); 2521 if ((pa & (alignment - 1)) != 0) { 2522 m_inc = atop(roundup2(pa, alignment) - pa); 2523 continue; 2524 } 2525 if (rounddown2(pa ^ (pa + ptoa(npages) - 1), 2526 boundary) != 0) { 2527 m_inc = atop(roundup2(pa, boundary) - pa); 2528 continue; 2529 } 2530 } else 2531 KASSERT(m_run != NULL, ("m_run == NULL")); 2532 2533 retry: 2534 m_inc = 1; 2535 if (vm_page_wired(m)) 2536 run_ext = 0; 2537 #if VM_NRESERVLEVEL > 0 2538 else if ((level = vm_reserv_level(m)) >= 0 && 2539 (options & VPSC_NORESERV) != 0) { 2540 run_ext = 0; 2541 /* Advance to the end of the reservation. */ 2542 pa = VM_PAGE_TO_PHYS(m); 2543 m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - 2544 pa); 2545 } 2546 #endif 2547 else if ((object = atomic_load_ptr(&m->object)) != NULL) { 2548 /* 2549 * The page is considered eligible for relocation if 2550 * and only if it could be laundered or reclaimed by 2551 * the page daemon. 2552 */ 2553 VM_OBJECT_RLOCK(object); 2554 if (object != m->object) { 2555 VM_OBJECT_RUNLOCK(object); 2556 goto retry; 2557 } 2558 /* Don't care: PG_NODUMP, PG_ZERO. */ 2559 if (object->type != OBJT_DEFAULT && 2560 object->type != OBJT_SWAP && 2561 object->type != OBJT_VNODE) { 2562 run_ext = 0; 2563 #if VM_NRESERVLEVEL > 0 2564 } else if ((options & VPSC_NOSUPER) != 0 && 2565 (level = vm_reserv_level_iffullpop(m)) >= 0) { 2566 run_ext = 0; 2567 /* Advance to the end of the superpage. */ 2568 pa = VM_PAGE_TO_PHYS(m); 2569 m_inc = atop(roundup2(pa + 1, 2570 vm_reserv_size(level)) - pa); 2571 #endif 2572 } else if (object->memattr == VM_MEMATTR_DEFAULT && 2573 vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) { 2574 /* 2575 * The page is allocated but eligible for 2576 * relocation. Extend the current run by one 2577 * page. 2578 */ 2579 KASSERT(pmap_page_get_memattr(m) == 2580 VM_MEMATTR_DEFAULT, 2581 ("page %p has an unexpected memattr", m)); 2582 KASSERT((m->oflags & (VPO_SWAPINPROG | 2583 VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, 2584 ("page %p has unexpected oflags", m)); 2585 /* Don't care: PGA_NOSYNC. */ 2586 run_ext = 1; 2587 } else 2588 run_ext = 0; 2589 VM_OBJECT_RUNLOCK(object); 2590 #if VM_NRESERVLEVEL > 0 2591 } else if (level >= 0) { 2592 /* 2593 * The page is reserved but not yet allocated. In 2594 * other words, it is still free. Extend the current 2595 * run by one page. 2596 */ 2597 run_ext = 1; 2598 #endif 2599 } else if ((order = m->order) < VM_NFREEORDER) { 2600 /* 2601 * The page is enqueued in the physical memory 2602 * allocator's free page queues. Moreover, it is the 2603 * first page in a power-of-two-sized run of 2604 * contiguous free pages. Add these pages to the end 2605 * of the current run, and jump ahead. 2606 */ 2607 run_ext = 1 << order; 2608 m_inc = 1 << order; 2609 } else { 2610 /* 2611 * Skip the page for one of the following reasons: (1) 2612 * It is enqueued in the physical memory allocator's 2613 * free page queues. However, it is not the first 2614 * page in a run of contiguous free pages. (This case 2615 * rarely occurs because the scan is performed in 2616 * ascending order.) (2) It is not reserved, and it is 2617 * transitioning from free to allocated. (Conversely, 2618 * the transition from allocated to free for managed 2619 * pages is blocked by the page lock.) (3) It is 2620 * allocated but not contained by an object and not 2621 * wired, e.g., allocated by Xen's balloon driver. 2622 */ 2623 run_ext = 0; 2624 } 2625 2626 /* 2627 * Extend or reset the current run of pages. 2628 */ 2629 if (run_ext > 0) { 2630 if (run_len == 0) 2631 m_run = m; 2632 run_len += run_ext; 2633 } else { 2634 if (run_len > 0) { 2635 m_run = NULL; 2636 run_len = 0; 2637 } 2638 } 2639 } 2640 if (run_len >= npages) 2641 return (m_run); 2642 return (NULL); 2643 } 2644 2645 /* 2646 * vm_page_reclaim_run: 2647 * 2648 * Try to relocate each of the allocated virtual pages within the 2649 * specified run of physical pages to a new physical address. Free the 2650 * physical pages underlying the relocated virtual pages. A virtual page 2651 * is relocatable if and only if it could be laundered or reclaimed by 2652 * the page daemon. Whenever possible, a virtual page is relocated to a 2653 * physical address above "high". 2654 * 2655 * Returns 0 if every physical page within the run was already free or 2656 * just freed by a successful relocation. Otherwise, returns a non-zero 2657 * value indicating why the last attempt to relocate a virtual page was 2658 * unsuccessful. 2659 * 2660 * "req_class" must be an allocation class. 2661 */ 2662 static int 2663 vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, 2664 vm_paddr_t high) 2665 { 2666 struct vm_domain *vmd; 2667 struct spglist free; 2668 vm_object_t object; 2669 vm_paddr_t pa; 2670 vm_page_t m, m_end, m_new; 2671 int error, order, req; 2672 2673 KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class, 2674 ("req_class is not an allocation class")); 2675 SLIST_INIT(&free); 2676 error = 0; 2677 m = m_run; 2678 m_end = m_run + npages; 2679 for (; error == 0 && m < m_end; m++) { 2680 KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, 2681 ("page %p is PG_FICTITIOUS or PG_MARKER", m)); 2682 2683 /* 2684 * Racily check for wirings. Races are handled once the object 2685 * lock is held and the page is unmapped. 2686 */ 2687 if (vm_page_wired(m)) 2688 error = EBUSY; 2689 else if ((object = atomic_load_ptr(&m->object)) != NULL) { 2690 /* 2691 * The page is relocated if and only if it could be 2692 * laundered or reclaimed by the page daemon. 2693 */ 2694 VM_OBJECT_WLOCK(object); 2695 /* Don't care: PG_NODUMP, PG_ZERO. */ 2696 if (m->object != object || 2697 (object->type != OBJT_DEFAULT && 2698 object->type != OBJT_SWAP && 2699 object->type != OBJT_VNODE)) 2700 error = EINVAL; 2701 else if (object->memattr != VM_MEMATTR_DEFAULT) 2702 error = EINVAL; 2703 else if (vm_page_queue(m) != PQ_NONE && 2704 vm_page_tryxbusy(m) != 0) { 2705 if (vm_page_wired(m)) { 2706 vm_page_xunbusy(m); 2707 error = EBUSY; 2708 goto unlock; 2709 } 2710 KASSERT(pmap_page_get_memattr(m) == 2711 VM_MEMATTR_DEFAULT, 2712 ("page %p has an unexpected memattr", m)); 2713 KASSERT(m->oflags == 0, 2714 ("page %p has unexpected oflags", m)); 2715 /* Don't care: PGA_NOSYNC. */ 2716 if (!vm_page_none_valid(m)) { 2717 /* 2718 * First, try to allocate a new page 2719 * that is above "high". Failing 2720 * that, try to allocate a new page 2721 * that is below "m_run". Allocate 2722 * the new page between the end of 2723 * "m_run" and "high" only as a last 2724 * resort. 2725 */ 2726 req = req_class | VM_ALLOC_NOOBJ; 2727 if ((m->flags & PG_NODUMP) != 0) 2728 req |= VM_ALLOC_NODUMP; 2729 if (trunc_page(high) != 2730 ~(vm_paddr_t)PAGE_MASK) { 2731 m_new = vm_page_alloc_contig( 2732 NULL, 0, req, 1, 2733 round_page(high), 2734 ~(vm_paddr_t)0, 2735 PAGE_SIZE, 0, 2736 VM_MEMATTR_DEFAULT); 2737 } else 2738 m_new = NULL; 2739 if (m_new == NULL) { 2740 pa = VM_PAGE_TO_PHYS(m_run); 2741 m_new = vm_page_alloc_contig( 2742 NULL, 0, req, 1, 2743 0, pa - 1, PAGE_SIZE, 0, 2744 VM_MEMATTR_DEFAULT); 2745 } 2746 if (m_new == NULL) { 2747 pa += ptoa(npages); 2748 m_new = vm_page_alloc_contig( 2749 NULL, 0, req, 1, 2750 pa, high, PAGE_SIZE, 0, 2751 VM_MEMATTR_DEFAULT); 2752 } 2753 if (m_new == NULL) { 2754 vm_page_xunbusy(m); 2755 error = ENOMEM; 2756 goto unlock; 2757 } 2758 2759 /* 2760 * Unmap the page and check for new 2761 * wirings that may have been acquired 2762 * through a pmap lookup. 2763 */ 2764 if (object->ref_count != 0 && 2765 !vm_page_try_remove_all(m)) { 2766 vm_page_xunbusy(m); 2767 vm_page_free(m_new); 2768 error = EBUSY; 2769 goto unlock; 2770 } 2771 2772 /* 2773 * Replace "m" with the new page. For 2774 * vm_page_replace(), "m" must be busy 2775 * and dequeued. Finally, change "m" 2776 * as if vm_page_free() was called. 2777 */ 2778 m_new->a.flags = m->a.flags & 2779 ~PGA_QUEUE_STATE_MASK; 2780 KASSERT(m_new->oflags == VPO_UNMANAGED, 2781 ("page %p is managed", m_new)); 2782 m_new->oflags = 0; 2783 pmap_copy_page(m, m_new); 2784 m_new->valid = m->valid; 2785 m_new->dirty = m->dirty; 2786 m->flags &= ~PG_ZERO; 2787 vm_page_dequeue(m); 2788 if (vm_page_replace_hold(m_new, object, 2789 m->pindex, m) && 2790 vm_page_free_prep(m)) 2791 SLIST_INSERT_HEAD(&free, m, 2792 plinks.s.ss); 2793 2794 /* 2795 * The new page must be deactivated 2796 * before the object is unlocked. 2797 */ 2798 vm_page_deactivate(m_new); 2799 } else { 2800 m->flags &= ~PG_ZERO; 2801 vm_page_dequeue(m); 2802 if (vm_page_free_prep(m)) 2803 SLIST_INSERT_HEAD(&free, m, 2804 plinks.s.ss); 2805 KASSERT(m->dirty == 0, 2806 ("page %p is dirty", m)); 2807 } 2808 } else 2809 error = EBUSY; 2810 unlock: 2811 VM_OBJECT_WUNLOCK(object); 2812 } else { 2813 MPASS(vm_phys_domain(m) == domain); 2814 vmd = VM_DOMAIN(domain); 2815 vm_domain_free_lock(vmd); 2816 order = m->order; 2817 if (order < VM_NFREEORDER) { 2818 /* 2819 * The page is enqueued in the physical memory 2820 * allocator's free page queues. Moreover, it 2821 * is the first page in a power-of-two-sized 2822 * run of contiguous free pages. Jump ahead 2823 * to the last page within that run, and 2824 * continue from there. 2825 */ 2826 m += (1 << order) - 1; 2827 } 2828 #if VM_NRESERVLEVEL > 0 2829 else if (vm_reserv_is_page_free(m)) 2830 order = 0; 2831 #endif 2832 vm_domain_free_unlock(vmd); 2833 if (order == VM_NFREEORDER) 2834 error = EINVAL; 2835 } 2836 } 2837 if ((m = SLIST_FIRST(&free)) != NULL) { 2838 int cnt; 2839 2840 vmd = VM_DOMAIN(domain); 2841 cnt = 0; 2842 vm_domain_free_lock(vmd); 2843 do { 2844 MPASS(vm_phys_domain(m) == domain); 2845 SLIST_REMOVE_HEAD(&free, plinks.s.ss); 2846 vm_phys_free_pages(m, 0); 2847 cnt++; 2848 } while ((m = SLIST_FIRST(&free)) != NULL); 2849 vm_domain_free_unlock(vmd); 2850 vm_domain_freecnt_inc(vmd, cnt); 2851 } 2852 return (error); 2853 } 2854 2855 #define NRUNS 16 2856 2857 CTASSERT(powerof2(NRUNS)); 2858 2859 #define RUN_INDEX(count) ((count) & (NRUNS - 1)) 2860 2861 #define MIN_RECLAIM 8 2862 2863 /* 2864 * vm_page_reclaim_contig: 2865 * 2866 * Reclaim allocated, contiguous physical memory satisfying the specified 2867 * conditions by relocating the virtual pages using that physical memory. 2868 * Returns true if reclamation is successful and false otherwise. Since 2869 * relocation requires the allocation of physical pages, reclamation may 2870 * fail due to a shortage of free pages. When reclamation fails, callers 2871 * are expected to perform vm_wait() before retrying a failed allocation 2872 * operation, e.g., vm_page_alloc_contig(). 2873 * 2874 * The caller must always specify an allocation class through "req". 2875 * 2876 * allocation classes: 2877 * VM_ALLOC_NORMAL normal process request 2878 * VM_ALLOC_SYSTEM system *really* needs a page 2879 * VM_ALLOC_INTERRUPT interrupt time request 2880 * 2881 * The optional allocation flags are ignored. 2882 * 2883 * "npages" must be greater than zero. Both "alignment" and "boundary" 2884 * must be a power of two. 2885 */ 2886 bool 2887 vm_page_reclaim_contig_domain(int domain, int req, u_long npages, 2888 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 2889 { 2890 struct vm_domain *vmd; 2891 vm_paddr_t curr_low; 2892 vm_page_t m_run, m_runs[NRUNS]; 2893 u_long count, reclaimed; 2894 int error, i, options, req_class; 2895 2896 KASSERT(npages > 0, ("npages is 0")); 2897 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 2898 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 2899 req_class = req & VM_ALLOC_CLASS_MASK; 2900 2901 /* 2902 * The page daemon is allowed to dig deeper into the free page list. 2903 */ 2904 if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) 2905 req_class = VM_ALLOC_SYSTEM; 2906 2907 /* 2908 * Return if the number of free pages cannot satisfy the requested 2909 * allocation. 2910 */ 2911 vmd = VM_DOMAIN(domain); 2912 count = vmd->vmd_free_count; 2913 if (count < npages + vmd->vmd_free_reserved || (count < npages + 2914 vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || 2915 (count < npages && req_class == VM_ALLOC_INTERRUPT)) 2916 return (false); 2917 2918 /* 2919 * Scan up to three times, relaxing the restrictions ("options") on 2920 * the reclamation of reservations and superpages each time. 2921 */ 2922 for (options = VPSC_NORESERV;;) { 2923 /* 2924 * Find the highest runs that satisfy the given constraints 2925 * and restrictions, and record them in "m_runs". 2926 */ 2927 curr_low = low; 2928 count = 0; 2929 for (;;) { 2930 m_run = vm_phys_scan_contig(domain, npages, curr_low, 2931 high, alignment, boundary, options); 2932 if (m_run == NULL) 2933 break; 2934 curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); 2935 m_runs[RUN_INDEX(count)] = m_run; 2936 count++; 2937 } 2938 2939 /* 2940 * Reclaim the highest runs in LIFO (descending) order until 2941 * the number of reclaimed pages, "reclaimed", is at least 2942 * MIN_RECLAIM. Reset "reclaimed" each time because each 2943 * reclamation is idempotent, and runs will (likely) recur 2944 * from one scan to the next as restrictions are relaxed. 2945 */ 2946 reclaimed = 0; 2947 for (i = 0; count > 0 && i < NRUNS; i++) { 2948 count--; 2949 m_run = m_runs[RUN_INDEX(count)]; 2950 error = vm_page_reclaim_run(req_class, domain, npages, 2951 m_run, high); 2952 if (error == 0) { 2953 reclaimed += npages; 2954 if (reclaimed >= MIN_RECLAIM) 2955 return (true); 2956 } 2957 } 2958 2959 /* 2960 * Either relax the restrictions on the next scan or return if 2961 * the last scan had no restrictions. 2962 */ 2963 if (options == VPSC_NORESERV) 2964 options = VPSC_NOSUPER; 2965 else if (options == VPSC_NOSUPER) 2966 options = VPSC_ANY; 2967 else if (options == VPSC_ANY) 2968 return (reclaimed != 0); 2969 } 2970 } 2971 2972 bool 2973 vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, 2974 u_long alignment, vm_paddr_t boundary) 2975 { 2976 struct vm_domainset_iter di; 2977 int domain; 2978 bool ret; 2979 2980 vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req); 2981 do { 2982 ret = vm_page_reclaim_contig_domain(domain, req, npages, low, 2983 high, alignment, boundary); 2984 if (ret) 2985 break; 2986 } while (vm_domainset_iter_page(&di, NULL, &domain) == 0); 2987 2988 return (ret); 2989 } 2990 2991 /* 2992 * Set the domain in the appropriate page level domainset. 2993 */ 2994 void 2995 vm_domain_set(struct vm_domain *vmd) 2996 { 2997 2998 mtx_lock(&vm_domainset_lock); 2999 if (!vmd->vmd_minset && vm_paging_min(vmd)) { 3000 vmd->vmd_minset = 1; 3001 DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains); 3002 } 3003 if (!vmd->vmd_severeset && vm_paging_severe(vmd)) { 3004 vmd->vmd_severeset = 1; 3005 DOMAINSET_SET(vmd->vmd_domain, &vm_severe_domains); 3006 } 3007 mtx_unlock(&vm_domainset_lock); 3008 } 3009 3010 /* 3011 * Clear the domain from the appropriate page level domainset. 3012 */ 3013 void 3014 vm_domain_clear(struct vm_domain *vmd) 3015 { 3016 3017 mtx_lock(&vm_domainset_lock); 3018 if (vmd->vmd_minset && !vm_paging_min(vmd)) { 3019 vmd->vmd_minset = 0; 3020 DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains); 3021 if (vm_min_waiters != 0) { 3022 vm_min_waiters = 0; 3023 wakeup(&vm_min_domains); 3024 } 3025 } 3026 if (vmd->vmd_severeset && !vm_paging_severe(vmd)) { 3027 vmd->vmd_severeset = 0; 3028 DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains); 3029 if (vm_severe_waiters != 0) { 3030 vm_severe_waiters = 0; 3031 wakeup(&vm_severe_domains); 3032 } 3033 } 3034 3035 /* 3036 * If pageout daemon needs pages, then tell it that there are 3037 * some free. 3038 */ 3039 if (vmd->vmd_pageout_pages_needed && 3040 vmd->vmd_free_count >= vmd->vmd_pageout_free_min) { 3041 wakeup(&vmd->vmd_pageout_pages_needed); 3042 vmd->vmd_pageout_pages_needed = 0; 3043 } 3044 3045 /* See comments in vm_wait_doms(). */ 3046 if (vm_pageproc_waiters) { 3047 vm_pageproc_waiters = 0; 3048 wakeup(&vm_pageproc_waiters); 3049 } 3050 mtx_unlock(&vm_domainset_lock); 3051 } 3052 3053 /* 3054 * Wait for free pages to exceed the min threshold globally. 3055 */ 3056 void 3057 vm_wait_min(void) 3058 { 3059 3060 mtx_lock(&vm_domainset_lock); 3061 while (vm_page_count_min()) { 3062 vm_min_waiters++; 3063 msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0); 3064 } 3065 mtx_unlock(&vm_domainset_lock); 3066 } 3067 3068 /* 3069 * Wait for free pages to exceed the severe threshold globally. 3070 */ 3071 void 3072 vm_wait_severe(void) 3073 { 3074 3075 mtx_lock(&vm_domainset_lock); 3076 while (vm_page_count_severe()) { 3077 vm_severe_waiters++; 3078 msleep(&vm_severe_domains, &vm_domainset_lock, PVM, 3079 "vmwait", 0); 3080 } 3081 mtx_unlock(&vm_domainset_lock); 3082 } 3083 3084 u_int 3085 vm_wait_count(void) 3086 { 3087 3088 return (vm_severe_waiters + vm_min_waiters + vm_pageproc_waiters); 3089 } 3090 3091 void 3092 vm_wait_doms(const domainset_t *wdoms) 3093 { 3094 3095 /* 3096 * We use racey wakeup synchronization to avoid expensive global 3097 * locking for the pageproc when sleeping with a non-specific vm_wait. 3098 * To handle this, we only sleep for one tick in this instance. It 3099 * is expected that most allocations for the pageproc will come from 3100 * kmem or vm_page_grab* which will use the more specific and 3101 * race-free vm_wait_domain(). 3102 */ 3103 if (curproc == pageproc) { 3104 mtx_lock(&vm_domainset_lock); 3105 vm_pageproc_waiters++; 3106 msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM | PDROP, 3107 "pageprocwait", 1); 3108 } else { 3109 /* 3110 * XXX Ideally we would wait only until the allocation could 3111 * be satisfied. This condition can cause new allocators to 3112 * consume all freed pages while old allocators wait. 3113 */ 3114 mtx_lock(&vm_domainset_lock); 3115 if (vm_page_count_min_set(wdoms)) { 3116 vm_min_waiters++; 3117 msleep(&vm_min_domains, &vm_domainset_lock, 3118 PVM | PDROP, "vmwait", 0); 3119 } else 3120 mtx_unlock(&vm_domainset_lock); 3121 } 3122 } 3123 3124 /* 3125 * vm_wait_domain: 3126 * 3127 * Sleep until free pages are available for allocation. 3128 * - Called in various places after failed memory allocations. 3129 */ 3130 void 3131 vm_wait_domain(int domain) 3132 { 3133 struct vm_domain *vmd; 3134 domainset_t wdom; 3135 3136 vmd = VM_DOMAIN(domain); 3137 vm_domain_free_assert_unlocked(vmd); 3138 3139 if (curproc == pageproc) { 3140 mtx_lock(&vm_domainset_lock); 3141 if (vmd->vmd_free_count < vmd->vmd_pageout_free_min) { 3142 vmd->vmd_pageout_pages_needed = 1; 3143 msleep(&vmd->vmd_pageout_pages_needed, 3144 &vm_domainset_lock, PDROP | PSWP, "VMWait", 0); 3145 } else 3146 mtx_unlock(&vm_domainset_lock); 3147 } else { 3148 if (pageproc == NULL) 3149 panic("vm_wait in early boot"); 3150 DOMAINSET_ZERO(&wdom); 3151 DOMAINSET_SET(vmd->vmd_domain, &wdom); 3152 vm_wait_doms(&wdom); 3153 } 3154 } 3155 3156 /* 3157 * vm_wait: 3158 * 3159 * Sleep until free pages are available for allocation in the 3160 * affinity domains of the obj. If obj is NULL, the domain set 3161 * for the calling thread is used. 3162 * Called in various places after failed memory allocations. 3163 */ 3164 void 3165 vm_wait(vm_object_t obj) 3166 { 3167 struct domainset *d; 3168 3169 d = NULL; 3170 3171 /* 3172 * Carefully fetch pointers only once: the struct domainset 3173 * itself is ummutable but the pointer might change. 3174 */ 3175 if (obj != NULL) 3176 d = obj->domain.dr_policy; 3177 if (d == NULL) 3178 d = curthread->td_domain.dr_policy; 3179 3180 vm_wait_doms(&d->ds_mask); 3181 } 3182 3183 /* 3184 * vm_domain_alloc_fail: 3185 * 3186 * Called when a page allocation function fails. Informs the 3187 * pagedaemon and performs the requested wait. Requires the 3188 * domain_free and object lock on entry. Returns with the 3189 * object lock held and free lock released. Returns an error when 3190 * retry is necessary. 3191 * 3192 */ 3193 static int 3194 vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req) 3195 { 3196 3197 vm_domain_free_assert_unlocked(vmd); 3198 3199 atomic_add_int(&vmd->vmd_pageout_deficit, 3200 max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); 3201 if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) { 3202 if (object != NULL) 3203 VM_OBJECT_WUNLOCK(object); 3204 vm_wait_domain(vmd->vmd_domain); 3205 if (object != NULL) 3206 VM_OBJECT_WLOCK(object); 3207 if (req & VM_ALLOC_WAITOK) 3208 return (EAGAIN); 3209 } 3210 3211 return (0); 3212 } 3213 3214 /* 3215 * vm_waitpfault: 3216 * 3217 * Sleep until free pages are available for allocation. 3218 * - Called only in vm_fault so that processes page faulting 3219 * can be easily tracked. 3220 * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing 3221 * processes will be able to grab memory first. Do not change 3222 * this balance without careful testing first. 3223 */ 3224 void 3225 vm_waitpfault(struct domainset *dset, int timo) 3226 { 3227 3228 /* 3229 * XXX Ideally we would wait only until the allocation could 3230 * be satisfied. This condition can cause new allocators to 3231 * consume all freed pages while old allocators wait. 3232 */ 3233 mtx_lock(&vm_domainset_lock); 3234 if (vm_page_count_min_set(&dset->ds_mask)) { 3235 vm_min_waiters++; 3236 msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP, 3237 "pfault", timo); 3238 } else 3239 mtx_unlock(&vm_domainset_lock); 3240 } 3241 3242 static struct vm_pagequeue * 3243 _vm_page_pagequeue(vm_page_t m, uint8_t queue) 3244 { 3245 3246 return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]); 3247 } 3248 3249 #ifdef INVARIANTS 3250 static struct vm_pagequeue * 3251 vm_page_pagequeue(vm_page_t m) 3252 { 3253 3254 return (_vm_page_pagequeue(m, vm_page_astate_load(m).queue)); 3255 } 3256 #endif 3257 3258 static __always_inline bool 3259 vm_page_pqstate_fcmpset(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) 3260 { 3261 vm_page_astate_t tmp; 3262 3263 tmp = *old; 3264 do { 3265 if (__predict_true(vm_page_astate_fcmpset(m, old, new))) 3266 return (true); 3267 counter_u64_add(pqstate_commit_retries, 1); 3268 } while (old->_bits == tmp._bits); 3269 3270 return (false); 3271 } 3272 3273 /* 3274 * Do the work of committing a queue state update that moves the page out of 3275 * its current queue. 3276 */ 3277 static bool 3278 _vm_page_pqstate_commit_dequeue(struct vm_pagequeue *pq, vm_page_t m, 3279 vm_page_astate_t *old, vm_page_astate_t new) 3280 { 3281 vm_page_t next; 3282 3283 vm_pagequeue_assert_locked(pq); 3284 KASSERT(vm_page_pagequeue(m) == pq, 3285 ("%s: queue %p does not match page %p", __func__, pq, m)); 3286 KASSERT(old->queue != PQ_NONE && new.queue != old->queue, 3287 ("%s: invalid queue indices %d %d", 3288 __func__, old->queue, new.queue)); 3289 3290 /* 3291 * Once the queue index of the page changes there is nothing 3292 * synchronizing with further updates to the page's physical 3293 * queue state. Therefore we must speculatively remove the page 3294 * from the queue now and be prepared to roll back if the queue 3295 * state update fails. If the page is not physically enqueued then 3296 * we just update its queue index. 3297 */ 3298 if ((old->flags & PGA_ENQUEUED) != 0) { 3299 new.flags &= ~PGA_ENQUEUED; 3300 next = TAILQ_NEXT(m, plinks.q); 3301 TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); 3302 vm_pagequeue_cnt_dec(pq); 3303 if (!vm_page_pqstate_fcmpset(m, old, new)) { 3304 if (next == NULL) 3305 TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); 3306 else 3307 TAILQ_INSERT_BEFORE(next, m, plinks.q); 3308 vm_pagequeue_cnt_inc(pq); 3309 return (false); 3310 } else { 3311 return (true); 3312 } 3313 } else { 3314 return (vm_page_pqstate_fcmpset(m, old, new)); 3315 } 3316 } 3317 3318 static bool 3319 vm_page_pqstate_commit_dequeue(vm_page_t m, vm_page_astate_t *old, 3320 vm_page_astate_t new) 3321 { 3322 struct vm_pagequeue *pq; 3323 vm_page_astate_t as; 3324 bool ret; 3325 3326 pq = _vm_page_pagequeue(m, old->queue); 3327 3328 /* 3329 * The queue field and PGA_ENQUEUED flag are stable only so long as the 3330 * corresponding page queue lock is held. 3331 */ 3332 vm_pagequeue_lock(pq); 3333 as = vm_page_astate_load(m); 3334 if (__predict_false(as._bits != old->_bits)) { 3335 *old = as; 3336 ret = false; 3337 } else { 3338 ret = _vm_page_pqstate_commit_dequeue(pq, m, old, new); 3339 } 3340 vm_pagequeue_unlock(pq); 3341 return (ret); 3342 } 3343 3344 /* 3345 * Commit a queue state update that enqueues or requeues a page. 3346 */ 3347 static bool 3348 _vm_page_pqstate_commit_requeue(struct vm_pagequeue *pq, vm_page_t m, 3349 vm_page_astate_t *old, vm_page_astate_t new) 3350 { 3351 struct vm_domain *vmd; 3352 3353 vm_pagequeue_assert_locked(pq); 3354 KASSERT(old->queue != PQ_NONE && new.queue == old->queue, 3355 ("%s: invalid queue indices %d %d", 3356 __func__, old->queue, new.queue)); 3357 3358 new.flags |= PGA_ENQUEUED; 3359 if (!vm_page_pqstate_fcmpset(m, old, new)) 3360 return (false); 3361 3362 if ((old->flags & PGA_ENQUEUED) != 0) 3363 TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); 3364 else 3365 vm_pagequeue_cnt_inc(pq); 3366 3367 /* 3368 * Give PGA_REQUEUE_HEAD precedence over PGA_REQUEUE. In particular, if 3369 * both flags are set in close succession, only PGA_REQUEUE_HEAD will be 3370 * applied, even if it was set first. 3371 */ 3372 if ((old->flags & PGA_REQUEUE_HEAD) != 0) { 3373 vmd = vm_pagequeue_domain(m); 3374 KASSERT(pq == &vmd->vmd_pagequeues[PQ_INACTIVE], 3375 ("%s: invalid page queue for page %p", __func__, m)); 3376 TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q); 3377 } else { 3378 TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); 3379 } 3380 return (true); 3381 } 3382 3383 /* 3384 * Commit a queue state update that encodes a request for a deferred queue 3385 * operation. 3386 */ 3387 static bool 3388 vm_page_pqstate_commit_request(vm_page_t m, vm_page_astate_t *old, 3389 vm_page_astate_t new) 3390 { 3391 3392 KASSERT(old->queue == new.queue || new.queue != PQ_NONE, 3393 ("%s: invalid state, queue %d flags %x", 3394 __func__, new.queue, new.flags)); 3395 3396 if (old->_bits != new._bits && 3397 !vm_page_pqstate_fcmpset(m, old, new)) 3398 return (false); 3399 vm_page_pqbatch_submit(m, new.queue); 3400 return (true); 3401 } 3402 3403 /* 3404 * A generic queue state update function. This handles more cases than the 3405 * specialized functions above. 3406 */ 3407 bool 3408 vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) 3409 { 3410 3411 if (old->_bits == new._bits) 3412 return (true); 3413 3414 if (old->queue != PQ_NONE && new.queue != old->queue) { 3415 if (!vm_page_pqstate_commit_dequeue(m, old, new)) 3416 return (false); 3417 if (new.queue != PQ_NONE) 3418 vm_page_pqbatch_submit(m, new.queue); 3419 } else { 3420 if (!vm_page_pqstate_fcmpset(m, old, new)) 3421 return (false); 3422 if (new.queue != PQ_NONE && 3423 ((new.flags & ~old->flags) & PGA_QUEUE_OP_MASK) != 0) 3424 vm_page_pqbatch_submit(m, new.queue); 3425 } 3426 return (true); 3427 } 3428 3429 /* 3430 * Apply deferred queue state updates to a page. 3431 */ 3432 static inline void 3433 vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m, uint8_t queue) 3434 { 3435 vm_page_astate_t new, old; 3436 3437 CRITICAL_ASSERT(curthread); 3438 vm_pagequeue_assert_locked(pq); 3439 KASSERT(queue < PQ_COUNT, 3440 ("%s: invalid queue index %d", __func__, queue)); 3441 KASSERT(pq == _vm_page_pagequeue(m, queue), 3442 ("%s: page %p does not belong to queue %p", __func__, m, pq)); 3443 3444 for (old = vm_page_astate_load(m);;) { 3445 if (__predict_false(old.queue != queue || 3446 (old.flags & PGA_QUEUE_OP_MASK) == 0)) { 3447 counter_u64_add(queue_nops, 1); 3448 break; 3449 } 3450 KASSERT(old.queue != PQ_NONE || (old.flags & PGA_QUEUE_STATE_MASK) == 0, 3451 ("%s: page %p has unexpected queue state", __func__, m)); 3452 3453 new = old; 3454 if ((old.flags & PGA_DEQUEUE) != 0) { 3455 new.flags &= ~PGA_QUEUE_OP_MASK; 3456 new.queue = PQ_NONE; 3457 if (__predict_true(_vm_page_pqstate_commit_dequeue(pq, 3458 m, &old, new))) { 3459 counter_u64_add(queue_ops, 1); 3460 break; 3461 } 3462 } else { 3463 new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD); 3464 if (__predict_true(_vm_page_pqstate_commit_requeue(pq, 3465 m, &old, new))) { 3466 counter_u64_add(queue_ops, 1); 3467 break; 3468 } 3469 } 3470 } 3471 } 3472 3473 static void 3474 vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq, 3475 uint8_t queue) 3476 { 3477 int i; 3478 3479 for (i = 0; i < bq->bq_cnt; i++) 3480 vm_pqbatch_process_page(pq, bq->bq_pa[i], queue); 3481 vm_batchqueue_init(bq); 3482 } 3483 3484 /* 3485 * vm_page_pqbatch_submit: [ internal use only ] 3486 * 3487 * Enqueue a page in the specified page queue's batched work queue. 3488 * The caller must have encoded the requested operation in the page 3489 * structure's a.flags field. 3490 */ 3491 void 3492 vm_page_pqbatch_submit(vm_page_t m, uint8_t queue) 3493 { 3494 struct vm_batchqueue *bq; 3495 struct vm_pagequeue *pq; 3496 int domain; 3497 3498 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3499 ("page %p is unmanaged", m)); 3500 KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue)); 3501 3502 domain = vm_phys_domain(m); 3503 pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue]; 3504 3505 critical_enter(); 3506 bq = DPCPU_PTR(pqbatch[domain][queue]); 3507 if (vm_batchqueue_insert(bq, m)) { 3508 critical_exit(); 3509 return; 3510 } 3511 critical_exit(); 3512 vm_pagequeue_lock(pq); 3513 critical_enter(); 3514 bq = DPCPU_PTR(pqbatch[domain][queue]); 3515 vm_pqbatch_process(pq, bq, queue); 3516 vm_pqbatch_process_page(pq, m, queue); 3517 vm_pagequeue_unlock(pq); 3518 critical_exit(); 3519 } 3520 3521 /* 3522 * vm_page_pqbatch_drain: [ internal use only ] 3523 * 3524 * Force all per-CPU page queue batch queues to be drained. This is 3525 * intended for use in severe memory shortages, to ensure that pages 3526 * do not remain stuck in the batch queues. 3527 */ 3528 void 3529 vm_page_pqbatch_drain(void) 3530 { 3531 struct thread *td; 3532 struct vm_domain *vmd; 3533 struct vm_pagequeue *pq; 3534 int cpu, domain, queue; 3535 3536 td = curthread; 3537 CPU_FOREACH(cpu) { 3538 thread_lock(td); 3539 sched_bind(td, cpu); 3540 thread_unlock(td); 3541 3542 for (domain = 0; domain < vm_ndomains; domain++) { 3543 vmd = VM_DOMAIN(domain); 3544 for (queue = 0; queue < PQ_COUNT; queue++) { 3545 pq = &vmd->vmd_pagequeues[queue]; 3546 vm_pagequeue_lock(pq); 3547 critical_enter(); 3548 vm_pqbatch_process(pq, 3549 DPCPU_PTR(pqbatch[domain][queue]), queue); 3550 critical_exit(); 3551 vm_pagequeue_unlock(pq); 3552 } 3553 } 3554 } 3555 thread_lock(td); 3556 sched_unbind(td); 3557 thread_unlock(td); 3558 } 3559 3560 /* 3561 * vm_page_dequeue_deferred: [ internal use only ] 3562 * 3563 * Request removal of the given page from its current page 3564 * queue. Physical removal from the queue may be deferred 3565 * indefinitely. 3566 * 3567 * The page must be locked. 3568 */ 3569 void 3570 vm_page_dequeue_deferred(vm_page_t m) 3571 { 3572 vm_page_astate_t new, old; 3573 3574 old = vm_page_astate_load(m); 3575 do { 3576 if (old.queue == PQ_NONE) { 3577 KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0, 3578 ("%s: page %p has unexpected queue state", 3579 __func__, m)); 3580 break; 3581 } 3582 new = old; 3583 new.flags |= PGA_DEQUEUE; 3584 } while (!vm_page_pqstate_commit_request(m, &old, new)); 3585 } 3586 3587 /* 3588 * vm_page_dequeue: 3589 * 3590 * Remove the page from whichever page queue it's in, if any, before 3591 * returning. 3592 */ 3593 void 3594 vm_page_dequeue(vm_page_t m) 3595 { 3596 vm_page_astate_t new, old; 3597 3598 old = vm_page_astate_load(m); 3599 do { 3600 if (old.queue == PQ_NONE) { 3601 KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0, 3602 ("%s: page %p has unexpected queue state", 3603 __func__, m)); 3604 break; 3605 } 3606 new = old; 3607 new.flags &= ~PGA_QUEUE_OP_MASK; 3608 new.queue = PQ_NONE; 3609 } while (!vm_page_pqstate_commit_dequeue(m, &old, new)); 3610 3611 } 3612 3613 /* 3614 * Schedule the given page for insertion into the specified page queue. 3615 * Physical insertion of the page may be deferred indefinitely. 3616 */ 3617 static void 3618 vm_page_enqueue(vm_page_t m, uint8_t queue) 3619 { 3620 3621 KASSERT(m->a.queue == PQ_NONE && 3622 (m->a.flags & PGA_QUEUE_STATE_MASK) == 0, 3623 ("%s: page %p is already enqueued", __func__, m)); 3624 KASSERT(m->ref_count > 0, 3625 ("%s: page %p does not carry any references", __func__, m)); 3626 3627 m->a.queue = queue; 3628 if ((m->a.flags & PGA_REQUEUE) == 0) 3629 vm_page_aflag_set(m, PGA_REQUEUE); 3630 vm_page_pqbatch_submit(m, queue); 3631 } 3632 3633 /* 3634 * vm_page_free_prep: 3635 * 3636 * Prepares the given page to be put on the free list, 3637 * disassociating it from any VM object. The caller may return 3638 * the page to the free list only if this function returns true. 3639 * 3640 * The object must be locked. The page must be locked if it is 3641 * managed. 3642 */ 3643 static bool 3644 vm_page_free_prep(vm_page_t m) 3645 { 3646 3647 /* 3648 * Synchronize with threads that have dropped a reference to this 3649 * page. 3650 */ 3651 atomic_thread_fence_acq(); 3652 3653 #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) 3654 if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) { 3655 uint64_t *p; 3656 int i; 3657 p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3658 for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++) 3659 KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx", 3660 m, i, (uintmax_t)*p)); 3661 } 3662 #endif 3663 if ((m->oflags & VPO_UNMANAGED) == 0) { 3664 KASSERT(!pmap_page_is_mapped(m), 3665 ("vm_page_free_prep: freeing mapped page %p", m)); 3666 KASSERT((m->a.flags & (PGA_EXECUTABLE | PGA_WRITEABLE)) == 0, 3667 ("vm_page_free_prep: mapping flags set in page %p", m)); 3668 } else { 3669 KASSERT(m->a.queue == PQ_NONE, 3670 ("vm_page_free_prep: unmanaged page %p is queued", m)); 3671 } 3672 VM_CNT_INC(v_tfree); 3673 3674 if (m->object != NULL) { 3675 KASSERT(((m->oflags & VPO_UNMANAGED) != 0) == 3676 ((m->object->flags & OBJ_UNMANAGED) != 0), 3677 ("vm_page_free_prep: managed flag mismatch for page %p", 3678 m)); 3679 vm_page_assert_xbusied(m); 3680 3681 /* 3682 * The object reference can be released without an atomic 3683 * operation. 3684 */ 3685 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 3686 m->ref_count == VPRC_OBJREF, 3687 ("vm_page_free_prep: page %p has unexpected ref_count %u", 3688 m, m->ref_count)); 3689 vm_page_object_remove(m); 3690 m->object = NULL; 3691 m->ref_count -= VPRC_OBJREF; 3692 } else 3693 vm_page_assert_unbusied(m); 3694 3695 vm_page_busy_free(m); 3696 3697 /* 3698 * If fictitious remove object association and 3699 * return. 3700 */ 3701 if ((m->flags & PG_FICTITIOUS) != 0) { 3702 KASSERT(m->ref_count == 1, 3703 ("fictitious page %p is referenced", m)); 3704 KASSERT(m->a.queue == PQ_NONE, 3705 ("fictitious page %p is queued", m)); 3706 return (false); 3707 } 3708 3709 /* 3710 * Pages need not be dequeued before they are returned to the physical 3711 * memory allocator, but they must at least be marked for a deferred 3712 * dequeue. 3713 */ 3714 if ((m->oflags & VPO_UNMANAGED) == 0) 3715 vm_page_dequeue_deferred(m); 3716 3717 m->valid = 0; 3718 vm_page_undirty(m); 3719 3720 if (m->ref_count != 0) 3721 panic("vm_page_free_prep: page %p has references", m); 3722 3723 /* 3724 * Restore the default memory attribute to the page. 3725 */ 3726 if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) 3727 pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); 3728 3729 #if VM_NRESERVLEVEL > 0 3730 /* 3731 * Determine whether the page belongs to a reservation. If the page was 3732 * allocated from a per-CPU cache, it cannot belong to a reservation, so 3733 * as an optimization, we avoid the check in that case. 3734 */ 3735 if ((m->flags & PG_PCPU_CACHE) == 0 && vm_reserv_free_page(m)) 3736 return (false); 3737 #endif 3738 3739 return (true); 3740 } 3741 3742 /* 3743 * vm_page_free_toq: 3744 * 3745 * Returns the given page to the free list, disassociating it 3746 * from any VM object. 3747 * 3748 * The object must be locked. The page must be locked if it is 3749 * managed. 3750 */ 3751 static void 3752 vm_page_free_toq(vm_page_t m) 3753 { 3754 struct vm_domain *vmd; 3755 uma_zone_t zone; 3756 3757 if (!vm_page_free_prep(m)) 3758 return; 3759 3760 vmd = vm_pagequeue_domain(m); 3761 zone = vmd->vmd_pgcache[m->pool].zone; 3762 if ((m->flags & PG_PCPU_CACHE) != 0 && zone != NULL) { 3763 uma_zfree(zone, m); 3764 return; 3765 } 3766 vm_domain_free_lock(vmd); 3767 vm_phys_free_pages(m, 0); 3768 vm_domain_free_unlock(vmd); 3769 vm_domain_freecnt_inc(vmd, 1); 3770 } 3771 3772 /* 3773 * vm_page_free_pages_toq: 3774 * 3775 * Returns a list of pages to the free list, disassociating it 3776 * from any VM object. In other words, this is equivalent to 3777 * calling vm_page_free_toq() for each page of a list of VM objects. 3778 * 3779 * The objects must be locked. The pages must be locked if it is 3780 * managed. 3781 */ 3782 void 3783 vm_page_free_pages_toq(struct spglist *free, bool update_wire_count) 3784 { 3785 vm_page_t m; 3786 int count; 3787 3788 if (SLIST_EMPTY(free)) 3789 return; 3790 3791 count = 0; 3792 while ((m = SLIST_FIRST(free)) != NULL) { 3793 count++; 3794 SLIST_REMOVE_HEAD(free, plinks.s.ss); 3795 vm_page_free_toq(m); 3796 } 3797 3798 if (update_wire_count) 3799 vm_wire_sub(count); 3800 } 3801 3802 /* 3803 * Mark this page as wired down, preventing reclamation by the page daemon 3804 * or when the containing object is destroyed. 3805 */ 3806 void 3807 vm_page_wire(vm_page_t m) 3808 { 3809 u_int old; 3810 3811 KASSERT(m->object != NULL, 3812 ("vm_page_wire: page %p does not belong to an object", m)); 3813 if (!vm_page_busied(m) && !vm_object_busied(m->object)) 3814 VM_OBJECT_ASSERT_LOCKED(m->object); 3815 KASSERT((m->flags & PG_FICTITIOUS) == 0 || 3816 VPRC_WIRE_COUNT(m->ref_count) >= 1, 3817 ("vm_page_wire: fictitious page %p has zero wirings", m)); 3818 3819 old = atomic_fetchadd_int(&m->ref_count, 1); 3820 KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX, 3821 ("vm_page_wire: counter overflow for page %p", m)); 3822 if (VPRC_WIRE_COUNT(old) == 0) { 3823 if ((m->oflags & VPO_UNMANAGED) == 0) 3824 vm_page_aflag_set(m, PGA_DEQUEUE); 3825 vm_wire_add(1); 3826 } 3827 } 3828 3829 /* 3830 * Attempt to wire a mapped page following a pmap lookup of that page. 3831 * This may fail if a thread is concurrently tearing down mappings of the page. 3832 * The transient failure is acceptable because it translates to the 3833 * failure of the caller pmap_extract_and_hold(), which should be then 3834 * followed by the vm_fault() fallback, see e.g. vm_fault_quick_hold_pages(). 3835 */ 3836 bool 3837 vm_page_wire_mapped(vm_page_t m) 3838 { 3839 u_int old; 3840 3841 old = m->ref_count; 3842 do { 3843 KASSERT(old > 0, 3844 ("vm_page_wire_mapped: wiring unreferenced page %p", m)); 3845 if ((old & VPRC_BLOCKED) != 0) 3846 return (false); 3847 } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1)); 3848 3849 if (VPRC_WIRE_COUNT(old) == 0) { 3850 if ((m->oflags & VPO_UNMANAGED) == 0) 3851 vm_page_aflag_set(m, PGA_DEQUEUE); 3852 vm_wire_add(1); 3853 } 3854 return (true); 3855 } 3856 3857 /* 3858 * Release a wiring reference to a managed page. If the page still belongs to 3859 * an object, update its position in the page queues to reflect the reference. 3860 * If the wiring was the last reference to the page, free the page. 3861 */ 3862 static void 3863 vm_page_unwire_managed(vm_page_t m, uint8_t nqueue, bool noreuse) 3864 { 3865 u_int old; 3866 3867 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3868 ("%s: page %p is unmanaged", __func__, m)); 3869 3870 /* 3871 * Update LRU state before releasing the wiring reference. 3872 * Use a release store when updating the reference count to 3873 * synchronize with vm_page_free_prep(). 3874 */ 3875 old = m->ref_count; 3876 do { 3877 KASSERT(VPRC_WIRE_COUNT(old) > 0, 3878 ("vm_page_unwire: wire count underflow for page %p", m)); 3879 3880 if (old > VPRC_OBJREF + 1) { 3881 /* 3882 * The page has at least one other wiring reference. An 3883 * earlier iteration of this loop may have called 3884 * vm_page_release_toq() and cleared PGA_DEQUEUE, so 3885 * re-set it if necessary. 3886 */ 3887 if ((vm_page_astate_load(m).flags & PGA_DEQUEUE) == 0) 3888 vm_page_aflag_set(m, PGA_DEQUEUE); 3889 } else if (old == VPRC_OBJREF + 1) { 3890 /* 3891 * This is the last wiring. Clear PGA_DEQUEUE and 3892 * update the page's queue state to reflect the 3893 * reference. If the page does not belong to an object 3894 * (i.e., the VPRC_OBJREF bit is clear), we only need to 3895 * clear leftover queue state. 3896 */ 3897 vm_page_release_toq(m, nqueue, false); 3898 } else if (old == 1) { 3899 vm_page_aflag_clear(m, PGA_DEQUEUE); 3900 } 3901 } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); 3902 3903 if (VPRC_WIRE_COUNT(old) == 1) { 3904 vm_wire_sub(1); 3905 if (old == 1) 3906 vm_page_free(m); 3907 } 3908 } 3909 3910 /* 3911 * Release one wiring of the specified page, potentially allowing it to be 3912 * paged out. 3913 * 3914 * Only managed pages belonging to an object can be paged out. If the number 3915 * of wirings transitions to zero and the page is eligible for page out, then 3916 * the page is added to the specified paging queue. If the released wiring 3917 * represented the last reference to the page, the page is freed. 3918 * 3919 * A managed page must be locked. 3920 */ 3921 void 3922 vm_page_unwire(vm_page_t m, uint8_t nqueue) 3923 { 3924 3925 KASSERT(nqueue < PQ_COUNT, 3926 ("vm_page_unwire: invalid queue %u request for page %p", 3927 nqueue, m)); 3928 3929 if ((m->oflags & VPO_UNMANAGED) != 0) { 3930 if (vm_page_unwire_noq(m) && m->ref_count == 0) 3931 vm_page_free(m); 3932 return; 3933 } 3934 vm_page_unwire_managed(m, nqueue, false); 3935 } 3936 3937 /* 3938 * Unwire a page without (re-)inserting it into a page queue. It is up 3939 * to the caller to enqueue, requeue, or free the page as appropriate. 3940 * In most cases involving managed pages, vm_page_unwire() should be used 3941 * instead. 3942 */ 3943 bool 3944 vm_page_unwire_noq(vm_page_t m) 3945 { 3946 u_int old; 3947 3948 old = vm_page_drop(m, 1); 3949 KASSERT(VPRC_WIRE_COUNT(old) != 0, 3950 ("vm_page_unref: counter underflow for page %p", m)); 3951 KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1, 3952 ("vm_page_unref: missing ref on fictitious page %p", m)); 3953 3954 if (VPRC_WIRE_COUNT(old) > 1) 3955 return (false); 3956 if ((m->oflags & VPO_UNMANAGED) == 0) 3957 vm_page_aflag_clear(m, PGA_DEQUEUE); 3958 vm_wire_sub(1); 3959 return (true); 3960 } 3961 3962 /* 3963 * Ensure that the page ends up in the specified page queue. If the page is 3964 * active or being moved to the active queue, ensure that its act_count is 3965 * at least ACT_INIT but do not otherwise mess with it. 3966 * 3967 * A managed page must be locked. 3968 */ 3969 static __always_inline void 3970 vm_page_mvqueue(vm_page_t m, const uint8_t nqueue, const uint16_t nflag) 3971 { 3972 vm_page_astate_t old, new; 3973 3974 KASSERT(m->ref_count > 0, 3975 ("%s: page %p does not carry any references", __func__, m)); 3976 KASSERT(nflag == PGA_REQUEUE || nflag == PGA_REQUEUE_HEAD, 3977 ("%s: invalid flags %x", __func__, nflag)); 3978 3979 if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) 3980 return; 3981 3982 old = vm_page_astate_load(m); 3983 do { 3984 if ((old.flags & PGA_DEQUEUE) != 0) 3985 break; 3986 new = old; 3987 new.flags &= ~PGA_QUEUE_OP_MASK; 3988 if (nqueue == PQ_ACTIVE) 3989 new.act_count = max(old.act_count, ACT_INIT); 3990 if (old.queue == nqueue) { 3991 if (nqueue != PQ_ACTIVE) 3992 new.flags |= nflag; 3993 } else { 3994 new.flags |= nflag; 3995 new.queue = nqueue; 3996 } 3997 } while (!vm_page_pqstate_commit(m, &old, new)); 3998 } 3999 4000 /* 4001 * Put the specified page on the active list (if appropriate). 4002 */ 4003 void 4004 vm_page_activate(vm_page_t m) 4005 { 4006 4007 vm_page_mvqueue(m, PQ_ACTIVE, PGA_REQUEUE); 4008 } 4009 4010 /* 4011 * Move the specified page to the tail of the inactive queue, or requeue 4012 * the page if it is already in the inactive queue. 4013 */ 4014 void 4015 vm_page_deactivate(vm_page_t m) 4016 { 4017 4018 vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE); 4019 } 4020 4021 void 4022 vm_page_deactivate_noreuse(vm_page_t m) 4023 { 4024 4025 vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE_HEAD); 4026 } 4027 4028 /* 4029 * Put a page in the laundry, or requeue it if it is already there. 4030 */ 4031 void 4032 vm_page_launder(vm_page_t m) 4033 { 4034 4035 vm_page_mvqueue(m, PQ_LAUNDRY, PGA_REQUEUE); 4036 } 4037 4038 /* 4039 * Put a page in the PQ_UNSWAPPABLE holding queue. 4040 */ 4041 void 4042 vm_page_unswappable(vm_page_t m) 4043 { 4044 4045 KASSERT(!vm_page_wired(m) && (m->oflags & VPO_UNMANAGED) == 0, 4046 ("page %p already unswappable", m)); 4047 4048 vm_page_dequeue(m); 4049 vm_page_enqueue(m, PQ_UNSWAPPABLE); 4050 } 4051 4052 /* 4053 * Release a page back to the page queues in preparation for unwiring. 4054 */ 4055 static void 4056 vm_page_release_toq(vm_page_t m, uint8_t nqueue, const bool noreuse) 4057 { 4058 vm_page_astate_t old, new; 4059 uint16_t nflag; 4060 4061 /* 4062 * Use a check of the valid bits to determine whether we should 4063 * accelerate reclamation of the page. The object lock might not be 4064 * held here, in which case the check is racy. At worst we will either 4065 * accelerate reclamation of a valid page and violate LRU, or 4066 * unnecessarily defer reclamation of an invalid page. 4067 * 4068 * If we were asked to not cache the page, place it near the head of the 4069 * inactive queue so that is reclaimed sooner. 4070 */ 4071 if (noreuse || m->valid == 0) { 4072 nqueue = PQ_INACTIVE; 4073 nflag = PGA_REQUEUE_HEAD; 4074 } else { 4075 nflag = PGA_REQUEUE; 4076 } 4077 4078 old = vm_page_astate_load(m); 4079 do { 4080 new = old; 4081 4082 /* 4083 * If the page is already in the active queue and we are not 4084 * trying to accelerate reclamation, simply mark it as 4085 * referenced and avoid any queue operations. 4086 */ 4087 new.flags &= ~PGA_QUEUE_OP_MASK; 4088 if (nflag != PGA_REQUEUE_HEAD && old.queue == PQ_ACTIVE) 4089 new.flags |= PGA_REFERENCED; 4090 else { 4091 new.flags |= nflag; 4092 new.queue = nqueue; 4093 } 4094 } while (!vm_page_pqstate_commit(m, &old, new)); 4095 } 4096 4097 /* 4098 * Unwire a page and either attempt to free it or re-add it to the page queues. 4099 */ 4100 void 4101 vm_page_release(vm_page_t m, int flags) 4102 { 4103 vm_object_t object; 4104 4105 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4106 ("vm_page_release: page %p is unmanaged", m)); 4107 4108 if ((flags & VPR_TRYFREE) != 0) { 4109 for (;;) { 4110 object = atomic_load_ptr(&m->object); 4111 if (object == NULL) 4112 break; 4113 /* Depends on type-stability. */ 4114 if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object)) 4115 break; 4116 if (object == m->object) { 4117 vm_page_release_locked(m, flags); 4118 VM_OBJECT_WUNLOCK(object); 4119 return; 4120 } 4121 VM_OBJECT_WUNLOCK(object); 4122 } 4123 } 4124 vm_page_unwire_managed(m, PQ_INACTIVE, flags != 0); 4125 } 4126 4127 /* See vm_page_release(). */ 4128 void 4129 vm_page_release_locked(vm_page_t m, int flags) 4130 { 4131 4132 VM_OBJECT_ASSERT_WLOCKED(m->object); 4133 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4134 ("vm_page_release_locked: page %p is unmanaged", m)); 4135 4136 if (vm_page_unwire_noq(m)) { 4137 if ((flags & VPR_TRYFREE) != 0 && 4138 (m->object->ref_count == 0 || !pmap_page_is_mapped(m)) && 4139 m->dirty == 0 && vm_page_tryxbusy(m)) { 4140 vm_page_free(m); 4141 } else { 4142 vm_page_release_toq(m, PQ_INACTIVE, flags != 0); 4143 } 4144 } 4145 } 4146 4147 static bool 4148 vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t)) 4149 { 4150 u_int old; 4151 4152 KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0, 4153 ("vm_page_try_blocked_op: page %p has no object", m)); 4154 KASSERT(vm_page_busied(m), 4155 ("vm_page_try_blocked_op: page %p is not busy", m)); 4156 VM_OBJECT_ASSERT_LOCKED(m->object); 4157 4158 old = m->ref_count; 4159 do { 4160 KASSERT(old != 0, 4161 ("vm_page_try_blocked_op: page %p has no references", m)); 4162 if (VPRC_WIRE_COUNT(old) != 0) 4163 return (false); 4164 } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED)); 4165 4166 (op)(m); 4167 4168 /* 4169 * If the object is read-locked, new wirings may be created via an 4170 * object lookup. 4171 */ 4172 old = vm_page_drop(m, VPRC_BLOCKED); 4173 KASSERT(!VM_OBJECT_WOWNED(m->object) || 4174 old == (VPRC_BLOCKED | VPRC_OBJREF), 4175 ("vm_page_try_blocked_op: unexpected refcount value %u for %p", 4176 old, m)); 4177 return (true); 4178 } 4179 4180 /* 4181 * Atomically check for wirings and remove all mappings of the page. 4182 */ 4183 bool 4184 vm_page_try_remove_all(vm_page_t m) 4185 { 4186 4187 return (vm_page_try_blocked_op(m, pmap_remove_all)); 4188 } 4189 4190 /* 4191 * Atomically check for wirings and remove all writeable mappings of the page. 4192 */ 4193 bool 4194 vm_page_try_remove_write(vm_page_t m) 4195 { 4196 4197 return (vm_page_try_blocked_op(m, pmap_remove_write)); 4198 } 4199 4200 /* 4201 * vm_page_advise 4202 * 4203 * Apply the specified advice to the given page. 4204 * 4205 * The object and page must be locked. 4206 */ 4207 void 4208 vm_page_advise(vm_page_t m, int advice) 4209 { 4210 4211 VM_OBJECT_ASSERT_WLOCKED(m->object); 4212 if (advice == MADV_FREE) 4213 /* 4214 * Mark the page clean. This will allow the page to be freed 4215 * without first paging it out. MADV_FREE pages are often 4216 * quickly reused by malloc(3), so we do not do anything that 4217 * would result in a page fault on a later access. 4218 */ 4219 vm_page_undirty(m); 4220 else if (advice != MADV_DONTNEED) { 4221 if (advice == MADV_WILLNEED) 4222 vm_page_activate(m); 4223 return; 4224 } 4225 4226 if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) 4227 vm_page_dirty(m); 4228 4229 /* 4230 * Clear any references to the page. Otherwise, the page daemon will 4231 * immediately reactivate the page. 4232 */ 4233 vm_page_aflag_clear(m, PGA_REFERENCED); 4234 4235 /* 4236 * Place clean pages near the head of the inactive queue rather than 4237 * the tail, thus defeating the queue's LRU operation and ensuring that 4238 * the page will be reused quickly. Dirty pages not already in the 4239 * laundry are moved there. 4240 */ 4241 if (m->dirty == 0) 4242 vm_page_deactivate_noreuse(m); 4243 else if (!vm_page_in_laundry(m)) 4244 vm_page_launder(m); 4245 } 4246 4247 static inline int 4248 vm_page_grab_pflags(int allocflags) 4249 { 4250 int pflags; 4251 4252 KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || 4253 (allocflags & VM_ALLOC_WIRED) != 0, 4254 ("vm_page_grab_pflags: the pages must be busied or wired")); 4255 KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || 4256 (allocflags & VM_ALLOC_IGN_SBUSY) != 0, 4257 ("vm_page_grab_pflags: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY " 4258 "mismatch")); 4259 pflags = allocflags & 4260 ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL | 4261 VM_ALLOC_NOBUSY); 4262 if ((allocflags & VM_ALLOC_NOWAIT) == 0) 4263 pflags |= VM_ALLOC_WAITFAIL; 4264 if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) 4265 pflags |= VM_ALLOC_SBUSY; 4266 4267 return (pflags); 4268 } 4269 4270 /* 4271 * Grab a page, waiting until we are waken up due to the page 4272 * changing state. We keep on waiting, if the page continues 4273 * to be in the object. If the page doesn't exist, first allocate it 4274 * and then conditionally zero it. 4275 * 4276 * This routine may sleep. 4277 * 4278 * The object must be locked on entry. The lock will, however, be released 4279 * and reacquired if the routine sleeps. 4280 */ 4281 vm_page_t 4282 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) 4283 { 4284 vm_page_t m; 4285 int pflags; 4286 4287 VM_OBJECT_ASSERT_WLOCKED(object); 4288 pflags = vm_page_grab_pflags(allocflags); 4289 retrylookup: 4290 if ((m = vm_page_lookup(object, pindex)) != NULL) { 4291 if (!vm_page_acquire_flags(m, allocflags)) { 4292 if (vm_page_busy_sleep_flags(object, m, "pgrbwt", 4293 allocflags)) 4294 goto retrylookup; 4295 return (NULL); 4296 } 4297 goto out; 4298 } 4299 if ((allocflags & VM_ALLOC_NOCREAT) != 0) 4300 return (NULL); 4301 m = vm_page_alloc(object, pindex, pflags); 4302 if (m == NULL) { 4303 if ((allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0) 4304 return (NULL); 4305 goto retrylookup; 4306 } 4307 if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) 4308 pmap_zero_page(m); 4309 4310 out: 4311 if ((allocflags & VM_ALLOC_NOBUSY) != 0) { 4312 if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) 4313 vm_page_sunbusy(m); 4314 else 4315 vm_page_xunbusy(m); 4316 } 4317 return (m); 4318 } 4319 4320 /* 4321 * Grab a page and make it valid, paging in if necessary. Pages missing from 4322 * their pager are zero filled and validated. If a VM_ALLOC_COUNT is supplied 4323 * and the page is not valid as many as VM_INITIAL_PAGEIN pages can be brought 4324 * in simultaneously. Additional pages will be left on a paging queue but 4325 * will neither be wired nor busy regardless of allocflags. 4326 */ 4327 int 4328 vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags) 4329 { 4330 vm_page_t m; 4331 vm_page_t ma[VM_INITIAL_PAGEIN]; 4332 bool sleep, xbusy; 4333 int after, i, pflags, rv; 4334 4335 KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || 4336 (allocflags & VM_ALLOC_IGN_SBUSY) != 0, 4337 ("vm_page_grab_valid: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); 4338 KASSERT((allocflags & 4339 (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0, 4340 ("vm_page_grab_valid: Invalid flags 0x%X", allocflags)); 4341 VM_OBJECT_ASSERT_WLOCKED(object); 4342 pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY); 4343 pflags |= VM_ALLOC_WAITFAIL; 4344 4345 retrylookup: 4346 xbusy = false; 4347 if ((m = vm_page_lookup(object, pindex)) != NULL) { 4348 /* 4349 * If the page is fully valid it can only become invalid 4350 * with the object lock held. If it is not valid it can 4351 * become valid with the busy lock held. Therefore, we 4352 * may unnecessarily lock the exclusive busy here if we 4353 * race with I/O completion not using the object lock. 4354 * However, we will not end up with an invalid page and a 4355 * shared lock. 4356 */ 4357 if (!vm_page_all_valid(m) || 4358 (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) { 4359 sleep = !vm_page_tryxbusy(m); 4360 xbusy = true; 4361 } else 4362 sleep = !vm_page_trysbusy(m); 4363 if (sleep) { 4364 (void)vm_page_busy_sleep_flags(object, m, "pgrbwt", 4365 allocflags); 4366 goto retrylookup; 4367 } 4368 if ((allocflags & VM_ALLOC_NOCREAT) != 0 && 4369 !vm_page_all_valid(m)) { 4370 if (xbusy) 4371 vm_page_xunbusy(m); 4372 else 4373 vm_page_sunbusy(m); 4374 *mp = NULL; 4375 return (VM_PAGER_FAIL); 4376 } 4377 if ((allocflags & VM_ALLOC_WIRED) != 0) 4378 vm_page_wire(m); 4379 if (vm_page_all_valid(m)) 4380 goto out; 4381 } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) { 4382 *mp = NULL; 4383 return (VM_PAGER_FAIL); 4384 } else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) { 4385 xbusy = true; 4386 } else { 4387 goto retrylookup; 4388 } 4389 4390 vm_page_assert_xbusied(m); 4391 MPASS(xbusy); 4392 if (vm_pager_has_page(object, pindex, NULL, &after)) { 4393 after = MIN(after, VM_INITIAL_PAGEIN); 4394 after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT); 4395 after = MAX(after, 1); 4396 ma[0] = m; 4397 for (i = 1; i < after; i++) { 4398 if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { 4399 if (ma[i]->valid || !vm_page_tryxbusy(ma[i])) 4400 break; 4401 } else { 4402 ma[i] = vm_page_alloc(object, m->pindex + i, 4403 VM_ALLOC_NORMAL); 4404 if (ma[i] == NULL) 4405 break; 4406 } 4407 } 4408 after = i; 4409 vm_object_pip_add(object, after); 4410 VM_OBJECT_WUNLOCK(object); 4411 rv = vm_pager_get_pages(object, ma, after, NULL, NULL); 4412 VM_OBJECT_WLOCK(object); 4413 vm_object_pip_wakeupn(object, after); 4414 /* Pager may have replaced a page. */ 4415 m = ma[0]; 4416 if (rv != VM_PAGER_OK) { 4417 if ((allocflags & VM_ALLOC_WIRED) != 0) 4418 vm_page_unwire_noq(m); 4419 for (i = 0; i < after; i++) { 4420 if (!vm_page_wired(ma[i])) 4421 vm_page_free(ma[i]); 4422 else 4423 vm_page_xunbusy(ma[i]); 4424 } 4425 *mp = NULL; 4426 return (rv); 4427 } 4428 for (i = 1; i < after; i++) 4429 vm_page_readahead_finish(ma[i]); 4430 MPASS(vm_page_all_valid(m)); 4431 } else { 4432 vm_page_zero_invalid(m, TRUE); 4433 } 4434 out: 4435 if ((allocflags & VM_ALLOC_NOBUSY) != 0) { 4436 if (xbusy) 4437 vm_page_xunbusy(m); 4438 else 4439 vm_page_sunbusy(m); 4440 } 4441 if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy) 4442 vm_page_busy_downgrade(m); 4443 *mp = m; 4444 return (VM_PAGER_OK); 4445 } 4446 4447 /* 4448 * Return the specified range of pages from the given object. For each 4449 * page offset within the range, if a page already exists within the object 4450 * at that offset and it is busy, then wait for it to change state. If, 4451 * instead, the page doesn't exist, then allocate it. 4452 * 4453 * The caller must always specify an allocation class. 4454 * 4455 * allocation classes: 4456 * VM_ALLOC_NORMAL normal process request 4457 * VM_ALLOC_SYSTEM system *really* needs the pages 4458 * 4459 * The caller must always specify that the pages are to be busied and/or 4460 * wired. 4461 * 4462 * optional allocation flags: 4463 * VM_ALLOC_IGN_SBUSY do not sleep on soft busy pages 4464 * VM_ALLOC_NOBUSY do not exclusive busy the page 4465 * VM_ALLOC_NOWAIT do not sleep 4466 * VM_ALLOC_SBUSY set page to sbusy state 4467 * VM_ALLOC_WIRED wire the pages 4468 * VM_ALLOC_ZERO zero and validate any invalid pages 4469 * 4470 * If VM_ALLOC_NOWAIT is not specified, this routine may sleep. Otherwise, it 4471 * may return a partial prefix of the requested range. 4472 */ 4473 int 4474 vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, 4475 vm_page_t *ma, int count) 4476 { 4477 vm_page_t m, mpred; 4478 int pflags; 4479 int i; 4480 4481 VM_OBJECT_ASSERT_WLOCKED(object); 4482 KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0, 4483 ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed")); 4484 4485 pflags = vm_page_grab_pflags(allocflags); 4486 if (count == 0) 4487 return (0); 4488 4489 i = 0; 4490 retrylookup: 4491 m = vm_radix_lookup_le(&object->rtree, pindex + i); 4492 if (m == NULL || m->pindex != pindex + i) { 4493 mpred = m; 4494 m = NULL; 4495 } else 4496 mpred = TAILQ_PREV(m, pglist, listq); 4497 for (; i < count; i++) { 4498 if (m != NULL) { 4499 if (!vm_page_acquire_flags(m, allocflags)) { 4500 if (vm_page_busy_sleep_flags(object, m, 4501 "grbmaw", allocflags)) 4502 goto retrylookup; 4503 break; 4504 } 4505 } else { 4506 if ((allocflags & VM_ALLOC_NOCREAT) != 0) 4507 break; 4508 m = vm_page_alloc_after(object, pindex + i, 4509 pflags | VM_ALLOC_COUNT(count - i), mpred); 4510 if (m == NULL) { 4511 if ((allocflags & (VM_ALLOC_NOWAIT | 4512 VM_ALLOC_WAITFAIL)) != 0) 4513 break; 4514 goto retrylookup; 4515 } 4516 } 4517 if (vm_page_none_valid(m) && 4518 (allocflags & VM_ALLOC_ZERO) != 0) { 4519 if ((m->flags & PG_ZERO) == 0) 4520 pmap_zero_page(m); 4521 vm_page_valid(m); 4522 } 4523 if ((allocflags & VM_ALLOC_NOBUSY) != 0) { 4524 if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) 4525 vm_page_sunbusy(m); 4526 else 4527 vm_page_xunbusy(m); 4528 } 4529 ma[i] = mpred = m; 4530 m = vm_page_next(m); 4531 } 4532 return (i); 4533 } 4534 4535 /* 4536 * Mapping function for valid or dirty bits in a page. 4537 * 4538 * Inputs are required to range within a page. 4539 */ 4540 vm_page_bits_t 4541 vm_page_bits(int base, int size) 4542 { 4543 int first_bit; 4544 int last_bit; 4545 4546 KASSERT( 4547 base + size <= PAGE_SIZE, 4548 ("vm_page_bits: illegal base/size %d/%d", base, size) 4549 ); 4550 4551 if (size == 0) /* handle degenerate case */ 4552 return (0); 4553 4554 first_bit = base >> DEV_BSHIFT; 4555 last_bit = (base + size - 1) >> DEV_BSHIFT; 4556 4557 return (((vm_page_bits_t)2 << last_bit) - 4558 ((vm_page_bits_t)1 << first_bit)); 4559 } 4560 4561 void 4562 vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set) 4563 { 4564 4565 #if PAGE_SIZE == 32768 4566 atomic_set_64((uint64_t *)bits, set); 4567 #elif PAGE_SIZE == 16384 4568 atomic_set_32((uint32_t *)bits, set); 4569 #elif (PAGE_SIZE == 8192) && defined(atomic_set_16) 4570 atomic_set_16((uint16_t *)bits, set); 4571 #elif (PAGE_SIZE == 4096) && defined(atomic_set_8) 4572 atomic_set_8((uint8_t *)bits, set); 4573 #else /* PAGE_SIZE <= 8192 */ 4574 uintptr_t addr; 4575 int shift; 4576 4577 addr = (uintptr_t)bits; 4578 /* 4579 * Use a trick to perform a 32-bit atomic on the 4580 * containing aligned word, to not depend on the existence 4581 * of atomic_{set, clear}_{8, 16}. 4582 */ 4583 shift = addr & (sizeof(uint32_t) - 1); 4584 #if BYTE_ORDER == BIG_ENDIAN 4585 shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; 4586 #else 4587 shift *= NBBY; 4588 #endif 4589 addr &= ~(sizeof(uint32_t) - 1); 4590 atomic_set_32((uint32_t *)addr, set << shift); 4591 #endif /* PAGE_SIZE */ 4592 } 4593 4594 static inline void 4595 vm_page_bits_clear(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t clear) 4596 { 4597 4598 #if PAGE_SIZE == 32768 4599 atomic_clear_64((uint64_t *)bits, clear); 4600 #elif PAGE_SIZE == 16384 4601 atomic_clear_32((uint32_t *)bits, clear); 4602 #elif (PAGE_SIZE == 8192) && defined(atomic_clear_16) 4603 atomic_clear_16((uint16_t *)bits, clear); 4604 #elif (PAGE_SIZE == 4096) && defined(atomic_clear_8) 4605 atomic_clear_8((uint8_t *)bits, clear); 4606 #else /* PAGE_SIZE <= 8192 */ 4607 uintptr_t addr; 4608 int shift; 4609 4610 addr = (uintptr_t)bits; 4611 /* 4612 * Use a trick to perform a 32-bit atomic on the 4613 * containing aligned word, to not depend on the existence 4614 * of atomic_{set, clear}_{8, 16}. 4615 */ 4616 shift = addr & (sizeof(uint32_t) - 1); 4617 #if BYTE_ORDER == BIG_ENDIAN 4618 shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; 4619 #else 4620 shift *= NBBY; 4621 #endif 4622 addr &= ~(sizeof(uint32_t) - 1); 4623 atomic_clear_32((uint32_t *)addr, clear << shift); 4624 #endif /* PAGE_SIZE */ 4625 } 4626 4627 static inline vm_page_bits_t 4628 vm_page_bits_swap(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t newbits) 4629 { 4630 #if PAGE_SIZE == 32768 4631 uint64_t old; 4632 4633 old = *bits; 4634 while (atomic_fcmpset_64(bits, &old, newbits) == 0); 4635 return (old); 4636 #elif PAGE_SIZE == 16384 4637 uint32_t old; 4638 4639 old = *bits; 4640 while (atomic_fcmpset_32(bits, &old, newbits) == 0); 4641 return (old); 4642 #elif (PAGE_SIZE == 8192) && defined(atomic_fcmpset_16) 4643 uint16_t old; 4644 4645 old = *bits; 4646 while (atomic_fcmpset_16(bits, &old, newbits) == 0); 4647 return (old); 4648 #elif (PAGE_SIZE == 4096) && defined(atomic_fcmpset_8) 4649 uint8_t old; 4650 4651 old = *bits; 4652 while (atomic_fcmpset_8(bits, &old, newbits) == 0); 4653 return (old); 4654 #else /* PAGE_SIZE <= 4096*/ 4655 uintptr_t addr; 4656 uint32_t old, new, mask; 4657 int shift; 4658 4659 addr = (uintptr_t)bits; 4660 /* 4661 * Use a trick to perform a 32-bit atomic on the 4662 * containing aligned word, to not depend on the existence 4663 * of atomic_{set, swap, clear}_{8, 16}. 4664 */ 4665 shift = addr & (sizeof(uint32_t) - 1); 4666 #if BYTE_ORDER == BIG_ENDIAN 4667 shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; 4668 #else 4669 shift *= NBBY; 4670 #endif 4671 addr &= ~(sizeof(uint32_t) - 1); 4672 mask = VM_PAGE_BITS_ALL << shift; 4673 4674 old = *bits; 4675 do { 4676 new = old & ~mask; 4677 new |= newbits << shift; 4678 } while (atomic_fcmpset_32((uint32_t *)addr, &old, new) == 0); 4679 return (old >> shift); 4680 #endif /* PAGE_SIZE */ 4681 } 4682 4683 /* 4684 * vm_page_set_valid_range: 4685 * 4686 * Sets portions of a page valid. The arguments are expected 4687 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 4688 * of any partial chunks touched by the range. The invalid portion of 4689 * such chunks will be zeroed. 4690 * 4691 * (base + size) must be less then or equal to PAGE_SIZE. 4692 */ 4693 void 4694 vm_page_set_valid_range(vm_page_t m, int base, int size) 4695 { 4696 int endoff, frag; 4697 vm_page_bits_t pagebits; 4698 4699 vm_page_assert_busied(m); 4700 if (size == 0) /* handle degenerate case */ 4701 return; 4702 4703 /* 4704 * If the base is not DEV_BSIZE aligned and the valid 4705 * bit is clear, we have to zero out a portion of the 4706 * first block. 4707 */ 4708 if ((frag = rounddown2(base, DEV_BSIZE)) != base && 4709 (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) 4710 pmap_zero_page_area(m, frag, base - frag); 4711 4712 /* 4713 * If the ending offset is not DEV_BSIZE aligned and the 4714 * valid bit is clear, we have to zero out a portion of 4715 * the last block. 4716 */ 4717 endoff = base + size; 4718 if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && 4719 (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) 4720 pmap_zero_page_area(m, endoff, 4721 DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 4722 4723 /* 4724 * Assert that no previously invalid block that is now being validated 4725 * is already dirty. 4726 */ 4727 KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, 4728 ("vm_page_set_valid_range: page %p is dirty", m)); 4729 4730 /* 4731 * Set valid bits inclusive of any overlap. 4732 */ 4733 pagebits = vm_page_bits(base, size); 4734 if (vm_page_xbusied(m)) 4735 m->valid |= pagebits; 4736 else 4737 vm_page_bits_set(m, &m->valid, pagebits); 4738 } 4739 4740 /* 4741 * Set the page dirty bits and free the invalid swap space if 4742 * present. Returns the previous dirty bits. 4743 */ 4744 vm_page_bits_t 4745 vm_page_set_dirty(vm_page_t m) 4746 { 4747 vm_page_bits_t old; 4748 4749 VM_PAGE_OBJECT_BUSY_ASSERT(m); 4750 4751 if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) { 4752 old = m->dirty; 4753 m->dirty = VM_PAGE_BITS_ALL; 4754 } else 4755 old = vm_page_bits_swap(m, &m->dirty, VM_PAGE_BITS_ALL); 4756 if (old == 0 && (m->a.flags & PGA_SWAP_SPACE) != 0) 4757 vm_pager_page_unswapped(m); 4758 4759 return (old); 4760 } 4761 4762 /* 4763 * Clear the given bits from the specified page's dirty field. 4764 */ 4765 static __inline void 4766 vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) 4767 { 4768 4769 vm_page_assert_busied(m); 4770 4771 /* 4772 * If the page is xbusied and not write mapped we are the 4773 * only thread that can modify dirty bits. Otherwise, The pmap 4774 * layer can call vm_page_dirty() without holding a distinguished 4775 * lock. The combination of page busy and atomic operations 4776 * suffice to guarantee consistency of the page dirty field. 4777 */ 4778 if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) 4779 m->dirty &= ~pagebits; 4780 else 4781 vm_page_bits_clear(m, &m->dirty, pagebits); 4782 } 4783 4784 /* 4785 * vm_page_set_validclean: 4786 * 4787 * Sets portions of a page valid and clean. The arguments are expected 4788 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 4789 * of any partial chunks touched by the range. The invalid portion of 4790 * such chunks will be zero'd. 4791 * 4792 * (base + size) must be less then or equal to PAGE_SIZE. 4793 */ 4794 void 4795 vm_page_set_validclean(vm_page_t m, int base, int size) 4796 { 4797 vm_page_bits_t oldvalid, pagebits; 4798 int endoff, frag; 4799 4800 vm_page_assert_busied(m); 4801 if (size == 0) /* handle degenerate case */ 4802 return; 4803 4804 /* 4805 * If the base is not DEV_BSIZE aligned and the valid 4806 * bit is clear, we have to zero out a portion of the 4807 * first block. 4808 */ 4809 if ((frag = rounddown2(base, DEV_BSIZE)) != base && 4810 (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) 4811 pmap_zero_page_area(m, frag, base - frag); 4812 4813 /* 4814 * If the ending offset is not DEV_BSIZE aligned and the 4815 * valid bit is clear, we have to zero out a portion of 4816 * the last block. 4817 */ 4818 endoff = base + size; 4819 if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && 4820 (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) 4821 pmap_zero_page_area(m, endoff, 4822 DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 4823 4824 /* 4825 * Set valid, clear dirty bits. If validating the entire 4826 * page we can safely clear the pmap modify bit. We also 4827 * use this opportunity to clear the PGA_NOSYNC flag. If a process 4828 * takes a write fault on a MAP_NOSYNC memory area the flag will 4829 * be set again. 4830 * 4831 * We set valid bits inclusive of any overlap, but we can only 4832 * clear dirty bits for DEV_BSIZE chunks that are fully within 4833 * the range. 4834 */ 4835 oldvalid = m->valid; 4836 pagebits = vm_page_bits(base, size); 4837 if (vm_page_xbusied(m)) 4838 m->valid |= pagebits; 4839 else 4840 vm_page_bits_set(m, &m->valid, pagebits); 4841 #if 0 /* NOT YET */ 4842 if ((frag = base & (DEV_BSIZE - 1)) != 0) { 4843 frag = DEV_BSIZE - frag; 4844 base += frag; 4845 size -= frag; 4846 if (size < 0) 4847 size = 0; 4848 } 4849 pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); 4850 #endif 4851 if (base == 0 && size == PAGE_SIZE) { 4852 /* 4853 * The page can only be modified within the pmap if it is 4854 * mapped, and it can only be mapped if it was previously 4855 * fully valid. 4856 */ 4857 if (oldvalid == VM_PAGE_BITS_ALL) 4858 /* 4859 * Perform the pmap_clear_modify() first. Otherwise, 4860 * a concurrent pmap operation, such as 4861 * pmap_protect(), could clear a modification in the 4862 * pmap and set the dirty field on the page before 4863 * pmap_clear_modify() had begun and after the dirty 4864 * field was cleared here. 4865 */ 4866 pmap_clear_modify(m); 4867 m->dirty = 0; 4868 vm_page_aflag_clear(m, PGA_NOSYNC); 4869 } else if (oldvalid != VM_PAGE_BITS_ALL && vm_page_xbusied(m)) 4870 m->dirty &= ~pagebits; 4871 else 4872 vm_page_clear_dirty_mask(m, pagebits); 4873 } 4874 4875 void 4876 vm_page_clear_dirty(vm_page_t m, int base, int size) 4877 { 4878 4879 vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); 4880 } 4881 4882 /* 4883 * vm_page_set_invalid: 4884 * 4885 * Invalidates DEV_BSIZE'd chunks within a page. Both the 4886 * valid and dirty bits for the effected areas are cleared. 4887 */ 4888 void 4889 vm_page_set_invalid(vm_page_t m, int base, int size) 4890 { 4891 vm_page_bits_t bits; 4892 vm_object_t object; 4893 4894 /* 4895 * The object lock is required so that pages can't be mapped 4896 * read-only while we're in the process of invalidating them. 4897 */ 4898 object = m->object; 4899 VM_OBJECT_ASSERT_WLOCKED(object); 4900 vm_page_assert_busied(m); 4901 4902 if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) + 4903 size >= object->un_pager.vnp.vnp_size) 4904 bits = VM_PAGE_BITS_ALL; 4905 else 4906 bits = vm_page_bits(base, size); 4907 if (object->ref_count != 0 && vm_page_all_valid(m) && bits != 0) 4908 pmap_remove_all(m); 4909 KASSERT((bits == 0 && vm_page_all_valid(m)) || 4910 !pmap_page_is_mapped(m), 4911 ("vm_page_set_invalid: page %p is mapped", m)); 4912 if (vm_page_xbusied(m)) { 4913 m->valid &= ~bits; 4914 m->dirty &= ~bits; 4915 } else { 4916 vm_page_bits_clear(m, &m->valid, bits); 4917 vm_page_bits_clear(m, &m->dirty, bits); 4918 } 4919 } 4920 4921 /* 4922 * vm_page_invalid: 4923 * 4924 * Invalidates the entire page. The page must be busy, unmapped, and 4925 * the enclosing object must be locked. The object locks protects 4926 * against concurrent read-only pmap enter which is done without 4927 * busy. 4928 */ 4929 void 4930 vm_page_invalid(vm_page_t m) 4931 { 4932 4933 vm_page_assert_busied(m); 4934 VM_OBJECT_ASSERT_LOCKED(m->object); 4935 MPASS(!pmap_page_is_mapped(m)); 4936 4937 if (vm_page_xbusied(m)) 4938 m->valid = 0; 4939 else 4940 vm_page_bits_clear(m, &m->valid, VM_PAGE_BITS_ALL); 4941 } 4942 4943 /* 4944 * vm_page_zero_invalid() 4945 * 4946 * The kernel assumes that the invalid portions of a page contain 4947 * garbage, but such pages can be mapped into memory by user code. 4948 * When this occurs, we must zero out the non-valid portions of the 4949 * page so user code sees what it expects. 4950 * 4951 * Pages are most often semi-valid when the end of a file is mapped 4952 * into memory and the file's size is not page aligned. 4953 */ 4954 void 4955 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) 4956 { 4957 int b; 4958 int i; 4959 4960 /* 4961 * Scan the valid bits looking for invalid sections that 4962 * must be zeroed. Invalid sub-DEV_BSIZE'd areas ( where the 4963 * valid bit may be set ) have already been zeroed by 4964 * vm_page_set_validclean(). 4965 */ 4966 for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { 4967 if (i == (PAGE_SIZE / DEV_BSIZE) || 4968 (m->valid & ((vm_page_bits_t)1 << i))) { 4969 if (i > b) { 4970 pmap_zero_page_area(m, 4971 b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); 4972 } 4973 b = i + 1; 4974 } 4975 } 4976 4977 /* 4978 * setvalid is TRUE when we can safely set the zero'd areas 4979 * as being valid. We can do this if there are no cache consistancy 4980 * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. 4981 */ 4982 if (setvalid) 4983 vm_page_valid(m); 4984 } 4985 4986 /* 4987 * vm_page_is_valid: 4988 * 4989 * Is (partial) page valid? Note that the case where size == 0 4990 * will return FALSE in the degenerate case where the page is 4991 * entirely invalid, and TRUE otherwise. 4992 * 4993 * Some callers envoke this routine without the busy lock held and 4994 * handle races via higher level locks. Typical callers should 4995 * hold a busy lock to prevent invalidation. 4996 */ 4997 int 4998 vm_page_is_valid(vm_page_t m, int base, int size) 4999 { 5000 vm_page_bits_t bits; 5001 5002 bits = vm_page_bits(base, size); 5003 return (m->valid != 0 && (m->valid & bits) == bits); 5004 } 5005 5006 /* 5007 * Returns true if all of the specified predicates are true for the entire 5008 * (super)page and false otherwise. 5009 */ 5010 bool 5011 vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m) 5012 { 5013 vm_object_t object; 5014 int i, npages; 5015 5016 object = m->object; 5017 if (skip_m != NULL && skip_m->object != object) 5018 return (false); 5019 VM_OBJECT_ASSERT_LOCKED(object); 5020 npages = atop(pagesizes[m->psind]); 5021 5022 /* 5023 * The physically contiguous pages that make up a superpage, i.e., a 5024 * page with a page size index ("psind") greater than zero, will 5025 * occupy adjacent entries in vm_page_array[]. 5026 */ 5027 for (i = 0; i < npages; i++) { 5028 /* Always test object consistency, including "skip_m". */ 5029 if (m[i].object != object) 5030 return (false); 5031 if (&m[i] == skip_m) 5032 continue; 5033 if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i])) 5034 return (false); 5035 if ((flags & PS_ALL_DIRTY) != 0) { 5036 /* 5037 * Calling vm_page_test_dirty() or pmap_is_modified() 5038 * might stop this case from spuriously returning 5039 * "false". However, that would require a write lock 5040 * on the object containing "m[i]". 5041 */ 5042 if (m[i].dirty != VM_PAGE_BITS_ALL) 5043 return (false); 5044 } 5045 if ((flags & PS_ALL_VALID) != 0 && 5046 m[i].valid != VM_PAGE_BITS_ALL) 5047 return (false); 5048 } 5049 return (true); 5050 } 5051 5052 /* 5053 * Set the page's dirty bits if the page is modified. 5054 */ 5055 void 5056 vm_page_test_dirty(vm_page_t m) 5057 { 5058 5059 vm_page_assert_busied(m); 5060 if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) 5061 vm_page_dirty(m); 5062 } 5063 5064 void 5065 vm_page_valid(vm_page_t m) 5066 { 5067 5068 vm_page_assert_busied(m); 5069 if (vm_page_xbusied(m)) 5070 m->valid = VM_PAGE_BITS_ALL; 5071 else 5072 vm_page_bits_set(m, &m->valid, VM_PAGE_BITS_ALL); 5073 } 5074 5075 void 5076 vm_page_lock_KBI(vm_page_t m, const char *file, int line) 5077 { 5078 5079 mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); 5080 } 5081 5082 void 5083 vm_page_unlock_KBI(vm_page_t m, const char *file, int line) 5084 { 5085 5086 mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); 5087 } 5088 5089 int 5090 vm_page_trylock_KBI(vm_page_t m, const char *file, int line) 5091 { 5092 5093 return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); 5094 } 5095 5096 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) 5097 void 5098 vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line) 5099 { 5100 5101 vm_page_lock_assert_KBI(m, MA_OWNED, file, line); 5102 } 5103 5104 void 5105 vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) 5106 { 5107 5108 mtx_assert_(vm_page_lockptr(m), a, file, line); 5109 } 5110 #endif 5111 5112 #ifdef INVARIANTS 5113 void 5114 vm_page_object_busy_assert(vm_page_t m) 5115 { 5116 5117 /* 5118 * Certain of the page's fields may only be modified by the 5119 * holder of a page or object busy. 5120 */ 5121 if (m->object != NULL && !vm_page_busied(m)) 5122 VM_OBJECT_ASSERT_BUSY(m->object); 5123 } 5124 5125 void 5126 vm_page_assert_pga_writeable(vm_page_t m, uint16_t bits) 5127 { 5128 5129 if ((bits & PGA_WRITEABLE) == 0) 5130 return; 5131 5132 /* 5133 * The PGA_WRITEABLE flag can only be set if the page is 5134 * managed, is exclusively busied or the object is locked. 5135 * Currently, this flag is only set by pmap_enter(). 5136 */ 5137 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5138 ("PGA_WRITEABLE on unmanaged page")); 5139 if (!vm_page_xbusied(m)) 5140 VM_OBJECT_ASSERT_BUSY(m->object); 5141 } 5142 #endif 5143 5144 #include "opt_ddb.h" 5145 #ifdef DDB 5146 #include <sys/kernel.h> 5147 5148 #include <ddb/ddb.h> 5149 5150 DB_SHOW_COMMAND(page, vm_page_print_page_info) 5151 { 5152 5153 db_printf("vm_cnt.v_free_count: %d\n", vm_free_count()); 5154 db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count()); 5155 db_printf("vm_cnt.v_active_count: %d\n", vm_active_count()); 5156 db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count()); 5157 db_printf("vm_cnt.v_wire_count: %d\n", vm_wire_count()); 5158 db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); 5159 db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); 5160 db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target); 5161 db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target); 5162 } 5163 5164 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 5165 { 5166 int dom; 5167 5168 db_printf("pq_free %d\n", vm_free_count()); 5169 for (dom = 0; dom < vm_ndomains; dom++) { 5170 db_printf( 5171 "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n", 5172 dom, 5173 vm_dom[dom].vmd_page_count, 5174 vm_dom[dom].vmd_free_count, 5175 vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, 5176 vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, 5177 vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt, 5178 vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt); 5179 } 5180 } 5181 5182 DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) 5183 { 5184 vm_page_t m; 5185 boolean_t phys, virt; 5186 5187 if (!have_addr) { 5188 db_printf("show pginfo addr\n"); 5189 return; 5190 } 5191 5192 phys = strchr(modif, 'p') != NULL; 5193 virt = strchr(modif, 'v') != NULL; 5194 if (virt) 5195 m = PHYS_TO_VM_PAGE(pmap_kextract(addr)); 5196 else if (phys) 5197 m = PHYS_TO_VM_PAGE(addr); 5198 else 5199 m = (vm_page_t)addr; 5200 db_printf( 5201 "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n" 5202 " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", 5203 m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, 5204 m->a.queue, m->ref_count, m->a.flags, m->oflags, 5205 m->flags, m->a.act_count, m->busy_lock, m->valid, m->dirty); 5206 } 5207 #endif /* DDB */ 5208