1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/domainset.h> 50 #include <sys/lock.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/queue.h> 56 #include <sys/rwlock.h> 57 #include <sys/sbuf.h> 58 #include <sys/sysctl.h> 59 #include <sys/tree.h> 60 #include <sys/vmmeter.h> 61 62 #include <ddb/ddb.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_phys.h> 70 #include <vm/vm_pagequeue.h> 71 72 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73 "Too many physsegs."); 74 75 #ifdef NUMA 76 struct mem_affinity __read_mostly *mem_affinity; 77 int __read_mostly *mem_locality; 78 #endif 79 80 int __read_mostly vm_ndomains = 1; 81 domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 82 83 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 84 int __read_mostly vm_phys_nsegs; 85 86 struct vm_phys_fictitious_seg; 87 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 88 struct vm_phys_fictitious_seg *); 89 90 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 91 RB_INITIALIZER(_vm_phys_fictitious_tree); 92 93 struct vm_phys_fictitious_seg { 94 RB_ENTRY(vm_phys_fictitious_seg) node; 95 /* Memory region data */ 96 vm_paddr_t start; 97 vm_paddr_t end; 98 vm_page_t first_page; 99 }; 100 101 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 102 vm_phys_fictitious_cmp); 103 104 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 105 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 106 107 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 108 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] 109 [VM_NFREEORDER_MAX]; 110 111 static int __read_mostly vm_nfreelists; 112 113 /* 114 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 115 */ 116 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 117 118 CTASSERT(VM_FREELIST_DEFAULT == 0); 119 120 #ifdef VM_FREELIST_DMA32 121 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122 #endif 123 124 /* 125 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126 * the ordering of the free list boundaries. 127 */ 128 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 129 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 130 #endif 131 132 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 133 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 134 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 135 136 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 137 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 138 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 139 140 #ifdef NUMA 141 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 142 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 143 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 144 #endif 145 146 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 147 &vm_ndomains, 0, "Number of physical memory domains available."); 148 149 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 150 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 151 vm_paddr_t boundary); 152 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 153 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 154 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 155 int order, int tail); 156 157 /* 158 * Red-black tree helpers for vm fictitious range management. 159 */ 160 static inline int 161 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 162 struct vm_phys_fictitious_seg *range) 163 { 164 165 KASSERT(range->start != 0 && range->end != 0, 166 ("Invalid range passed on search for vm_fictitious page")); 167 if (p->start >= range->end) 168 return (1); 169 if (p->start < range->start) 170 return (-1); 171 172 return (0); 173 } 174 175 static int 176 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 177 struct vm_phys_fictitious_seg *p2) 178 { 179 180 /* Check if this is a search for a page */ 181 if (p1->end == 0) 182 return (vm_phys_fictitious_in_range(p1, p2)); 183 184 KASSERT(p2->end != 0, 185 ("Invalid range passed as second parameter to vm fictitious comparison")); 186 187 /* Searching to add a new range */ 188 if (p1->end <= p2->start) 189 return (-1); 190 if (p1->start >= p2->end) 191 return (1); 192 193 panic("Trying to add overlapping vm fictitious ranges:\n" 194 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 195 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 196 } 197 198 int 199 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 200 { 201 #ifdef NUMA 202 domainset_t mask; 203 int i; 204 205 if (vm_ndomains == 1 || mem_affinity == NULL) 206 return (0); 207 208 DOMAINSET_ZERO(&mask); 209 /* 210 * Check for any memory that overlaps low, high. 211 */ 212 for (i = 0; mem_affinity[i].end != 0; i++) 213 if (mem_affinity[i].start <= high && 214 mem_affinity[i].end >= low) 215 DOMAINSET_SET(mem_affinity[i].domain, &mask); 216 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 217 return (prefer); 218 if (DOMAINSET_EMPTY(&mask)) 219 panic("vm_phys_domain_match: Impossible constraint"); 220 return (DOMAINSET_FFS(&mask) - 1); 221 #else 222 return (0); 223 #endif 224 } 225 226 /* 227 * Outputs the state of the physical memory allocator, specifically, 228 * the amount of physical memory in each free list. 229 */ 230 static int 231 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 232 { 233 struct sbuf sbuf; 234 struct vm_freelist *fl; 235 int dom, error, flind, oind, pind; 236 237 error = sysctl_wire_old_buffer(req, 0); 238 if (error != 0) 239 return (error); 240 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 241 for (dom = 0; dom < vm_ndomains; dom++) { 242 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 243 for (flind = 0; flind < vm_nfreelists; flind++) { 244 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 245 "\n ORDER (SIZE) | NUMBER" 246 "\n ", flind); 247 for (pind = 0; pind < VM_NFREEPOOL; pind++) 248 sbuf_printf(&sbuf, " | POOL %d", pind); 249 sbuf_printf(&sbuf, "\n-- "); 250 for (pind = 0; pind < VM_NFREEPOOL; pind++) 251 sbuf_printf(&sbuf, "-- -- "); 252 sbuf_printf(&sbuf, "--\n"); 253 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 254 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 255 1 << (PAGE_SHIFT - 10 + oind)); 256 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 257 fl = vm_phys_free_queues[dom][flind][pind]; 258 sbuf_printf(&sbuf, " | %6d", 259 fl[oind].lcnt); 260 } 261 sbuf_printf(&sbuf, "\n"); 262 } 263 } 264 } 265 error = sbuf_finish(&sbuf); 266 sbuf_delete(&sbuf); 267 return (error); 268 } 269 270 /* 271 * Outputs the set of physical memory segments. 272 */ 273 static int 274 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 275 { 276 struct sbuf sbuf; 277 struct vm_phys_seg *seg; 278 int error, segind; 279 280 error = sysctl_wire_old_buffer(req, 0); 281 if (error != 0) 282 return (error); 283 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 284 for (segind = 0; segind < vm_phys_nsegs; segind++) { 285 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 286 seg = &vm_phys_segs[segind]; 287 sbuf_printf(&sbuf, "start: %#jx\n", 288 (uintmax_t)seg->start); 289 sbuf_printf(&sbuf, "end: %#jx\n", 290 (uintmax_t)seg->end); 291 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 292 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 293 } 294 error = sbuf_finish(&sbuf); 295 sbuf_delete(&sbuf); 296 return (error); 297 } 298 299 /* 300 * Return affinity, or -1 if there's no affinity information. 301 */ 302 int 303 vm_phys_mem_affinity(int f, int t) 304 { 305 306 #ifdef NUMA 307 if (mem_locality == NULL) 308 return (-1); 309 if (f >= vm_ndomains || t >= vm_ndomains) 310 return (-1); 311 return (mem_locality[f * vm_ndomains + t]); 312 #else 313 return (-1); 314 #endif 315 } 316 317 #ifdef NUMA 318 /* 319 * Outputs the VM locality table. 320 */ 321 static int 322 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 323 { 324 struct sbuf sbuf; 325 int error, i, j; 326 327 error = sysctl_wire_old_buffer(req, 0); 328 if (error != 0) 329 return (error); 330 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 331 332 sbuf_printf(&sbuf, "\n"); 333 334 for (i = 0; i < vm_ndomains; i++) { 335 sbuf_printf(&sbuf, "%d: ", i); 336 for (j = 0; j < vm_ndomains; j++) { 337 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 338 } 339 sbuf_printf(&sbuf, "\n"); 340 } 341 error = sbuf_finish(&sbuf); 342 sbuf_delete(&sbuf); 343 return (error); 344 } 345 #endif 346 347 static void 348 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 349 { 350 351 m->order = order; 352 if (tail) 353 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 354 else 355 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 356 fl[order].lcnt++; 357 } 358 359 static void 360 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 361 { 362 363 TAILQ_REMOVE(&fl[order].pl, m, listq); 364 fl[order].lcnt--; 365 m->order = VM_NFREEORDER; 366 } 367 368 /* 369 * Create a physical memory segment. 370 */ 371 static void 372 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 373 { 374 struct vm_phys_seg *seg; 375 376 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 377 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 378 KASSERT(domain >= 0 && domain < vm_ndomains, 379 ("vm_phys_create_seg: invalid domain provided")); 380 seg = &vm_phys_segs[vm_phys_nsegs++]; 381 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 382 *seg = *(seg - 1); 383 seg--; 384 } 385 seg->start = start; 386 seg->end = end; 387 seg->domain = domain; 388 } 389 390 static void 391 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 392 { 393 #ifdef NUMA 394 int i; 395 396 if (mem_affinity == NULL) { 397 _vm_phys_create_seg(start, end, 0); 398 return; 399 } 400 401 for (i = 0;; i++) { 402 if (mem_affinity[i].end == 0) 403 panic("Reached end of affinity info"); 404 if (mem_affinity[i].end <= start) 405 continue; 406 if (mem_affinity[i].start > start) 407 panic("No affinity info for start %jx", 408 (uintmax_t)start); 409 if (mem_affinity[i].end >= end) { 410 _vm_phys_create_seg(start, end, 411 mem_affinity[i].domain); 412 break; 413 } 414 _vm_phys_create_seg(start, mem_affinity[i].end, 415 mem_affinity[i].domain); 416 start = mem_affinity[i].end; 417 } 418 #else 419 _vm_phys_create_seg(start, end, 0); 420 #endif 421 } 422 423 /* 424 * Add a physical memory segment. 425 */ 426 void 427 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 428 { 429 vm_paddr_t paddr; 430 431 KASSERT((start & PAGE_MASK) == 0, 432 ("vm_phys_define_seg: start is not page aligned")); 433 KASSERT((end & PAGE_MASK) == 0, 434 ("vm_phys_define_seg: end is not page aligned")); 435 436 /* 437 * Split the physical memory segment if it spans two or more free 438 * list boundaries. 439 */ 440 paddr = start; 441 #ifdef VM_FREELIST_LOWMEM 442 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 443 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 444 paddr = VM_LOWMEM_BOUNDARY; 445 } 446 #endif 447 #ifdef VM_FREELIST_DMA32 448 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 449 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 450 paddr = VM_DMA32_BOUNDARY; 451 } 452 #endif 453 vm_phys_create_seg(paddr, end); 454 } 455 456 /* 457 * Initialize the physical memory allocator. 458 * 459 * Requires that vm_page_array is initialized! 460 */ 461 void 462 vm_phys_init(void) 463 { 464 struct vm_freelist *fl; 465 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 466 u_long npages; 467 int dom, flind, freelist, oind, pind, segind; 468 469 /* 470 * Compute the number of free lists, and generate the mapping from the 471 * manifest constants VM_FREELIST_* to the free list indices. 472 * 473 * Initially, the entries of vm_freelist_to_flind[] are set to either 474 * 0 or 1 to indicate which free lists should be created. 475 */ 476 npages = 0; 477 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 478 seg = &vm_phys_segs[segind]; 479 #ifdef VM_FREELIST_LOWMEM 480 if (seg->end <= VM_LOWMEM_BOUNDARY) 481 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 482 else 483 #endif 484 #ifdef VM_FREELIST_DMA32 485 if ( 486 #ifdef VM_DMA32_NPAGES_THRESHOLD 487 /* 488 * Create the DMA32 free list only if the amount of 489 * physical memory above physical address 4G exceeds the 490 * given threshold. 491 */ 492 npages > VM_DMA32_NPAGES_THRESHOLD && 493 #endif 494 seg->end <= VM_DMA32_BOUNDARY) 495 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 496 else 497 #endif 498 { 499 npages += atop(seg->end - seg->start); 500 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 501 } 502 } 503 /* Change each entry into a running total of the free lists. */ 504 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 505 vm_freelist_to_flind[freelist] += 506 vm_freelist_to_flind[freelist - 1]; 507 } 508 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 509 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 510 /* Change each entry into a free list index. */ 511 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 512 vm_freelist_to_flind[freelist]--; 513 514 /* 515 * Initialize the first_page and free_queues fields of each physical 516 * memory segment. 517 */ 518 #ifdef VM_PHYSSEG_SPARSE 519 npages = 0; 520 #endif 521 for (segind = 0; segind < vm_phys_nsegs; segind++) { 522 seg = &vm_phys_segs[segind]; 523 #ifdef VM_PHYSSEG_SPARSE 524 seg->first_page = &vm_page_array[npages]; 525 npages += atop(seg->end - seg->start); 526 #else 527 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 528 #endif 529 #ifdef VM_FREELIST_LOWMEM 530 if (seg->end <= VM_LOWMEM_BOUNDARY) { 531 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 532 KASSERT(flind >= 0, 533 ("vm_phys_init: LOWMEM flind < 0")); 534 } else 535 #endif 536 #ifdef VM_FREELIST_DMA32 537 if (seg->end <= VM_DMA32_BOUNDARY) { 538 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 539 KASSERT(flind >= 0, 540 ("vm_phys_init: DMA32 flind < 0")); 541 } else 542 #endif 543 { 544 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 545 KASSERT(flind >= 0, 546 ("vm_phys_init: DEFAULT flind < 0")); 547 } 548 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 549 } 550 551 /* 552 * Coalesce physical memory segments that are contiguous and share the 553 * same per-domain free queues. 554 */ 555 prev_seg = vm_phys_segs; 556 seg = &vm_phys_segs[1]; 557 end_seg = &vm_phys_segs[vm_phys_nsegs]; 558 while (seg < end_seg) { 559 if (prev_seg->end == seg->start && 560 prev_seg->free_queues == seg->free_queues) { 561 prev_seg->end = seg->end; 562 KASSERT(prev_seg->domain == seg->domain, 563 ("vm_phys_init: free queues cannot span domains")); 564 vm_phys_nsegs--; 565 end_seg--; 566 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 567 *tmp_seg = *(tmp_seg + 1); 568 } else { 569 prev_seg = seg; 570 seg++; 571 } 572 } 573 574 /* 575 * Initialize the free queues. 576 */ 577 for (dom = 0; dom < vm_ndomains; dom++) { 578 for (flind = 0; flind < vm_nfreelists; flind++) { 579 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 580 fl = vm_phys_free_queues[dom][flind][pind]; 581 for (oind = 0; oind < VM_NFREEORDER; oind++) 582 TAILQ_INIT(&fl[oind].pl); 583 } 584 } 585 } 586 587 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 588 } 589 590 /* 591 * Register info about the NUMA topology of the system. 592 * 593 * Invoked by platform-dependent code prior to vm_phys_init(). 594 */ 595 void 596 vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, 597 int *locality) 598 { 599 #ifdef NUMA 600 int d, i; 601 602 /* 603 * For now the only override value that we support is 1, which 604 * effectively disables NUMA-awareness in the allocators. 605 */ 606 d = 0; 607 TUNABLE_INT_FETCH("vm.numa.disabled", &d); 608 if (d) 609 ndomains = 1; 610 611 if (ndomains > 1) { 612 vm_ndomains = ndomains; 613 mem_affinity = affinity; 614 mem_locality = locality; 615 } 616 617 for (i = 0; i < vm_ndomains; i++) 618 DOMAINSET_SET(i, &all_domains); 619 #else 620 (void)ndomains; 621 (void)affinity; 622 (void)locality; 623 #endif 624 } 625 626 int 627 _vm_phys_domain(vm_paddr_t pa) 628 { 629 #ifdef NUMA 630 int i; 631 632 if (vm_ndomains == 1 || mem_affinity == NULL) 633 return (0); 634 635 /* 636 * Check for any memory that overlaps. 637 */ 638 for (i = 0; mem_affinity[i].end != 0; i++) 639 if (mem_affinity[i].start <= pa && 640 mem_affinity[i].end >= pa) 641 return (mem_affinity[i].domain); 642 #endif 643 return (0); 644 } 645 646 /* 647 * Split a contiguous, power of two-sized set of physical pages. 648 * 649 * When this function is called by a page allocation function, the caller 650 * should request insertion at the head unless the order [order, oind) queues 651 * are known to be empty. The objective being to reduce the likelihood of 652 * long-term fragmentation by promoting contemporaneous allocation and 653 * (hopefully) deallocation. 654 */ 655 static __inline void 656 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 657 int tail) 658 { 659 vm_page_t m_buddy; 660 661 while (oind > order) { 662 oind--; 663 m_buddy = &m[1 << oind]; 664 KASSERT(m_buddy->order == VM_NFREEORDER, 665 ("vm_phys_split_pages: page %p has unexpected order %d", 666 m_buddy, m_buddy->order)); 667 vm_freelist_add(fl, m_buddy, oind, tail); 668 } 669 } 670 671 /* 672 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 673 * and sized set to the specified free list. 674 * 675 * When this function is called by a page allocation function, the caller 676 * should request insertion at the head unless the lower-order queues are 677 * known to be empty. The objective being to reduce the likelihood of long- 678 * term fragmentation by promoting contemporaneous allocation and (hopefully) 679 * deallocation. 680 * 681 * The physical page m's buddy must not be free. 682 */ 683 static void 684 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 685 { 686 u_int n; 687 int order; 688 689 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 690 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 691 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 692 ("vm_phys_enq_range: page %p and npages %u are misaligned", 693 m, npages)); 694 do { 695 KASSERT(m->order == VM_NFREEORDER, 696 ("vm_phys_enq_range: page %p has unexpected order %d", 697 m, m->order)); 698 order = ffs(npages) - 1; 699 KASSERT(order < VM_NFREEORDER, 700 ("vm_phys_enq_range: order %d is out of range", order)); 701 vm_freelist_add(fl, m, order, tail); 702 n = 1 << order; 703 m += n; 704 npages -= n; 705 } while (npages > 0); 706 } 707 708 /* 709 * Tries to allocate the specified number of pages from the specified pool 710 * within the specified domain. Returns the actual number of allocated pages 711 * and a pointer to each page through the array ma[]. 712 * 713 * The returned pages may not be physically contiguous. However, in contrast 714 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 715 * calling this function once to allocate the desired number of pages will 716 * avoid wasted time in vm_phys_split_pages(). 717 * 718 * The free page queues for the specified domain must be locked. 719 */ 720 int 721 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 722 { 723 struct vm_freelist *alt, *fl; 724 vm_page_t m; 725 int avail, end, flind, freelist, i, need, oind, pind; 726 727 KASSERT(domain >= 0 && domain < vm_ndomains, 728 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 729 KASSERT(pool < VM_NFREEPOOL, 730 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 731 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 732 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 733 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 734 i = 0; 735 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 736 flind = vm_freelist_to_flind[freelist]; 737 if (flind < 0) 738 continue; 739 fl = vm_phys_free_queues[domain][flind][pool]; 740 for (oind = 0; oind < VM_NFREEORDER; oind++) { 741 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 742 vm_freelist_rem(fl, m, oind); 743 avail = 1 << oind; 744 need = imin(npages - i, avail); 745 for (end = i + need; i < end;) 746 ma[i++] = m++; 747 if (need < avail) { 748 /* 749 * Return excess pages to fl. Its 750 * order [0, oind) queues are empty. 751 */ 752 vm_phys_enq_range(m, avail - need, fl, 753 1); 754 return (npages); 755 } else if (i == npages) 756 return (npages); 757 } 758 } 759 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 760 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 761 alt = vm_phys_free_queues[domain][flind][pind]; 762 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 763 NULL) { 764 vm_freelist_rem(alt, m, oind); 765 vm_phys_set_pool(pool, m, oind); 766 avail = 1 << oind; 767 need = imin(npages - i, avail); 768 for (end = i + need; i < end;) 769 ma[i++] = m++; 770 if (need < avail) { 771 /* 772 * Return excess pages to fl. 773 * Its order [0, oind) queues 774 * are empty. 775 */ 776 vm_phys_enq_range(m, avail - 777 need, fl, 1); 778 return (npages); 779 } else if (i == npages) 780 return (npages); 781 } 782 } 783 } 784 } 785 return (i); 786 } 787 788 /* 789 * Allocate a contiguous, power of two-sized set of physical pages 790 * from the free lists. 791 * 792 * The free page queues must be locked. 793 */ 794 vm_page_t 795 vm_phys_alloc_pages(int domain, int pool, int order) 796 { 797 vm_page_t m; 798 int freelist; 799 800 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 801 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 802 if (m != NULL) 803 return (m); 804 } 805 return (NULL); 806 } 807 808 /* 809 * Allocate a contiguous, power of two-sized set of physical pages from the 810 * specified free list. The free list must be specified using one of the 811 * manifest constants VM_FREELIST_*. 812 * 813 * The free page queues must be locked. 814 */ 815 vm_page_t 816 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 817 { 818 struct vm_freelist *alt, *fl; 819 vm_page_t m; 820 int oind, pind, flind; 821 822 KASSERT(domain >= 0 && domain < vm_ndomains, 823 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 824 domain)); 825 KASSERT(freelist < VM_NFREELIST, 826 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 827 freelist)); 828 KASSERT(pool < VM_NFREEPOOL, 829 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 830 KASSERT(order < VM_NFREEORDER, 831 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 832 833 flind = vm_freelist_to_flind[freelist]; 834 /* Check if freelist is present */ 835 if (flind < 0) 836 return (NULL); 837 838 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 839 fl = &vm_phys_free_queues[domain][flind][pool][0]; 840 for (oind = order; oind < VM_NFREEORDER; oind++) { 841 m = TAILQ_FIRST(&fl[oind].pl); 842 if (m != NULL) { 843 vm_freelist_rem(fl, m, oind); 844 /* The order [order, oind) queues are empty. */ 845 vm_phys_split_pages(m, oind, fl, order, 1); 846 return (m); 847 } 848 } 849 850 /* 851 * The given pool was empty. Find the largest 852 * contiguous, power-of-two-sized set of pages in any 853 * pool. Transfer these pages to the given pool, and 854 * use them to satisfy the allocation. 855 */ 856 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 857 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 858 alt = &vm_phys_free_queues[domain][flind][pind][0]; 859 m = TAILQ_FIRST(&alt[oind].pl); 860 if (m != NULL) { 861 vm_freelist_rem(alt, m, oind); 862 vm_phys_set_pool(pool, m, oind); 863 /* The order [order, oind) queues are empty. */ 864 vm_phys_split_pages(m, oind, fl, order, 1); 865 return (m); 866 } 867 } 868 } 869 return (NULL); 870 } 871 872 /* 873 * Find the vm_page corresponding to the given physical address. 874 */ 875 vm_page_t 876 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 877 { 878 struct vm_phys_seg *seg; 879 int segind; 880 881 for (segind = 0; segind < vm_phys_nsegs; segind++) { 882 seg = &vm_phys_segs[segind]; 883 if (pa >= seg->start && pa < seg->end) 884 return (&seg->first_page[atop(pa - seg->start)]); 885 } 886 return (NULL); 887 } 888 889 vm_page_t 890 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 891 { 892 struct vm_phys_fictitious_seg tmp, *seg; 893 vm_page_t m; 894 895 m = NULL; 896 tmp.start = pa; 897 tmp.end = 0; 898 899 rw_rlock(&vm_phys_fictitious_reg_lock); 900 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 901 rw_runlock(&vm_phys_fictitious_reg_lock); 902 if (seg == NULL) 903 return (NULL); 904 905 m = &seg->first_page[atop(pa - seg->start)]; 906 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 907 908 return (m); 909 } 910 911 static inline void 912 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 913 long page_count, vm_memattr_t memattr) 914 { 915 long i; 916 917 bzero(range, page_count * sizeof(*range)); 918 for (i = 0; i < page_count; i++) { 919 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 920 range[i].oflags &= ~VPO_UNMANAGED; 921 range[i].busy_lock = VPB_UNBUSIED; 922 } 923 } 924 925 int 926 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 927 vm_memattr_t memattr) 928 { 929 struct vm_phys_fictitious_seg *seg; 930 vm_page_t fp; 931 long page_count; 932 #ifdef VM_PHYSSEG_DENSE 933 long pi, pe; 934 long dpage_count; 935 #endif 936 937 KASSERT(start < end, 938 ("Start of segment isn't less than end (start: %jx end: %jx)", 939 (uintmax_t)start, (uintmax_t)end)); 940 941 page_count = (end - start) / PAGE_SIZE; 942 943 #ifdef VM_PHYSSEG_DENSE 944 pi = atop(start); 945 pe = atop(end); 946 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 947 fp = &vm_page_array[pi - first_page]; 948 if ((pe - first_page) > vm_page_array_size) { 949 /* 950 * We have a segment that starts inside 951 * of vm_page_array, but ends outside of it. 952 * 953 * Use vm_page_array pages for those that are 954 * inside of the vm_page_array range, and 955 * allocate the remaining ones. 956 */ 957 dpage_count = vm_page_array_size - (pi - first_page); 958 vm_phys_fictitious_init_range(fp, start, dpage_count, 959 memattr); 960 page_count -= dpage_count; 961 start += ptoa(dpage_count); 962 goto alloc; 963 } 964 /* 965 * We can allocate the full range from vm_page_array, 966 * so there's no need to register the range in the tree. 967 */ 968 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 969 return (0); 970 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 971 /* 972 * We have a segment that ends inside of vm_page_array, 973 * but starts outside of it. 974 */ 975 fp = &vm_page_array[0]; 976 dpage_count = pe - first_page; 977 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 978 memattr); 979 end -= ptoa(dpage_count); 980 page_count -= dpage_count; 981 goto alloc; 982 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 983 /* 984 * Trying to register a fictitious range that expands before 985 * and after vm_page_array. 986 */ 987 return (EINVAL); 988 } else { 989 alloc: 990 #endif 991 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 992 M_WAITOK); 993 #ifdef VM_PHYSSEG_DENSE 994 } 995 #endif 996 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 997 998 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 999 seg->start = start; 1000 seg->end = end; 1001 seg->first_page = fp; 1002 1003 rw_wlock(&vm_phys_fictitious_reg_lock); 1004 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 1005 rw_wunlock(&vm_phys_fictitious_reg_lock); 1006 1007 return (0); 1008 } 1009 1010 void 1011 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1012 { 1013 struct vm_phys_fictitious_seg *seg, tmp; 1014 #ifdef VM_PHYSSEG_DENSE 1015 long pi, pe; 1016 #endif 1017 1018 KASSERT(start < end, 1019 ("Start of segment isn't less than end (start: %jx end: %jx)", 1020 (uintmax_t)start, (uintmax_t)end)); 1021 1022 #ifdef VM_PHYSSEG_DENSE 1023 pi = atop(start); 1024 pe = atop(end); 1025 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1026 if ((pe - first_page) <= vm_page_array_size) { 1027 /* 1028 * This segment was allocated using vm_page_array 1029 * only, there's nothing to do since those pages 1030 * were never added to the tree. 1031 */ 1032 return; 1033 } 1034 /* 1035 * We have a segment that starts inside 1036 * of vm_page_array, but ends outside of it. 1037 * 1038 * Calculate how many pages were added to the 1039 * tree and free them. 1040 */ 1041 start = ptoa(first_page + vm_page_array_size); 1042 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1043 /* 1044 * We have a segment that ends inside of vm_page_array, 1045 * but starts outside of it. 1046 */ 1047 end = ptoa(first_page); 1048 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1049 /* Since it's not possible to register such a range, panic. */ 1050 panic( 1051 "Unregistering not registered fictitious range [%#jx:%#jx]", 1052 (uintmax_t)start, (uintmax_t)end); 1053 } 1054 #endif 1055 tmp.start = start; 1056 tmp.end = 0; 1057 1058 rw_wlock(&vm_phys_fictitious_reg_lock); 1059 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1060 if (seg->start != start || seg->end != end) { 1061 rw_wunlock(&vm_phys_fictitious_reg_lock); 1062 panic( 1063 "Unregistering not registered fictitious range [%#jx:%#jx]", 1064 (uintmax_t)start, (uintmax_t)end); 1065 } 1066 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1067 rw_wunlock(&vm_phys_fictitious_reg_lock); 1068 free(seg->first_page, M_FICT_PAGES); 1069 free(seg, M_FICT_PAGES); 1070 } 1071 1072 /* 1073 * Free a contiguous, power of two-sized set of physical pages. 1074 * 1075 * The free page queues must be locked. 1076 */ 1077 void 1078 vm_phys_free_pages(vm_page_t m, int order) 1079 { 1080 struct vm_freelist *fl; 1081 struct vm_phys_seg *seg; 1082 vm_paddr_t pa; 1083 vm_page_t m_buddy; 1084 1085 KASSERT(m->order == VM_NFREEORDER, 1086 ("vm_phys_free_pages: page %p has unexpected order %d", 1087 m, m->order)); 1088 KASSERT(m->pool < VM_NFREEPOOL, 1089 ("vm_phys_free_pages: page %p has unexpected pool %d", 1090 m, m->pool)); 1091 KASSERT(order < VM_NFREEORDER, 1092 ("vm_phys_free_pages: order %d is out of range", order)); 1093 seg = &vm_phys_segs[m->segind]; 1094 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1095 if (order < VM_NFREEORDER - 1) { 1096 pa = VM_PAGE_TO_PHYS(m); 1097 do { 1098 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1099 if (pa < seg->start || pa >= seg->end) 1100 break; 1101 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1102 if (m_buddy->order != order) 1103 break; 1104 fl = (*seg->free_queues)[m_buddy->pool]; 1105 vm_freelist_rem(fl, m_buddy, order); 1106 if (m_buddy->pool != m->pool) 1107 vm_phys_set_pool(m->pool, m_buddy, order); 1108 order++; 1109 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1110 m = &seg->first_page[atop(pa - seg->start)]; 1111 } while (order < VM_NFREEORDER - 1); 1112 } 1113 fl = (*seg->free_queues)[m->pool]; 1114 vm_freelist_add(fl, m, order, 1); 1115 } 1116 1117 /* 1118 * Return the largest possible order of a set of pages starting at m. 1119 */ 1120 static int 1121 max_order(vm_page_t m) 1122 { 1123 1124 /* 1125 * Unsigned "min" is used here so that "order" is assigned 1126 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1127 * or the low-order bits of its physical address are zero 1128 * because the size of a physical address exceeds the size of 1129 * a long. 1130 */ 1131 return (min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1132 VM_NFREEORDER - 1)); 1133 } 1134 1135 /* 1136 * Free a contiguous, arbitrarily sized set of physical pages, without 1137 * merging across set boundaries. 1138 * 1139 * The free page queues must be locked. 1140 */ 1141 void 1142 vm_phys_enqueue_contig(vm_page_t m, u_long npages) 1143 { 1144 struct vm_freelist *fl; 1145 struct vm_phys_seg *seg; 1146 vm_page_t m_end; 1147 int order; 1148 1149 /* 1150 * Avoid unnecessary coalescing by freeing the pages in the largest 1151 * possible power-of-two-sized subsets. 1152 */ 1153 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1154 seg = &vm_phys_segs[m->segind]; 1155 fl = (*seg->free_queues)[m->pool]; 1156 m_end = m + npages; 1157 /* Free blocks of increasing size. */ 1158 while ((order = max_order(m)) < VM_NFREEORDER - 1 && 1159 m + (1 << order) <= m_end) { 1160 KASSERT(seg == &vm_phys_segs[m->segind], 1161 ("%s: page range [%p,%p) spans multiple segments", 1162 __func__, m_end - npages, m)); 1163 vm_freelist_add(fl, m, order, 1); 1164 m += 1 << order; 1165 } 1166 /* Free blocks of maximum size. */ 1167 while (m + (1 << order) <= m_end) { 1168 KASSERT(seg == &vm_phys_segs[m->segind], 1169 ("%s: page range [%p,%p) spans multiple segments", 1170 __func__, m_end - npages, m)); 1171 vm_freelist_add(fl, m, order, 1); 1172 m += 1 << order; 1173 } 1174 /* Free blocks of diminishing size. */ 1175 while (m < m_end) { 1176 KASSERT(seg == &vm_phys_segs[m->segind], 1177 ("%s: page range [%p,%p) spans multiple segments", 1178 __func__, m_end - npages, m)); 1179 order = flsl(m_end - m) - 1; 1180 vm_freelist_add(fl, m, order, 1); 1181 m += 1 << order; 1182 } 1183 } 1184 1185 /* 1186 * Free a contiguous, arbitrarily sized set of physical pages. 1187 * 1188 * The free page queues must be locked. 1189 */ 1190 void 1191 vm_phys_free_contig(vm_page_t m, u_long npages) 1192 { 1193 int order_start, order_end; 1194 vm_page_t m_start, m_end; 1195 1196 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1197 1198 m_start = m; 1199 order_start = max_order(m_start); 1200 if (order_start < VM_NFREEORDER - 1) 1201 m_start += 1 << order_start; 1202 m_end = m + npages; 1203 order_end = max_order(m_end); 1204 if (order_end < VM_NFREEORDER - 1) 1205 m_end -= 1 << order_end; 1206 /* 1207 * Avoid unnecessary coalescing by freeing the pages at the start and 1208 * end of the range last. 1209 */ 1210 if (m_start < m_end) 1211 vm_phys_enqueue_contig(m_start, m_end - m_start); 1212 if (order_start < VM_NFREEORDER - 1) 1213 vm_phys_free_pages(m, order_start); 1214 if (order_end < VM_NFREEORDER - 1) 1215 vm_phys_free_pages(m_end, order_end); 1216 } 1217 1218 /* 1219 * Scan physical memory between the specified addresses "low" and "high" for a 1220 * run of contiguous physical pages that satisfy the specified conditions, and 1221 * return the lowest page in the run. The specified "alignment" determines 1222 * the alignment of the lowest physical page in the run. If the specified 1223 * "boundary" is non-zero, then the run of physical pages cannot span a 1224 * physical address that is a multiple of "boundary". 1225 * 1226 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1227 * be a power of two. 1228 */ 1229 vm_page_t 1230 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1231 u_long alignment, vm_paddr_t boundary, int options) 1232 { 1233 vm_paddr_t pa_end; 1234 vm_page_t m_end, m_run, m_start; 1235 struct vm_phys_seg *seg; 1236 int segind; 1237 1238 KASSERT(npages > 0, ("npages is 0")); 1239 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1240 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1241 if (low >= high) 1242 return (NULL); 1243 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1244 seg = &vm_phys_segs[segind]; 1245 if (seg->domain != domain) 1246 continue; 1247 if (seg->start >= high) 1248 break; 1249 if (low >= seg->end) 1250 continue; 1251 if (low <= seg->start) 1252 m_start = seg->first_page; 1253 else 1254 m_start = &seg->first_page[atop(low - seg->start)]; 1255 if (high < seg->end) 1256 pa_end = high; 1257 else 1258 pa_end = seg->end; 1259 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1260 continue; 1261 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1262 m_run = vm_page_scan_contig(npages, m_start, m_end, 1263 alignment, boundary, options); 1264 if (m_run != NULL) 1265 return (m_run); 1266 } 1267 return (NULL); 1268 } 1269 1270 /* 1271 * Set the pool for a contiguous, power of two-sized set of physical pages. 1272 */ 1273 void 1274 vm_phys_set_pool(int pool, vm_page_t m, int order) 1275 { 1276 vm_page_t m_tmp; 1277 1278 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1279 m_tmp->pool = pool; 1280 } 1281 1282 /* 1283 * Search for the given physical page "m" in the free lists. If the search 1284 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1285 * FALSE, indicating that "m" is not in the free lists. 1286 * 1287 * The free page queues must be locked. 1288 */ 1289 boolean_t 1290 vm_phys_unfree_page(vm_page_t m) 1291 { 1292 struct vm_freelist *fl; 1293 struct vm_phys_seg *seg; 1294 vm_paddr_t pa, pa_half; 1295 vm_page_t m_set, m_tmp; 1296 int order; 1297 1298 /* 1299 * First, find the contiguous, power of two-sized set of free 1300 * physical pages containing the given physical page "m" and 1301 * assign it to "m_set". 1302 */ 1303 seg = &vm_phys_segs[m->segind]; 1304 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1305 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1306 order < VM_NFREEORDER - 1; ) { 1307 order++; 1308 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1309 if (pa >= seg->start) 1310 m_set = &seg->first_page[atop(pa - seg->start)]; 1311 else 1312 return (FALSE); 1313 } 1314 if (m_set->order < order) 1315 return (FALSE); 1316 if (m_set->order == VM_NFREEORDER) 1317 return (FALSE); 1318 KASSERT(m_set->order < VM_NFREEORDER, 1319 ("vm_phys_unfree_page: page %p has unexpected order %d", 1320 m_set, m_set->order)); 1321 1322 /* 1323 * Next, remove "m_set" from the free lists. Finally, extract 1324 * "m" from "m_set" using an iterative algorithm: While "m_set" 1325 * is larger than a page, shrink "m_set" by returning the half 1326 * of "m_set" that does not contain "m" to the free lists. 1327 */ 1328 fl = (*seg->free_queues)[m_set->pool]; 1329 order = m_set->order; 1330 vm_freelist_rem(fl, m_set, order); 1331 while (order > 0) { 1332 order--; 1333 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1334 if (m->phys_addr < pa_half) 1335 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1336 else { 1337 m_tmp = m_set; 1338 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1339 } 1340 vm_freelist_add(fl, m_tmp, order, 0); 1341 } 1342 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1343 return (TRUE); 1344 } 1345 1346 /* 1347 * Allocate a contiguous set of physical pages of the given size 1348 * "npages" from the free lists. All of the physical pages must be at 1349 * or above the given physical address "low" and below the given 1350 * physical address "high". The given value "alignment" determines the 1351 * alignment of the first physical page in the set. If the given value 1352 * "boundary" is non-zero, then the set of physical pages cannot cross 1353 * any physical address boundary that is a multiple of that value. Both 1354 * "alignment" and "boundary" must be a power of two. 1355 */ 1356 vm_page_t 1357 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1358 u_long alignment, vm_paddr_t boundary) 1359 { 1360 vm_paddr_t pa_end, pa_start; 1361 vm_page_t m_run; 1362 struct vm_phys_seg *seg; 1363 int segind; 1364 1365 KASSERT(npages > 0, ("npages is 0")); 1366 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1367 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1368 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1369 if (low >= high) 1370 return (NULL); 1371 m_run = NULL; 1372 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1373 seg = &vm_phys_segs[segind]; 1374 if (seg->start >= high || seg->domain != domain) 1375 continue; 1376 if (low >= seg->end) 1377 break; 1378 if (low <= seg->start) 1379 pa_start = seg->start; 1380 else 1381 pa_start = low; 1382 if (high < seg->end) 1383 pa_end = high; 1384 else 1385 pa_end = seg->end; 1386 if (pa_end - pa_start < ptoa(npages)) 1387 continue; 1388 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1389 alignment, boundary); 1390 if (m_run != NULL) 1391 break; 1392 } 1393 return (m_run); 1394 } 1395 1396 /* 1397 * Allocate a run of contiguous physical pages from the free list for the 1398 * specified segment. 1399 */ 1400 static vm_page_t 1401 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1402 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1403 { 1404 struct vm_freelist *fl; 1405 vm_paddr_t pa, pa_end, size; 1406 vm_page_t m, m_ret; 1407 u_long npages_end; 1408 int oind, order, pind; 1409 1410 KASSERT(npages > 0, ("npages is 0")); 1411 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1412 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1413 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1414 /* Compute the queue that is the best fit for npages. */ 1415 order = flsl(npages - 1); 1416 /* Search for a run satisfying the specified conditions. */ 1417 size = npages << PAGE_SHIFT; 1418 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1419 oind++) { 1420 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1421 fl = (*seg->free_queues)[pind]; 1422 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1423 /* 1424 * Is the size of this allocation request 1425 * larger than the largest block size? 1426 */ 1427 if (order >= VM_NFREEORDER) { 1428 /* 1429 * Determine if a sufficient number of 1430 * subsequent blocks to satisfy the 1431 * allocation request are free. 1432 */ 1433 pa = VM_PAGE_TO_PHYS(m_ret); 1434 pa_end = pa + size; 1435 if (pa_end < pa) 1436 continue; 1437 for (;;) { 1438 pa += 1 << (PAGE_SHIFT + 1439 VM_NFREEORDER - 1); 1440 if (pa >= pa_end || 1441 pa < seg->start || 1442 pa >= seg->end) 1443 break; 1444 m = &seg->first_page[atop(pa - 1445 seg->start)]; 1446 if (m->order != VM_NFREEORDER - 1447 1) 1448 break; 1449 } 1450 /* If not, go to the next block. */ 1451 if (pa < pa_end) 1452 continue; 1453 } 1454 1455 /* 1456 * Determine if the blocks are within the 1457 * given range, satisfy the given alignment, 1458 * and do not cross the given boundary. 1459 */ 1460 pa = VM_PAGE_TO_PHYS(m_ret); 1461 pa_end = pa + size; 1462 if (pa >= low && pa_end <= high && 1463 (pa & (alignment - 1)) == 0 && 1464 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1465 goto done; 1466 } 1467 } 1468 } 1469 return (NULL); 1470 done: 1471 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1472 fl = (*seg->free_queues)[m->pool]; 1473 vm_freelist_rem(fl, m, oind); 1474 if (m->pool != VM_FREEPOOL_DEFAULT) 1475 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1476 } 1477 /* Return excess pages to the free lists. */ 1478 npages_end = roundup2(npages, 1 << oind); 1479 if (npages < npages_end) { 1480 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1481 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1482 } 1483 return (m_ret); 1484 } 1485 1486 #ifdef DDB 1487 /* 1488 * Show the number of physical pages in each of the free lists. 1489 */ 1490 DB_SHOW_COMMAND(freepages, db_show_freepages) 1491 { 1492 struct vm_freelist *fl; 1493 int flind, oind, pind, dom; 1494 1495 for (dom = 0; dom < vm_ndomains; dom++) { 1496 db_printf("DOMAIN: %d\n", dom); 1497 for (flind = 0; flind < vm_nfreelists; flind++) { 1498 db_printf("FREE LIST %d:\n" 1499 "\n ORDER (SIZE) | NUMBER" 1500 "\n ", flind); 1501 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1502 db_printf(" | POOL %d", pind); 1503 db_printf("\n-- "); 1504 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1505 db_printf("-- -- "); 1506 db_printf("--\n"); 1507 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1508 db_printf(" %2.2d (%6.6dK)", oind, 1509 1 << (PAGE_SHIFT - 10 + oind)); 1510 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1511 fl = vm_phys_free_queues[dom][flind][pind]; 1512 db_printf(" | %6.6d", fl[oind].lcnt); 1513 } 1514 db_printf("\n"); 1515 } 1516 db_printf("\n"); 1517 } 1518 db_printf("\n"); 1519 } 1520 } 1521 #endif 1522