1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/domainset.h> 50 #include <sys/lock.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/queue.h> 56 #include <sys/rwlock.h> 57 #include <sys/sbuf.h> 58 #include <sys/sysctl.h> 59 #include <sys/tree.h> 60 #include <sys/vmmeter.h> 61 62 #include <ddb/ddb.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_phys.h> 70 #include <vm/vm_pagequeue.h> 71 72 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73 "Too many physsegs."); 74 75 #ifdef NUMA 76 struct mem_affinity __read_mostly *mem_affinity; 77 int __read_mostly *mem_locality; 78 #endif 79 80 int __read_mostly vm_ndomains = 1; 81 domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 82 83 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 84 int __read_mostly vm_phys_nsegs; 85 86 struct vm_phys_fictitious_seg; 87 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 88 struct vm_phys_fictitious_seg *); 89 90 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 91 RB_INITIALIZER(_vm_phys_fictitious_tree); 92 93 struct vm_phys_fictitious_seg { 94 RB_ENTRY(vm_phys_fictitious_seg) node; 95 /* Memory region data */ 96 vm_paddr_t start; 97 vm_paddr_t end; 98 vm_page_t first_page; 99 }; 100 101 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 102 vm_phys_fictitious_cmp); 103 104 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 105 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 106 107 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 108 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] 109 [VM_NFREEORDER_MAX]; 110 111 static int __read_mostly vm_nfreelists; 112 113 /* 114 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 115 */ 116 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 117 118 CTASSERT(VM_FREELIST_DEFAULT == 0); 119 120 #ifdef VM_FREELIST_DMA32 121 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122 #endif 123 124 /* 125 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126 * the ordering of the free list boundaries. 127 */ 128 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 129 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 130 #endif 131 132 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 133 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 134 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 135 136 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 137 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 138 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 139 140 #ifdef NUMA 141 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 142 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 143 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 144 #endif 145 146 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 147 &vm_ndomains, 0, "Number of physical memory domains available."); 148 149 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 150 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 151 vm_paddr_t boundary); 152 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 153 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 154 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 155 int order, int tail); 156 157 /* 158 * Red-black tree helpers for vm fictitious range management. 159 */ 160 static inline int 161 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 162 struct vm_phys_fictitious_seg *range) 163 { 164 165 KASSERT(range->start != 0 && range->end != 0, 166 ("Invalid range passed on search for vm_fictitious page")); 167 if (p->start >= range->end) 168 return (1); 169 if (p->start < range->start) 170 return (-1); 171 172 return (0); 173 } 174 175 static int 176 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 177 struct vm_phys_fictitious_seg *p2) 178 { 179 180 /* Check if this is a search for a page */ 181 if (p1->end == 0) 182 return (vm_phys_fictitious_in_range(p1, p2)); 183 184 KASSERT(p2->end != 0, 185 ("Invalid range passed as second parameter to vm fictitious comparison")); 186 187 /* Searching to add a new range */ 188 if (p1->end <= p2->start) 189 return (-1); 190 if (p1->start >= p2->end) 191 return (1); 192 193 panic("Trying to add overlapping vm fictitious ranges:\n" 194 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 195 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 196 } 197 198 int 199 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 200 { 201 #ifdef NUMA 202 domainset_t mask; 203 int i; 204 205 if (vm_ndomains == 1 || mem_affinity == NULL) 206 return (0); 207 208 DOMAINSET_ZERO(&mask); 209 /* 210 * Check for any memory that overlaps low, high. 211 */ 212 for (i = 0; mem_affinity[i].end != 0; i++) 213 if (mem_affinity[i].start <= high && 214 mem_affinity[i].end >= low) 215 DOMAINSET_SET(mem_affinity[i].domain, &mask); 216 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 217 return (prefer); 218 if (DOMAINSET_EMPTY(&mask)) 219 panic("vm_phys_domain_match: Impossible constraint"); 220 return (DOMAINSET_FFS(&mask) - 1); 221 #else 222 return (0); 223 #endif 224 } 225 226 /* 227 * Outputs the state of the physical memory allocator, specifically, 228 * the amount of physical memory in each free list. 229 */ 230 static int 231 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 232 { 233 struct sbuf sbuf; 234 struct vm_freelist *fl; 235 int dom, error, flind, oind, pind; 236 237 error = sysctl_wire_old_buffer(req, 0); 238 if (error != 0) 239 return (error); 240 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 241 for (dom = 0; dom < vm_ndomains; dom++) { 242 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 243 for (flind = 0; flind < vm_nfreelists; flind++) { 244 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 245 "\n ORDER (SIZE) | NUMBER" 246 "\n ", flind); 247 for (pind = 0; pind < VM_NFREEPOOL; pind++) 248 sbuf_printf(&sbuf, " | POOL %d", pind); 249 sbuf_printf(&sbuf, "\n-- "); 250 for (pind = 0; pind < VM_NFREEPOOL; pind++) 251 sbuf_printf(&sbuf, "-- -- "); 252 sbuf_printf(&sbuf, "--\n"); 253 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 254 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 255 1 << (PAGE_SHIFT - 10 + oind)); 256 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 257 fl = vm_phys_free_queues[dom][flind][pind]; 258 sbuf_printf(&sbuf, " | %6d", 259 fl[oind].lcnt); 260 } 261 sbuf_printf(&sbuf, "\n"); 262 } 263 } 264 } 265 error = sbuf_finish(&sbuf); 266 sbuf_delete(&sbuf); 267 return (error); 268 } 269 270 /* 271 * Outputs the set of physical memory segments. 272 */ 273 static int 274 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 275 { 276 struct sbuf sbuf; 277 struct vm_phys_seg *seg; 278 int error, segind; 279 280 error = sysctl_wire_old_buffer(req, 0); 281 if (error != 0) 282 return (error); 283 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 284 for (segind = 0; segind < vm_phys_nsegs; segind++) { 285 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 286 seg = &vm_phys_segs[segind]; 287 sbuf_printf(&sbuf, "start: %#jx\n", 288 (uintmax_t)seg->start); 289 sbuf_printf(&sbuf, "end: %#jx\n", 290 (uintmax_t)seg->end); 291 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 292 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 293 } 294 error = sbuf_finish(&sbuf); 295 sbuf_delete(&sbuf); 296 return (error); 297 } 298 299 /* 300 * Return affinity, or -1 if there's no affinity information. 301 */ 302 int 303 vm_phys_mem_affinity(int f, int t) 304 { 305 306 #ifdef NUMA 307 if (mem_locality == NULL) 308 return (-1); 309 if (f >= vm_ndomains || t >= vm_ndomains) 310 return (-1); 311 return (mem_locality[f * vm_ndomains + t]); 312 #else 313 return (-1); 314 #endif 315 } 316 317 #ifdef NUMA 318 /* 319 * Outputs the VM locality table. 320 */ 321 static int 322 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 323 { 324 struct sbuf sbuf; 325 int error, i, j; 326 327 error = sysctl_wire_old_buffer(req, 0); 328 if (error != 0) 329 return (error); 330 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 331 332 sbuf_printf(&sbuf, "\n"); 333 334 for (i = 0; i < vm_ndomains; i++) { 335 sbuf_printf(&sbuf, "%d: ", i); 336 for (j = 0; j < vm_ndomains; j++) { 337 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 338 } 339 sbuf_printf(&sbuf, "\n"); 340 } 341 error = sbuf_finish(&sbuf); 342 sbuf_delete(&sbuf); 343 return (error); 344 } 345 #endif 346 347 static void 348 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 349 { 350 351 m->order = order; 352 if (tail) 353 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 354 else 355 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 356 fl[order].lcnt++; 357 } 358 359 static void 360 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 361 { 362 363 TAILQ_REMOVE(&fl[order].pl, m, listq); 364 fl[order].lcnt--; 365 m->order = VM_NFREEORDER; 366 } 367 368 /* 369 * Create a physical memory segment. 370 */ 371 static void 372 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 373 { 374 struct vm_phys_seg *seg; 375 376 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 377 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 378 KASSERT(domain >= 0 && domain < vm_ndomains, 379 ("vm_phys_create_seg: invalid domain provided")); 380 seg = &vm_phys_segs[vm_phys_nsegs++]; 381 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 382 *seg = *(seg - 1); 383 seg--; 384 } 385 seg->start = start; 386 seg->end = end; 387 seg->domain = domain; 388 } 389 390 static void 391 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 392 { 393 #ifdef NUMA 394 int i; 395 396 if (mem_affinity == NULL) { 397 _vm_phys_create_seg(start, end, 0); 398 return; 399 } 400 401 for (i = 0;; i++) { 402 if (mem_affinity[i].end == 0) 403 panic("Reached end of affinity info"); 404 if (mem_affinity[i].end <= start) 405 continue; 406 if (mem_affinity[i].start > start) 407 panic("No affinity info for start %jx", 408 (uintmax_t)start); 409 if (mem_affinity[i].end >= end) { 410 _vm_phys_create_seg(start, end, 411 mem_affinity[i].domain); 412 break; 413 } 414 _vm_phys_create_seg(start, mem_affinity[i].end, 415 mem_affinity[i].domain); 416 start = mem_affinity[i].end; 417 } 418 #else 419 _vm_phys_create_seg(start, end, 0); 420 #endif 421 } 422 423 /* 424 * Add a physical memory segment. 425 */ 426 void 427 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 428 { 429 vm_paddr_t paddr; 430 431 KASSERT((start & PAGE_MASK) == 0, 432 ("vm_phys_define_seg: start is not page aligned")); 433 KASSERT((end & PAGE_MASK) == 0, 434 ("vm_phys_define_seg: end is not page aligned")); 435 436 /* 437 * Split the physical memory segment if it spans two or more free 438 * list boundaries. 439 */ 440 paddr = start; 441 #ifdef VM_FREELIST_LOWMEM 442 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 443 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 444 paddr = VM_LOWMEM_BOUNDARY; 445 } 446 #endif 447 #ifdef VM_FREELIST_DMA32 448 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 449 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 450 paddr = VM_DMA32_BOUNDARY; 451 } 452 #endif 453 vm_phys_create_seg(paddr, end); 454 } 455 456 /* 457 * Initialize the physical memory allocator. 458 * 459 * Requires that vm_page_array is initialized! 460 */ 461 void 462 vm_phys_init(void) 463 { 464 struct vm_freelist *fl; 465 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 466 u_long npages; 467 int dom, flind, freelist, oind, pind, segind; 468 469 /* 470 * Compute the number of free lists, and generate the mapping from the 471 * manifest constants VM_FREELIST_* to the free list indices. 472 * 473 * Initially, the entries of vm_freelist_to_flind[] are set to either 474 * 0 or 1 to indicate which free lists should be created. 475 */ 476 npages = 0; 477 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 478 seg = &vm_phys_segs[segind]; 479 #ifdef VM_FREELIST_LOWMEM 480 if (seg->end <= VM_LOWMEM_BOUNDARY) 481 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 482 else 483 #endif 484 #ifdef VM_FREELIST_DMA32 485 if ( 486 #ifdef VM_DMA32_NPAGES_THRESHOLD 487 /* 488 * Create the DMA32 free list only if the amount of 489 * physical memory above physical address 4G exceeds the 490 * given threshold. 491 */ 492 npages > VM_DMA32_NPAGES_THRESHOLD && 493 #endif 494 seg->end <= VM_DMA32_BOUNDARY) 495 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 496 else 497 #endif 498 { 499 npages += atop(seg->end - seg->start); 500 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 501 } 502 } 503 /* Change each entry into a running total of the free lists. */ 504 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 505 vm_freelist_to_flind[freelist] += 506 vm_freelist_to_flind[freelist - 1]; 507 } 508 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 509 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 510 /* Change each entry into a free list index. */ 511 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 512 vm_freelist_to_flind[freelist]--; 513 514 /* 515 * Initialize the first_page and free_queues fields of each physical 516 * memory segment. 517 */ 518 #ifdef VM_PHYSSEG_SPARSE 519 npages = 0; 520 #endif 521 for (segind = 0; segind < vm_phys_nsegs; segind++) { 522 seg = &vm_phys_segs[segind]; 523 #ifdef VM_PHYSSEG_SPARSE 524 seg->first_page = &vm_page_array[npages]; 525 npages += atop(seg->end - seg->start); 526 #else 527 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 528 #endif 529 #ifdef VM_FREELIST_LOWMEM 530 if (seg->end <= VM_LOWMEM_BOUNDARY) { 531 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 532 KASSERT(flind >= 0, 533 ("vm_phys_init: LOWMEM flind < 0")); 534 } else 535 #endif 536 #ifdef VM_FREELIST_DMA32 537 if (seg->end <= VM_DMA32_BOUNDARY) { 538 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 539 KASSERT(flind >= 0, 540 ("vm_phys_init: DMA32 flind < 0")); 541 } else 542 #endif 543 { 544 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 545 KASSERT(flind >= 0, 546 ("vm_phys_init: DEFAULT flind < 0")); 547 } 548 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 549 } 550 551 /* 552 * Coalesce physical memory segments that are contiguous and share the 553 * same per-domain free queues. 554 */ 555 prev_seg = vm_phys_segs; 556 seg = &vm_phys_segs[1]; 557 end_seg = &vm_phys_segs[vm_phys_nsegs]; 558 while (seg < end_seg) { 559 if (prev_seg->end == seg->start && 560 prev_seg->free_queues == seg->free_queues) { 561 prev_seg->end = seg->end; 562 KASSERT(prev_seg->domain == seg->domain, 563 ("vm_phys_init: free queues cannot span domains")); 564 vm_phys_nsegs--; 565 end_seg--; 566 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 567 *tmp_seg = *(tmp_seg + 1); 568 } else { 569 prev_seg = seg; 570 seg++; 571 } 572 } 573 574 /* 575 * Initialize the free queues. 576 */ 577 for (dom = 0; dom < vm_ndomains; dom++) { 578 for (flind = 0; flind < vm_nfreelists; flind++) { 579 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 580 fl = vm_phys_free_queues[dom][flind][pind]; 581 for (oind = 0; oind < VM_NFREEORDER; oind++) 582 TAILQ_INIT(&fl[oind].pl); 583 } 584 } 585 } 586 587 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 588 } 589 590 /* 591 * Register info about the NUMA topology of the system. 592 * 593 * Invoked by platform-dependent code prior to vm_phys_init(). 594 */ 595 void 596 vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, 597 int *locality) 598 { 599 #ifdef NUMA 600 int d, i; 601 602 /* 603 * For now the only override value that we support is 1, which 604 * effectively disables NUMA-awareness in the allocators. 605 */ 606 d = 0; 607 TUNABLE_INT_FETCH("vm.numa.disabled", &d); 608 if (d) 609 ndomains = 1; 610 611 if (ndomains > 1) { 612 vm_ndomains = ndomains; 613 mem_affinity = affinity; 614 mem_locality = locality; 615 } 616 617 for (i = 0; i < vm_ndomains; i++) 618 DOMAINSET_SET(i, &all_domains); 619 #else 620 (void)ndomains; 621 (void)affinity; 622 (void)locality; 623 #endif 624 } 625 626 /* 627 * Split a contiguous, power of two-sized set of physical pages. 628 * 629 * When this function is called by a page allocation function, the caller 630 * should request insertion at the head unless the order [order, oind) queues 631 * are known to be empty. The objective being to reduce the likelihood of 632 * long-term fragmentation by promoting contemporaneous allocation and 633 * (hopefully) deallocation. 634 */ 635 static __inline void 636 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 637 int tail) 638 { 639 vm_page_t m_buddy; 640 641 while (oind > order) { 642 oind--; 643 m_buddy = &m[1 << oind]; 644 KASSERT(m_buddy->order == VM_NFREEORDER, 645 ("vm_phys_split_pages: page %p has unexpected order %d", 646 m_buddy, m_buddy->order)); 647 vm_freelist_add(fl, m_buddy, oind, tail); 648 } 649 } 650 651 /* 652 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 653 * and sized set to the specified free list. 654 * 655 * When this function is called by a page allocation function, the caller 656 * should request insertion at the head unless the lower-order queues are 657 * known to be empty. The objective being to reduce the likelihood of long- 658 * term fragmentation by promoting contemporaneous allocation and (hopefully) 659 * deallocation. 660 * 661 * The physical page m's buddy must not be free. 662 */ 663 static void 664 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 665 { 666 u_int n; 667 int order; 668 669 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 670 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 671 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 672 ("vm_phys_enq_range: page %p and npages %u are misaligned", 673 m, npages)); 674 do { 675 KASSERT(m->order == VM_NFREEORDER, 676 ("vm_phys_enq_range: page %p has unexpected order %d", 677 m, m->order)); 678 order = ffs(npages) - 1; 679 KASSERT(order < VM_NFREEORDER, 680 ("vm_phys_enq_range: order %d is out of range", order)); 681 vm_freelist_add(fl, m, order, tail); 682 n = 1 << order; 683 m += n; 684 npages -= n; 685 } while (npages > 0); 686 } 687 688 /* 689 * Tries to allocate the specified number of pages from the specified pool 690 * within the specified domain. Returns the actual number of allocated pages 691 * and a pointer to each page through the array ma[]. 692 * 693 * The returned pages may not be physically contiguous. However, in contrast 694 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 695 * calling this function once to allocate the desired number of pages will 696 * avoid wasted time in vm_phys_split_pages(). 697 * 698 * The free page queues for the specified domain must be locked. 699 */ 700 int 701 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 702 { 703 struct vm_freelist *alt, *fl; 704 vm_page_t m; 705 int avail, end, flind, freelist, i, need, oind, pind; 706 707 KASSERT(domain >= 0 && domain < vm_ndomains, 708 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 709 KASSERT(pool < VM_NFREEPOOL, 710 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 711 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 712 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 713 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 714 i = 0; 715 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 716 flind = vm_freelist_to_flind[freelist]; 717 if (flind < 0) 718 continue; 719 fl = vm_phys_free_queues[domain][flind][pool]; 720 for (oind = 0; oind < VM_NFREEORDER; oind++) { 721 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 722 vm_freelist_rem(fl, m, oind); 723 avail = 1 << oind; 724 need = imin(npages - i, avail); 725 for (end = i + need; i < end;) 726 ma[i++] = m++; 727 if (need < avail) { 728 /* 729 * Return excess pages to fl. Its 730 * order [0, oind) queues are empty. 731 */ 732 vm_phys_enq_range(m, avail - need, fl, 733 1); 734 return (npages); 735 } else if (i == npages) 736 return (npages); 737 } 738 } 739 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 740 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 741 alt = vm_phys_free_queues[domain][flind][pind]; 742 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 743 NULL) { 744 vm_freelist_rem(alt, m, oind); 745 vm_phys_set_pool(pool, m, oind); 746 avail = 1 << oind; 747 need = imin(npages - i, avail); 748 for (end = i + need; i < end;) 749 ma[i++] = m++; 750 if (need < avail) { 751 /* 752 * Return excess pages to fl. 753 * Its order [0, oind) queues 754 * are empty. 755 */ 756 vm_phys_enq_range(m, avail - 757 need, fl, 1); 758 return (npages); 759 } else if (i == npages) 760 return (npages); 761 } 762 } 763 } 764 } 765 return (i); 766 } 767 768 /* 769 * Allocate a contiguous, power of two-sized set of physical pages 770 * from the free lists. 771 * 772 * The free page queues must be locked. 773 */ 774 vm_page_t 775 vm_phys_alloc_pages(int domain, int pool, int order) 776 { 777 vm_page_t m; 778 int freelist; 779 780 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 781 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 782 if (m != NULL) 783 return (m); 784 } 785 return (NULL); 786 } 787 788 /* 789 * Allocate a contiguous, power of two-sized set of physical pages from the 790 * specified free list. The free list must be specified using one of the 791 * manifest constants VM_FREELIST_*. 792 * 793 * The free page queues must be locked. 794 */ 795 vm_page_t 796 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 797 { 798 struct vm_freelist *alt, *fl; 799 vm_page_t m; 800 int oind, pind, flind; 801 802 KASSERT(domain >= 0 && domain < vm_ndomains, 803 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 804 domain)); 805 KASSERT(freelist < VM_NFREELIST, 806 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 807 freelist)); 808 KASSERT(pool < VM_NFREEPOOL, 809 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 810 KASSERT(order < VM_NFREEORDER, 811 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 812 813 flind = vm_freelist_to_flind[freelist]; 814 /* Check if freelist is present */ 815 if (flind < 0) 816 return (NULL); 817 818 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 819 fl = &vm_phys_free_queues[domain][flind][pool][0]; 820 for (oind = order; oind < VM_NFREEORDER; oind++) { 821 m = TAILQ_FIRST(&fl[oind].pl); 822 if (m != NULL) { 823 vm_freelist_rem(fl, m, oind); 824 /* The order [order, oind) queues are empty. */ 825 vm_phys_split_pages(m, oind, fl, order, 1); 826 return (m); 827 } 828 } 829 830 /* 831 * The given pool was empty. Find the largest 832 * contiguous, power-of-two-sized set of pages in any 833 * pool. Transfer these pages to the given pool, and 834 * use them to satisfy the allocation. 835 */ 836 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 837 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 838 alt = &vm_phys_free_queues[domain][flind][pind][0]; 839 m = TAILQ_FIRST(&alt[oind].pl); 840 if (m != NULL) { 841 vm_freelist_rem(alt, m, oind); 842 vm_phys_set_pool(pool, m, oind); 843 /* The order [order, oind) queues are empty. */ 844 vm_phys_split_pages(m, oind, fl, order, 1); 845 return (m); 846 } 847 } 848 } 849 return (NULL); 850 } 851 852 /* 853 * Find the vm_page corresponding to the given physical address. 854 */ 855 vm_page_t 856 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 857 { 858 struct vm_phys_seg *seg; 859 int segind; 860 861 for (segind = 0; segind < vm_phys_nsegs; segind++) { 862 seg = &vm_phys_segs[segind]; 863 if (pa >= seg->start && pa < seg->end) 864 return (&seg->first_page[atop(pa - seg->start)]); 865 } 866 return (NULL); 867 } 868 869 vm_page_t 870 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 871 { 872 struct vm_phys_fictitious_seg tmp, *seg; 873 vm_page_t m; 874 875 m = NULL; 876 tmp.start = pa; 877 tmp.end = 0; 878 879 rw_rlock(&vm_phys_fictitious_reg_lock); 880 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 881 rw_runlock(&vm_phys_fictitious_reg_lock); 882 if (seg == NULL) 883 return (NULL); 884 885 m = &seg->first_page[atop(pa - seg->start)]; 886 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 887 888 return (m); 889 } 890 891 static inline void 892 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 893 long page_count, vm_memattr_t memattr) 894 { 895 long i; 896 897 bzero(range, page_count * sizeof(*range)); 898 for (i = 0; i < page_count; i++) { 899 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 900 range[i].oflags &= ~VPO_UNMANAGED; 901 range[i].busy_lock = VPB_UNBUSIED; 902 } 903 } 904 905 int 906 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 907 vm_memattr_t memattr) 908 { 909 struct vm_phys_fictitious_seg *seg; 910 vm_page_t fp; 911 long page_count; 912 #ifdef VM_PHYSSEG_DENSE 913 long pi, pe; 914 long dpage_count; 915 #endif 916 917 KASSERT(start < end, 918 ("Start of segment isn't less than end (start: %jx end: %jx)", 919 (uintmax_t)start, (uintmax_t)end)); 920 921 page_count = (end - start) / PAGE_SIZE; 922 923 #ifdef VM_PHYSSEG_DENSE 924 pi = atop(start); 925 pe = atop(end); 926 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 927 fp = &vm_page_array[pi - first_page]; 928 if ((pe - first_page) > vm_page_array_size) { 929 /* 930 * We have a segment that starts inside 931 * of vm_page_array, but ends outside of it. 932 * 933 * Use vm_page_array pages for those that are 934 * inside of the vm_page_array range, and 935 * allocate the remaining ones. 936 */ 937 dpage_count = vm_page_array_size - (pi - first_page); 938 vm_phys_fictitious_init_range(fp, start, dpage_count, 939 memattr); 940 page_count -= dpage_count; 941 start += ptoa(dpage_count); 942 goto alloc; 943 } 944 /* 945 * We can allocate the full range from vm_page_array, 946 * so there's no need to register the range in the tree. 947 */ 948 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 949 return (0); 950 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 951 /* 952 * We have a segment that ends inside of vm_page_array, 953 * but starts outside of it. 954 */ 955 fp = &vm_page_array[0]; 956 dpage_count = pe - first_page; 957 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 958 memattr); 959 end -= ptoa(dpage_count); 960 page_count -= dpage_count; 961 goto alloc; 962 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 963 /* 964 * Trying to register a fictitious range that expands before 965 * and after vm_page_array. 966 */ 967 return (EINVAL); 968 } else { 969 alloc: 970 #endif 971 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 972 M_WAITOK); 973 #ifdef VM_PHYSSEG_DENSE 974 } 975 #endif 976 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 977 978 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 979 seg->start = start; 980 seg->end = end; 981 seg->first_page = fp; 982 983 rw_wlock(&vm_phys_fictitious_reg_lock); 984 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 985 rw_wunlock(&vm_phys_fictitious_reg_lock); 986 987 return (0); 988 } 989 990 void 991 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 992 { 993 struct vm_phys_fictitious_seg *seg, tmp; 994 #ifdef VM_PHYSSEG_DENSE 995 long pi, pe; 996 #endif 997 998 KASSERT(start < end, 999 ("Start of segment isn't less than end (start: %jx end: %jx)", 1000 (uintmax_t)start, (uintmax_t)end)); 1001 1002 #ifdef VM_PHYSSEG_DENSE 1003 pi = atop(start); 1004 pe = atop(end); 1005 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1006 if ((pe - first_page) <= vm_page_array_size) { 1007 /* 1008 * This segment was allocated using vm_page_array 1009 * only, there's nothing to do since those pages 1010 * were never added to the tree. 1011 */ 1012 return; 1013 } 1014 /* 1015 * We have a segment that starts inside 1016 * of vm_page_array, but ends outside of it. 1017 * 1018 * Calculate how many pages were added to the 1019 * tree and free them. 1020 */ 1021 start = ptoa(first_page + vm_page_array_size); 1022 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1023 /* 1024 * We have a segment that ends inside of vm_page_array, 1025 * but starts outside of it. 1026 */ 1027 end = ptoa(first_page); 1028 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1029 /* Since it's not possible to register such a range, panic. */ 1030 panic( 1031 "Unregistering not registered fictitious range [%#jx:%#jx]", 1032 (uintmax_t)start, (uintmax_t)end); 1033 } 1034 #endif 1035 tmp.start = start; 1036 tmp.end = 0; 1037 1038 rw_wlock(&vm_phys_fictitious_reg_lock); 1039 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1040 if (seg->start != start || seg->end != end) { 1041 rw_wunlock(&vm_phys_fictitious_reg_lock); 1042 panic( 1043 "Unregistering not registered fictitious range [%#jx:%#jx]", 1044 (uintmax_t)start, (uintmax_t)end); 1045 } 1046 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1047 rw_wunlock(&vm_phys_fictitious_reg_lock); 1048 free(seg->first_page, M_FICT_PAGES); 1049 free(seg, M_FICT_PAGES); 1050 } 1051 1052 /* 1053 * Free a contiguous, power of two-sized set of physical pages. 1054 * 1055 * The free page queues must be locked. 1056 */ 1057 void 1058 vm_phys_free_pages(vm_page_t m, int order) 1059 { 1060 struct vm_freelist *fl; 1061 struct vm_phys_seg *seg; 1062 vm_paddr_t pa; 1063 vm_page_t m_buddy; 1064 1065 KASSERT(m->order == VM_NFREEORDER, 1066 ("vm_phys_free_pages: page %p has unexpected order %d", 1067 m, m->order)); 1068 KASSERT(m->pool < VM_NFREEPOOL, 1069 ("vm_phys_free_pages: page %p has unexpected pool %d", 1070 m, m->pool)); 1071 KASSERT(order < VM_NFREEORDER, 1072 ("vm_phys_free_pages: order %d is out of range", order)); 1073 seg = &vm_phys_segs[m->segind]; 1074 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1075 if (order < VM_NFREEORDER - 1) { 1076 pa = VM_PAGE_TO_PHYS(m); 1077 do { 1078 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1079 if (pa < seg->start || pa >= seg->end) 1080 break; 1081 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1082 if (m_buddy->order != order) 1083 break; 1084 fl = (*seg->free_queues)[m_buddy->pool]; 1085 vm_freelist_rem(fl, m_buddy, order); 1086 if (m_buddy->pool != m->pool) 1087 vm_phys_set_pool(m->pool, m_buddy, order); 1088 order++; 1089 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1090 m = &seg->first_page[atop(pa - seg->start)]; 1091 } while (order < VM_NFREEORDER - 1); 1092 } 1093 fl = (*seg->free_queues)[m->pool]; 1094 vm_freelist_add(fl, m, order, 1); 1095 } 1096 1097 /* 1098 * Return the largest possible order of a set of pages starting at m. 1099 */ 1100 static int 1101 max_order(vm_page_t m) 1102 { 1103 1104 /* 1105 * Unsigned "min" is used here so that "order" is assigned 1106 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1107 * or the low-order bits of its physical address are zero 1108 * because the size of a physical address exceeds the size of 1109 * a long. 1110 */ 1111 return (min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1112 VM_NFREEORDER - 1)); 1113 } 1114 1115 /* 1116 * Free a contiguous, arbitrarily sized set of physical pages, without 1117 * merging across set boundaries. 1118 * 1119 * The free page queues must be locked. 1120 */ 1121 void 1122 vm_phys_enqueue_contig(vm_page_t m, u_long npages) 1123 { 1124 struct vm_freelist *fl; 1125 struct vm_phys_seg *seg; 1126 vm_page_t m_end; 1127 int order; 1128 1129 /* 1130 * Avoid unnecessary coalescing by freeing the pages in the largest 1131 * possible power-of-two-sized subsets. 1132 */ 1133 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1134 seg = &vm_phys_segs[m->segind]; 1135 fl = (*seg->free_queues)[m->pool]; 1136 m_end = m + npages; 1137 /* Free blocks of increasing size. */ 1138 while ((order = max_order(m)) < VM_NFREEORDER - 1 && 1139 m + (1 << order) <= m_end) { 1140 KASSERT(seg == &vm_phys_segs[m->segind], 1141 ("%s: page range [%p,%p) spans multiple segments", 1142 __func__, m_end - npages, m)); 1143 vm_freelist_add(fl, m, order, 1); 1144 m += 1 << order; 1145 } 1146 /* Free blocks of maximum size. */ 1147 while (m + (1 << order) <= m_end) { 1148 KASSERT(seg == &vm_phys_segs[m->segind], 1149 ("%s: page range [%p,%p) spans multiple segments", 1150 __func__, m_end - npages, m)); 1151 vm_freelist_add(fl, m, order, 1); 1152 m += 1 << order; 1153 } 1154 /* Free blocks of diminishing size. */ 1155 while (m < m_end) { 1156 KASSERT(seg == &vm_phys_segs[m->segind], 1157 ("%s: page range [%p,%p) spans multiple segments", 1158 __func__, m_end - npages, m)); 1159 order = flsl(m_end - m) - 1; 1160 vm_freelist_add(fl, m, order, 1); 1161 m += 1 << order; 1162 } 1163 } 1164 1165 /* 1166 * Free a contiguous, arbitrarily sized set of physical pages. 1167 * 1168 * The free page queues must be locked. 1169 */ 1170 void 1171 vm_phys_free_contig(vm_page_t m, u_long npages) 1172 { 1173 int order_start, order_end; 1174 vm_page_t m_start, m_end; 1175 1176 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1177 1178 m_start = m; 1179 order_start = max_order(m_start); 1180 if (order_start < VM_NFREEORDER - 1) 1181 m_start += 1 << order_start; 1182 m_end = m + npages; 1183 order_end = max_order(m_end); 1184 if (order_end < VM_NFREEORDER - 1) 1185 m_end -= 1 << order_end; 1186 /* 1187 * Avoid unnecessary coalescing by freeing the pages at the start and 1188 * end of the range last. 1189 */ 1190 if (m_start < m_end) 1191 vm_phys_enqueue_contig(m_start, m_end - m_start); 1192 if (order_start < VM_NFREEORDER - 1) 1193 vm_phys_free_pages(m, order_start); 1194 if (order_end < VM_NFREEORDER - 1) 1195 vm_phys_free_pages(m_end, order_end); 1196 } 1197 1198 /* 1199 * Scan physical memory between the specified addresses "low" and "high" for a 1200 * run of contiguous physical pages that satisfy the specified conditions, and 1201 * return the lowest page in the run. The specified "alignment" determines 1202 * the alignment of the lowest physical page in the run. If the specified 1203 * "boundary" is non-zero, then the run of physical pages cannot span a 1204 * physical address that is a multiple of "boundary". 1205 * 1206 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1207 * be a power of two. 1208 */ 1209 vm_page_t 1210 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1211 u_long alignment, vm_paddr_t boundary, int options) 1212 { 1213 vm_paddr_t pa_end; 1214 vm_page_t m_end, m_run, m_start; 1215 struct vm_phys_seg *seg; 1216 int segind; 1217 1218 KASSERT(npages > 0, ("npages is 0")); 1219 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1220 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1221 if (low >= high) 1222 return (NULL); 1223 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1224 seg = &vm_phys_segs[segind]; 1225 if (seg->domain != domain) 1226 continue; 1227 if (seg->start >= high) 1228 break; 1229 if (low >= seg->end) 1230 continue; 1231 if (low <= seg->start) 1232 m_start = seg->first_page; 1233 else 1234 m_start = &seg->first_page[atop(low - seg->start)]; 1235 if (high < seg->end) 1236 pa_end = high; 1237 else 1238 pa_end = seg->end; 1239 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1240 continue; 1241 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1242 m_run = vm_page_scan_contig(npages, m_start, m_end, 1243 alignment, boundary, options); 1244 if (m_run != NULL) 1245 return (m_run); 1246 } 1247 return (NULL); 1248 } 1249 1250 /* 1251 * Set the pool for a contiguous, power of two-sized set of physical pages. 1252 */ 1253 void 1254 vm_phys_set_pool(int pool, vm_page_t m, int order) 1255 { 1256 vm_page_t m_tmp; 1257 1258 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1259 m_tmp->pool = pool; 1260 } 1261 1262 /* 1263 * Search for the given physical page "m" in the free lists. If the search 1264 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1265 * FALSE, indicating that "m" is not in the free lists. 1266 * 1267 * The free page queues must be locked. 1268 */ 1269 boolean_t 1270 vm_phys_unfree_page(vm_page_t m) 1271 { 1272 struct vm_freelist *fl; 1273 struct vm_phys_seg *seg; 1274 vm_paddr_t pa, pa_half; 1275 vm_page_t m_set, m_tmp; 1276 int order; 1277 1278 /* 1279 * First, find the contiguous, power of two-sized set of free 1280 * physical pages containing the given physical page "m" and 1281 * assign it to "m_set". 1282 */ 1283 seg = &vm_phys_segs[m->segind]; 1284 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1285 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1286 order < VM_NFREEORDER - 1; ) { 1287 order++; 1288 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1289 if (pa >= seg->start) 1290 m_set = &seg->first_page[atop(pa - seg->start)]; 1291 else 1292 return (FALSE); 1293 } 1294 if (m_set->order < order) 1295 return (FALSE); 1296 if (m_set->order == VM_NFREEORDER) 1297 return (FALSE); 1298 KASSERT(m_set->order < VM_NFREEORDER, 1299 ("vm_phys_unfree_page: page %p has unexpected order %d", 1300 m_set, m_set->order)); 1301 1302 /* 1303 * Next, remove "m_set" from the free lists. Finally, extract 1304 * "m" from "m_set" using an iterative algorithm: While "m_set" 1305 * is larger than a page, shrink "m_set" by returning the half 1306 * of "m_set" that does not contain "m" to the free lists. 1307 */ 1308 fl = (*seg->free_queues)[m_set->pool]; 1309 order = m_set->order; 1310 vm_freelist_rem(fl, m_set, order); 1311 while (order > 0) { 1312 order--; 1313 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1314 if (m->phys_addr < pa_half) 1315 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1316 else { 1317 m_tmp = m_set; 1318 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1319 } 1320 vm_freelist_add(fl, m_tmp, order, 0); 1321 } 1322 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1323 return (TRUE); 1324 } 1325 1326 /* 1327 * Allocate a contiguous set of physical pages of the given size 1328 * "npages" from the free lists. All of the physical pages must be at 1329 * or above the given physical address "low" and below the given 1330 * physical address "high". The given value "alignment" determines the 1331 * alignment of the first physical page in the set. If the given value 1332 * "boundary" is non-zero, then the set of physical pages cannot cross 1333 * any physical address boundary that is a multiple of that value. Both 1334 * "alignment" and "boundary" must be a power of two. 1335 */ 1336 vm_page_t 1337 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1338 u_long alignment, vm_paddr_t boundary) 1339 { 1340 vm_paddr_t pa_end, pa_start; 1341 vm_page_t m_run; 1342 struct vm_phys_seg *seg; 1343 int segind; 1344 1345 KASSERT(npages > 0, ("npages is 0")); 1346 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1347 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1348 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1349 if (low >= high) 1350 return (NULL); 1351 m_run = NULL; 1352 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1353 seg = &vm_phys_segs[segind]; 1354 if (seg->start >= high || seg->domain != domain) 1355 continue; 1356 if (low >= seg->end) 1357 break; 1358 if (low <= seg->start) 1359 pa_start = seg->start; 1360 else 1361 pa_start = low; 1362 if (high < seg->end) 1363 pa_end = high; 1364 else 1365 pa_end = seg->end; 1366 if (pa_end - pa_start < ptoa(npages)) 1367 continue; 1368 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1369 alignment, boundary); 1370 if (m_run != NULL) 1371 break; 1372 } 1373 return (m_run); 1374 } 1375 1376 /* 1377 * Allocate a run of contiguous physical pages from the free list for the 1378 * specified segment. 1379 */ 1380 static vm_page_t 1381 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1382 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1383 { 1384 struct vm_freelist *fl; 1385 vm_paddr_t pa, pa_end, size; 1386 vm_page_t m, m_ret; 1387 u_long npages_end; 1388 int oind, order, pind; 1389 1390 KASSERT(npages > 0, ("npages is 0")); 1391 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1392 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1393 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1394 /* Compute the queue that is the best fit for npages. */ 1395 order = flsl(npages - 1); 1396 /* Search for a run satisfying the specified conditions. */ 1397 size = npages << PAGE_SHIFT; 1398 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1399 oind++) { 1400 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1401 fl = (*seg->free_queues)[pind]; 1402 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1403 /* 1404 * Is the size of this allocation request 1405 * larger than the largest block size? 1406 */ 1407 if (order >= VM_NFREEORDER) { 1408 /* 1409 * Determine if a sufficient number of 1410 * subsequent blocks to satisfy the 1411 * allocation request are free. 1412 */ 1413 pa = VM_PAGE_TO_PHYS(m_ret); 1414 pa_end = pa + size; 1415 if (pa_end < pa) 1416 continue; 1417 for (;;) { 1418 pa += 1 << (PAGE_SHIFT + 1419 VM_NFREEORDER - 1); 1420 if (pa >= pa_end || 1421 pa < seg->start || 1422 pa >= seg->end) 1423 break; 1424 m = &seg->first_page[atop(pa - 1425 seg->start)]; 1426 if (m->order != VM_NFREEORDER - 1427 1) 1428 break; 1429 } 1430 /* If not, go to the next block. */ 1431 if (pa < pa_end) 1432 continue; 1433 } 1434 1435 /* 1436 * Determine if the blocks are within the 1437 * given range, satisfy the given alignment, 1438 * and do not cross the given boundary. 1439 */ 1440 pa = VM_PAGE_TO_PHYS(m_ret); 1441 pa_end = pa + size; 1442 if (pa >= low && pa_end <= high && 1443 (pa & (alignment - 1)) == 0 && 1444 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1445 goto done; 1446 } 1447 } 1448 } 1449 return (NULL); 1450 done: 1451 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1452 fl = (*seg->free_queues)[m->pool]; 1453 vm_freelist_rem(fl, m, oind); 1454 if (m->pool != VM_FREEPOOL_DEFAULT) 1455 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1456 } 1457 /* Return excess pages to the free lists. */ 1458 npages_end = roundup2(npages, 1 << oind); 1459 if (npages < npages_end) { 1460 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1461 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1462 } 1463 return (m_ret); 1464 } 1465 1466 #ifdef DDB 1467 /* 1468 * Show the number of physical pages in each of the free lists. 1469 */ 1470 DB_SHOW_COMMAND(freepages, db_show_freepages) 1471 { 1472 struct vm_freelist *fl; 1473 int flind, oind, pind, dom; 1474 1475 for (dom = 0; dom < vm_ndomains; dom++) { 1476 db_printf("DOMAIN: %d\n", dom); 1477 for (flind = 0; flind < vm_nfreelists; flind++) { 1478 db_printf("FREE LIST %d:\n" 1479 "\n ORDER (SIZE) | NUMBER" 1480 "\n ", flind); 1481 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1482 db_printf(" | POOL %d", pind); 1483 db_printf("\n-- "); 1484 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1485 db_printf("-- -- "); 1486 db_printf("--\n"); 1487 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1488 db_printf(" %2.2d (%6.6dK)", oind, 1489 1 << (PAGE_SHIFT - 10 + oind)); 1490 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1491 fl = vm_phys_free_queues[dom][flind][pind]; 1492 db_printf(" | %6.6d", fl[oind].lcnt); 1493 } 1494 db_printf("\n"); 1495 } 1496 db_printf("\n"); 1497 } 1498 db_printf("\n"); 1499 } 1500 } 1501 #endif 1502