1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/domainset.h> 50 #include <sys/lock.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/queue.h> 56 #include <sys/rwlock.h> 57 #include <sys/sbuf.h> 58 #include <sys/sysctl.h> 59 #include <sys/tree.h> 60 #include <sys/vmmeter.h> 61 #include <sys/seq.h> 62 63 #include <ddb/ddb.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/vm_kern.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_phys.h> 71 #include <vm/vm_pagequeue.h> 72 73 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 74 "Too many physsegs."); 75 76 #ifdef NUMA 77 struct mem_affinity __read_mostly *mem_affinity; 78 int __read_mostly *mem_locality; 79 #endif 80 81 int __read_mostly vm_ndomains = 1; 82 domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 83 84 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 85 int __read_mostly vm_phys_nsegs; 86 87 struct vm_phys_fictitious_seg; 88 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 89 struct vm_phys_fictitious_seg *); 90 91 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 92 RB_INITIALIZER(_vm_phys_fictitious_tree); 93 94 struct vm_phys_fictitious_seg { 95 RB_ENTRY(vm_phys_fictitious_seg) node; 96 /* Memory region data */ 97 vm_paddr_t start; 98 vm_paddr_t end; 99 vm_page_t first_page; 100 }; 101 102 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 103 vm_phys_fictitious_cmp); 104 105 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 106 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 107 108 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 109 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 110 111 static int __read_mostly vm_nfreelists; 112 113 /* 114 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 115 */ 116 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 117 118 CTASSERT(VM_FREELIST_DEFAULT == 0); 119 120 #ifdef VM_FREELIST_DMA32 121 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122 #endif 123 124 /* 125 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126 * the ordering of the free list boundaries. 127 */ 128 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 129 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 130 #endif 131 132 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 133 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 134 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 135 136 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 137 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 138 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 139 140 #ifdef NUMA 141 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 142 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 143 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 144 #endif 145 146 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 147 &vm_ndomains, 0, "Number of physical memory domains available."); 148 149 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 150 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 151 vm_paddr_t boundary); 152 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 153 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 154 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 155 int order, int tail); 156 157 /* 158 * Red-black tree helpers for vm fictitious range management. 159 */ 160 static inline int 161 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 162 struct vm_phys_fictitious_seg *range) 163 { 164 165 KASSERT(range->start != 0 && range->end != 0, 166 ("Invalid range passed on search for vm_fictitious page")); 167 if (p->start >= range->end) 168 return (1); 169 if (p->start < range->start) 170 return (-1); 171 172 return (0); 173 } 174 175 static int 176 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 177 struct vm_phys_fictitious_seg *p2) 178 { 179 180 /* Check if this is a search for a page */ 181 if (p1->end == 0) 182 return (vm_phys_fictitious_in_range(p1, p2)); 183 184 KASSERT(p2->end != 0, 185 ("Invalid range passed as second parameter to vm fictitious comparison")); 186 187 /* Searching to add a new range */ 188 if (p1->end <= p2->start) 189 return (-1); 190 if (p1->start >= p2->end) 191 return (1); 192 193 panic("Trying to add overlapping vm fictitious ranges:\n" 194 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 195 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 196 } 197 198 int 199 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 200 { 201 #ifdef NUMA 202 domainset_t mask; 203 int i; 204 205 if (vm_ndomains == 1 || mem_affinity == NULL) 206 return (0); 207 208 DOMAINSET_ZERO(&mask); 209 /* 210 * Check for any memory that overlaps low, high. 211 */ 212 for (i = 0; mem_affinity[i].end != 0; i++) 213 if (mem_affinity[i].start <= high && 214 mem_affinity[i].end >= low) 215 DOMAINSET_SET(mem_affinity[i].domain, &mask); 216 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 217 return (prefer); 218 if (DOMAINSET_EMPTY(&mask)) 219 panic("vm_phys_domain_match: Impossible constraint"); 220 return (DOMAINSET_FFS(&mask) - 1); 221 #else 222 return (0); 223 #endif 224 } 225 226 /* 227 * Outputs the state of the physical memory allocator, specifically, 228 * the amount of physical memory in each free list. 229 */ 230 static int 231 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 232 { 233 struct sbuf sbuf; 234 struct vm_freelist *fl; 235 int dom, error, flind, oind, pind; 236 237 error = sysctl_wire_old_buffer(req, 0); 238 if (error != 0) 239 return (error); 240 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 241 for (dom = 0; dom < vm_ndomains; dom++) { 242 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 243 for (flind = 0; flind < vm_nfreelists; flind++) { 244 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 245 "\n ORDER (SIZE) | NUMBER" 246 "\n ", flind); 247 for (pind = 0; pind < VM_NFREEPOOL; pind++) 248 sbuf_printf(&sbuf, " | POOL %d", pind); 249 sbuf_printf(&sbuf, "\n-- "); 250 for (pind = 0; pind < VM_NFREEPOOL; pind++) 251 sbuf_printf(&sbuf, "-- -- "); 252 sbuf_printf(&sbuf, "--\n"); 253 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 254 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 255 1 << (PAGE_SHIFT - 10 + oind)); 256 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 257 fl = vm_phys_free_queues[dom][flind][pind]; 258 sbuf_printf(&sbuf, " | %6d", 259 fl[oind].lcnt); 260 } 261 sbuf_printf(&sbuf, "\n"); 262 } 263 } 264 } 265 error = sbuf_finish(&sbuf); 266 sbuf_delete(&sbuf); 267 return (error); 268 } 269 270 /* 271 * Outputs the set of physical memory segments. 272 */ 273 static int 274 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 275 { 276 struct sbuf sbuf; 277 struct vm_phys_seg *seg; 278 int error, segind; 279 280 error = sysctl_wire_old_buffer(req, 0); 281 if (error != 0) 282 return (error); 283 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 284 for (segind = 0; segind < vm_phys_nsegs; segind++) { 285 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 286 seg = &vm_phys_segs[segind]; 287 sbuf_printf(&sbuf, "start: %#jx\n", 288 (uintmax_t)seg->start); 289 sbuf_printf(&sbuf, "end: %#jx\n", 290 (uintmax_t)seg->end); 291 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 292 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 293 } 294 error = sbuf_finish(&sbuf); 295 sbuf_delete(&sbuf); 296 return (error); 297 } 298 299 /* 300 * Return affinity, or -1 if there's no affinity information. 301 */ 302 int 303 vm_phys_mem_affinity(int f, int t) 304 { 305 306 #ifdef NUMA 307 if (mem_locality == NULL) 308 return (-1); 309 if (f >= vm_ndomains || t >= vm_ndomains) 310 return (-1); 311 return (mem_locality[f * vm_ndomains + t]); 312 #else 313 return (-1); 314 #endif 315 } 316 317 #ifdef NUMA 318 /* 319 * Outputs the VM locality table. 320 */ 321 static int 322 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 323 { 324 struct sbuf sbuf; 325 int error, i, j; 326 327 error = sysctl_wire_old_buffer(req, 0); 328 if (error != 0) 329 return (error); 330 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 331 332 sbuf_printf(&sbuf, "\n"); 333 334 for (i = 0; i < vm_ndomains; i++) { 335 sbuf_printf(&sbuf, "%d: ", i); 336 for (j = 0; j < vm_ndomains; j++) { 337 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 338 } 339 sbuf_printf(&sbuf, "\n"); 340 } 341 error = sbuf_finish(&sbuf); 342 sbuf_delete(&sbuf); 343 return (error); 344 } 345 #endif 346 347 static void 348 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 349 { 350 351 m->order = order; 352 if (tail) 353 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 354 else 355 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 356 fl[order].lcnt++; 357 } 358 359 static void 360 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 361 { 362 363 TAILQ_REMOVE(&fl[order].pl, m, listq); 364 fl[order].lcnt--; 365 m->order = VM_NFREEORDER; 366 } 367 368 /* 369 * Create a physical memory segment. 370 */ 371 static void 372 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 373 { 374 struct vm_phys_seg *seg; 375 376 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 377 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 378 KASSERT(domain >= 0 && domain < vm_ndomains, 379 ("vm_phys_create_seg: invalid domain provided")); 380 seg = &vm_phys_segs[vm_phys_nsegs++]; 381 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 382 *seg = *(seg - 1); 383 seg--; 384 } 385 seg->start = start; 386 seg->end = end; 387 seg->domain = domain; 388 } 389 390 static void 391 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 392 { 393 #ifdef NUMA 394 int i; 395 396 if (mem_affinity == NULL) { 397 _vm_phys_create_seg(start, end, 0); 398 return; 399 } 400 401 for (i = 0;; i++) { 402 if (mem_affinity[i].end == 0) 403 panic("Reached end of affinity info"); 404 if (mem_affinity[i].end <= start) 405 continue; 406 if (mem_affinity[i].start > start) 407 panic("No affinity info for start %jx", 408 (uintmax_t)start); 409 if (mem_affinity[i].end >= end) { 410 _vm_phys_create_seg(start, end, 411 mem_affinity[i].domain); 412 break; 413 } 414 _vm_phys_create_seg(start, mem_affinity[i].end, 415 mem_affinity[i].domain); 416 start = mem_affinity[i].end; 417 } 418 #else 419 _vm_phys_create_seg(start, end, 0); 420 #endif 421 } 422 423 /* 424 * Add a physical memory segment. 425 */ 426 void 427 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 428 { 429 vm_paddr_t paddr; 430 431 KASSERT((start & PAGE_MASK) == 0, 432 ("vm_phys_define_seg: start is not page aligned")); 433 KASSERT((end & PAGE_MASK) == 0, 434 ("vm_phys_define_seg: end is not page aligned")); 435 436 /* 437 * Split the physical memory segment if it spans two or more free 438 * list boundaries. 439 */ 440 paddr = start; 441 #ifdef VM_FREELIST_LOWMEM 442 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 443 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 444 paddr = VM_LOWMEM_BOUNDARY; 445 } 446 #endif 447 #ifdef VM_FREELIST_DMA32 448 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 449 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 450 paddr = VM_DMA32_BOUNDARY; 451 } 452 #endif 453 vm_phys_create_seg(paddr, end); 454 } 455 456 /* 457 * Initialize the physical memory allocator. 458 * 459 * Requires that vm_page_array is initialized! 460 */ 461 void 462 vm_phys_init(void) 463 { 464 struct vm_freelist *fl; 465 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 466 u_long npages; 467 int dom, flind, freelist, oind, pind, segind; 468 469 /* 470 * Compute the number of free lists, and generate the mapping from the 471 * manifest constants VM_FREELIST_* to the free list indices. 472 * 473 * Initially, the entries of vm_freelist_to_flind[] are set to either 474 * 0 or 1 to indicate which free lists should be created. 475 */ 476 npages = 0; 477 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 478 seg = &vm_phys_segs[segind]; 479 #ifdef VM_FREELIST_LOWMEM 480 if (seg->end <= VM_LOWMEM_BOUNDARY) 481 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 482 else 483 #endif 484 #ifdef VM_FREELIST_DMA32 485 if ( 486 #ifdef VM_DMA32_NPAGES_THRESHOLD 487 /* 488 * Create the DMA32 free list only if the amount of 489 * physical memory above physical address 4G exceeds the 490 * given threshold. 491 */ 492 npages > VM_DMA32_NPAGES_THRESHOLD && 493 #endif 494 seg->end <= VM_DMA32_BOUNDARY) 495 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 496 else 497 #endif 498 { 499 npages += atop(seg->end - seg->start); 500 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 501 } 502 } 503 /* Change each entry into a running total of the free lists. */ 504 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 505 vm_freelist_to_flind[freelist] += 506 vm_freelist_to_flind[freelist - 1]; 507 } 508 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 509 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 510 /* Change each entry into a free list index. */ 511 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 512 vm_freelist_to_flind[freelist]--; 513 514 /* 515 * Initialize the first_page and free_queues fields of each physical 516 * memory segment. 517 */ 518 #ifdef VM_PHYSSEG_SPARSE 519 npages = 0; 520 #endif 521 for (segind = 0; segind < vm_phys_nsegs; segind++) { 522 seg = &vm_phys_segs[segind]; 523 #ifdef VM_PHYSSEG_SPARSE 524 seg->first_page = &vm_page_array[npages]; 525 npages += atop(seg->end - seg->start); 526 #else 527 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 528 #endif 529 #ifdef VM_FREELIST_LOWMEM 530 if (seg->end <= VM_LOWMEM_BOUNDARY) { 531 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 532 KASSERT(flind >= 0, 533 ("vm_phys_init: LOWMEM flind < 0")); 534 } else 535 #endif 536 #ifdef VM_FREELIST_DMA32 537 if (seg->end <= VM_DMA32_BOUNDARY) { 538 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 539 KASSERT(flind >= 0, 540 ("vm_phys_init: DMA32 flind < 0")); 541 } else 542 #endif 543 { 544 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 545 KASSERT(flind >= 0, 546 ("vm_phys_init: DEFAULT flind < 0")); 547 } 548 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 549 } 550 551 /* 552 * Coalesce physical memory segments that are contiguous and share the 553 * same per-domain free queues. 554 */ 555 prev_seg = vm_phys_segs; 556 seg = &vm_phys_segs[1]; 557 end_seg = &vm_phys_segs[vm_phys_nsegs]; 558 while (seg < end_seg) { 559 if (prev_seg->end == seg->start && 560 prev_seg->free_queues == seg->free_queues) { 561 prev_seg->end = seg->end; 562 KASSERT(prev_seg->domain == seg->domain, 563 ("vm_phys_init: free queues cannot span domains")); 564 vm_phys_nsegs--; 565 end_seg--; 566 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 567 *tmp_seg = *(tmp_seg + 1); 568 } else { 569 prev_seg = seg; 570 seg++; 571 } 572 } 573 574 /* 575 * Initialize the free queues. 576 */ 577 for (dom = 0; dom < vm_ndomains; dom++) { 578 for (flind = 0; flind < vm_nfreelists; flind++) { 579 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 580 fl = vm_phys_free_queues[dom][flind][pind]; 581 for (oind = 0; oind < VM_NFREEORDER; oind++) 582 TAILQ_INIT(&fl[oind].pl); 583 } 584 } 585 } 586 587 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 588 } 589 590 /* 591 * Register info about the NUMA topology of the system. 592 * 593 * Invoked by platform-dependent code prior to vm_phys_init(). 594 */ 595 void 596 vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, 597 int *locality) 598 { 599 #ifdef NUMA 600 int d, i; 601 602 /* 603 * For now the only override value that we support is 1, which 604 * effectively disables NUMA-awareness in the allocators. 605 */ 606 d = 0; 607 TUNABLE_INT_FETCH("vm.numa.disabled", &d); 608 if (d) 609 ndomains = 1; 610 611 if (ndomains > 1) { 612 vm_ndomains = ndomains; 613 mem_affinity = affinity; 614 mem_locality = locality; 615 } 616 617 for (i = 0; i < vm_ndomains; i++) 618 DOMAINSET_SET(i, &all_domains); 619 620 domainset_init(); 621 #else 622 (void)ndomains; 623 (void)affinity; 624 (void)locality; 625 #endif 626 } 627 628 /* 629 * Split a contiguous, power of two-sized set of physical pages. 630 * 631 * When this function is called by a page allocation function, the caller 632 * should request insertion at the head unless the order [order, oind) queues 633 * are known to be empty. The objective being to reduce the likelihood of 634 * long-term fragmentation by promoting contemporaneous allocation and 635 * (hopefully) deallocation. 636 */ 637 static __inline void 638 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 639 int tail) 640 { 641 vm_page_t m_buddy; 642 643 while (oind > order) { 644 oind--; 645 m_buddy = &m[1 << oind]; 646 KASSERT(m_buddy->order == VM_NFREEORDER, 647 ("vm_phys_split_pages: page %p has unexpected order %d", 648 m_buddy, m_buddy->order)); 649 vm_freelist_add(fl, m_buddy, oind, tail); 650 } 651 } 652 653 /* 654 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 655 * and sized set to the specified free list. 656 * 657 * When this function is called by a page allocation function, the caller 658 * should request insertion at the head unless the lower-order queues are 659 * known to be empty. The objective being to reduce the likelihood of long- 660 * term fragmentation by promoting contemporaneous allocation and (hopefully) 661 * deallocation. 662 * 663 * The physical page m's buddy must not be free. 664 */ 665 static void 666 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 667 { 668 u_int n; 669 int order; 670 671 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 672 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 673 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 674 ("vm_phys_enq_range: page %p and npages %u are misaligned", 675 m, npages)); 676 do { 677 KASSERT(m->order == VM_NFREEORDER, 678 ("vm_phys_enq_range: page %p has unexpected order %d", 679 m, m->order)); 680 order = ffs(npages) - 1; 681 KASSERT(order < VM_NFREEORDER, 682 ("vm_phys_enq_range: order %d is out of range", order)); 683 vm_freelist_add(fl, m, order, tail); 684 n = 1 << order; 685 m += n; 686 npages -= n; 687 } while (npages > 0); 688 } 689 690 /* 691 * Tries to allocate the specified number of pages from the specified pool 692 * within the specified domain. Returns the actual number of allocated pages 693 * and a pointer to each page through the array ma[]. 694 * 695 * The returned pages may not be physically contiguous. However, in contrast 696 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 697 * calling this function once to allocate the desired number of pages will 698 * avoid wasted time in vm_phys_split_pages(). 699 * 700 * The free page queues for the specified domain must be locked. 701 */ 702 int 703 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 704 { 705 struct vm_freelist *alt, *fl; 706 vm_page_t m; 707 int avail, end, flind, freelist, i, need, oind, pind; 708 709 KASSERT(domain >= 0 && domain < vm_ndomains, 710 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 711 KASSERT(pool < VM_NFREEPOOL, 712 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 713 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 714 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 715 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 716 i = 0; 717 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 718 flind = vm_freelist_to_flind[freelist]; 719 if (flind < 0) 720 continue; 721 fl = vm_phys_free_queues[domain][flind][pool]; 722 for (oind = 0; oind < VM_NFREEORDER; oind++) { 723 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 724 vm_freelist_rem(fl, m, oind); 725 avail = 1 << oind; 726 need = imin(npages - i, avail); 727 for (end = i + need; i < end;) 728 ma[i++] = m++; 729 if (need < avail) { 730 /* 731 * Return excess pages to fl. Its 732 * order [0, oind) queues are empty. 733 */ 734 vm_phys_enq_range(m, avail - need, fl, 735 1); 736 return (npages); 737 } else if (i == npages) 738 return (npages); 739 } 740 } 741 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 742 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 743 alt = vm_phys_free_queues[domain][flind][pind]; 744 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 745 NULL) { 746 vm_freelist_rem(alt, m, oind); 747 vm_phys_set_pool(pool, m, oind); 748 avail = 1 << oind; 749 need = imin(npages - i, avail); 750 for (end = i + need; i < end;) 751 ma[i++] = m++; 752 if (need < avail) { 753 /* 754 * Return excess pages to fl. 755 * Its order [0, oind) queues 756 * are empty. 757 */ 758 vm_phys_enq_range(m, avail - 759 need, fl, 1); 760 return (npages); 761 } else if (i == npages) 762 return (npages); 763 } 764 } 765 } 766 } 767 return (i); 768 } 769 770 /* 771 * Allocate a contiguous, power of two-sized set of physical pages 772 * from the free lists. 773 * 774 * The free page queues must be locked. 775 */ 776 vm_page_t 777 vm_phys_alloc_pages(int domain, int pool, int order) 778 { 779 vm_page_t m; 780 int freelist; 781 782 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 783 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 784 if (m != NULL) 785 return (m); 786 } 787 return (NULL); 788 } 789 790 /* 791 * Allocate a contiguous, power of two-sized set of physical pages from the 792 * specified free list. The free list must be specified using one of the 793 * manifest constants VM_FREELIST_*. 794 * 795 * The free page queues must be locked. 796 */ 797 vm_page_t 798 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 799 { 800 struct vm_freelist *alt, *fl; 801 vm_page_t m; 802 int oind, pind, flind; 803 804 KASSERT(domain >= 0 && domain < vm_ndomains, 805 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 806 domain)); 807 KASSERT(freelist < VM_NFREELIST, 808 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 809 freelist)); 810 KASSERT(pool < VM_NFREEPOOL, 811 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 812 KASSERT(order < VM_NFREEORDER, 813 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 814 815 flind = vm_freelist_to_flind[freelist]; 816 /* Check if freelist is present */ 817 if (flind < 0) 818 return (NULL); 819 820 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 821 fl = &vm_phys_free_queues[domain][flind][pool][0]; 822 for (oind = order; oind < VM_NFREEORDER; oind++) { 823 m = TAILQ_FIRST(&fl[oind].pl); 824 if (m != NULL) { 825 vm_freelist_rem(fl, m, oind); 826 /* The order [order, oind) queues are empty. */ 827 vm_phys_split_pages(m, oind, fl, order, 1); 828 return (m); 829 } 830 } 831 832 /* 833 * The given pool was empty. Find the largest 834 * contiguous, power-of-two-sized set of pages in any 835 * pool. Transfer these pages to the given pool, and 836 * use them to satisfy the allocation. 837 */ 838 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 839 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 840 alt = &vm_phys_free_queues[domain][flind][pind][0]; 841 m = TAILQ_FIRST(&alt[oind].pl); 842 if (m != NULL) { 843 vm_freelist_rem(alt, m, oind); 844 vm_phys_set_pool(pool, m, oind); 845 /* The order [order, oind) queues are empty. */ 846 vm_phys_split_pages(m, oind, fl, order, 1); 847 return (m); 848 } 849 } 850 } 851 return (NULL); 852 } 853 854 /* 855 * Find the vm_page corresponding to the given physical address. 856 */ 857 vm_page_t 858 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 859 { 860 struct vm_phys_seg *seg; 861 int segind; 862 863 for (segind = 0; segind < vm_phys_nsegs; segind++) { 864 seg = &vm_phys_segs[segind]; 865 if (pa >= seg->start && pa < seg->end) 866 return (&seg->first_page[atop(pa - seg->start)]); 867 } 868 return (NULL); 869 } 870 871 vm_page_t 872 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 873 { 874 struct vm_phys_fictitious_seg tmp, *seg; 875 vm_page_t m; 876 877 m = NULL; 878 tmp.start = pa; 879 tmp.end = 0; 880 881 rw_rlock(&vm_phys_fictitious_reg_lock); 882 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 883 rw_runlock(&vm_phys_fictitious_reg_lock); 884 if (seg == NULL) 885 return (NULL); 886 887 m = &seg->first_page[atop(pa - seg->start)]; 888 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 889 890 return (m); 891 } 892 893 static inline void 894 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 895 long page_count, vm_memattr_t memattr) 896 { 897 long i; 898 899 bzero(range, page_count * sizeof(*range)); 900 for (i = 0; i < page_count; i++) { 901 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 902 range[i].oflags &= ~VPO_UNMANAGED; 903 range[i].busy_lock = VPB_UNBUSIED; 904 } 905 } 906 907 int 908 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 909 vm_memattr_t memattr) 910 { 911 struct vm_phys_fictitious_seg *seg; 912 vm_page_t fp; 913 long page_count; 914 #ifdef VM_PHYSSEG_DENSE 915 long pi, pe; 916 long dpage_count; 917 #endif 918 919 KASSERT(start < end, 920 ("Start of segment isn't less than end (start: %jx end: %jx)", 921 (uintmax_t)start, (uintmax_t)end)); 922 923 page_count = (end - start) / PAGE_SIZE; 924 925 #ifdef VM_PHYSSEG_DENSE 926 pi = atop(start); 927 pe = atop(end); 928 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 929 fp = &vm_page_array[pi - first_page]; 930 if ((pe - first_page) > vm_page_array_size) { 931 /* 932 * We have a segment that starts inside 933 * of vm_page_array, but ends outside of it. 934 * 935 * Use vm_page_array pages for those that are 936 * inside of the vm_page_array range, and 937 * allocate the remaining ones. 938 */ 939 dpage_count = vm_page_array_size - (pi - first_page); 940 vm_phys_fictitious_init_range(fp, start, dpage_count, 941 memattr); 942 page_count -= dpage_count; 943 start += ptoa(dpage_count); 944 goto alloc; 945 } 946 /* 947 * We can allocate the full range from vm_page_array, 948 * so there's no need to register the range in the tree. 949 */ 950 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 951 return (0); 952 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 953 /* 954 * We have a segment that ends inside of vm_page_array, 955 * but starts outside of it. 956 */ 957 fp = &vm_page_array[0]; 958 dpage_count = pe - first_page; 959 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 960 memattr); 961 end -= ptoa(dpage_count); 962 page_count -= dpage_count; 963 goto alloc; 964 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 965 /* 966 * Trying to register a fictitious range that expands before 967 * and after vm_page_array. 968 */ 969 return (EINVAL); 970 } else { 971 alloc: 972 #endif 973 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 974 M_WAITOK); 975 #ifdef VM_PHYSSEG_DENSE 976 } 977 #endif 978 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 979 980 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 981 seg->start = start; 982 seg->end = end; 983 seg->first_page = fp; 984 985 rw_wlock(&vm_phys_fictitious_reg_lock); 986 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 987 rw_wunlock(&vm_phys_fictitious_reg_lock); 988 989 return (0); 990 } 991 992 void 993 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 994 { 995 struct vm_phys_fictitious_seg *seg, tmp; 996 #ifdef VM_PHYSSEG_DENSE 997 long pi, pe; 998 #endif 999 1000 KASSERT(start < end, 1001 ("Start of segment isn't less than end (start: %jx end: %jx)", 1002 (uintmax_t)start, (uintmax_t)end)); 1003 1004 #ifdef VM_PHYSSEG_DENSE 1005 pi = atop(start); 1006 pe = atop(end); 1007 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1008 if ((pe - first_page) <= vm_page_array_size) { 1009 /* 1010 * This segment was allocated using vm_page_array 1011 * only, there's nothing to do since those pages 1012 * were never added to the tree. 1013 */ 1014 return; 1015 } 1016 /* 1017 * We have a segment that starts inside 1018 * of vm_page_array, but ends outside of it. 1019 * 1020 * Calculate how many pages were added to the 1021 * tree and free them. 1022 */ 1023 start = ptoa(first_page + vm_page_array_size); 1024 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1025 /* 1026 * We have a segment that ends inside of vm_page_array, 1027 * but starts outside of it. 1028 */ 1029 end = ptoa(first_page); 1030 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1031 /* Since it's not possible to register such a range, panic. */ 1032 panic( 1033 "Unregistering not registered fictitious range [%#jx:%#jx]", 1034 (uintmax_t)start, (uintmax_t)end); 1035 } 1036 #endif 1037 tmp.start = start; 1038 tmp.end = 0; 1039 1040 rw_wlock(&vm_phys_fictitious_reg_lock); 1041 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1042 if (seg->start != start || seg->end != end) { 1043 rw_wunlock(&vm_phys_fictitious_reg_lock); 1044 panic( 1045 "Unregistering not registered fictitious range [%#jx:%#jx]", 1046 (uintmax_t)start, (uintmax_t)end); 1047 } 1048 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1049 rw_wunlock(&vm_phys_fictitious_reg_lock); 1050 free(seg->first_page, M_FICT_PAGES); 1051 free(seg, M_FICT_PAGES); 1052 } 1053 1054 /* 1055 * Free a contiguous, power of two-sized set of physical pages. 1056 * 1057 * The free page queues must be locked. 1058 */ 1059 void 1060 vm_phys_free_pages(vm_page_t m, int order) 1061 { 1062 struct vm_freelist *fl; 1063 struct vm_phys_seg *seg; 1064 vm_paddr_t pa; 1065 vm_page_t m_buddy; 1066 1067 KASSERT(m->order == VM_NFREEORDER, 1068 ("vm_phys_free_pages: page %p has unexpected order %d", 1069 m, m->order)); 1070 KASSERT(m->pool < VM_NFREEPOOL, 1071 ("vm_phys_free_pages: page %p has unexpected pool %d", 1072 m, m->pool)); 1073 KASSERT(order < VM_NFREEORDER, 1074 ("vm_phys_free_pages: order %d is out of range", order)); 1075 seg = &vm_phys_segs[m->segind]; 1076 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1077 if (order < VM_NFREEORDER - 1) { 1078 pa = VM_PAGE_TO_PHYS(m); 1079 do { 1080 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1081 if (pa < seg->start || pa >= seg->end) 1082 break; 1083 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1084 if (m_buddy->order != order) 1085 break; 1086 fl = (*seg->free_queues)[m_buddy->pool]; 1087 vm_freelist_rem(fl, m_buddy, order); 1088 if (m_buddy->pool != m->pool) 1089 vm_phys_set_pool(m->pool, m_buddy, order); 1090 order++; 1091 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1092 m = &seg->first_page[atop(pa - seg->start)]; 1093 } while (order < VM_NFREEORDER - 1); 1094 } 1095 fl = (*seg->free_queues)[m->pool]; 1096 vm_freelist_add(fl, m, order, 1); 1097 } 1098 1099 /* 1100 * Free a contiguous, arbitrarily sized set of physical pages. 1101 * 1102 * The free page queues must be locked. 1103 */ 1104 void 1105 vm_phys_free_contig(vm_page_t m, u_long npages) 1106 { 1107 u_int n; 1108 int order; 1109 1110 /* 1111 * Avoid unnecessary coalescing by freeing the pages in the largest 1112 * possible power-of-two-sized subsets. 1113 */ 1114 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1115 for (;; npages -= n) { 1116 /* 1117 * Unsigned "min" is used here so that "order" is assigned 1118 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1119 * or the low-order bits of its physical address are zero 1120 * because the size of a physical address exceeds the size of 1121 * a long. 1122 */ 1123 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1124 VM_NFREEORDER - 1); 1125 n = 1 << order; 1126 if (npages < n) 1127 break; 1128 vm_phys_free_pages(m, order); 1129 m += n; 1130 } 1131 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1132 for (; npages > 0; npages -= n) { 1133 order = flsl(npages) - 1; 1134 n = 1 << order; 1135 vm_phys_free_pages(m, order); 1136 m += n; 1137 } 1138 } 1139 1140 /* 1141 * Scan physical memory between the specified addresses "low" and "high" for a 1142 * run of contiguous physical pages that satisfy the specified conditions, and 1143 * return the lowest page in the run. The specified "alignment" determines 1144 * the alignment of the lowest physical page in the run. If the specified 1145 * "boundary" is non-zero, then the run of physical pages cannot span a 1146 * physical address that is a multiple of "boundary". 1147 * 1148 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1149 * be a power of two. 1150 */ 1151 vm_page_t 1152 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1153 u_long alignment, vm_paddr_t boundary, int options) 1154 { 1155 vm_paddr_t pa_end; 1156 vm_page_t m_end, m_run, m_start; 1157 struct vm_phys_seg *seg; 1158 int segind; 1159 1160 KASSERT(npages > 0, ("npages is 0")); 1161 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1162 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1163 if (low >= high) 1164 return (NULL); 1165 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1166 seg = &vm_phys_segs[segind]; 1167 if (seg->domain != domain) 1168 continue; 1169 if (seg->start >= high) 1170 break; 1171 if (low >= seg->end) 1172 continue; 1173 if (low <= seg->start) 1174 m_start = seg->first_page; 1175 else 1176 m_start = &seg->first_page[atop(low - seg->start)]; 1177 if (high < seg->end) 1178 pa_end = high; 1179 else 1180 pa_end = seg->end; 1181 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1182 continue; 1183 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1184 m_run = vm_page_scan_contig(npages, m_start, m_end, 1185 alignment, boundary, options); 1186 if (m_run != NULL) 1187 return (m_run); 1188 } 1189 return (NULL); 1190 } 1191 1192 /* 1193 * Set the pool for a contiguous, power of two-sized set of physical pages. 1194 */ 1195 void 1196 vm_phys_set_pool(int pool, vm_page_t m, int order) 1197 { 1198 vm_page_t m_tmp; 1199 1200 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1201 m_tmp->pool = pool; 1202 } 1203 1204 /* 1205 * Search for the given physical page "m" in the free lists. If the search 1206 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1207 * FALSE, indicating that "m" is not in the free lists. 1208 * 1209 * The free page queues must be locked. 1210 */ 1211 boolean_t 1212 vm_phys_unfree_page(vm_page_t m) 1213 { 1214 struct vm_freelist *fl; 1215 struct vm_phys_seg *seg; 1216 vm_paddr_t pa, pa_half; 1217 vm_page_t m_set, m_tmp; 1218 int order; 1219 1220 /* 1221 * First, find the contiguous, power of two-sized set of free 1222 * physical pages containing the given physical page "m" and 1223 * assign it to "m_set". 1224 */ 1225 seg = &vm_phys_segs[m->segind]; 1226 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1227 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1228 order < VM_NFREEORDER - 1; ) { 1229 order++; 1230 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1231 if (pa >= seg->start) 1232 m_set = &seg->first_page[atop(pa - seg->start)]; 1233 else 1234 return (FALSE); 1235 } 1236 if (m_set->order < order) 1237 return (FALSE); 1238 if (m_set->order == VM_NFREEORDER) 1239 return (FALSE); 1240 KASSERT(m_set->order < VM_NFREEORDER, 1241 ("vm_phys_unfree_page: page %p has unexpected order %d", 1242 m_set, m_set->order)); 1243 1244 /* 1245 * Next, remove "m_set" from the free lists. Finally, extract 1246 * "m" from "m_set" using an iterative algorithm: While "m_set" 1247 * is larger than a page, shrink "m_set" by returning the half 1248 * of "m_set" that does not contain "m" to the free lists. 1249 */ 1250 fl = (*seg->free_queues)[m_set->pool]; 1251 order = m_set->order; 1252 vm_freelist_rem(fl, m_set, order); 1253 while (order > 0) { 1254 order--; 1255 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1256 if (m->phys_addr < pa_half) 1257 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1258 else { 1259 m_tmp = m_set; 1260 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1261 } 1262 vm_freelist_add(fl, m_tmp, order, 0); 1263 } 1264 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1265 return (TRUE); 1266 } 1267 1268 /* 1269 * Allocate a contiguous set of physical pages of the given size 1270 * "npages" from the free lists. All of the physical pages must be at 1271 * or above the given physical address "low" and below the given 1272 * physical address "high". The given value "alignment" determines the 1273 * alignment of the first physical page in the set. If the given value 1274 * "boundary" is non-zero, then the set of physical pages cannot cross 1275 * any physical address boundary that is a multiple of that value. Both 1276 * "alignment" and "boundary" must be a power of two. 1277 */ 1278 vm_page_t 1279 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1280 u_long alignment, vm_paddr_t boundary) 1281 { 1282 vm_paddr_t pa_end, pa_start; 1283 vm_page_t m_run; 1284 struct vm_phys_seg *seg; 1285 int segind; 1286 1287 KASSERT(npages > 0, ("npages is 0")); 1288 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1289 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1290 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1291 if (low >= high) 1292 return (NULL); 1293 m_run = NULL; 1294 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1295 seg = &vm_phys_segs[segind]; 1296 if (seg->start >= high || seg->domain != domain) 1297 continue; 1298 if (low >= seg->end) 1299 break; 1300 if (low <= seg->start) 1301 pa_start = seg->start; 1302 else 1303 pa_start = low; 1304 if (high < seg->end) 1305 pa_end = high; 1306 else 1307 pa_end = seg->end; 1308 if (pa_end - pa_start < ptoa(npages)) 1309 continue; 1310 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1311 alignment, boundary); 1312 if (m_run != NULL) 1313 break; 1314 } 1315 return (m_run); 1316 } 1317 1318 /* 1319 * Allocate a run of contiguous physical pages from the free list for the 1320 * specified segment. 1321 */ 1322 static vm_page_t 1323 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1324 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1325 { 1326 struct vm_freelist *fl; 1327 vm_paddr_t pa, pa_end, size; 1328 vm_page_t m, m_ret; 1329 u_long npages_end; 1330 int oind, order, pind; 1331 1332 KASSERT(npages > 0, ("npages is 0")); 1333 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1334 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1335 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1336 /* Compute the queue that is the best fit for npages. */ 1337 order = flsl(npages - 1); 1338 /* Search for a run satisfying the specified conditions. */ 1339 size = npages << PAGE_SHIFT; 1340 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1341 oind++) { 1342 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1343 fl = (*seg->free_queues)[pind]; 1344 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1345 /* 1346 * Is the size of this allocation request 1347 * larger than the largest block size? 1348 */ 1349 if (order >= VM_NFREEORDER) { 1350 /* 1351 * Determine if a sufficient number of 1352 * subsequent blocks to satisfy the 1353 * allocation request are free. 1354 */ 1355 pa = VM_PAGE_TO_PHYS(m_ret); 1356 pa_end = pa + size; 1357 if (pa_end < pa) 1358 continue; 1359 for (;;) { 1360 pa += 1 << (PAGE_SHIFT + 1361 VM_NFREEORDER - 1); 1362 if (pa >= pa_end || 1363 pa < seg->start || 1364 pa >= seg->end) 1365 break; 1366 m = &seg->first_page[atop(pa - 1367 seg->start)]; 1368 if (m->order != VM_NFREEORDER - 1369 1) 1370 break; 1371 } 1372 /* If not, go to the next block. */ 1373 if (pa < pa_end) 1374 continue; 1375 } 1376 1377 /* 1378 * Determine if the blocks are within the 1379 * given range, satisfy the given alignment, 1380 * and do not cross the given boundary. 1381 */ 1382 pa = VM_PAGE_TO_PHYS(m_ret); 1383 pa_end = pa + size; 1384 if (pa >= low && pa_end <= high && 1385 (pa & (alignment - 1)) == 0 && 1386 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1387 goto done; 1388 } 1389 } 1390 } 1391 return (NULL); 1392 done: 1393 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1394 fl = (*seg->free_queues)[m->pool]; 1395 vm_freelist_rem(fl, m, oind); 1396 if (m->pool != VM_FREEPOOL_DEFAULT) 1397 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1398 } 1399 /* Return excess pages to the free lists. */ 1400 npages_end = roundup2(npages, 1 << oind); 1401 if (npages < npages_end) { 1402 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1403 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1404 } 1405 return (m_ret); 1406 } 1407 1408 #ifdef DDB 1409 /* 1410 * Show the number of physical pages in each of the free lists. 1411 */ 1412 DB_SHOW_COMMAND(freepages, db_show_freepages) 1413 { 1414 struct vm_freelist *fl; 1415 int flind, oind, pind, dom; 1416 1417 for (dom = 0; dom < vm_ndomains; dom++) { 1418 db_printf("DOMAIN: %d\n", dom); 1419 for (flind = 0; flind < vm_nfreelists; flind++) { 1420 db_printf("FREE LIST %d:\n" 1421 "\n ORDER (SIZE) | NUMBER" 1422 "\n ", flind); 1423 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1424 db_printf(" | POOL %d", pind); 1425 db_printf("\n-- "); 1426 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1427 db_printf("-- -- "); 1428 db_printf("--\n"); 1429 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1430 db_printf(" %2.2d (%6.6dK)", oind, 1431 1 << (PAGE_SHIFT - 10 + oind)); 1432 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1433 fl = vm_phys_free_queues[dom][flind][pind]; 1434 db_printf(" | %6.6d", fl[oind].lcnt); 1435 } 1436 db_printf("\n"); 1437 } 1438 db_printf("\n"); 1439 } 1440 db_printf("\n"); 1441 } 1442 } 1443 #endif 1444