1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/domainset.h> 50 #include <sys/lock.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/queue.h> 56 #include <sys/rwlock.h> 57 #include <sys/sbuf.h> 58 #include <sys/sysctl.h> 59 #include <sys/tree.h> 60 #include <sys/vmmeter.h> 61 #include <sys/seq.h> 62 63 #include <ddb/ddb.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/vm_kern.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_phys.h> 71 #include <vm/vm_pagequeue.h> 72 73 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 74 "Too many physsegs."); 75 76 #ifdef NUMA 77 struct mem_affinity __read_mostly *mem_affinity; 78 int __read_mostly *mem_locality; 79 #endif 80 81 int __read_mostly vm_ndomains = 1; 82 domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 83 84 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 85 int __read_mostly vm_phys_nsegs; 86 87 struct vm_phys_fictitious_seg; 88 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 89 struct vm_phys_fictitious_seg *); 90 91 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 92 RB_INITIALIZER(_vm_phys_fictitious_tree); 93 94 struct vm_phys_fictitious_seg { 95 RB_ENTRY(vm_phys_fictitious_seg) node; 96 /* Memory region data */ 97 vm_paddr_t start; 98 vm_paddr_t end; 99 vm_page_t first_page; 100 }; 101 102 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 103 vm_phys_fictitious_cmp); 104 105 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 106 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 107 108 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 109 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] 110 [VM_NFREEORDER_MAX]; 111 112 static int __read_mostly vm_nfreelists; 113 114 /* 115 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 116 */ 117 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 118 119 CTASSERT(VM_FREELIST_DEFAULT == 0); 120 121 #ifdef VM_FREELIST_DMA32 122 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 123 #endif 124 125 /* 126 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 127 * the ordering of the free list boundaries. 128 */ 129 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 130 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 131 #endif 132 133 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 134 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 135 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 136 137 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 138 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 139 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 140 141 #ifdef NUMA 142 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 143 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 144 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 145 #endif 146 147 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 148 &vm_ndomains, 0, "Number of physical memory domains available."); 149 150 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 151 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 152 vm_paddr_t boundary); 153 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 154 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 155 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 156 int order, int tail); 157 158 /* 159 * Red-black tree helpers for vm fictitious range management. 160 */ 161 static inline int 162 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 163 struct vm_phys_fictitious_seg *range) 164 { 165 166 KASSERT(range->start != 0 && range->end != 0, 167 ("Invalid range passed on search for vm_fictitious page")); 168 if (p->start >= range->end) 169 return (1); 170 if (p->start < range->start) 171 return (-1); 172 173 return (0); 174 } 175 176 static int 177 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 178 struct vm_phys_fictitious_seg *p2) 179 { 180 181 /* Check if this is a search for a page */ 182 if (p1->end == 0) 183 return (vm_phys_fictitious_in_range(p1, p2)); 184 185 KASSERT(p2->end != 0, 186 ("Invalid range passed as second parameter to vm fictitious comparison")); 187 188 /* Searching to add a new range */ 189 if (p1->end <= p2->start) 190 return (-1); 191 if (p1->start >= p2->end) 192 return (1); 193 194 panic("Trying to add overlapping vm fictitious ranges:\n" 195 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 196 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 197 } 198 199 int 200 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 201 { 202 #ifdef NUMA 203 domainset_t mask; 204 int i; 205 206 if (vm_ndomains == 1 || mem_affinity == NULL) 207 return (0); 208 209 DOMAINSET_ZERO(&mask); 210 /* 211 * Check for any memory that overlaps low, high. 212 */ 213 for (i = 0; mem_affinity[i].end != 0; i++) 214 if (mem_affinity[i].start <= high && 215 mem_affinity[i].end >= low) 216 DOMAINSET_SET(mem_affinity[i].domain, &mask); 217 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 218 return (prefer); 219 if (DOMAINSET_EMPTY(&mask)) 220 panic("vm_phys_domain_match: Impossible constraint"); 221 return (DOMAINSET_FFS(&mask) - 1); 222 #else 223 return (0); 224 #endif 225 } 226 227 /* 228 * Outputs the state of the physical memory allocator, specifically, 229 * the amount of physical memory in each free list. 230 */ 231 static int 232 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 233 { 234 struct sbuf sbuf; 235 struct vm_freelist *fl; 236 int dom, error, flind, oind, pind; 237 238 error = sysctl_wire_old_buffer(req, 0); 239 if (error != 0) 240 return (error); 241 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 242 for (dom = 0; dom < vm_ndomains; dom++) { 243 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 244 for (flind = 0; flind < vm_nfreelists; flind++) { 245 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 246 "\n ORDER (SIZE) | NUMBER" 247 "\n ", flind); 248 for (pind = 0; pind < VM_NFREEPOOL; pind++) 249 sbuf_printf(&sbuf, " | POOL %d", pind); 250 sbuf_printf(&sbuf, "\n-- "); 251 for (pind = 0; pind < VM_NFREEPOOL; pind++) 252 sbuf_printf(&sbuf, "-- -- "); 253 sbuf_printf(&sbuf, "--\n"); 254 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 255 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 256 1 << (PAGE_SHIFT - 10 + oind)); 257 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 258 fl = vm_phys_free_queues[dom][flind][pind]; 259 sbuf_printf(&sbuf, " | %6d", 260 fl[oind].lcnt); 261 } 262 sbuf_printf(&sbuf, "\n"); 263 } 264 } 265 } 266 error = sbuf_finish(&sbuf); 267 sbuf_delete(&sbuf); 268 return (error); 269 } 270 271 /* 272 * Outputs the set of physical memory segments. 273 */ 274 static int 275 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 276 { 277 struct sbuf sbuf; 278 struct vm_phys_seg *seg; 279 int error, segind; 280 281 error = sysctl_wire_old_buffer(req, 0); 282 if (error != 0) 283 return (error); 284 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 285 for (segind = 0; segind < vm_phys_nsegs; segind++) { 286 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 287 seg = &vm_phys_segs[segind]; 288 sbuf_printf(&sbuf, "start: %#jx\n", 289 (uintmax_t)seg->start); 290 sbuf_printf(&sbuf, "end: %#jx\n", 291 (uintmax_t)seg->end); 292 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 293 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 294 } 295 error = sbuf_finish(&sbuf); 296 sbuf_delete(&sbuf); 297 return (error); 298 } 299 300 /* 301 * Return affinity, or -1 if there's no affinity information. 302 */ 303 int 304 vm_phys_mem_affinity(int f, int t) 305 { 306 307 #ifdef NUMA 308 if (mem_locality == NULL) 309 return (-1); 310 if (f >= vm_ndomains || t >= vm_ndomains) 311 return (-1); 312 return (mem_locality[f * vm_ndomains + t]); 313 #else 314 return (-1); 315 #endif 316 } 317 318 #ifdef NUMA 319 /* 320 * Outputs the VM locality table. 321 */ 322 static int 323 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 324 { 325 struct sbuf sbuf; 326 int error, i, j; 327 328 error = sysctl_wire_old_buffer(req, 0); 329 if (error != 0) 330 return (error); 331 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 332 333 sbuf_printf(&sbuf, "\n"); 334 335 for (i = 0; i < vm_ndomains; i++) { 336 sbuf_printf(&sbuf, "%d: ", i); 337 for (j = 0; j < vm_ndomains; j++) { 338 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 339 } 340 sbuf_printf(&sbuf, "\n"); 341 } 342 error = sbuf_finish(&sbuf); 343 sbuf_delete(&sbuf); 344 return (error); 345 } 346 #endif 347 348 static void 349 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 350 { 351 352 m->order = order; 353 if (tail) 354 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 355 else 356 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 357 fl[order].lcnt++; 358 } 359 360 static void 361 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 362 { 363 364 TAILQ_REMOVE(&fl[order].pl, m, listq); 365 fl[order].lcnt--; 366 m->order = VM_NFREEORDER; 367 } 368 369 /* 370 * Create a physical memory segment. 371 */ 372 static void 373 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 374 { 375 struct vm_phys_seg *seg; 376 377 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 378 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 379 KASSERT(domain >= 0 && domain < vm_ndomains, 380 ("vm_phys_create_seg: invalid domain provided")); 381 seg = &vm_phys_segs[vm_phys_nsegs++]; 382 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 383 *seg = *(seg - 1); 384 seg--; 385 } 386 seg->start = start; 387 seg->end = end; 388 seg->domain = domain; 389 } 390 391 static void 392 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 393 { 394 #ifdef NUMA 395 int i; 396 397 if (mem_affinity == NULL) { 398 _vm_phys_create_seg(start, end, 0); 399 return; 400 } 401 402 for (i = 0;; i++) { 403 if (mem_affinity[i].end == 0) 404 panic("Reached end of affinity info"); 405 if (mem_affinity[i].end <= start) 406 continue; 407 if (mem_affinity[i].start > start) 408 panic("No affinity info for start %jx", 409 (uintmax_t)start); 410 if (mem_affinity[i].end >= end) { 411 _vm_phys_create_seg(start, end, 412 mem_affinity[i].domain); 413 break; 414 } 415 _vm_phys_create_seg(start, mem_affinity[i].end, 416 mem_affinity[i].domain); 417 start = mem_affinity[i].end; 418 } 419 #else 420 _vm_phys_create_seg(start, end, 0); 421 #endif 422 } 423 424 /* 425 * Add a physical memory segment. 426 */ 427 void 428 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 429 { 430 vm_paddr_t paddr; 431 432 KASSERT((start & PAGE_MASK) == 0, 433 ("vm_phys_define_seg: start is not page aligned")); 434 KASSERT((end & PAGE_MASK) == 0, 435 ("vm_phys_define_seg: end is not page aligned")); 436 437 /* 438 * Split the physical memory segment if it spans two or more free 439 * list boundaries. 440 */ 441 paddr = start; 442 #ifdef VM_FREELIST_LOWMEM 443 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 444 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 445 paddr = VM_LOWMEM_BOUNDARY; 446 } 447 #endif 448 #ifdef VM_FREELIST_DMA32 449 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 450 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 451 paddr = VM_DMA32_BOUNDARY; 452 } 453 #endif 454 vm_phys_create_seg(paddr, end); 455 } 456 457 /* 458 * Initialize the physical memory allocator. 459 * 460 * Requires that vm_page_array is initialized! 461 */ 462 void 463 vm_phys_init(void) 464 { 465 struct vm_freelist *fl; 466 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 467 u_long npages; 468 int dom, flind, freelist, oind, pind, segind; 469 470 /* 471 * Compute the number of free lists, and generate the mapping from the 472 * manifest constants VM_FREELIST_* to the free list indices. 473 * 474 * Initially, the entries of vm_freelist_to_flind[] are set to either 475 * 0 or 1 to indicate which free lists should be created. 476 */ 477 npages = 0; 478 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 479 seg = &vm_phys_segs[segind]; 480 #ifdef VM_FREELIST_LOWMEM 481 if (seg->end <= VM_LOWMEM_BOUNDARY) 482 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 483 else 484 #endif 485 #ifdef VM_FREELIST_DMA32 486 if ( 487 #ifdef VM_DMA32_NPAGES_THRESHOLD 488 /* 489 * Create the DMA32 free list only if the amount of 490 * physical memory above physical address 4G exceeds the 491 * given threshold. 492 */ 493 npages > VM_DMA32_NPAGES_THRESHOLD && 494 #endif 495 seg->end <= VM_DMA32_BOUNDARY) 496 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 497 else 498 #endif 499 { 500 npages += atop(seg->end - seg->start); 501 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 502 } 503 } 504 /* Change each entry into a running total of the free lists. */ 505 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 506 vm_freelist_to_flind[freelist] += 507 vm_freelist_to_flind[freelist - 1]; 508 } 509 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 510 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 511 /* Change each entry into a free list index. */ 512 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 513 vm_freelist_to_flind[freelist]--; 514 515 /* 516 * Initialize the first_page and free_queues fields of each physical 517 * memory segment. 518 */ 519 #ifdef VM_PHYSSEG_SPARSE 520 npages = 0; 521 #endif 522 for (segind = 0; segind < vm_phys_nsegs; segind++) { 523 seg = &vm_phys_segs[segind]; 524 #ifdef VM_PHYSSEG_SPARSE 525 seg->first_page = &vm_page_array[npages]; 526 npages += atop(seg->end - seg->start); 527 #else 528 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 529 #endif 530 #ifdef VM_FREELIST_LOWMEM 531 if (seg->end <= VM_LOWMEM_BOUNDARY) { 532 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 533 KASSERT(flind >= 0, 534 ("vm_phys_init: LOWMEM flind < 0")); 535 } else 536 #endif 537 #ifdef VM_FREELIST_DMA32 538 if (seg->end <= VM_DMA32_BOUNDARY) { 539 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 540 KASSERT(flind >= 0, 541 ("vm_phys_init: DMA32 flind < 0")); 542 } else 543 #endif 544 { 545 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 546 KASSERT(flind >= 0, 547 ("vm_phys_init: DEFAULT flind < 0")); 548 } 549 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 550 } 551 552 /* 553 * Coalesce physical memory segments that are contiguous and share the 554 * same per-domain free queues. 555 */ 556 prev_seg = vm_phys_segs; 557 seg = &vm_phys_segs[1]; 558 end_seg = &vm_phys_segs[vm_phys_nsegs]; 559 while (seg < end_seg) { 560 if (prev_seg->end == seg->start && 561 prev_seg->free_queues == seg->free_queues) { 562 prev_seg->end = seg->end; 563 KASSERT(prev_seg->domain == seg->domain, 564 ("vm_phys_init: free queues cannot span domains")); 565 vm_phys_nsegs--; 566 end_seg--; 567 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 568 *tmp_seg = *(tmp_seg + 1); 569 } else { 570 prev_seg = seg; 571 seg++; 572 } 573 } 574 575 /* 576 * Initialize the free queues. 577 */ 578 for (dom = 0; dom < vm_ndomains; dom++) { 579 for (flind = 0; flind < vm_nfreelists; flind++) { 580 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 581 fl = vm_phys_free_queues[dom][flind][pind]; 582 for (oind = 0; oind < VM_NFREEORDER; oind++) 583 TAILQ_INIT(&fl[oind].pl); 584 } 585 } 586 } 587 588 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 589 } 590 591 /* 592 * Register info about the NUMA topology of the system. 593 * 594 * Invoked by platform-dependent code prior to vm_phys_init(). 595 */ 596 void 597 vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, 598 int *locality) 599 { 600 #ifdef NUMA 601 int d, i; 602 603 /* 604 * For now the only override value that we support is 1, which 605 * effectively disables NUMA-awareness in the allocators. 606 */ 607 d = 0; 608 TUNABLE_INT_FETCH("vm.numa.disabled", &d); 609 if (d) 610 ndomains = 1; 611 612 if (ndomains > 1) { 613 vm_ndomains = ndomains; 614 mem_affinity = affinity; 615 mem_locality = locality; 616 } 617 618 for (i = 0; i < vm_ndomains; i++) 619 DOMAINSET_SET(i, &all_domains); 620 #else 621 (void)ndomains; 622 (void)affinity; 623 (void)locality; 624 #endif 625 } 626 627 /* 628 * Split a contiguous, power of two-sized set of physical pages. 629 * 630 * When this function is called by a page allocation function, the caller 631 * should request insertion at the head unless the order [order, oind) queues 632 * are known to be empty. The objective being to reduce the likelihood of 633 * long-term fragmentation by promoting contemporaneous allocation and 634 * (hopefully) deallocation. 635 */ 636 static __inline void 637 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 638 int tail) 639 { 640 vm_page_t m_buddy; 641 642 while (oind > order) { 643 oind--; 644 m_buddy = &m[1 << oind]; 645 KASSERT(m_buddy->order == VM_NFREEORDER, 646 ("vm_phys_split_pages: page %p has unexpected order %d", 647 m_buddy, m_buddy->order)); 648 vm_freelist_add(fl, m_buddy, oind, tail); 649 } 650 } 651 652 /* 653 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 654 * and sized set to the specified free list. 655 * 656 * When this function is called by a page allocation function, the caller 657 * should request insertion at the head unless the lower-order queues are 658 * known to be empty. The objective being to reduce the likelihood of long- 659 * term fragmentation by promoting contemporaneous allocation and (hopefully) 660 * deallocation. 661 * 662 * The physical page m's buddy must not be free. 663 */ 664 static void 665 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 666 { 667 u_int n; 668 int order; 669 670 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 671 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 672 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 673 ("vm_phys_enq_range: page %p and npages %u are misaligned", 674 m, npages)); 675 do { 676 KASSERT(m->order == VM_NFREEORDER, 677 ("vm_phys_enq_range: page %p has unexpected order %d", 678 m, m->order)); 679 order = ffs(npages) - 1; 680 KASSERT(order < VM_NFREEORDER, 681 ("vm_phys_enq_range: order %d is out of range", order)); 682 vm_freelist_add(fl, m, order, tail); 683 n = 1 << order; 684 m += n; 685 npages -= n; 686 } while (npages > 0); 687 } 688 689 /* 690 * Tries to allocate the specified number of pages from the specified pool 691 * within the specified domain. Returns the actual number of allocated pages 692 * and a pointer to each page through the array ma[]. 693 * 694 * The returned pages may not be physically contiguous. However, in contrast 695 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 696 * calling this function once to allocate the desired number of pages will 697 * avoid wasted time in vm_phys_split_pages(). 698 * 699 * The free page queues for the specified domain must be locked. 700 */ 701 int 702 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 703 { 704 struct vm_freelist *alt, *fl; 705 vm_page_t m; 706 int avail, end, flind, freelist, i, need, oind, pind; 707 708 KASSERT(domain >= 0 && domain < vm_ndomains, 709 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 710 KASSERT(pool < VM_NFREEPOOL, 711 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 712 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 713 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 714 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 715 i = 0; 716 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 717 flind = vm_freelist_to_flind[freelist]; 718 if (flind < 0) 719 continue; 720 fl = vm_phys_free_queues[domain][flind][pool]; 721 for (oind = 0; oind < VM_NFREEORDER; oind++) { 722 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 723 vm_freelist_rem(fl, m, oind); 724 avail = 1 << oind; 725 need = imin(npages - i, avail); 726 for (end = i + need; i < end;) 727 ma[i++] = m++; 728 if (need < avail) { 729 /* 730 * Return excess pages to fl. Its 731 * order [0, oind) queues are empty. 732 */ 733 vm_phys_enq_range(m, avail - need, fl, 734 1); 735 return (npages); 736 } else if (i == npages) 737 return (npages); 738 } 739 } 740 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 741 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 742 alt = vm_phys_free_queues[domain][flind][pind]; 743 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 744 NULL) { 745 vm_freelist_rem(alt, m, oind); 746 vm_phys_set_pool(pool, m, oind); 747 avail = 1 << oind; 748 need = imin(npages - i, avail); 749 for (end = i + need; i < end;) 750 ma[i++] = m++; 751 if (need < avail) { 752 /* 753 * Return excess pages to fl. 754 * Its order [0, oind) queues 755 * are empty. 756 */ 757 vm_phys_enq_range(m, avail - 758 need, fl, 1); 759 return (npages); 760 } else if (i == npages) 761 return (npages); 762 } 763 } 764 } 765 } 766 return (i); 767 } 768 769 /* 770 * Allocate a contiguous, power of two-sized set of physical pages 771 * from the free lists. 772 * 773 * The free page queues must be locked. 774 */ 775 vm_page_t 776 vm_phys_alloc_pages(int domain, int pool, int order) 777 { 778 vm_page_t m; 779 int freelist; 780 781 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 782 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 783 if (m != NULL) 784 return (m); 785 } 786 return (NULL); 787 } 788 789 /* 790 * Allocate a contiguous, power of two-sized set of physical pages from the 791 * specified free list. The free list must be specified using one of the 792 * manifest constants VM_FREELIST_*. 793 * 794 * The free page queues must be locked. 795 */ 796 vm_page_t 797 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 798 { 799 struct vm_freelist *alt, *fl; 800 vm_page_t m; 801 int oind, pind, flind; 802 803 KASSERT(domain >= 0 && domain < vm_ndomains, 804 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 805 domain)); 806 KASSERT(freelist < VM_NFREELIST, 807 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 808 freelist)); 809 KASSERT(pool < VM_NFREEPOOL, 810 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 811 KASSERT(order < VM_NFREEORDER, 812 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 813 814 flind = vm_freelist_to_flind[freelist]; 815 /* Check if freelist is present */ 816 if (flind < 0) 817 return (NULL); 818 819 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 820 fl = &vm_phys_free_queues[domain][flind][pool][0]; 821 for (oind = order; oind < VM_NFREEORDER; oind++) { 822 m = TAILQ_FIRST(&fl[oind].pl); 823 if (m != NULL) { 824 vm_freelist_rem(fl, m, oind); 825 /* The order [order, oind) queues are empty. */ 826 vm_phys_split_pages(m, oind, fl, order, 1); 827 return (m); 828 } 829 } 830 831 /* 832 * The given pool was empty. Find the largest 833 * contiguous, power-of-two-sized set of pages in any 834 * pool. Transfer these pages to the given pool, and 835 * use them to satisfy the allocation. 836 */ 837 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 838 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 839 alt = &vm_phys_free_queues[domain][flind][pind][0]; 840 m = TAILQ_FIRST(&alt[oind].pl); 841 if (m != NULL) { 842 vm_freelist_rem(alt, m, oind); 843 vm_phys_set_pool(pool, m, oind); 844 /* The order [order, oind) queues are empty. */ 845 vm_phys_split_pages(m, oind, fl, order, 1); 846 return (m); 847 } 848 } 849 } 850 return (NULL); 851 } 852 853 /* 854 * Find the vm_page corresponding to the given physical address. 855 */ 856 vm_page_t 857 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 858 { 859 struct vm_phys_seg *seg; 860 int segind; 861 862 for (segind = 0; segind < vm_phys_nsegs; segind++) { 863 seg = &vm_phys_segs[segind]; 864 if (pa >= seg->start && pa < seg->end) 865 return (&seg->first_page[atop(pa - seg->start)]); 866 } 867 return (NULL); 868 } 869 870 vm_page_t 871 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 872 { 873 struct vm_phys_fictitious_seg tmp, *seg; 874 vm_page_t m; 875 876 m = NULL; 877 tmp.start = pa; 878 tmp.end = 0; 879 880 rw_rlock(&vm_phys_fictitious_reg_lock); 881 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 882 rw_runlock(&vm_phys_fictitious_reg_lock); 883 if (seg == NULL) 884 return (NULL); 885 886 m = &seg->first_page[atop(pa - seg->start)]; 887 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 888 889 return (m); 890 } 891 892 static inline void 893 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 894 long page_count, vm_memattr_t memattr) 895 { 896 long i; 897 898 bzero(range, page_count * sizeof(*range)); 899 for (i = 0; i < page_count; i++) { 900 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 901 range[i].oflags &= ~VPO_UNMANAGED; 902 range[i].busy_lock = VPB_UNBUSIED; 903 } 904 } 905 906 int 907 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 908 vm_memattr_t memattr) 909 { 910 struct vm_phys_fictitious_seg *seg; 911 vm_page_t fp; 912 long page_count; 913 #ifdef VM_PHYSSEG_DENSE 914 long pi, pe; 915 long dpage_count; 916 #endif 917 918 KASSERT(start < end, 919 ("Start of segment isn't less than end (start: %jx end: %jx)", 920 (uintmax_t)start, (uintmax_t)end)); 921 922 page_count = (end - start) / PAGE_SIZE; 923 924 #ifdef VM_PHYSSEG_DENSE 925 pi = atop(start); 926 pe = atop(end); 927 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 928 fp = &vm_page_array[pi - first_page]; 929 if ((pe - first_page) > vm_page_array_size) { 930 /* 931 * We have a segment that starts inside 932 * of vm_page_array, but ends outside of it. 933 * 934 * Use vm_page_array pages for those that are 935 * inside of the vm_page_array range, and 936 * allocate the remaining ones. 937 */ 938 dpage_count = vm_page_array_size - (pi - first_page); 939 vm_phys_fictitious_init_range(fp, start, dpage_count, 940 memattr); 941 page_count -= dpage_count; 942 start += ptoa(dpage_count); 943 goto alloc; 944 } 945 /* 946 * We can allocate the full range from vm_page_array, 947 * so there's no need to register the range in the tree. 948 */ 949 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 950 return (0); 951 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 952 /* 953 * We have a segment that ends inside of vm_page_array, 954 * but starts outside of it. 955 */ 956 fp = &vm_page_array[0]; 957 dpage_count = pe - first_page; 958 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 959 memattr); 960 end -= ptoa(dpage_count); 961 page_count -= dpage_count; 962 goto alloc; 963 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 964 /* 965 * Trying to register a fictitious range that expands before 966 * and after vm_page_array. 967 */ 968 return (EINVAL); 969 } else { 970 alloc: 971 #endif 972 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 973 M_WAITOK); 974 #ifdef VM_PHYSSEG_DENSE 975 } 976 #endif 977 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 978 979 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 980 seg->start = start; 981 seg->end = end; 982 seg->first_page = fp; 983 984 rw_wlock(&vm_phys_fictitious_reg_lock); 985 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 986 rw_wunlock(&vm_phys_fictitious_reg_lock); 987 988 return (0); 989 } 990 991 void 992 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 993 { 994 struct vm_phys_fictitious_seg *seg, tmp; 995 #ifdef VM_PHYSSEG_DENSE 996 long pi, pe; 997 #endif 998 999 KASSERT(start < end, 1000 ("Start of segment isn't less than end (start: %jx end: %jx)", 1001 (uintmax_t)start, (uintmax_t)end)); 1002 1003 #ifdef VM_PHYSSEG_DENSE 1004 pi = atop(start); 1005 pe = atop(end); 1006 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1007 if ((pe - first_page) <= vm_page_array_size) { 1008 /* 1009 * This segment was allocated using vm_page_array 1010 * only, there's nothing to do since those pages 1011 * were never added to the tree. 1012 */ 1013 return; 1014 } 1015 /* 1016 * We have a segment that starts inside 1017 * of vm_page_array, but ends outside of it. 1018 * 1019 * Calculate how many pages were added to the 1020 * tree and free them. 1021 */ 1022 start = ptoa(first_page + vm_page_array_size); 1023 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1024 /* 1025 * We have a segment that ends inside of vm_page_array, 1026 * but starts outside of it. 1027 */ 1028 end = ptoa(first_page); 1029 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1030 /* Since it's not possible to register such a range, panic. */ 1031 panic( 1032 "Unregistering not registered fictitious range [%#jx:%#jx]", 1033 (uintmax_t)start, (uintmax_t)end); 1034 } 1035 #endif 1036 tmp.start = start; 1037 tmp.end = 0; 1038 1039 rw_wlock(&vm_phys_fictitious_reg_lock); 1040 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1041 if (seg->start != start || seg->end != end) { 1042 rw_wunlock(&vm_phys_fictitious_reg_lock); 1043 panic( 1044 "Unregistering not registered fictitious range [%#jx:%#jx]", 1045 (uintmax_t)start, (uintmax_t)end); 1046 } 1047 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1048 rw_wunlock(&vm_phys_fictitious_reg_lock); 1049 free(seg->first_page, M_FICT_PAGES); 1050 free(seg, M_FICT_PAGES); 1051 } 1052 1053 /* 1054 * Free a contiguous, power of two-sized set of physical pages. 1055 * 1056 * The free page queues must be locked. 1057 */ 1058 void 1059 vm_phys_free_pages(vm_page_t m, int order) 1060 { 1061 struct vm_freelist *fl; 1062 struct vm_phys_seg *seg; 1063 vm_paddr_t pa; 1064 vm_page_t m_buddy; 1065 1066 KASSERT(m->order == VM_NFREEORDER, 1067 ("vm_phys_free_pages: page %p has unexpected order %d", 1068 m, m->order)); 1069 KASSERT(m->pool < VM_NFREEPOOL, 1070 ("vm_phys_free_pages: page %p has unexpected pool %d", 1071 m, m->pool)); 1072 KASSERT(order < VM_NFREEORDER, 1073 ("vm_phys_free_pages: order %d is out of range", order)); 1074 seg = &vm_phys_segs[m->segind]; 1075 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1076 if (order < VM_NFREEORDER - 1) { 1077 pa = VM_PAGE_TO_PHYS(m); 1078 do { 1079 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1080 if (pa < seg->start || pa >= seg->end) 1081 break; 1082 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1083 if (m_buddy->order != order) 1084 break; 1085 fl = (*seg->free_queues)[m_buddy->pool]; 1086 vm_freelist_rem(fl, m_buddy, order); 1087 if (m_buddy->pool != m->pool) 1088 vm_phys_set_pool(m->pool, m_buddy, order); 1089 order++; 1090 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1091 m = &seg->first_page[atop(pa - seg->start)]; 1092 } while (order < VM_NFREEORDER - 1); 1093 } 1094 fl = (*seg->free_queues)[m->pool]; 1095 vm_freelist_add(fl, m, order, 1); 1096 } 1097 1098 /* 1099 * Free a contiguous, arbitrarily sized set of physical pages. 1100 * 1101 * The free page queues must be locked. 1102 */ 1103 void 1104 vm_phys_free_contig(vm_page_t m, u_long npages) 1105 { 1106 u_int n; 1107 int order; 1108 1109 /* 1110 * Avoid unnecessary coalescing by freeing the pages in the largest 1111 * possible power-of-two-sized subsets. 1112 */ 1113 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1114 for (;; npages -= n) { 1115 /* 1116 * Unsigned "min" is used here so that "order" is assigned 1117 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1118 * or the low-order bits of its physical address are zero 1119 * because the size of a physical address exceeds the size of 1120 * a long. 1121 */ 1122 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1123 VM_NFREEORDER - 1); 1124 n = 1 << order; 1125 if (npages < n) 1126 break; 1127 vm_phys_free_pages(m, order); 1128 m += n; 1129 } 1130 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1131 for (; npages > 0; npages -= n) { 1132 order = flsl(npages) - 1; 1133 n = 1 << order; 1134 vm_phys_free_pages(m, order); 1135 m += n; 1136 } 1137 } 1138 1139 /* 1140 * Scan physical memory between the specified addresses "low" and "high" for a 1141 * run of contiguous physical pages that satisfy the specified conditions, and 1142 * return the lowest page in the run. The specified "alignment" determines 1143 * the alignment of the lowest physical page in the run. If the specified 1144 * "boundary" is non-zero, then the run of physical pages cannot span a 1145 * physical address that is a multiple of "boundary". 1146 * 1147 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1148 * be a power of two. 1149 */ 1150 vm_page_t 1151 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1152 u_long alignment, vm_paddr_t boundary, int options) 1153 { 1154 vm_paddr_t pa_end; 1155 vm_page_t m_end, m_run, m_start; 1156 struct vm_phys_seg *seg; 1157 int segind; 1158 1159 KASSERT(npages > 0, ("npages is 0")); 1160 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1161 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1162 if (low >= high) 1163 return (NULL); 1164 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1165 seg = &vm_phys_segs[segind]; 1166 if (seg->domain != domain) 1167 continue; 1168 if (seg->start >= high) 1169 break; 1170 if (low >= seg->end) 1171 continue; 1172 if (low <= seg->start) 1173 m_start = seg->first_page; 1174 else 1175 m_start = &seg->first_page[atop(low - seg->start)]; 1176 if (high < seg->end) 1177 pa_end = high; 1178 else 1179 pa_end = seg->end; 1180 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1181 continue; 1182 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1183 m_run = vm_page_scan_contig(npages, m_start, m_end, 1184 alignment, boundary, options); 1185 if (m_run != NULL) 1186 return (m_run); 1187 } 1188 return (NULL); 1189 } 1190 1191 /* 1192 * Set the pool for a contiguous, power of two-sized set of physical pages. 1193 */ 1194 void 1195 vm_phys_set_pool(int pool, vm_page_t m, int order) 1196 { 1197 vm_page_t m_tmp; 1198 1199 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1200 m_tmp->pool = pool; 1201 } 1202 1203 /* 1204 * Search for the given physical page "m" in the free lists. If the search 1205 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1206 * FALSE, indicating that "m" is not in the free lists. 1207 * 1208 * The free page queues must be locked. 1209 */ 1210 boolean_t 1211 vm_phys_unfree_page(vm_page_t m) 1212 { 1213 struct vm_freelist *fl; 1214 struct vm_phys_seg *seg; 1215 vm_paddr_t pa, pa_half; 1216 vm_page_t m_set, m_tmp; 1217 int order; 1218 1219 /* 1220 * First, find the contiguous, power of two-sized set of free 1221 * physical pages containing the given physical page "m" and 1222 * assign it to "m_set". 1223 */ 1224 seg = &vm_phys_segs[m->segind]; 1225 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1226 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1227 order < VM_NFREEORDER - 1; ) { 1228 order++; 1229 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1230 if (pa >= seg->start) 1231 m_set = &seg->first_page[atop(pa - seg->start)]; 1232 else 1233 return (FALSE); 1234 } 1235 if (m_set->order < order) 1236 return (FALSE); 1237 if (m_set->order == VM_NFREEORDER) 1238 return (FALSE); 1239 KASSERT(m_set->order < VM_NFREEORDER, 1240 ("vm_phys_unfree_page: page %p has unexpected order %d", 1241 m_set, m_set->order)); 1242 1243 /* 1244 * Next, remove "m_set" from the free lists. Finally, extract 1245 * "m" from "m_set" using an iterative algorithm: While "m_set" 1246 * is larger than a page, shrink "m_set" by returning the half 1247 * of "m_set" that does not contain "m" to the free lists. 1248 */ 1249 fl = (*seg->free_queues)[m_set->pool]; 1250 order = m_set->order; 1251 vm_freelist_rem(fl, m_set, order); 1252 while (order > 0) { 1253 order--; 1254 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1255 if (m->phys_addr < pa_half) 1256 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1257 else { 1258 m_tmp = m_set; 1259 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1260 } 1261 vm_freelist_add(fl, m_tmp, order, 0); 1262 } 1263 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1264 return (TRUE); 1265 } 1266 1267 /* 1268 * Allocate a contiguous set of physical pages of the given size 1269 * "npages" from the free lists. All of the physical pages must be at 1270 * or above the given physical address "low" and below the given 1271 * physical address "high". The given value "alignment" determines the 1272 * alignment of the first physical page in the set. If the given value 1273 * "boundary" is non-zero, then the set of physical pages cannot cross 1274 * any physical address boundary that is a multiple of that value. Both 1275 * "alignment" and "boundary" must be a power of two. 1276 */ 1277 vm_page_t 1278 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1279 u_long alignment, vm_paddr_t boundary) 1280 { 1281 vm_paddr_t pa_end, pa_start; 1282 vm_page_t m_run; 1283 struct vm_phys_seg *seg; 1284 int segind; 1285 1286 KASSERT(npages > 0, ("npages is 0")); 1287 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1288 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1289 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1290 if (low >= high) 1291 return (NULL); 1292 m_run = NULL; 1293 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1294 seg = &vm_phys_segs[segind]; 1295 if (seg->start >= high || seg->domain != domain) 1296 continue; 1297 if (low >= seg->end) 1298 break; 1299 if (low <= seg->start) 1300 pa_start = seg->start; 1301 else 1302 pa_start = low; 1303 if (high < seg->end) 1304 pa_end = high; 1305 else 1306 pa_end = seg->end; 1307 if (pa_end - pa_start < ptoa(npages)) 1308 continue; 1309 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1310 alignment, boundary); 1311 if (m_run != NULL) 1312 break; 1313 } 1314 return (m_run); 1315 } 1316 1317 /* 1318 * Allocate a run of contiguous physical pages from the free list for the 1319 * specified segment. 1320 */ 1321 static vm_page_t 1322 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1323 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1324 { 1325 struct vm_freelist *fl; 1326 vm_paddr_t pa, pa_end, size; 1327 vm_page_t m, m_ret; 1328 u_long npages_end; 1329 int oind, order, pind; 1330 1331 KASSERT(npages > 0, ("npages is 0")); 1332 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1333 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1334 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1335 /* Compute the queue that is the best fit for npages. */ 1336 order = flsl(npages - 1); 1337 /* Search for a run satisfying the specified conditions. */ 1338 size = npages << PAGE_SHIFT; 1339 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1340 oind++) { 1341 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1342 fl = (*seg->free_queues)[pind]; 1343 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1344 /* 1345 * Is the size of this allocation request 1346 * larger than the largest block size? 1347 */ 1348 if (order >= VM_NFREEORDER) { 1349 /* 1350 * Determine if a sufficient number of 1351 * subsequent blocks to satisfy the 1352 * allocation request are free. 1353 */ 1354 pa = VM_PAGE_TO_PHYS(m_ret); 1355 pa_end = pa + size; 1356 if (pa_end < pa) 1357 continue; 1358 for (;;) { 1359 pa += 1 << (PAGE_SHIFT + 1360 VM_NFREEORDER - 1); 1361 if (pa >= pa_end || 1362 pa < seg->start || 1363 pa >= seg->end) 1364 break; 1365 m = &seg->first_page[atop(pa - 1366 seg->start)]; 1367 if (m->order != VM_NFREEORDER - 1368 1) 1369 break; 1370 } 1371 /* If not, go to the next block. */ 1372 if (pa < pa_end) 1373 continue; 1374 } 1375 1376 /* 1377 * Determine if the blocks are within the 1378 * given range, satisfy the given alignment, 1379 * and do not cross the given boundary. 1380 */ 1381 pa = VM_PAGE_TO_PHYS(m_ret); 1382 pa_end = pa + size; 1383 if (pa >= low && pa_end <= high && 1384 (pa & (alignment - 1)) == 0 && 1385 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1386 goto done; 1387 } 1388 } 1389 } 1390 return (NULL); 1391 done: 1392 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1393 fl = (*seg->free_queues)[m->pool]; 1394 vm_freelist_rem(fl, m, oind); 1395 if (m->pool != VM_FREEPOOL_DEFAULT) 1396 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1397 } 1398 /* Return excess pages to the free lists. */ 1399 npages_end = roundup2(npages, 1 << oind); 1400 if (npages < npages_end) { 1401 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1402 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1403 } 1404 return (m_ret); 1405 } 1406 1407 #ifdef DDB 1408 /* 1409 * Show the number of physical pages in each of the free lists. 1410 */ 1411 DB_SHOW_COMMAND(freepages, db_show_freepages) 1412 { 1413 struct vm_freelist *fl; 1414 int flind, oind, pind, dom; 1415 1416 for (dom = 0; dom < vm_ndomains; dom++) { 1417 db_printf("DOMAIN: %d\n", dom); 1418 for (flind = 0; flind < vm_nfreelists; flind++) { 1419 db_printf("FREE LIST %d:\n" 1420 "\n ORDER (SIZE) | NUMBER" 1421 "\n ", flind); 1422 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1423 db_printf(" | POOL %d", pind); 1424 db_printf("\n-- "); 1425 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1426 db_printf("-- -- "); 1427 db_printf("--\n"); 1428 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1429 db_printf(" %2.2d (%6.6dK)", oind, 1430 1 << (PAGE_SHIFT - 10 + oind)); 1431 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1432 fl = vm_phys_free_queues[dom][flind][pind]; 1433 db_printf(" | %6.6d", fl[oind].lcnt); 1434 } 1435 db_printf("\n"); 1436 } 1437 db_printf("\n"); 1438 } 1439 db_printf("\n"); 1440 } 1441 } 1442 #endif 1443