1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/domainset.h> 50 #include <sys/lock.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/queue.h> 56 #include <sys/rwlock.h> 57 #include <sys/sbuf.h> 58 #include <sys/sysctl.h> 59 #include <sys/tree.h> 60 #include <sys/vmmeter.h> 61 #include <sys/seq.h> 62 63 #include <ddb/ddb.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/vm_kern.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_phys.h> 71 #include <vm/vm_pagequeue.h> 72 73 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 74 "Too many physsegs."); 75 76 #ifdef NUMA 77 struct mem_affinity __read_mostly *mem_affinity; 78 int __read_mostly *mem_locality; 79 #endif 80 81 int __read_mostly vm_ndomains = 1; 82 domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 83 84 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 85 int __read_mostly vm_phys_nsegs; 86 87 struct vm_phys_fictitious_seg; 88 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 89 struct vm_phys_fictitious_seg *); 90 91 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 92 RB_INITIALIZER(_vm_phys_fictitious_tree); 93 94 struct vm_phys_fictitious_seg { 95 RB_ENTRY(vm_phys_fictitious_seg) node; 96 /* Memory region data */ 97 vm_paddr_t start; 98 vm_paddr_t end; 99 vm_page_t first_page; 100 }; 101 102 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 103 vm_phys_fictitious_cmp); 104 105 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 106 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 107 108 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 109 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 110 111 static int __read_mostly vm_nfreelists; 112 113 /* 114 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 115 */ 116 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 117 118 CTASSERT(VM_FREELIST_DEFAULT == 0); 119 120 #ifdef VM_FREELIST_DMA32 121 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122 #endif 123 124 /* 125 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126 * the ordering of the free list boundaries. 127 */ 128 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 129 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 130 #endif 131 132 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 133 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 134 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 135 136 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 137 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 138 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 139 140 #ifdef NUMA 141 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 142 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 143 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 144 #endif 145 146 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 147 &vm_ndomains, 0, "Number of physical memory domains available."); 148 149 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 150 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 151 vm_paddr_t boundary); 152 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 153 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 154 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 155 int order, int tail); 156 157 /* 158 * Red-black tree helpers for vm fictitious range management. 159 */ 160 static inline int 161 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 162 struct vm_phys_fictitious_seg *range) 163 { 164 165 KASSERT(range->start != 0 && range->end != 0, 166 ("Invalid range passed on search for vm_fictitious page")); 167 if (p->start >= range->end) 168 return (1); 169 if (p->start < range->start) 170 return (-1); 171 172 return (0); 173 } 174 175 static int 176 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 177 struct vm_phys_fictitious_seg *p2) 178 { 179 180 /* Check if this is a search for a page */ 181 if (p1->end == 0) 182 return (vm_phys_fictitious_in_range(p1, p2)); 183 184 KASSERT(p2->end != 0, 185 ("Invalid range passed as second parameter to vm fictitious comparison")); 186 187 /* Searching to add a new range */ 188 if (p1->end <= p2->start) 189 return (-1); 190 if (p1->start >= p2->end) 191 return (1); 192 193 panic("Trying to add overlapping vm fictitious ranges:\n" 194 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 195 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 196 } 197 198 int 199 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 200 { 201 #ifdef NUMA 202 domainset_t mask; 203 int i; 204 205 if (vm_ndomains == 1 || mem_affinity == NULL) 206 return (0); 207 208 DOMAINSET_ZERO(&mask); 209 /* 210 * Check for any memory that overlaps low, high. 211 */ 212 for (i = 0; mem_affinity[i].end != 0; i++) 213 if (mem_affinity[i].start <= high && 214 mem_affinity[i].end >= low) 215 DOMAINSET_SET(mem_affinity[i].domain, &mask); 216 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 217 return (prefer); 218 if (DOMAINSET_EMPTY(&mask)) 219 panic("vm_phys_domain_match: Impossible constraint"); 220 return (DOMAINSET_FFS(&mask) - 1); 221 #else 222 return (0); 223 #endif 224 } 225 226 /* 227 * Outputs the state of the physical memory allocator, specifically, 228 * the amount of physical memory in each free list. 229 */ 230 static int 231 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 232 { 233 struct sbuf sbuf; 234 struct vm_freelist *fl; 235 int dom, error, flind, oind, pind; 236 237 error = sysctl_wire_old_buffer(req, 0); 238 if (error != 0) 239 return (error); 240 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 241 for (dom = 0; dom < vm_ndomains; dom++) { 242 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 243 for (flind = 0; flind < vm_nfreelists; flind++) { 244 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 245 "\n ORDER (SIZE) | NUMBER" 246 "\n ", flind); 247 for (pind = 0; pind < VM_NFREEPOOL; pind++) 248 sbuf_printf(&sbuf, " | POOL %d", pind); 249 sbuf_printf(&sbuf, "\n-- "); 250 for (pind = 0; pind < VM_NFREEPOOL; pind++) 251 sbuf_printf(&sbuf, "-- -- "); 252 sbuf_printf(&sbuf, "--\n"); 253 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 254 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 255 1 << (PAGE_SHIFT - 10 + oind)); 256 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 257 fl = vm_phys_free_queues[dom][flind][pind]; 258 sbuf_printf(&sbuf, " | %6d", 259 fl[oind].lcnt); 260 } 261 sbuf_printf(&sbuf, "\n"); 262 } 263 } 264 } 265 error = sbuf_finish(&sbuf); 266 sbuf_delete(&sbuf); 267 return (error); 268 } 269 270 /* 271 * Outputs the set of physical memory segments. 272 */ 273 static int 274 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 275 { 276 struct sbuf sbuf; 277 struct vm_phys_seg *seg; 278 int error, segind; 279 280 error = sysctl_wire_old_buffer(req, 0); 281 if (error != 0) 282 return (error); 283 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 284 for (segind = 0; segind < vm_phys_nsegs; segind++) { 285 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 286 seg = &vm_phys_segs[segind]; 287 sbuf_printf(&sbuf, "start: %#jx\n", 288 (uintmax_t)seg->start); 289 sbuf_printf(&sbuf, "end: %#jx\n", 290 (uintmax_t)seg->end); 291 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 292 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 293 } 294 error = sbuf_finish(&sbuf); 295 sbuf_delete(&sbuf); 296 return (error); 297 } 298 299 /* 300 * Return affinity, or -1 if there's no affinity information. 301 */ 302 int 303 vm_phys_mem_affinity(int f, int t) 304 { 305 306 #ifdef NUMA 307 if (mem_locality == NULL) 308 return (-1); 309 if (f >= vm_ndomains || t >= vm_ndomains) 310 return (-1); 311 return (mem_locality[f * vm_ndomains + t]); 312 #else 313 return (-1); 314 #endif 315 } 316 317 #ifdef NUMA 318 /* 319 * Outputs the VM locality table. 320 */ 321 static int 322 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 323 { 324 struct sbuf sbuf; 325 int error, i, j; 326 327 error = sysctl_wire_old_buffer(req, 0); 328 if (error != 0) 329 return (error); 330 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 331 332 sbuf_printf(&sbuf, "\n"); 333 334 for (i = 0; i < vm_ndomains; i++) { 335 sbuf_printf(&sbuf, "%d: ", i); 336 for (j = 0; j < vm_ndomains; j++) { 337 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 338 } 339 sbuf_printf(&sbuf, "\n"); 340 } 341 error = sbuf_finish(&sbuf); 342 sbuf_delete(&sbuf); 343 return (error); 344 } 345 #endif 346 347 static void 348 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 349 { 350 351 m->order = order; 352 if (tail) 353 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 354 else 355 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 356 fl[order].lcnt++; 357 } 358 359 static void 360 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 361 { 362 363 TAILQ_REMOVE(&fl[order].pl, m, listq); 364 fl[order].lcnt--; 365 m->order = VM_NFREEORDER; 366 } 367 368 /* 369 * Create a physical memory segment. 370 */ 371 static void 372 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 373 { 374 struct vm_phys_seg *seg; 375 376 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 377 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 378 KASSERT(domain >= 0 && domain < vm_ndomains, 379 ("vm_phys_create_seg: invalid domain provided")); 380 seg = &vm_phys_segs[vm_phys_nsegs++]; 381 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 382 *seg = *(seg - 1); 383 seg--; 384 } 385 seg->start = start; 386 seg->end = end; 387 seg->domain = domain; 388 } 389 390 static void 391 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 392 { 393 #ifdef NUMA 394 int i; 395 396 if (mem_affinity == NULL) { 397 _vm_phys_create_seg(start, end, 0); 398 return; 399 } 400 401 for (i = 0;; i++) { 402 if (mem_affinity[i].end == 0) 403 panic("Reached end of affinity info"); 404 if (mem_affinity[i].end <= start) 405 continue; 406 if (mem_affinity[i].start > start) 407 panic("No affinity info for start %jx", 408 (uintmax_t)start); 409 if (mem_affinity[i].end >= end) { 410 _vm_phys_create_seg(start, end, 411 mem_affinity[i].domain); 412 break; 413 } 414 _vm_phys_create_seg(start, mem_affinity[i].end, 415 mem_affinity[i].domain); 416 start = mem_affinity[i].end; 417 } 418 #else 419 _vm_phys_create_seg(start, end, 0); 420 #endif 421 } 422 423 /* 424 * Add a physical memory segment. 425 */ 426 void 427 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 428 { 429 vm_paddr_t paddr; 430 431 KASSERT((start & PAGE_MASK) == 0, 432 ("vm_phys_define_seg: start is not page aligned")); 433 KASSERT((end & PAGE_MASK) == 0, 434 ("vm_phys_define_seg: end is not page aligned")); 435 436 /* 437 * Split the physical memory segment if it spans two or more free 438 * list boundaries. 439 */ 440 paddr = start; 441 #ifdef VM_FREELIST_LOWMEM 442 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 443 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 444 paddr = VM_LOWMEM_BOUNDARY; 445 } 446 #endif 447 #ifdef VM_FREELIST_DMA32 448 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 449 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 450 paddr = VM_DMA32_BOUNDARY; 451 } 452 #endif 453 vm_phys_create_seg(paddr, end); 454 } 455 456 /* 457 * Initialize the physical memory allocator. 458 * 459 * Requires that vm_page_array is initialized! 460 */ 461 void 462 vm_phys_init(void) 463 { 464 struct vm_freelist *fl; 465 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 466 u_long npages; 467 int dom, flind, freelist, oind, pind, segind; 468 469 /* 470 * Compute the number of free lists, and generate the mapping from the 471 * manifest constants VM_FREELIST_* to the free list indices. 472 * 473 * Initially, the entries of vm_freelist_to_flind[] are set to either 474 * 0 or 1 to indicate which free lists should be created. 475 */ 476 npages = 0; 477 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 478 seg = &vm_phys_segs[segind]; 479 #ifdef VM_FREELIST_LOWMEM 480 if (seg->end <= VM_LOWMEM_BOUNDARY) 481 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 482 else 483 #endif 484 #ifdef VM_FREELIST_DMA32 485 if ( 486 #ifdef VM_DMA32_NPAGES_THRESHOLD 487 /* 488 * Create the DMA32 free list only if the amount of 489 * physical memory above physical address 4G exceeds the 490 * given threshold. 491 */ 492 npages > VM_DMA32_NPAGES_THRESHOLD && 493 #endif 494 seg->end <= VM_DMA32_BOUNDARY) 495 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 496 else 497 #endif 498 { 499 npages += atop(seg->end - seg->start); 500 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 501 } 502 } 503 /* Change each entry into a running total of the free lists. */ 504 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 505 vm_freelist_to_flind[freelist] += 506 vm_freelist_to_flind[freelist - 1]; 507 } 508 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 509 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 510 /* Change each entry into a free list index. */ 511 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 512 vm_freelist_to_flind[freelist]--; 513 514 /* 515 * Initialize the first_page and free_queues fields of each physical 516 * memory segment. 517 */ 518 #ifdef VM_PHYSSEG_SPARSE 519 npages = 0; 520 #endif 521 for (segind = 0; segind < vm_phys_nsegs; segind++) { 522 seg = &vm_phys_segs[segind]; 523 #ifdef VM_PHYSSEG_SPARSE 524 seg->first_page = &vm_page_array[npages]; 525 npages += atop(seg->end - seg->start); 526 #else 527 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 528 #endif 529 #ifdef VM_FREELIST_LOWMEM 530 if (seg->end <= VM_LOWMEM_BOUNDARY) { 531 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 532 KASSERT(flind >= 0, 533 ("vm_phys_init: LOWMEM flind < 0")); 534 } else 535 #endif 536 #ifdef VM_FREELIST_DMA32 537 if (seg->end <= VM_DMA32_BOUNDARY) { 538 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 539 KASSERT(flind >= 0, 540 ("vm_phys_init: DMA32 flind < 0")); 541 } else 542 #endif 543 { 544 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 545 KASSERT(flind >= 0, 546 ("vm_phys_init: DEFAULT flind < 0")); 547 } 548 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 549 } 550 551 /* 552 * Coalesce physical memory segments that are contiguous and share the 553 * same per-domain free queues. 554 */ 555 prev_seg = vm_phys_segs; 556 seg = &vm_phys_segs[1]; 557 end_seg = &vm_phys_segs[vm_phys_nsegs]; 558 while (seg < end_seg) { 559 if (prev_seg->end == seg->start && 560 prev_seg->free_queues == seg->free_queues) { 561 prev_seg->end = seg->end; 562 KASSERT(prev_seg->domain == seg->domain, 563 ("vm_phys_init: free queues cannot span domains")); 564 vm_phys_nsegs--; 565 end_seg--; 566 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 567 *tmp_seg = *(tmp_seg + 1); 568 } else { 569 prev_seg = seg; 570 seg++; 571 } 572 } 573 574 /* 575 * Initialize the free queues. 576 */ 577 for (dom = 0; dom < vm_ndomains; dom++) { 578 for (flind = 0; flind < vm_nfreelists; flind++) { 579 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 580 fl = vm_phys_free_queues[dom][flind][pind]; 581 for (oind = 0; oind < VM_NFREEORDER; oind++) 582 TAILQ_INIT(&fl[oind].pl); 583 } 584 } 585 } 586 587 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 588 } 589 590 /* 591 * Register info about the NUMA topology of the system. 592 * 593 * Invoked by platform-dependent code prior to vm_phys_init(). 594 */ 595 void 596 vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, 597 int *locality) 598 { 599 #ifdef NUMA 600 int i; 601 602 vm_ndomains = ndomains; 603 mem_affinity = affinity; 604 mem_locality = locality; 605 606 for (i = 0; i < vm_ndomains; i++) 607 DOMAINSET_SET(i, &all_domains); 608 609 domainset_init(); 610 #else 611 (void)ndomains; 612 (void)affinity; 613 (void)locality; 614 #endif 615 } 616 617 /* 618 * Split a contiguous, power of two-sized set of physical pages. 619 * 620 * When this function is called by a page allocation function, the caller 621 * should request insertion at the head unless the order [order, oind) queues 622 * are known to be empty. The objective being to reduce the likelihood of 623 * long-term fragmentation by promoting contemporaneous allocation and 624 * (hopefully) deallocation. 625 */ 626 static __inline void 627 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 628 int tail) 629 { 630 vm_page_t m_buddy; 631 632 while (oind > order) { 633 oind--; 634 m_buddy = &m[1 << oind]; 635 KASSERT(m_buddy->order == VM_NFREEORDER, 636 ("vm_phys_split_pages: page %p has unexpected order %d", 637 m_buddy, m_buddy->order)); 638 vm_freelist_add(fl, m_buddy, oind, tail); 639 } 640 } 641 642 /* 643 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 644 * and sized set to the specified free list. 645 * 646 * When this function is called by a page allocation function, the caller 647 * should request insertion at the head unless the lower-order queues are 648 * known to be empty. The objective being to reduce the likelihood of long- 649 * term fragmentation by promoting contemporaneous allocation and (hopefully) 650 * deallocation. 651 * 652 * The physical page m's buddy must not be free. 653 */ 654 static void 655 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 656 { 657 u_int n; 658 int order; 659 660 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 661 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 662 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 663 ("vm_phys_enq_range: page %p and npages %u are misaligned", 664 m, npages)); 665 do { 666 KASSERT(m->order == VM_NFREEORDER, 667 ("vm_phys_enq_range: page %p has unexpected order %d", 668 m, m->order)); 669 order = ffs(npages) - 1; 670 KASSERT(order < VM_NFREEORDER, 671 ("vm_phys_enq_range: order %d is out of range", order)); 672 vm_freelist_add(fl, m, order, tail); 673 n = 1 << order; 674 m += n; 675 npages -= n; 676 } while (npages > 0); 677 } 678 679 /* 680 * Tries to allocate the specified number of pages from the specified pool 681 * within the specified domain. Returns the actual number of allocated pages 682 * and a pointer to each page through the array ma[]. 683 * 684 * The returned pages may not be physically contiguous. However, in contrast 685 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 686 * calling this function once to allocate the desired number of pages will 687 * avoid wasted time in vm_phys_split_pages(). 688 * 689 * The free page queues for the specified domain must be locked. 690 */ 691 int 692 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 693 { 694 struct vm_freelist *alt, *fl; 695 vm_page_t m; 696 int avail, end, flind, freelist, i, need, oind, pind; 697 698 KASSERT(domain >= 0 && domain < vm_ndomains, 699 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 700 KASSERT(pool < VM_NFREEPOOL, 701 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 702 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 703 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 704 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 705 i = 0; 706 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 707 flind = vm_freelist_to_flind[freelist]; 708 if (flind < 0) 709 continue; 710 fl = vm_phys_free_queues[domain][flind][pool]; 711 for (oind = 0; oind < VM_NFREEORDER; oind++) { 712 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 713 vm_freelist_rem(fl, m, oind); 714 avail = 1 << oind; 715 need = imin(npages - i, avail); 716 for (end = i + need; i < end;) 717 ma[i++] = m++; 718 if (need < avail) { 719 /* 720 * Return excess pages to fl. Its 721 * order [0, oind) queues are empty. 722 */ 723 vm_phys_enq_range(m, avail - need, fl, 724 1); 725 return (npages); 726 } else if (i == npages) 727 return (npages); 728 } 729 } 730 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 731 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 732 alt = vm_phys_free_queues[domain][flind][pind]; 733 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 734 NULL) { 735 vm_freelist_rem(alt, m, oind); 736 vm_phys_set_pool(pool, m, oind); 737 avail = 1 << oind; 738 need = imin(npages - i, avail); 739 for (end = i + need; i < end;) 740 ma[i++] = m++; 741 if (need < avail) { 742 /* 743 * Return excess pages to fl. 744 * Its order [0, oind) queues 745 * are empty. 746 */ 747 vm_phys_enq_range(m, avail - 748 need, fl, 1); 749 return (npages); 750 } else if (i == npages) 751 return (npages); 752 } 753 } 754 } 755 } 756 return (i); 757 } 758 759 /* 760 * Allocate a contiguous, power of two-sized set of physical pages 761 * from the free lists. 762 * 763 * The free page queues must be locked. 764 */ 765 vm_page_t 766 vm_phys_alloc_pages(int domain, int pool, int order) 767 { 768 vm_page_t m; 769 int freelist; 770 771 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 772 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 773 if (m != NULL) 774 return (m); 775 } 776 return (NULL); 777 } 778 779 /* 780 * Allocate a contiguous, power of two-sized set of physical pages from the 781 * specified free list. The free list must be specified using one of the 782 * manifest constants VM_FREELIST_*. 783 * 784 * The free page queues must be locked. 785 */ 786 vm_page_t 787 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 788 { 789 struct vm_freelist *alt, *fl; 790 vm_page_t m; 791 int oind, pind, flind; 792 793 KASSERT(domain >= 0 && domain < vm_ndomains, 794 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 795 domain)); 796 KASSERT(freelist < VM_NFREELIST, 797 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 798 freelist)); 799 KASSERT(pool < VM_NFREEPOOL, 800 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 801 KASSERT(order < VM_NFREEORDER, 802 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 803 804 flind = vm_freelist_to_flind[freelist]; 805 /* Check if freelist is present */ 806 if (flind < 0) 807 return (NULL); 808 809 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 810 fl = &vm_phys_free_queues[domain][flind][pool][0]; 811 for (oind = order; oind < VM_NFREEORDER; oind++) { 812 m = TAILQ_FIRST(&fl[oind].pl); 813 if (m != NULL) { 814 vm_freelist_rem(fl, m, oind); 815 /* The order [order, oind) queues are empty. */ 816 vm_phys_split_pages(m, oind, fl, order, 1); 817 return (m); 818 } 819 } 820 821 /* 822 * The given pool was empty. Find the largest 823 * contiguous, power-of-two-sized set of pages in any 824 * pool. Transfer these pages to the given pool, and 825 * use them to satisfy the allocation. 826 */ 827 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 828 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 829 alt = &vm_phys_free_queues[domain][flind][pind][0]; 830 m = TAILQ_FIRST(&alt[oind].pl); 831 if (m != NULL) { 832 vm_freelist_rem(alt, m, oind); 833 vm_phys_set_pool(pool, m, oind); 834 /* The order [order, oind) queues are empty. */ 835 vm_phys_split_pages(m, oind, fl, order, 1); 836 return (m); 837 } 838 } 839 } 840 return (NULL); 841 } 842 843 /* 844 * Find the vm_page corresponding to the given physical address. 845 */ 846 vm_page_t 847 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 848 { 849 struct vm_phys_seg *seg; 850 int segind; 851 852 for (segind = 0; segind < vm_phys_nsegs; segind++) { 853 seg = &vm_phys_segs[segind]; 854 if (pa >= seg->start && pa < seg->end) 855 return (&seg->first_page[atop(pa - seg->start)]); 856 } 857 return (NULL); 858 } 859 860 vm_page_t 861 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 862 { 863 struct vm_phys_fictitious_seg tmp, *seg; 864 vm_page_t m; 865 866 m = NULL; 867 tmp.start = pa; 868 tmp.end = 0; 869 870 rw_rlock(&vm_phys_fictitious_reg_lock); 871 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 872 rw_runlock(&vm_phys_fictitious_reg_lock); 873 if (seg == NULL) 874 return (NULL); 875 876 m = &seg->first_page[atop(pa - seg->start)]; 877 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 878 879 return (m); 880 } 881 882 static inline void 883 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 884 long page_count, vm_memattr_t memattr) 885 { 886 long i; 887 888 bzero(range, page_count * sizeof(*range)); 889 for (i = 0; i < page_count; i++) { 890 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 891 range[i].oflags &= ~VPO_UNMANAGED; 892 range[i].busy_lock = VPB_UNBUSIED; 893 } 894 } 895 896 int 897 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 898 vm_memattr_t memattr) 899 { 900 struct vm_phys_fictitious_seg *seg; 901 vm_page_t fp; 902 long page_count; 903 #ifdef VM_PHYSSEG_DENSE 904 long pi, pe; 905 long dpage_count; 906 #endif 907 908 KASSERT(start < end, 909 ("Start of segment isn't less than end (start: %jx end: %jx)", 910 (uintmax_t)start, (uintmax_t)end)); 911 912 page_count = (end - start) / PAGE_SIZE; 913 914 #ifdef VM_PHYSSEG_DENSE 915 pi = atop(start); 916 pe = atop(end); 917 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 918 fp = &vm_page_array[pi - first_page]; 919 if ((pe - first_page) > vm_page_array_size) { 920 /* 921 * We have a segment that starts inside 922 * of vm_page_array, but ends outside of it. 923 * 924 * Use vm_page_array pages for those that are 925 * inside of the vm_page_array range, and 926 * allocate the remaining ones. 927 */ 928 dpage_count = vm_page_array_size - (pi - first_page); 929 vm_phys_fictitious_init_range(fp, start, dpage_count, 930 memattr); 931 page_count -= dpage_count; 932 start += ptoa(dpage_count); 933 goto alloc; 934 } 935 /* 936 * We can allocate the full range from vm_page_array, 937 * so there's no need to register the range in the tree. 938 */ 939 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 940 return (0); 941 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 942 /* 943 * We have a segment that ends inside of vm_page_array, 944 * but starts outside of it. 945 */ 946 fp = &vm_page_array[0]; 947 dpage_count = pe - first_page; 948 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 949 memattr); 950 end -= ptoa(dpage_count); 951 page_count -= dpage_count; 952 goto alloc; 953 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 954 /* 955 * Trying to register a fictitious range that expands before 956 * and after vm_page_array. 957 */ 958 return (EINVAL); 959 } else { 960 alloc: 961 #endif 962 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 963 M_WAITOK); 964 #ifdef VM_PHYSSEG_DENSE 965 } 966 #endif 967 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 968 969 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 970 seg->start = start; 971 seg->end = end; 972 seg->first_page = fp; 973 974 rw_wlock(&vm_phys_fictitious_reg_lock); 975 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 976 rw_wunlock(&vm_phys_fictitious_reg_lock); 977 978 return (0); 979 } 980 981 void 982 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 983 { 984 struct vm_phys_fictitious_seg *seg, tmp; 985 #ifdef VM_PHYSSEG_DENSE 986 long pi, pe; 987 #endif 988 989 KASSERT(start < end, 990 ("Start of segment isn't less than end (start: %jx end: %jx)", 991 (uintmax_t)start, (uintmax_t)end)); 992 993 #ifdef VM_PHYSSEG_DENSE 994 pi = atop(start); 995 pe = atop(end); 996 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 997 if ((pe - first_page) <= vm_page_array_size) { 998 /* 999 * This segment was allocated using vm_page_array 1000 * only, there's nothing to do since those pages 1001 * were never added to the tree. 1002 */ 1003 return; 1004 } 1005 /* 1006 * We have a segment that starts inside 1007 * of vm_page_array, but ends outside of it. 1008 * 1009 * Calculate how many pages were added to the 1010 * tree and free them. 1011 */ 1012 start = ptoa(first_page + vm_page_array_size); 1013 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1014 /* 1015 * We have a segment that ends inside of vm_page_array, 1016 * but starts outside of it. 1017 */ 1018 end = ptoa(first_page); 1019 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1020 /* Since it's not possible to register such a range, panic. */ 1021 panic( 1022 "Unregistering not registered fictitious range [%#jx:%#jx]", 1023 (uintmax_t)start, (uintmax_t)end); 1024 } 1025 #endif 1026 tmp.start = start; 1027 tmp.end = 0; 1028 1029 rw_wlock(&vm_phys_fictitious_reg_lock); 1030 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1031 if (seg->start != start || seg->end != end) { 1032 rw_wunlock(&vm_phys_fictitious_reg_lock); 1033 panic( 1034 "Unregistering not registered fictitious range [%#jx:%#jx]", 1035 (uintmax_t)start, (uintmax_t)end); 1036 } 1037 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1038 rw_wunlock(&vm_phys_fictitious_reg_lock); 1039 free(seg->first_page, M_FICT_PAGES); 1040 free(seg, M_FICT_PAGES); 1041 } 1042 1043 /* 1044 * Free a contiguous, power of two-sized set of physical pages. 1045 * 1046 * The free page queues must be locked. 1047 */ 1048 void 1049 vm_phys_free_pages(vm_page_t m, int order) 1050 { 1051 struct vm_freelist *fl; 1052 struct vm_phys_seg *seg; 1053 vm_paddr_t pa; 1054 vm_page_t m_buddy; 1055 1056 KASSERT(m->order == VM_NFREEORDER, 1057 ("vm_phys_free_pages: page %p has unexpected order %d", 1058 m, m->order)); 1059 KASSERT(m->pool < VM_NFREEPOOL, 1060 ("vm_phys_free_pages: page %p has unexpected pool %d", 1061 m, m->pool)); 1062 KASSERT(order < VM_NFREEORDER, 1063 ("vm_phys_free_pages: order %d is out of range", order)); 1064 seg = &vm_phys_segs[m->segind]; 1065 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1066 if (order < VM_NFREEORDER - 1) { 1067 pa = VM_PAGE_TO_PHYS(m); 1068 do { 1069 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1070 if (pa < seg->start || pa >= seg->end) 1071 break; 1072 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1073 if (m_buddy->order != order) 1074 break; 1075 fl = (*seg->free_queues)[m_buddy->pool]; 1076 vm_freelist_rem(fl, m_buddy, order); 1077 if (m_buddy->pool != m->pool) 1078 vm_phys_set_pool(m->pool, m_buddy, order); 1079 order++; 1080 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1081 m = &seg->first_page[atop(pa - seg->start)]; 1082 } while (order < VM_NFREEORDER - 1); 1083 } 1084 fl = (*seg->free_queues)[m->pool]; 1085 vm_freelist_add(fl, m, order, 1); 1086 } 1087 1088 /* 1089 * Free a contiguous, arbitrarily sized set of physical pages. 1090 * 1091 * The free page queues must be locked. 1092 */ 1093 void 1094 vm_phys_free_contig(vm_page_t m, u_long npages) 1095 { 1096 u_int n; 1097 int order; 1098 1099 /* 1100 * Avoid unnecessary coalescing by freeing the pages in the largest 1101 * possible power-of-two-sized subsets. 1102 */ 1103 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1104 for (;; npages -= n) { 1105 /* 1106 * Unsigned "min" is used here so that "order" is assigned 1107 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1108 * or the low-order bits of its physical address are zero 1109 * because the size of a physical address exceeds the size of 1110 * a long. 1111 */ 1112 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1113 VM_NFREEORDER - 1); 1114 n = 1 << order; 1115 if (npages < n) 1116 break; 1117 vm_phys_free_pages(m, order); 1118 m += n; 1119 } 1120 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1121 for (; npages > 0; npages -= n) { 1122 order = flsl(npages) - 1; 1123 n = 1 << order; 1124 vm_phys_free_pages(m, order); 1125 m += n; 1126 } 1127 } 1128 1129 /* 1130 * Scan physical memory between the specified addresses "low" and "high" for a 1131 * run of contiguous physical pages that satisfy the specified conditions, and 1132 * return the lowest page in the run. The specified "alignment" determines 1133 * the alignment of the lowest physical page in the run. If the specified 1134 * "boundary" is non-zero, then the run of physical pages cannot span a 1135 * physical address that is a multiple of "boundary". 1136 * 1137 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1138 * be a power of two. 1139 */ 1140 vm_page_t 1141 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1142 u_long alignment, vm_paddr_t boundary, int options) 1143 { 1144 vm_paddr_t pa_end; 1145 vm_page_t m_end, m_run, m_start; 1146 struct vm_phys_seg *seg; 1147 int segind; 1148 1149 KASSERT(npages > 0, ("npages is 0")); 1150 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1151 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1152 if (low >= high) 1153 return (NULL); 1154 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1155 seg = &vm_phys_segs[segind]; 1156 if (seg->domain != domain) 1157 continue; 1158 if (seg->start >= high) 1159 break; 1160 if (low >= seg->end) 1161 continue; 1162 if (low <= seg->start) 1163 m_start = seg->first_page; 1164 else 1165 m_start = &seg->first_page[atop(low - seg->start)]; 1166 if (high < seg->end) 1167 pa_end = high; 1168 else 1169 pa_end = seg->end; 1170 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1171 continue; 1172 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1173 m_run = vm_page_scan_contig(npages, m_start, m_end, 1174 alignment, boundary, options); 1175 if (m_run != NULL) 1176 return (m_run); 1177 } 1178 return (NULL); 1179 } 1180 1181 /* 1182 * Set the pool for a contiguous, power of two-sized set of physical pages. 1183 */ 1184 void 1185 vm_phys_set_pool(int pool, vm_page_t m, int order) 1186 { 1187 vm_page_t m_tmp; 1188 1189 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1190 m_tmp->pool = pool; 1191 } 1192 1193 /* 1194 * Search for the given physical page "m" in the free lists. If the search 1195 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1196 * FALSE, indicating that "m" is not in the free lists. 1197 * 1198 * The free page queues must be locked. 1199 */ 1200 boolean_t 1201 vm_phys_unfree_page(vm_page_t m) 1202 { 1203 struct vm_freelist *fl; 1204 struct vm_phys_seg *seg; 1205 vm_paddr_t pa, pa_half; 1206 vm_page_t m_set, m_tmp; 1207 int order; 1208 1209 /* 1210 * First, find the contiguous, power of two-sized set of free 1211 * physical pages containing the given physical page "m" and 1212 * assign it to "m_set". 1213 */ 1214 seg = &vm_phys_segs[m->segind]; 1215 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1216 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1217 order < VM_NFREEORDER - 1; ) { 1218 order++; 1219 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1220 if (pa >= seg->start) 1221 m_set = &seg->first_page[atop(pa - seg->start)]; 1222 else 1223 return (FALSE); 1224 } 1225 if (m_set->order < order) 1226 return (FALSE); 1227 if (m_set->order == VM_NFREEORDER) 1228 return (FALSE); 1229 KASSERT(m_set->order < VM_NFREEORDER, 1230 ("vm_phys_unfree_page: page %p has unexpected order %d", 1231 m_set, m_set->order)); 1232 1233 /* 1234 * Next, remove "m_set" from the free lists. Finally, extract 1235 * "m" from "m_set" using an iterative algorithm: While "m_set" 1236 * is larger than a page, shrink "m_set" by returning the half 1237 * of "m_set" that does not contain "m" to the free lists. 1238 */ 1239 fl = (*seg->free_queues)[m_set->pool]; 1240 order = m_set->order; 1241 vm_freelist_rem(fl, m_set, order); 1242 while (order > 0) { 1243 order--; 1244 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1245 if (m->phys_addr < pa_half) 1246 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1247 else { 1248 m_tmp = m_set; 1249 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1250 } 1251 vm_freelist_add(fl, m_tmp, order, 0); 1252 } 1253 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1254 return (TRUE); 1255 } 1256 1257 /* 1258 * Allocate a contiguous set of physical pages of the given size 1259 * "npages" from the free lists. All of the physical pages must be at 1260 * or above the given physical address "low" and below the given 1261 * physical address "high". The given value "alignment" determines the 1262 * alignment of the first physical page in the set. If the given value 1263 * "boundary" is non-zero, then the set of physical pages cannot cross 1264 * any physical address boundary that is a multiple of that value. Both 1265 * "alignment" and "boundary" must be a power of two. 1266 */ 1267 vm_page_t 1268 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1269 u_long alignment, vm_paddr_t boundary) 1270 { 1271 vm_paddr_t pa_end, pa_start; 1272 vm_page_t m_run; 1273 struct vm_phys_seg *seg; 1274 int segind; 1275 1276 KASSERT(npages > 0, ("npages is 0")); 1277 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1278 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1279 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1280 if (low >= high) 1281 return (NULL); 1282 m_run = NULL; 1283 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1284 seg = &vm_phys_segs[segind]; 1285 if (seg->start >= high || seg->domain != domain) 1286 continue; 1287 if (low >= seg->end) 1288 break; 1289 if (low <= seg->start) 1290 pa_start = seg->start; 1291 else 1292 pa_start = low; 1293 if (high < seg->end) 1294 pa_end = high; 1295 else 1296 pa_end = seg->end; 1297 if (pa_end - pa_start < ptoa(npages)) 1298 continue; 1299 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1300 alignment, boundary); 1301 if (m_run != NULL) 1302 break; 1303 } 1304 return (m_run); 1305 } 1306 1307 /* 1308 * Allocate a run of contiguous physical pages from the free list for the 1309 * specified segment. 1310 */ 1311 static vm_page_t 1312 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1313 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1314 { 1315 struct vm_freelist *fl; 1316 vm_paddr_t pa, pa_end, size; 1317 vm_page_t m, m_ret; 1318 u_long npages_end; 1319 int oind, order, pind; 1320 1321 KASSERT(npages > 0, ("npages is 0")); 1322 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1323 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1324 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1325 /* Compute the queue that is the best fit for npages. */ 1326 order = flsl(npages - 1); 1327 /* Search for a run satisfying the specified conditions. */ 1328 size = npages << PAGE_SHIFT; 1329 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1330 oind++) { 1331 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1332 fl = (*seg->free_queues)[pind]; 1333 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1334 /* 1335 * Is the size of this allocation request 1336 * larger than the largest block size? 1337 */ 1338 if (order >= VM_NFREEORDER) { 1339 /* 1340 * Determine if a sufficient number of 1341 * subsequent blocks to satisfy the 1342 * allocation request are free. 1343 */ 1344 pa = VM_PAGE_TO_PHYS(m_ret); 1345 pa_end = pa + size; 1346 if (pa_end < pa) 1347 continue; 1348 for (;;) { 1349 pa += 1 << (PAGE_SHIFT + 1350 VM_NFREEORDER - 1); 1351 if (pa >= pa_end || 1352 pa < seg->start || 1353 pa >= seg->end) 1354 break; 1355 m = &seg->first_page[atop(pa - 1356 seg->start)]; 1357 if (m->order != VM_NFREEORDER - 1358 1) 1359 break; 1360 } 1361 /* If not, go to the next block. */ 1362 if (pa < pa_end) 1363 continue; 1364 } 1365 1366 /* 1367 * Determine if the blocks are within the 1368 * given range, satisfy the given alignment, 1369 * and do not cross the given boundary. 1370 */ 1371 pa = VM_PAGE_TO_PHYS(m_ret); 1372 pa_end = pa + size; 1373 if (pa >= low && pa_end <= high && 1374 (pa & (alignment - 1)) == 0 && 1375 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1376 goto done; 1377 } 1378 } 1379 } 1380 return (NULL); 1381 done: 1382 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1383 fl = (*seg->free_queues)[m->pool]; 1384 vm_freelist_rem(fl, m, oind); 1385 if (m->pool != VM_FREEPOOL_DEFAULT) 1386 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1387 } 1388 /* Return excess pages to the free lists. */ 1389 npages_end = roundup2(npages, 1 << oind); 1390 if (npages < npages_end) { 1391 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1392 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1393 } 1394 return (m_ret); 1395 } 1396 1397 #ifdef DDB 1398 /* 1399 * Show the number of physical pages in each of the free lists. 1400 */ 1401 DB_SHOW_COMMAND(freepages, db_show_freepages) 1402 { 1403 struct vm_freelist *fl; 1404 int flind, oind, pind, dom; 1405 1406 for (dom = 0; dom < vm_ndomains; dom++) { 1407 db_printf("DOMAIN: %d\n", dom); 1408 for (flind = 0; flind < vm_nfreelists; flind++) { 1409 db_printf("FREE LIST %d:\n" 1410 "\n ORDER (SIZE) | NUMBER" 1411 "\n ", flind); 1412 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1413 db_printf(" | POOL %d", pind); 1414 db_printf("\n-- "); 1415 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1416 db_printf("-- -- "); 1417 db_printf("--\n"); 1418 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1419 db_printf(" %2.2d (%6.6dK)", oind, 1420 1 << (PAGE_SHIFT - 10 + oind)); 1421 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1422 fl = vm_phys_free_queues[dom][flind][pind]; 1423 db_printf(" | %6.6d", fl[oind].lcnt); 1424 } 1425 db_printf("\n"); 1426 } 1427 db_printf("\n"); 1428 } 1429 db_printf("\n"); 1430 } 1431 } 1432 #endif 1433