1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/lock.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/sysctl.h> 58 #include <sys/tree.h> 59 #include <sys/vmmeter.h> 60 #include <sys/seq.h> 61 62 #include <ddb/ddb.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_phys.h> 70 #include <vm/vm_pagequeue.h> 71 72 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73 "Too many physsegs."); 74 75 #ifdef NUMA 76 struct mem_affinity __read_mostly *mem_affinity; 77 int __read_mostly *mem_locality; 78 #endif 79 80 int __read_mostly vm_ndomains = 1; 81 82 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 83 int __read_mostly vm_phys_nsegs; 84 85 struct vm_phys_fictitious_seg; 86 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 87 struct vm_phys_fictitious_seg *); 88 89 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 90 RB_INITIALIZER(_vm_phys_fictitious_tree); 91 92 struct vm_phys_fictitious_seg { 93 RB_ENTRY(vm_phys_fictitious_seg) node; 94 /* Memory region data */ 95 vm_paddr_t start; 96 vm_paddr_t end; 97 vm_page_t first_page; 98 }; 99 100 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 101 vm_phys_fictitious_cmp); 102 103 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 104 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 105 106 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 107 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 108 109 static int __read_mostly vm_nfreelists; 110 111 /* 112 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 113 */ 114 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 115 116 CTASSERT(VM_FREELIST_DEFAULT == 0); 117 118 #ifdef VM_FREELIST_DMA32 119 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 120 #endif 121 122 /* 123 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 124 * the ordering of the free list boundaries. 125 */ 126 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 127 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 128 #endif 129 130 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 131 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 132 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 133 134 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 135 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 136 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 137 138 #ifdef NUMA 139 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 140 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 141 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 142 #endif 143 144 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 145 &vm_ndomains, 0, "Number of physical memory domains available."); 146 147 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 148 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 149 vm_paddr_t boundary); 150 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 151 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 152 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 153 int order, int tail); 154 155 /* 156 * Red-black tree helpers for vm fictitious range management. 157 */ 158 static inline int 159 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 160 struct vm_phys_fictitious_seg *range) 161 { 162 163 KASSERT(range->start != 0 && range->end != 0, 164 ("Invalid range passed on search for vm_fictitious page")); 165 if (p->start >= range->end) 166 return (1); 167 if (p->start < range->start) 168 return (-1); 169 170 return (0); 171 } 172 173 static int 174 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 175 struct vm_phys_fictitious_seg *p2) 176 { 177 178 /* Check if this is a search for a page */ 179 if (p1->end == 0) 180 return (vm_phys_fictitious_in_range(p1, p2)); 181 182 KASSERT(p2->end != 0, 183 ("Invalid range passed as second parameter to vm fictitious comparison")); 184 185 /* Searching to add a new range */ 186 if (p1->end <= p2->start) 187 return (-1); 188 if (p1->start >= p2->end) 189 return (1); 190 191 panic("Trying to add overlapping vm fictitious ranges:\n" 192 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 193 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 194 } 195 196 int 197 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 198 { 199 #ifdef NUMA 200 domainset_t mask; 201 int i; 202 203 if (vm_ndomains == 1 || mem_affinity == NULL) 204 return (0); 205 206 DOMAINSET_ZERO(&mask); 207 /* 208 * Check for any memory that overlaps low, high. 209 */ 210 for (i = 0; mem_affinity[i].end != 0; i++) 211 if (mem_affinity[i].start <= high && 212 mem_affinity[i].end >= low) 213 DOMAINSET_SET(mem_affinity[i].domain, &mask); 214 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 215 return (prefer); 216 if (DOMAINSET_EMPTY(&mask)) 217 panic("vm_phys_domain_match: Impossible constraint"); 218 return (DOMAINSET_FFS(&mask) - 1); 219 #else 220 return (0); 221 #endif 222 } 223 224 /* 225 * Outputs the state of the physical memory allocator, specifically, 226 * the amount of physical memory in each free list. 227 */ 228 static int 229 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 230 { 231 struct sbuf sbuf; 232 struct vm_freelist *fl; 233 int dom, error, flind, oind, pind; 234 235 error = sysctl_wire_old_buffer(req, 0); 236 if (error != 0) 237 return (error); 238 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 239 for (dom = 0; dom < vm_ndomains; dom++) { 240 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 241 for (flind = 0; flind < vm_nfreelists; flind++) { 242 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 243 "\n ORDER (SIZE) | NUMBER" 244 "\n ", flind); 245 for (pind = 0; pind < VM_NFREEPOOL; pind++) 246 sbuf_printf(&sbuf, " | POOL %d", pind); 247 sbuf_printf(&sbuf, "\n-- "); 248 for (pind = 0; pind < VM_NFREEPOOL; pind++) 249 sbuf_printf(&sbuf, "-- -- "); 250 sbuf_printf(&sbuf, "--\n"); 251 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 252 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 253 1 << (PAGE_SHIFT - 10 + oind)); 254 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 255 fl = vm_phys_free_queues[dom][flind][pind]; 256 sbuf_printf(&sbuf, " | %6d", 257 fl[oind].lcnt); 258 } 259 sbuf_printf(&sbuf, "\n"); 260 } 261 } 262 } 263 error = sbuf_finish(&sbuf); 264 sbuf_delete(&sbuf); 265 return (error); 266 } 267 268 /* 269 * Outputs the set of physical memory segments. 270 */ 271 static int 272 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 273 { 274 struct sbuf sbuf; 275 struct vm_phys_seg *seg; 276 int error, segind; 277 278 error = sysctl_wire_old_buffer(req, 0); 279 if (error != 0) 280 return (error); 281 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 282 for (segind = 0; segind < vm_phys_nsegs; segind++) { 283 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 284 seg = &vm_phys_segs[segind]; 285 sbuf_printf(&sbuf, "start: %#jx\n", 286 (uintmax_t)seg->start); 287 sbuf_printf(&sbuf, "end: %#jx\n", 288 (uintmax_t)seg->end); 289 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 290 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 291 } 292 error = sbuf_finish(&sbuf); 293 sbuf_delete(&sbuf); 294 return (error); 295 } 296 297 /* 298 * Return affinity, or -1 if there's no affinity information. 299 */ 300 int 301 vm_phys_mem_affinity(int f, int t) 302 { 303 304 #ifdef NUMA 305 if (mem_locality == NULL) 306 return (-1); 307 if (f >= vm_ndomains || t >= vm_ndomains) 308 return (-1); 309 return (mem_locality[f * vm_ndomains + t]); 310 #else 311 return (-1); 312 #endif 313 } 314 315 #ifdef NUMA 316 /* 317 * Outputs the VM locality table. 318 */ 319 static int 320 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 321 { 322 struct sbuf sbuf; 323 int error, i, j; 324 325 error = sysctl_wire_old_buffer(req, 0); 326 if (error != 0) 327 return (error); 328 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 329 330 sbuf_printf(&sbuf, "\n"); 331 332 for (i = 0; i < vm_ndomains; i++) { 333 sbuf_printf(&sbuf, "%d: ", i); 334 for (j = 0; j < vm_ndomains; j++) { 335 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 336 } 337 sbuf_printf(&sbuf, "\n"); 338 } 339 error = sbuf_finish(&sbuf); 340 sbuf_delete(&sbuf); 341 return (error); 342 } 343 #endif 344 345 static void 346 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 347 { 348 349 m->order = order; 350 if (tail) 351 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 352 else 353 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 354 fl[order].lcnt++; 355 } 356 357 static void 358 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 359 { 360 361 TAILQ_REMOVE(&fl[order].pl, m, listq); 362 fl[order].lcnt--; 363 m->order = VM_NFREEORDER; 364 } 365 366 /* 367 * Create a physical memory segment. 368 */ 369 static void 370 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 371 { 372 struct vm_phys_seg *seg; 373 374 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 375 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 376 KASSERT(domain >= 0 && domain < vm_ndomains, 377 ("vm_phys_create_seg: invalid domain provided")); 378 seg = &vm_phys_segs[vm_phys_nsegs++]; 379 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 380 *seg = *(seg - 1); 381 seg--; 382 } 383 seg->start = start; 384 seg->end = end; 385 seg->domain = domain; 386 } 387 388 static void 389 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 390 { 391 #ifdef NUMA 392 int i; 393 394 if (mem_affinity == NULL) { 395 _vm_phys_create_seg(start, end, 0); 396 return; 397 } 398 399 for (i = 0;; i++) { 400 if (mem_affinity[i].end == 0) 401 panic("Reached end of affinity info"); 402 if (mem_affinity[i].end <= start) 403 continue; 404 if (mem_affinity[i].start > start) 405 panic("No affinity info for start %jx", 406 (uintmax_t)start); 407 if (mem_affinity[i].end >= end) { 408 _vm_phys_create_seg(start, end, 409 mem_affinity[i].domain); 410 break; 411 } 412 _vm_phys_create_seg(start, mem_affinity[i].end, 413 mem_affinity[i].domain); 414 start = mem_affinity[i].end; 415 } 416 #else 417 _vm_phys_create_seg(start, end, 0); 418 #endif 419 } 420 421 /* 422 * Add a physical memory segment. 423 */ 424 void 425 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 426 { 427 vm_paddr_t paddr; 428 429 KASSERT((start & PAGE_MASK) == 0, 430 ("vm_phys_define_seg: start is not page aligned")); 431 KASSERT((end & PAGE_MASK) == 0, 432 ("vm_phys_define_seg: end is not page aligned")); 433 434 /* 435 * Split the physical memory segment if it spans two or more free 436 * list boundaries. 437 */ 438 paddr = start; 439 #ifdef VM_FREELIST_LOWMEM 440 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 441 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 442 paddr = VM_LOWMEM_BOUNDARY; 443 } 444 #endif 445 #ifdef VM_FREELIST_DMA32 446 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 447 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 448 paddr = VM_DMA32_BOUNDARY; 449 } 450 #endif 451 vm_phys_create_seg(paddr, end); 452 } 453 454 /* 455 * Initialize the physical memory allocator. 456 * 457 * Requires that vm_page_array is initialized! 458 */ 459 void 460 vm_phys_init(void) 461 { 462 struct vm_freelist *fl; 463 struct vm_phys_seg *seg; 464 u_long npages; 465 int dom, flind, freelist, oind, pind, segind; 466 467 /* 468 * Compute the number of free lists, and generate the mapping from the 469 * manifest constants VM_FREELIST_* to the free list indices. 470 * 471 * Initially, the entries of vm_freelist_to_flind[] are set to either 472 * 0 or 1 to indicate which free lists should be created. 473 */ 474 npages = 0; 475 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 476 seg = &vm_phys_segs[segind]; 477 #ifdef VM_FREELIST_LOWMEM 478 if (seg->end <= VM_LOWMEM_BOUNDARY) 479 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 480 else 481 #endif 482 #ifdef VM_FREELIST_DMA32 483 if ( 484 #ifdef VM_DMA32_NPAGES_THRESHOLD 485 /* 486 * Create the DMA32 free list only if the amount of 487 * physical memory above physical address 4G exceeds the 488 * given threshold. 489 */ 490 npages > VM_DMA32_NPAGES_THRESHOLD && 491 #endif 492 seg->end <= VM_DMA32_BOUNDARY) 493 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 494 else 495 #endif 496 { 497 npages += atop(seg->end - seg->start); 498 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 499 } 500 } 501 /* Change each entry into a running total of the free lists. */ 502 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 503 vm_freelist_to_flind[freelist] += 504 vm_freelist_to_flind[freelist - 1]; 505 } 506 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 507 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 508 /* Change each entry into a free list index. */ 509 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 510 vm_freelist_to_flind[freelist]--; 511 512 /* 513 * Initialize the first_page and free_queues fields of each physical 514 * memory segment. 515 */ 516 #ifdef VM_PHYSSEG_SPARSE 517 npages = 0; 518 #endif 519 for (segind = 0; segind < vm_phys_nsegs; segind++) { 520 seg = &vm_phys_segs[segind]; 521 #ifdef VM_PHYSSEG_SPARSE 522 seg->first_page = &vm_page_array[npages]; 523 npages += atop(seg->end - seg->start); 524 #else 525 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 526 #endif 527 #ifdef VM_FREELIST_LOWMEM 528 if (seg->end <= VM_LOWMEM_BOUNDARY) { 529 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 530 KASSERT(flind >= 0, 531 ("vm_phys_init: LOWMEM flind < 0")); 532 } else 533 #endif 534 #ifdef VM_FREELIST_DMA32 535 if (seg->end <= VM_DMA32_BOUNDARY) { 536 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 537 KASSERT(flind >= 0, 538 ("vm_phys_init: DMA32 flind < 0")); 539 } else 540 #endif 541 { 542 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 543 KASSERT(flind >= 0, 544 ("vm_phys_init: DEFAULT flind < 0")); 545 } 546 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 547 } 548 549 /* 550 * Initialize the free queues. 551 */ 552 for (dom = 0; dom < vm_ndomains; dom++) { 553 for (flind = 0; flind < vm_nfreelists; flind++) { 554 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 555 fl = vm_phys_free_queues[dom][flind][pind]; 556 for (oind = 0; oind < VM_NFREEORDER; oind++) 557 TAILQ_INIT(&fl[oind].pl); 558 } 559 } 560 } 561 562 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 563 } 564 565 /* 566 * Split a contiguous, power of two-sized set of physical pages. 567 * 568 * When this function is called by a page allocation function, the caller 569 * should request insertion at the head unless the order [order, oind) queues 570 * are known to be empty. The objective being to reduce the likelihood of 571 * long-term fragmentation by promoting contemporaneous allocation and 572 * (hopefully) deallocation. 573 */ 574 static __inline void 575 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 576 int tail) 577 { 578 vm_page_t m_buddy; 579 580 while (oind > order) { 581 oind--; 582 m_buddy = &m[1 << oind]; 583 KASSERT(m_buddy->order == VM_NFREEORDER, 584 ("vm_phys_split_pages: page %p has unexpected order %d", 585 m_buddy, m_buddy->order)); 586 vm_freelist_add(fl, m_buddy, oind, tail); 587 } 588 } 589 590 /* 591 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 592 * and sized set to the specified free list. 593 * 594 * When this function is called by a page allocation function, the caller 595 * should request insertion at the head unless the lower-order queues are 596 * known to be empty. The objective being to reduce the likelihood of long- 597 * term fragmentation by promoting contemporaneous allocation and (hopefully) 598 * deallocation. 599 * 600 * The physical page m's buddy must not be free. 601 */ 602 static void 603 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 604 { 605 u_int n; 606 int order; 607 608 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 609 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 610 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 611 ("vm_phys_enq_range: page %p and npages %u are misaligned", 612 m, npages)); 613 do { 614 KASSERT(m->order == VM_NFREEORDER, 615 ("vm_phys_enq_range: page %p has unexpected order %d", 616 m, m->order)); 617 order = ffs(npages) - 1; 618 KASSERT(order < VM_NFREEORDER, 619 ("vm_phys_enq_range: order %d is out of range", order)); 620 vm_freelist_add(fl, m, order, tail); 621 n = 1 << order; 622 m += n; 623 npages -= n; 624 } while (npages > 0); 625 } 626 627 /* 628 * Tries to allocate the specified number of pages from the specified pool 629 * within the specified domain. Returns the actual number of allocated pages 630 * and a pointer to each page through the array ma[]. 631 * 632 * The returned pages may not be physically contiguous. However, in contrast 633 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 634 * calling this function once to allocate the desired number of pages will 635 * avoid wasted time in vm_phys_split_pages(). 636 * 637 * The free page queues for the specified domain must be locked. 638 */ 639 int 640 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 641 { 642 struct vm_freelist *alt, *fl; 643 vm_page_t m; 644 int avail, end, flind, freelist, i, need, oind, pind; 645 646 KASSERT(domain >= 0 && domain < vm_ndomains, 647 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 648 KASSERT(pool < VM_NFREEPOOL, 649 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 650 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 651 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 652 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 653 i = 0; 654 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 655 flind = vm_freelist_to_flind[freelist]; 656 if (flind < 0) 657 continue; 658 fl = vm_phys_free_queues[domain][flind][pool]; 659 for (oind = 0; oind < VM_NFREEORDER; oind++) { 660 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 661 vm_freelist_rem(fl, m, oind); 662 avail = 1 << oind; 663 need = imin(npages - i, avail); 664 for (end = i + need; i < end;) 665 ma[i++] = m++; 666 if (need < avail) { 667 /* 668 * Return excess pages to fl. Its 669 * order [0, oind) queues are empty. 670 */ 671 vm_phys_enq_range(m, avail - need, fl, 672 1); 673 return (npages); 674 } else if (i == npages) 675 return (npages); 676 } 677 } 678 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 679 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 680 alt = vm_phys_free_queues[domain][flind][pind]; 681 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 682 NULL) { 683 vm_freelist_rem(alt, m, oind); 684 vm_phys_set_pool(pool, m, oind); 685 avail = 1 << oind; 686 need = imin(npages - i, avail); 687 for (end = i + need; i < end;) 688 ma[i++] = m++; 689 if (need < avail) { 690 /* 691 * Return excess pages to fl. 692 * Its order [0, oind) queues 693 * are empty. 694 */ 695 vm_phys_enq_range(m, avail - 696 need, fl, 1); 697 return (npages); 698 } else if (i == npages) 699 return (npages); 700 } 701 } 702 } 703 } 704 return (i); 705 } 706 707 /* 708 * Allocate a contiguous, power of two-sized set of physical pages 709 * from the free lists. 710 * 711 * The free page queues must be locked. 712 */ 713 vm_page_t 714 vm_phys_alloc_pages(int domain, int pool, int order) 715 { 716 vm_page_t m; 717 int freelist; 718 719 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 720 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 721 if (m != NULL) 722 return (m); 723 } 724 return (NULL); 725 } 726 727 /* 728 * Allocate a contiguous, power of two-sized set of physical pages from the 729 * specified free list. The free list must be specified using one of the 730 * manifest constants VM_FREELIST_*. 731 * 732 * The free page queues must be locked. 733 */ 734 vm_page_t 735 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 736 { 737 struct vm_freelist *alt, *fl; 738 vm_page_t m; 739 int oind, pind, flind; 740 741 KASSERT(domain >= 0 && domain < vm_ndomains, 742 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 743 domain)); 744 KASSERT(freelist < VM_NFREELIST, 745 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 746 freelist)); 747 KASSERT(pool < VM_NFREEPOOL, 748 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 749 KASSERT(order < VM_NFREEORDER, 750 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 751 752 flind = vm_freelist_to_flind[freelist]; 753 /* Check if freelist is present */ 754 if (flind < 0) 755 return (NULL); 756 757 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 758 fl = &vm_phys_free_queues[domain][flind][pool][0]; 759 for (oind = order; oind < VM_NFREEORDER; oind++) { 760 m = TAILQ_FIRST(&fl[oind].pl); 761 if (m != NULL) { 762 vm_freelist_rem(fl, m, oind); 763 /* The order [order, oind) queues are empty. */ 764 vm_phys_split_pages(m, oind, fl, order, 1); 765 return (m); 766 } 767 } 768 769 /* 770 * The given pool was empty. Find the largest 771 * contiguous, power-of-two-sized set of pages in any 772 * pool. Transfer these pages to the given pool, and 773 * use them to satisfy the allocation. 774 */ 775 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 776 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 777 alt = &vm_phys_free_queues[domain][flind][pind][0]; 778 m = TAILQ_FIRST(&alt[oind].pl); 779 if (m != NULL) { 780 vm_freelist_rem(alt, m, oind); 781 vm_phys_set_pool(pool, m, oind); 782 /* The order [order, oind) queues are empty. */ 783 vm_phys_split_pages(m, oind, fl, order, 1); 784 return (m); 785 } 786 } 787 } 788 return (NULL); 789 } 790 791 /* 792 * Find the vm_page corresponding to the given physical address. 793 */ 794 vm_page_t 795 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 796 { 797 struct vm_phys_seg *seg; 798 int segind; 799 800 for (segind = 0; segind < vm_phys_nsegs; segind++) { 801 seg = &vm_phys_segs[segind]; 802 if (pa >= seg->start && pa < seg->end) 803 return (&seg->first_page[atop(pa - seg->start)]); 804 } 805 return (NULL); 806 } 807 808 vm_page_t 809 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 810 { 811 struct vm_phys_fictitious_seg tmp, *seg; 812 vm_page_t m; 813 814 m = NULL; 815 tmp.start = pa; 816 tmp.end = 0; 817 818 rw_rlock(&vm_phys_fictitious_reg_lock); 819 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 820 rw_runlock(&vm_phys_fictitious_reg_lock); 821 if (seg == NULL) 822 return (NULL); 823 824 m = &seg->first_page[atop(pa - seg->start)]; 825 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 826 827 return (m); 828 } 829 830 static inline void 831 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 832 long page_count, vm_memattr_t memattr) 833 { 834 long i; 835 836 bzero(range, page_count * sizeof(*range)); 837 for (i = 0; i < page_count; i++) { 838 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 839 range[i].oflags &= ~VPO_UNMANAGED; 840 range[i].busy_lock = VPB_UNBUSIED; 841 } 842 } 843 844 int 845 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 846 vm_memattr_t memattr) 847 { 848 struct vm_phys_fictitious_seg *seg; 849 vm_page_t fp; 850 long page_count; 851 #ifdef VM_PHYSSEG_DENSE 852 long pi, pe; 853 long dpage_count; 854 #endif 855 856 KASSERT(start < end, 857 ("Start of segment isn't less than end (start: %jx end: %jx)", 858 (uintmax_t)start, (uintmax_t)end)); 859 860 page_count = (end - start) / PAGE_SIZE; 861 862 #ifdef VM_PHYSSEG_DENSE 863 pi = atop(start); 864 pe = atop(end); 865 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 866 fp = &vm_page_array[pi - first_page]; 867 if ((pe - first_page) > vm_page_array_size) { 868 /* 869 * We have a segment that starts inside 870 * of vm_page_array, but ends outside of it. 871 * 872 * Use vm_page_array pages for those that are 873 * inside of the vm_page_array range, and 874 * allocate the remaining ones. 875 */ 876 dpage_count = vm_page_array_size - (pi - first_page); 877 vm_phys_fictitious_init_range(fp, start, dpage_count, 878 memattr); 879 page_count -= dpage_count; 880 start += ptoa(dpage_count); 881 goto alloc; 882 } 883 /* 884 * We can allocate the full range from vm_page_array, 885 * so there's no need to register the range in the tree. 886 */ 887 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 888 return (0); 889 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 890 /* 891 * We have a segment that ends inside of vm_page_array, 892 * but starts outside of it. 893 */ 894 fp = &vm_page_array[0]; 895 dpage_count = pe - first_page; 896 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 897 memattr); 898 end -= ptoa(dpage_count); 899 page_count -= dpage_count; 900 goto alloc; 901 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 902 /* 903 * Trying to register a fictitious range that expands before 904 * and after vm_page_array. 905 */ 906 return (EINVAL); 907 } else { 908 alloc: 909 #endif 910 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 911 M_WAITOK); 912 #ifdef VM_PHYSSEG_DENSE 913 } 914 #endif 915 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 916 917 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 918 seg->start = start; 919 seg->end = end; 920 seg->first_page = fp; 921 922 rw_wlock(&vm_phys_fictitious_reg_lock); 923 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 924 rw_wunlock(&vm_phys_fictitious_reg_lock); 925 926 return (0); 927 } 928 929 void 930 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 931 { 932 struct vm_phys_fictitious_seg *seg, tmp; 933 #ifdef VM_PHYSSEG_DENSE 934 long pi, pe; 935 #endif 936 937 KASSERT(start < end, 938 ("Start of segment isn't less than end (start: %jx end: %jx)", 939 (uintmax_t)start, (uintmax_t)end)); 940 941 #ifdef VM_PHYSSEG_DENSE 942 pi = atop(start); 943 pe = atop(end); 944 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 945 if ((pe - first_page) <= vm_page_array_size) { 946 /* 947 * This segment was allocated using vm_page_array 948 * only, there's nothing to do since those pages 949 * were never added to the tree. 950 */ 951 return; 952 } 953 /* 954 * We have a segment that starts inside 955 * of vm_page_array, but ends outside of it. 956 * 957 * Calculate how many pages were added to the 958 * tree and free them. 959 */ 960 start = ptoa(first_page + vm_page_array_size); 961 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 962 /* 963 * We have a segment that ends inside of vm_page_array, 964 * but starts outside of it. 965 */ 966 end = ptoa(first_page); 967 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 968 /* Since it's not possible to register such a range, panic. */ 969 panic( 970 "Unregistering not registered fictitious range [%#jx:%#jx]", 971 (uintmax_t)start, (uintmax_t)end); 972 } 973 #endif 974 tmp.start = start; 975 tmp.end = 0; 976 977 rw_wlock(&vm_phys_fictitious_reg_lock); 978 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 979 if (seg->start != start || seg->end != end) { 980 rw_wunlock(&vm_phys_fictitious_reg_lock); 981 panic( 982 "Unregistering not registered fictitious range [%#jx:%#jx]", 983 (uintmax_t)start, (uintmax_t)end); 984 } 985 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 986 rw_wunlock(&vm_phys_fictitious_reg_lock); 987 free(seg->first_page, M_FICT_PAGES); 988 free(seg, M_FICT_PAGES); 989 } 990 991 /* 992 * Free a contiguous, power of two-sized set of physical pages. 993 * 994 * The free page queues must be locked. 995 */ 996 void 997 vm_phys_free_pages(vm_page_t m, int order) 998 { 999 struct vm_freelist *fl; 1000 struct vm_phys_seg *seg; 1001 vm_paddr_t pa; 1002 vm_page_t m_buddy; 1003 1004 KASSERT(m->order == VM_NFREEORDER, 1005 ("vm_phys_free_pages: page %p has unexpected order %d", 1006 m, m->order)); 1007 KASSERT(m->pool < VM_NFREEPOOL, 1008 ("vm_phys_free_pages: page %p has unexpected pool %d", 1009 m, m->pool)); 1010 KASSERT(order < VM_NFREEORDER, 1011 ("vm_phys_free_pages: order %d is out of range", order)); 1012 seg = &vm_phys_segs[m->segind]; 1013 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1014 if (order < VM_NFREEORDER - 1) { 1015 pa = VM_PAGE_TO_PHYS(m); 1016 do { 1017 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1018 if (pa < seg->start || pa >= seg->end) 1019 break; 1020 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1021 if (m_buddy->order != order) 1022 break; 1023 fl = (*seg->free_queues)[m_buddy->pool]; 1024 vm_freelist_rem(fl, m_buddy, order); 1025 if (m_buddy->pool != m->pool) 1026 vm_phys_set_pool(m->pool, m_buddy, order); 1027 order++; 1028 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1029 m = &seg->first_page[atop(pa - seg->start)]; 1030 } while (order < VM_NFREEORDER - 1); 1031 } 1032 fl = (*seg->free_queues)[m->pool]; 1033 vm_freelist_add(fl, m, order, 1); 1034 } 1035 1036 /* 1037 * Free a contiguous, arbitrarily sized set of physical pages. 1038 * 1039 * The free page queues must be locked. 1040 */ 1041 void 1042 vm_phys_free_contig(vm_page_t m, u_long npages) 1043 { 1044 u_int n; 1045 int order; 1046 1047 /* 1048 * Avoid unnecessary coalescing by freeing the pages in the largest 1049 * possible power-of-two-sized subsets. 1050 */ 1051 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1052 for (;; npages -= n) { 1053 /* 1054 * Unsigned "min" is used here so that "order" is assigned 1055 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1056 * or the low-order bits of its physical address are zero 1057 * because the size of a physical address exceeds the size of 1058 * a long. 1059 */ 1060 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1061 VM_NFREEORDER - 1); 1062 n = 1 << order; 1063 if (npages < n) 1064 break; 1065 vm_phys_free_pages(m, order); 1066 m += n; 1067 } 1068 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1069 for (; npages > 0; npages -= n) { 1070 order = flsl(npages) - 1; 1071 n = 1 << order; 1072 vm_phys_free_pages(m, order); 1073 m += n; 1074 } 1075 } 1076 1077 /* 1078 * Scan physical memory between the specified addresses "low" and "high" for a 1079 * run of contiguous physical pages that satisfy the specified conditions, and 1080 * return the lowest page in the run. The specified "alignment" determines 1081 * the alignment of the lowest physical page in the run. If the specified 1082 * "boundary" is non-zero, then the run of physical pages cannot span a 1083 * physical address that is a multiple of "boundary". 1084 * 1085 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1086 * be a power of two. 1087 */ 1088 vm_page_t 1089 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1090 u_long alignment, vm_paddr_t boundary, int options) 1091 { 1092 vm_paddr_t pa_end; 1093 vm_page_t m_end, m_run, m_start; 1094 struct vm_phys_seg *seg; 1095 int segind; 1096 1097 KASSERT(npages > 0, ("npages is 0")); 1098 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1099 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1100 if (low >= high) 1101 return (NULL); 1102 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1103 seg = &vm_phys_segs[segind]; 1104 if (seg->domain != domain) 1105 continue; 1106 if (seg->start >= high) 1107 break; 1108 if (low >= seg->end) 1109 continue; 1110 if (low <= seg->start) 1111 m_start = seg->first_page; 1112 else 1113 m_start = &seg->first_page[atop(low - seg->start)]; 1114 if (high < seg->end) 1115 pa_end = high; 1116 else 1117 pa_end = seg->end; 1118 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1119 continue; 1120 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1121 m_run = vm_page_scan_contig(npages, m_start, m_end, 1122 alignment, boundary, options); 1123 if (m_run != NULL) 1124 return (m_run); 1125 } 1126 return (NULL); 1127 } 1128 1129 /* 1130 * Set the pool for a contiguous, power of two-sized set of physical pages. 1131 */ 1132 void 1133 vm_phys_set_pool(int pool, vm_page_t m, int order) 1134 { 1135 vm_page_t m_tmp; 1136 1137 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1138 m_tmp->pool = pool; 1139 } 1140 1141 /* 1142 * Search for the given physical page "m" in the free lists. If the search 1143 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1144 * FALSE, indicating that "m" is not in the free lists. 1145 * 1146 * The free page queues must be locked. 1147 */ 1148 boolean_t 1149 vm_phys_unfree_page(vm_page_t m) 1150 { 1151 struct vm_freelist *fl; 1152 struct vm_phys_seg *seg; 1153 vm_paddr_t pa, pa_half; 1154 vm_page_t m_set, m_tmp; 1155 int order; 1156 1157 /* 1158 * First, find the contiguous, power of two-sized set of free 1159 * physical pages containing the given physical page "m" and 1160 * assign it to "m_set". 1161 */ 1162 seg = &vm_phys_segs[m->segind]; 1163 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1164 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1165 order < VM_NFREEORDER - 1; ) { 1166 order++; 1167 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1168 if (pa >= seg->start) 1169 m_set = &seg->first_page[atop(pa - seg->start)]; 1170 else 1171 return (FALSE); 1172 } 1173 if (m_set->order < order) 1174 return (FALSE); 1175 if (m_set->order == VM_NFREEORDER) 1176 return (FALSE); 1177 KASSERT(m_set->order < VM_NFREEORDER, 1178 ("vm_phys_unfree_page: page %p has unexpected order %d", 1179 m_set, m_set->order)); 1180 1181 /* 1182 * Next, remove "m_set" from the free lists. Finally, extract 1183 * "m" from "m_set" using an iterative algorithm: While "m_set" 1184 * is larger than a page, shrink "m_set" by returning the half 1185 * of "m_set" that does not contain "m" to the free lists. 1186 */ 1187 fl = (*seg->free_queues)[m_set->pool]; 1188 order = m_set->order; 1189 vm_freelist_rem(fl, m_set, order); 1190 while (order > 0) { 1191 order--; 1192 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1193 if (m->phys_addr < pa_half) 1194 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1195 else { 1196 m_tmp = m_set; 1197 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1198 } 1199 vm_freelist_add(fl, m_tmp, order, 0); 1200 } 1201 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1202 return (TRUE); 1203 } 1204 1205 /* 1206 * Allocate a contiguous set of physical pages of the given size 1207 * "npages" from the free lists. All of the physical pages must be at 1208 * or above the given physical address "low" and below the given 1209 * physical address "high". The given value "alignment" determines the 1210 * alignment of the first physical page in the set. If the given value 1211 * "boundary" is non-zero, then the set of physical pages cannot cross 1212 * any physical address boundary that is a multiple of that value. Both 1213 * "alignment" and "boundary" must be a power of two. 1214 */ 1215 vm_page_t 1216 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1217 u_long alignment, vm_paddr_t boundary) 1218 { 1219 vm_paddr_t pa_end, pa_start; 1220 vm_page_t m_run; 1221 struct vm_phys_seg *seg; 1222 int segind; 1223 1224 KASSERT(npages > 0, ("npages is 0")); 1225 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1226 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1227 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1228 if (low >= high) 1229 return (NULL); 1230 m_run = NULL; 1231 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1232 seg = &vm_phys_segs[segind]; 1233 if (seg->start >= high || seg->domain != domain) 1234 continue; 1235 if (low >= seg->end) 1236 break; 1237 if (low <= seg->start) 1238 pa_start = seg->start; 1239 else 1240 pa_start = low; 1241 if (high < seg->end) 1242 pa_end = high; 1243 else 1244 pa_end = seg->end; 1245 if (pa_end - pa_start < ptoa(npages)) 1246 continue; 1247 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1248 alignment, boundary); 1249 if (m_run != NULL) 1250 break; 1251 } 1252 return (m_run); 1253 } 1254 1255 /* 1256 * Allocate a run of contiguous physical pages from the free list for the 1257 * specified segment. 1258 */ 1259 static vm_page_t 1260 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1261 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1262 { 1263 struct vm_freelist *fl; 1264 vm_paddr_t pa, pa_end, size; 1265 vm_page_t m, m_ret; 1266 u_long npages_end; 1267 int oind, order, pind; 1268 1269 KASSERT(npages > 0, ("npages is 0")); 1270 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1271 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1272 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1273 /* Compute the queue that is the best fit for npages. */ 1274 order = flsl(npages - 1); 1275 /* Search for a run satisfying the specified conditions. */ 1276 size = npages << PAGE_SHIFT; 1277 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1278 oind++) { 1279 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1280 fl = (*seg->free_queues)[pind]; 1281 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1282 /* 1283 * Is the size of this allocation request 1284 * larger than the largest block size? 1285 */ 1286 if (order >= VM_NFREEORDER) { 1287 /* 1288 * Determine if a sufficient number of 1289 * subsequent blocks to satisfy the 1290 * allocation request are free. 1291 */ 1292 pa = VM_PAGE_TO_PHYS(m_ret); 1293 pa_end = pa + size; 1294 if (pa_end < pa) 1295 continue; 1296 for (;;) { 1297 pa += 1 << (PAGE_SHIFT + 1298 VM_NFREEORDER - 1); 1299 if (pa >= pa_end || 1300 pa < seg->start || 1301 pa >= seg->end) 1302 break; 1303 m = &seg->first_page[atop(pa - 1304 seg->start)]; 1305 if (m->order != VM_NFREEORDER - 1306 1) 1307 break; 1308 } 1309 /* If not, go to the next block. */ 1310 if (pa < pa_end) 1311 continue; 1312 } 1313 1314 /* 1315 * Determine if the blocks are within the 1316 * given range, satisfy the given alignment, 1317 * and do not cross the given boundary. 1318 */ 1319 pa = VM_PAGE_TO_PHYS(m_ret); 1320 pa_end = pa + size; 1321 if (pa >= low && pa_end <= high && 1322 (pa & (alignment - 1)) == 0 && 1323 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1324 goto done; 1325 } 1326 } 1327 } 1328 return (NULL); 1329 done: 1330 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1331 fl = (*seg->free_queues)[m->pool]; 1332 vm_freelist_rem(fl, m, oind); 1333 if (m->pool != VM_FREEPOOL_DEFAULT) 1334 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1335 } 1336 /* Return excess pages to the free lists. */ 1337 npages_end = roundup2(npages, 1 << oind); 1338 if (npages < npages_end) { 1339 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1340 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1341 } 1342 return (m_ret); 1343 } 1344 1345 #ifdef DDB 1346 /* 1347 * Show the number of physical pages in each of the free lists. 1348 */ 1349 DB_SHOW_COMMAND(freepages, db_show_freepages) 1350 { 1351 struct vm_freelist *fl; 1352 int flind, oind, pind, dom; 1353 1354 for (dom = 0; dom < vm_ndomains; dom++) { 1355 db_printf("DOMAIN: %d\n", dom); 1356 for (flind = 0; flind < vm_nfreelists; flind++) { 1357 db_printf("FREE LIST %d:\n" 1358 "\n ORDER (SIZE) | NUMBER" 1359 "\n ", flind); 1360 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1361 db_printf(" | POOL %d", pind); 1362 db_printf("\n-- "); 1363 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1364 db_printf("-- -- "); 1365 db_printf("--\n"); 1366 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1367 db_printf(" %2.2d (%6.6dK)", oind, 1368 1 << (PAGE_SHIFT - 10 + oind)); 1369 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1370 fl = vm_phys_free_queues[dom][flind][pind]; 1371 db_printf(" | %6.6d", fl[oind].lcnt); 1372 } 1373 db_printf("\n"); 1374 } 1375 db_printf("\n"); 1376 } 1377 db_printf("\n"); 1378 } 1379 } 1380 #endif 1381