1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/lock.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/sysctl.h> 58 #include <sys/tree.h> 59 #include <sys/vmmeter.h> 60 #include <sys/seq.h> 61 62 #include <ddb/ddb.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_phys.h> 70 #include <vm/vm_pagequeue.h> 71 72 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73 "Too many physsegs."); 74 75 #ifdef NUMA 76 struct mem_affinity __read_mostly *mem_affinity; 77 int __read_mostly *mem_locality; 78 #endif 79 80 int __read_mostly vm_ndomains = 1; 81 82 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 83 int __read_mostly vm_phys_nsegs; 84 85 struct vm_phys_fictitious_seg; 86 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 87 struct vm_phys_fictitious_seg *); 88 89 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 90 RB_INITIALIZER(_vm_phys_fictitious_tree); 91 92 struct vm_phys_fictitious_seg { 93 RB_ENTRY(vm_phys_fictitious_seg) node; 94 /* Memory region data */ 95 vm_paddr_t start; 96 vm_paddr_t end; 97 vm_page_t first_page; 98 }; 99 100 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 101 vm_phys_fictitious_cmp); 102 103 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 104 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 105 106 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 107 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 108 109 static int __read_mostly vm_nfreelists; 110 111 /* 112 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 113 */ 114 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 115 116 CTASSERT(VM_FREELIST_DEFAULT == 0); 117 118 #ifdef VM_FREELIST_ISADMA 119 #define VM_ISADMA_BOUNDARY 16777216 120 #endif 121 #ifdef VM_FREELIST_DMA32 122 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 123 #endif 124 125 /* 126 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 127 * the ordering of the free list boundaries. 128 */ 129 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) 130 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); 131 #endif 132 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 133 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 134 #endif 135 136 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 137 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 138 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 139 140 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 141 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 142 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 143 144 #ifdef NUMA 145 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 146 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 147 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 148 #endif 149 150 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 151 &vm_ndomains, 0, "Number of physical memory domains available."); 152 153 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 154 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 155 vm_paddr_t boundary); 156 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 157 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 158 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 159 int order); 160 161 /* 162 * Red-black tree helpers for vm fictitious range management. 163 */ 164 static inline int 165 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 166 struct vm_phys_fictitious_seg *range) 167 { 168 169 KASSERT(range->start != 0 && range->end != 0, 170 ("Invalid range passed on search for vm_fictitious page")); 171 if (p->start >= range->end) 172 return (1); 173 if (p->start < range->start) 174 return (-1); 175 176 return (0); 177 } 178 179 static int 180 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 181 struct vm_phys_fictitious_seg *p2) 182 { 183 184 /* Check if this is a search for a page */ 185 if (p1->end == 0) 186 return (vm_phys_fictitious_in_range(p1, p2)); 187 188 KASSERT(p2->end != 0, 189 ("Invalid range passed as second parameter to vm fictitious comparison")); 190 191 /* Searching to add a new range */ 192 if (p1->end <= p2->start) 193 return (-1); 194 if (p1->start >= p2->end) 195 return (1); 196 197 panic("Trying to add overlapping vm fictitious ranges:\n" 198 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 199 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 200 } 201 202 int 203 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 204 { 205 #ifdef NUMA 206 domainset_t mask; 207 int i; 208 209 if (vm_ndomains == 1 || mem_affinity == NULL) 210 return (0); 211 212 DOMAINSET_ZERO(&mask); 213 /* 214 * Check for any memory that overlaps low, high. 215 */ 216 for (i = 0; mem_affinity[i].end != 0; i++) 217 if (mem_affinity[i].start <= high && 218 mem_affinity[i].end >= low) 219 DOMAINSET_SET(mem_affinity[i].domain, &mask); 220 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 221 return (prefer); 222 if (DOMAINSET_EMPTY(&mask)) 223 panic("vm_phys_domain_match: Impossible constraint"); 224 return (DOMAINSET_FFS(&mask) - 1); 225 #else 226 return (0); 227 #endif 228 } 229 230 /* 231 * Outputs the state of the physical memory allocator, specifically, 232 * the amount of physical memory in each free list. 233 */ 234 static int 235 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 236 { 237 struct sbuf sbuf; 238 struct vm_freelist *fl; 239 int dom, error, flind, oind, pind; 240 241 error = sysctl_wire_old_buffer(req, 0); 242 if (error != 0) 243 return (error); 244 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 245 for (dom = 0; dom < vm_ndomains; dom++) { 246 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 247 for (flind = 0; flind < vm_nfreelists; flind++) { 248 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 249 "\n ORDER (SIZE) | NUMBER" 250 "\n ", flind); 251 for (pind = 0; pind < VM_NFREEPOOL; pind++) 252 sbuf_printf(&sbuf, " | POOL %d", pind); 253 sbuf_printf(&sbuf, "\n-- "); 254 for (pind = 0; pind < VM_NFREEPOOL; pind++) 255 sbuf_printf(&sbuf, "-- -- "); 256 sbuf_printf(&sbuf, "--\n"); 257 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 258 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 259 1 << (PAGE_SHIFT - 10 + oind)); 260 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 261 fl = vm_phys_free_queues[dom][flind][pind]; 262 sbuf_printf(&sbuf, " | %6d", 263 fl[oind].lcnt); 264 } 265 sbuf_printf(&sbuf, "\n"); 266 } 267 } 268 } 269 error = sbuf_finish(&sbuf); 270 sbuf_delete(&sbuf); 271 return (error); 272 } 273 274 /* 275 * Outputs the set of physical memory segments. 276 */ 277 static int 278 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 279 { 280 struct sbuf sbuf; 281 struct vm_phys_seg *seg; 282 int error, segind; 283 284 error = sysctl_wire_old_buffer(req, 0); 285 if (error != 0) 286 return (error); 287 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 288 for (segind = 0; segind < vm_phys_nsegs; segind++) { 289 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 290 seg = &vm_phys_segs[segind]; 291 sbuf_printf(&sbuf, "start: %#jx\n", 292 (uintmax_t)seg->start); 293 sbuf_printf(&sbuf, "end: %#jx\n", 294 (uintmax_t)seg->end); 295 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 296 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 297 } 298 error = sbuf_finish(&sbuf); 299 sbuf_delete(&sbuf); 300 return (error); 301 } 302 303 /* 304 * Return affinity, or -1 if there's no affinity information. 305 */ 306 int 307 vm_phys_mem_affinity(int f, int t) 308 { 309 310 #ifdef NUMA 311 if (mem_locality == NULL) 312 return (-1); 313 if (f >= vm_ndomains || t >= vm_ndomains) 314 return (-1); 315 return (mem_locality[f * vm_ndomains + t]); 316 #else 317 return (-1); 318 #endif 319 } 320 321 #ifdef NUMA 322 /* 323 * Outputs the VM locality table. 324 */ 325 static int 326 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 327 { 328 struct sbuf sbuf; 329 int error, i, j; 330 331 error = sysctl_wire_old_buffer(req, 0); 332 if (error != 0) 333 return (error); 334 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 335 336 sbuf_printf(&sbuf, "\n"); 337 338 for (i = 0; i < vm_ndomains; i++) { 339 sbuf_printf(&sbuf, "%d: ", i); 340 for (j = 0; j < vm_ndomains; j++) { 341 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 342 } 343 sbuf_printf(&sbuf, "\n"); 344 } 345 error = sbuf_finish(&sbuf); 346 sbuf_delete(&sbuf); 347 return (error); 348 } 349 #endif 350 351 static void 352 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 353 { 354 355 m->order = order; 356 if (tail) 357 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 358 else 359 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 360 fl[order].lcnt++; 361 } 362 363 static void 364 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 365 { 366 367 TAILQ_REMOVE(&fl[order].pl, m, listq); 368 fl[order].lcnt--; 369 m->order = VM_NFREEORDER; 370 } 371 372 /* 373 * Create a physical memory segment. 374 */ 375 static void 376 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 377 { 378 struct vm_phys_seg *seg; 379 380 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 381 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 382 KASSERT(domain >= 0 && domain < vm_ndomains, 383 ("vm_phys_create_seg: invalid domain provided")); 384 seg = &vm_phys_segs[vm_phys_nsegs++]; 385 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 386 *seg = *(seg - 1); 387 seg--; 388 } 389 seg->start = start; 390 seg->end = end; 391 seg->domain = domain; 392 } 393 394 static void 395 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 396 { 397 #ifdef NUMA 398 int i; 399 400 if (mem_affinity == NULL) { 401 _vm_phys_create_seg(start, end, 0); 402 return; 403 } 404 405 for (i = 0;; i++) { 406 if (mem_affinity[i].end == 0) 407 panic("Reached end of affinity info"); 408 if (mem_affinity[i].end <= start) 409 continue; 410 if (mem_affinity[i].start > start) 411 panic("No affinity info for start %jx", 412 (uintmax_t)start); 413 if (mem_affinity[i].end >= end) { 414 _vm_phys_create_seg(start, end, 415 mem_affinity[i].domain); 416 break; 417 } 418 _vm_phys_create_seg(start, mem_affinity[i].end, 419 mem_affinity[i].domain); 420 start = mem_affinity[i].end; 421 } 422 #else 423 _vm_phys_create_seg(start, end, 0); 424 #endif 425 } 426 427 /* 428 * Add a physical memory segment. 429 */ 430 void 431 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 432 { 433 vm_paddr_t paddr; 434 435 KASSERT((start & PAGE_MASK) == 0, 436 ("vm_phys_define_seg: start is not page aligned")); 437 KASSERT((end & PAGE_MASK) == 0, 438 ("vm_phys_define_seg: end is not page aligned")); 439 440 /* 441 * Split the physical memory segment if it spans two or more free 442 * list boundaries. 443 */ 444 paddr = start; 445 #ifdef VM_FREELIST_ISADMA 446 if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { 447 vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); 448 paddr = VM_ISADMA_BOUNDARY; 449 } 450 #endif 451 #ifdef VM_FREELIST_LOWMEM 452 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 453 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 454 paddr = VM_LOWMEM_BOUNDARY; 455 } 456 #endif 457 #ifdef VM_FREELIST_DMA32 458 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 459 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 460 paddr = VM_DMA32_BOUNDARY; 461 } 462 #endif 463 vm_phys_create_seg(paddr, end); 464 } 465 466 /* 467 * Initialize the physical memory allocator. 468 * 469 * Requires that vm_page_array is initialized! 470 */ 471 void 472 vm_phys_init(void) 473 { 474 struct vm_freelist *fl; 475 struct vm_phys_seg *seg; 476 u_long npages; 477 int dom, flind, freelist, oind, pind, segind; 478 479 /* 480 * Compute the number of free lists, and generate the mapping from the 481 * manifest constants VM_FREELIST_* to the free list indices. 482 * 483 * Initially, the entries of vm_freelist_to_flind[] are set to either 484 * 0 or 1 to indicate which free lists should be created. 485 */ 486 npages = 0; 487 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 488 seg = &vm_phys_segs[segind]; 489 #ifdef VM_FREELIST_ISADMA 490 if (seg->end <= VM_ISADMA_BOUNDARY) 491 vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; 492 else 493 #endif 494 #ifdef VM_FREELIST_LOWMEM 495 if (seg->end <= VM_LOWMEM_BOUNDARY) 496 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 497 else 498 #endif 499 #ifdef VM_FREELIST_DMA32 500 if ( 501 #ifdef VM_DMA32_NPAGES_THRESHOLD 502 /* 503 * Create the DMA32 free list only if the amount of 504 * physical memory above physical address 4G exceeds the 505 * given threshold. 506 */ 507 npages > VM_DMA32_NPAGES_THRESHOLD && 508 #endif 509 seg->end <= VM_DMA32_BOUNDARY) 510 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 511 else 512 #endif 513 { 514 npages += atop(seg->end - seg->start); 515 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 516 } 517 } 518 /* Change each entry into a running total of the free lists. */ 519 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 520 vm_freelist_to_flind[freelist] += 521 vm_freelist_to_flind[freelist - 1]; 522 } 523 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 524 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 525 /* Change each entry into a free list index. */ 526 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 527 vm_freelist_to_flind[freelist]--; 528 529 /* 530 * Initialize the first_page and free_queues fields of each physical 531 * memory segment. 532 */ 533 #ifdef VM_PHYSSEG_SPARSE 534 npages = 0; 535 #endif 536 for (segind = 0; segind < vm_phys_nsegs; segind++) { 537 seg = &vm_phys_segs[segind]; 538 #ifdef VM_PHYSSEG_SPARSE 539 seg->first_page = &vm_page_array[npages]; 540 npages += atop(seg->end - seg->start); 541 #else 542 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 543 #endif 544 #ifdef VM_FREELIST_ISADMA 545 if (seg->end <= VM_ISADMA_BOUNDARY) { 546 flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; 547 KASSERT(flind >= 0, 548 ("vm_phys_init: ISADMA flind < 0")); 549 } else 550 #endif 551 #ifdef VM_FREELIST_LOWMEM 552 if (seg->end <= VM_LOWMEM_BOUNDARY) { 553 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 554 KASSERT(flind >= 0, 555 ("vm_phys_init: LOWMEM flind < 0")); 556 } else 557 #endif 558 #ifdef VM_FREELIST_DMA32 559 if (seg->end <= VM_DMA32_BOUNDARY) { 560 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 561 KASSERT(flind >= 0, 562 ("vm_phys_init: DMA32 flind < 0")); 563 } else 564 #endif 565 { 566 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 567 KASSERT(flind >= 0, 568 ("vm_phys_init: DEFAULT flind < 0")); 569 } 570 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 571 } 572 573 /* 574 * Initialize the free queues. 575 */ 576 for (dom = 0; dom < vm_ndomains; dom++) { 577 for (flind = 0; flind < vm_nfreelists; flind++) { 578 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 579 fl = vm_phys_free_queues[dom][flind][pind]; 580 for (oind = 0; oind < VM_NFREEORDER; oind++) 581 TAILQ_INIT(&fl[oind].pl); 582 } 583 } 584 } 585 586 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 587 } 588 589 /* 590 * Split a contiguous, power of two-sized set of physical pages. 591 */ 592 static __inline void 593 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 594 { 595 vm_page_t m_buddy; 596 597 while (oind > order) { 598 oind--; 599 m_buddy = &m[1 << oind]; 600 KASSERT(m_buddy->order == VM_NFREEORDER, 601 ("vm_phys_split_pages: page %p has unexpected order %d", 602 m_buddy, m_buddy->order)); 603 vm_freelist_add(fl, m_buddy, oind, 0); 604 } 605 } 606 607 /* 608 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 609 * and sized set to the specified free list. 610 * 611 * When this function is called by a page allocation function, the caller 612 * should request insertion at the head unless the lower-order queues are 613 * known to be empty. The objective being to reduce the likelihood of long- 614 * term fragmentation by promoting contemporaneous allocation and (hopefully) 615 * deallocation. 616 * 617 * The physical page m's buddy must not be free. 618 */ 619 static void 620 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 621 { 622 u_int n; 623 int order; 624 625 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 626 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 627 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 628 ("vm_phys_enq_range: page %p and npages %u are misaligned", 629 m, npages)); 630 do { 631 KASSERT(m->order == VM_NFREEORDER, 632 ("vm_phys_enq_range: page %p has unexpected order %d", 633 m, m->order)); 634 order = ffs(npages) - 1; 635 KASSERT(order < VM_NFREEORDER, 636 ("vm_phys_enq_range: order %d is out of range", order)); 637 vm_freelist_add(fl, m, order, tail); 638 n = 1 << order; 639 m += n; 640 npages -= n; 641 } while (npages > 0); 642 } 643 644 /* 645 * Tries to allocate the specified number of pages from the specified pool 646 * within the specified domain. Returns the actual number of allocated pages 647 * and a pointer to each page through the array ma[]. 648 * 649 * The returned pages may not be physically contiguous. However, in contrast 650 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 651 * calling this function once to allocate the desired number of pages will 652 * avoid wasted time in vm_phys_split_pages(). 653 * 654 * The free page queues for the specified domain must be locked. 655 */ 656 int 657 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 658 { 659 struct vm_freelist *alt, *fl; 660 vm_page_t m; 661 int avail, end, flind, freelist, i, need, oind, pind; 662 663 KASSERT(domain >= 0 && domain < vm_ndomains, 664 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 665 KASSERT(pool < VM_NFREEPOOL, 666 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 667 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 668 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 669 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 670 i = 0; 671 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 672 flind = vm_freelist_to_flind[freelist]; 673 if (flind < 0) 674 continue; 675 fl = vm_phys_free_queues[domain][flind][pool]; 676 for (oind = 0; oind < VM_NFREEORDER; oind++) { 677 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 678 vm_freelist_rem(fl, m, oind); 679 avail = 1 << oind; 680 need = imin(npages - i, avail); 681 for (end = i + need; i < end;) 682 ma[i++] = m++; 683 if (need < avail) { 684 /* 685 * Return excess pages to fl. Its 686 * order [0, oind) queues are empty. 687 */ 688 vm_phys_enq_range(m, avail - need, fl, 689 1); 690 return (npages); 691 } else if (i == npages) 692 return (npages); 693 } 694 } 695 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 696 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 697 alt = vm_phys_free_queues[domain][flind][pind]; 698 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 699 NULL) { 700 vm_freelist_rem(alt, m, oind); 701 vm_phys_set_pool(pool, m, oind); 702 avail = 1 << oind; 703 need = imin(npages - i, avail); 704 for (end = i + need; i < end;) 705 ma[i++] = m++; 706 if (need < avail) { 707 /* 708 * Return excess pages to fl. 709 * Its order [0, oind) queues 710 * are empty. 711 */ 712 vm_phys_enq_range(m, avail - 713 need, fl, 1); 714 return (npages); 715 } else if (i == npages) 716 return (npages); 717 } 718 } 719 } 720 } 721 return (i); 722 } 723 724 /* 725 * Allocate a contiguous, power of two-sized set of physical pages 726 * from the free lists. 727 * 728 * The free page queues must be locked. 729 */ 730 vm_page_t 731 vm_phys_alloc_pages(int domain, int pool, int order) 732 { 733 vm_page_t m; 734 int freelist; 735 736 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 737 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 738 if (m != NULL) 739 return (m); 740 } 741 return (NULL); 742 } 743 744 /* 745 * Allocate a contiguous, power of two-sized set of physical pages from the 746 * specified free list. The free list must be specified using one of the 747 * manifest constants VM_FREELIST_*. 748 * 749 * The free page queues must be locked. 750 */ 751 vm_page_t 752 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 753 { 754 struct vm_freelist *alt, *fl; 755 vm_page_t m; 756 int oind, pind, flind; 757 758 KASSERT(domain >= 0 && domain < vm_ndomains, 759 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 760 domain)); 761 KASSERT(freelist < VM_NFREELIST, 762 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 763 freelist)); 764 KASSERT(pool < VM_NFREEPOOL, 765 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 766 KASSERT(order < VM_NFREEORDER, 767 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 768 769 flind = vm_freelist_to_flind[freelist]; 770 /* Check if freelist is present */ 771 if (flind < 0) 772 return (NULL); 773 774 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 775 fl = &vm_phys_free_queues[domain][flind][pool][0]; 776 for (oind = order; oind < VM_NFREEORDER; oind++) { 777 m = TAILQ_FIRST(&fl[oind].pl); 778 if (m != NULL) { 779 vm_freelist_rem(fl, m, oind); 780 vm_phys_split_pages(m, oind, fl, order); 781 return (m); 782 } 783 } 784 785 /* 786 * The given pool was empty. Find the largest 787 * contiguous, power-of-two-sized set of pages in any 788 * pool. Transfer these pages to the given pool, and 789 * use them to satisfy the allocation. 790 */ 791 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 792 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 793 alt = &vm_phys_free_queues[domain][flind][pind][0]; 794 m = TAILQ_FIRST(&alt[oind].pl); 795 if (m != NULL) { 796 vm_freelist_rem(alt, m, oind); 797 vm_phys_set_pool(pool, m, oind); 798 vm_phys_split_pages(m, oind, fl, order); 799 return (m); 800 } 801 } 802 } 803 return (NULL); 804 } 805 806 /* 807 * Find the vm_page corresponding to the given physical address. 808 */ 809 vm_page_t 810 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 811 { 812 struct vm_phys_seg *seg; 813 int segind; 814 815 for (segind = 0; segind < vm_phys_nsegs; segind++) { 816 seg = &vm_phys_segs[segind]; 817 if (pa >= seg->start && pa < seg->end) 818 return (&seg->first_page[atop(pa - seg->start)]); 819 } 820 return (NULL); 821 } 822 823 vm_page_t 824 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 825 { 826 struct vm_phys_fictitious_seg tmp, *seg; 827 vm_page_t m; 828 829 m = NULL; 830 tmp.start = pa; 831 tmp.end = 0; 832 833 rw_rlock(&vm_phys_fictitious_reg_lock); 834 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 835 rw_runlock(&vm_phys_fictitious_reg_lock); 836 if (seg == NULL) 837 return (NULL); 838 839 m = &seg->first_page[atop(pa - seg->start)]; 840 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 841 842 return (m); 843 } 844 845 static inline void 846 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 847 long page_count, vm_memattr_t memattr) 848 { 849 long i; 850 851 bzero(range, page_count * sizeof(*range)); 852 for (i = 0; i < page_count; i++) { 853 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 854 range[i].oflags &= ~VPO_UNMANAGED; 855 range[i].busy_lock = VPB_UNBUSIED; 856 } 857 } 858 859 int 860 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 861 vm_memattr_t memattr) 862 { 863 struct vm_phys_fictitious_seg *seg; 864 vm_page_t fp; 865 long page_count; 866 #ifdef VM_PHYSSEG_DENSE 867 long pi, pe; 868 long dpage_count; 869 #endif 870 871 KASSERT(start < end, 872 ("Start of segment isn't less than end (start: %jx end: %jx)", 873 (uintmax_t)start, (uintmax_t)end)); 874 875 page_count = (end - start) / PAGE_SIZE; 876 877 #ifdef VM_PHYSSEG_DENSE 878 pi = atop(start); 879 pe = atop(end); 880 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 881 fp = &vm_page_array[pi - first_page]; 882 if ((pe - first_page) > vm_page_array_size) { 883 /* 884 * We have a segment that starts inside 885 * of vm_page_array, but ends outside of it. 886 * 887 * Use vm_page_array pages for those that are 888 * inside of the vm_page_array range, and 889 * allocate the remaining ones. 890 */ 891 dpage_count = vm_page_array_size - (pi - first_page); 892 vm_phys_fictitious_init_range(fp, start, dpage_count, 893 memattr); 894 page_count -= dpage_count; 895 start += ptoa(dpage_count); 896 goto alloc; 897 } 898 /* 899 * We can allocate the full range from vm_page_array, 900 * so there's no need to register the range in the tree. 901 */ 902 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 903 return (0); 904 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 905 /* 906 * We have a segment that ends inside of vm_page_array, 907 * but starts outside of it. 908 */ 909 fp = &vm_page_array[0]; 910 dpage_count = pe - first_page; 911 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 912 memattr); 913 end -= ptoa(dpage_count); 914 page_count -= dpage_count; 915 goto alloc; 916 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 917 /* 918 * Trying to register a fictitious range that expands before 919 * and after vm_page_array. 920 */ 921 return (EINVAL); 922 } else { 923 alloc: 924 #endif 925 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 926 M_WAITOK); 927 #ifdef VM_PHYSSEG_DENSE 928 } 929 #endif 930 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 931 932 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 933 seg->start = start; 934 seg->end = end; 935 seg->first_page = fp; 936 937 rw_wlock(&vm_phys_fictitious_reg_lock); 938 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 939 rw_wunlock(&vm_phys_fictitious_reg_lock); 940 941 return (0); 942 } 943 944 void 945 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 946 { 947 struct vm_phys_fictitious_seg *seg, tmp; 948 #ifdef VM_PHYSSEG_DENSE 949 long pi, pe; 950 #endif 951 952 KASSERT(start < end, 953 ("Start of segment isn't less than end (start: %jx end: %jx)", 954 (uintmax_t)start, (uintmax_t)end)); 955 956 #ifdef VM_PHYSSEG_DENSE 957 pi = atop(start); 958 pe = atop(end); 959 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 960 if ((pe - first_page) <= vm_page_array_size) { 961 /* 962 * This segment was allocated using vm_page_array 963 * only, there's nothing to do since those pages 964 * were never added to the tree. 965 */ 966 return; 967 } 968 /* 969 * We have a segment that starts inside 970 * of vm_page_array, but ends outside of it. 971 * 972 * Calculate how many pages were added to the 973 * tree and free them. 974 */ 975 start = ptoa(first_page + vm_page_array_size); 976 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 977 /* 978 * We have a segment that ends inside of vm_page_array, 979 * but starts outside of it. 980 */ 981 end = ptoa(first_page); 982 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 983 /* Since it's not possible to register such a range, panic. */ 984 panic( 985 "Unregistering not registered fictitious range [%#jx:%#jx]", 986 (uintmax_t)start, (uintmax_t)end); 987 } 988 #endif 989 tmp.start = start; 990 tmp.end = 0; 991 992 rw_wlock(&vm_phys_fictitious_reg_lock); 993 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 994 if (seg->start != start || seg->end != end) { 995 rw_wunlock(&vm_phys_fictitious_reg_lock); 996 panic( 997 "Unregistering not registered fictitious range [%#jx:%#jx]", 998 (uintmax_t)start, (uintmax_t)end); 999 } 1000 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1001 rw_wunlock(&vm_phys_fictitious_reg_lock); 1002 free(seg->first_page, M_FICT_PAGES); 1003 free(seg, M_FICT_PAGES); 1004 } 1005 1006 /* 1007 * Free a contiguous, power of two-sized set of physical pages. 1008 * 1009 * The free page queues must be locked. 1010 */ 1011 void 1012 vm_phys_free_pages(vm_page_t m, int order) 1013 { 1014 struct vm_freelist *fl; 1015 struct vm_phys_seg *seg; 1016 vm_paddr_t pa; 1017 vm_page_t m_buddy; 1018 1019 KASSERT(m->order == VM_NFREEORDER, 1020 ("vm_phys_free_pages: page %p has unexpected order %d", 1021 m, m->order)); 1022 KASSERT(m->pool < VM_NFREEPOOL, 1023 ("vm_phys_free_pages: page %p has unexpected pool %d", 1024 m, m->pool)); 1025 KASSERT(order < VM_NFREEORDER, 1026 ("vm_phys_free_pages: order %d is out of range", order)); 1027 seg = &vm_phys_segs[m->segind]; 1028 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1029 if (order < VM_NFREEORDER - 1) { 1030 pa = VM_PAGE_TO_PHYS(m); 1031 do { 1032 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1033 if (pa < seg->start || pa >= seg->end) 1034 break; 1035 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1036 if (m_buddy->order != order) 1037 break; 1038 fl = (*seg->free_queues)[m_buddy->pool]; 1039 vm_freelist_rem(fl, m_buddy, order); 1040 if (m_buddy->pool != m->pool) 1041 vm_phys_set_pool(m->pool, m_buddy, order); 1042 order++; 1043 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1044 m = &seg->first_page[atop(pa - seg->start)]; 1045 } while (order < VM_NFREEORDER - 1); 1046 } 1047 fl = (*seg->free_queues)[m->pool]; 1048 vm_freelist_add(fl, m, order, 1); 1049 } 1050 1051 /* 1052 * Free a contiguous, arbitrarily sized set of physical pages. 1053 * 1054 * The free page queues must be locked. 1055 */ 1056 void 1057 vm_phys_free_contig(vm_page_t m, u_long npages) 1058 { 1059 u_int n; 1060 int order; 1061 1062 /* 1063 * Avoid unnecessary coalescing by freeing the pages in the largest 1064 * possible power-of-two-sized subsets. 1065 */ 1066 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1067 for (;; npages -= n) { 1068 /* 1069 * Unsigned "min" is used here so that "order" is assigned 1070 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1071 * or the low-order bits of its physical address are zero 1072 * because the size of a physical address exceeds the size of 1073 * a long. 1074 */ 1075 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1076 VM_NFREEORDER - 1); 1077 n = 1 << order; 1078 if (npages < n) 1079 break; 1080 vm_phys_free_pages(m, order); 1081 m += n; 1082 } 1083 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1084 for (; npages > 0; npages -= n) { 1085 order = flsl(npages) - 1; 1086 n = 1 << order; 1087 vm_phys_free_pages(m, order); 1088 m += n; 1089 } 1090 } 1091 1092 /* 1093 * Scan physical memory between the specified addresses "low" and "high" for a 1094 * run of contiguous physical pages that satisfy the specified conditions, and 1095 * return the lowest page in the run. The specified "alignment" determines 1096 * the alignment of the lowest physical page in the run. If the specified 1097 * "boundary" is non-zero, then the run of physical pages cannot span a 1098 * physical address that is a multiple of "boundary". 1099 * 1100 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1101 * be a power of two. 1102 */ 1103 vm_page_t 1104 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1105 u_long alignment, vm_paddr_t boundary, int options) 1106 { 1107 vm_paddr_t pa_end; 1108 vm_page_t m_end, m_run, m_start; 1109 struct vm_phys_seg *seg; 1110 int segind; 1111 1112 KASSERT(npages > 0, ("npages is 0")); 1113 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1114 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1115 if (low >= high) 1116 return (NULL); 1117 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1118 seg = &vm_phys_segs[segind]; 1119 if (seg->domain != domain) 1120 continue; 1121 if (seg->start >= high) 1122 break; 1123 if (low >= seg->end) 1124 continue; 1125 if (low <= seg->start) 1126 m_start = seg->first_page; 1127 else 1128 m_start = &seg->first_page[atop(low - seg->start)]; 1129 if (high < seg->end) 1130 pa_end = high; 1131 else 1132 pa_end = seg->end; 1133 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1134 continue; 1135 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1136 m_run = vm_page_scan_contig(npages, m_start, m_end, 1137 alignment, boundary, options); 1138 if (m_run != NULL) 1139 return (m_run); 1140 } 1141 return (NULL); 1142 } 1143 1144 /* 1145 * Set the pool for a contiguous, power of two-sized set of physical pages. 1146 */ 1147 void 1148 vm_phys_set_pool(int pool, vm_page_t m, int order) 1149 { 1150 vm_page_t m_tmp; 1151 1152 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1153 m_tmp->pool = pool; 1154 } 1155 1156 /* 1157 * Search for the given physical page "m" in the free lists. If the search 1158 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1159 * FALSE, indicating that "m" is not in the free lists. 1160 * 1161 * The free page queues must be locked. 1162 */ 1163 boolean_t 1164 vm_phys_unfree_page(vm_page_t m) 1165 { 1166 struct vm_freelist *fl; 1167 struct vm_phys_seg *seg; 1168 vm_paddr_t pa, pa_half; 1169 vm_page_t m_set, m_tmp; 1170 int order; 1171 1172 /* 1173 * First, find the contiguous, power of two-sized set of free 1174 * physical pages containing the given physical page "m" and 1175 * assign it to "m_set". 1176 */ 1177 seg = &vm_phys_segs[m->segind]; 1178 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1179 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1180 order < VM_NFREEORDER - 1; ) { 1181 order++; 1182 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1183 if (pa >= seg->start) 1184 m_set = &seg->first_page[atop(pa - seg->start)]; 1185 else 1186 return (FALSE); 1187 } 1188 if (m_set->order < order) 1189 return (FALSE); 1190 if (m_set->order == VM_NFREEORDER) 1191 return (FALSE); 1192 KASSERT(m_set->order < VM_NFREEORDER, 1193 ("vm_phys_unfree_page: page %p has unexpected order %d", 1194 m_set, m_set->order)); 1195 1196 /* 1197 * Next, remove "m_set" from the free lists. Finally, extract 1198 * "m" from "m_set" using an iterative algorithm: While "m_set" 1199 * is larger than a page, shrink "m_set" by returning the half 1200 * of "m_set" that does not contain "m" to the free lists. 1201 */ 1202 fl = (*seg->free_queues)[m_set->pool]; 1203 order = m_set->order; 1204 vm_freelist_rem(fl, m_set, order); 1205 while (order > 0) { 1206 order--; 1207 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1208 if (m->phys_addr < pa_half) 1209 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1210 else { 1211 m_tmp = m_set; 1212 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1213 } 1214 vm_freelist_add(fl, m_tmp, order, 0); 1215 } 1216 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1217 return (TRUE); 1218 } 1219 1220 /* 1221 * Allocate a contiguous set of physical pages of the given size 1222 * "npages" from the free lists. All of the physical pages must be at 1223 * or above the given physical address "low" and below the given 1224 * physical address "high". The given value "alignment" determines the 1225 * alignment of the first physical page in the set. If the given value 1226 * "boundary" is non-zero, then the set of physical pages cannot cross 1227 * any physical address boundary that is a multiple of that value. Both 1228 * "alignment" and "boundary" must be a power of two. 1229 */ 1230 vm_page_t 1231 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1232 u_long alignment, vm_paddr_t boundary) 1233 { 1234 vm_paddr_t pa_end, pa_start; 1235 vm_page_t m_run; 1236 struct vm_phys_seg *seg; 1237 int segind; 1238 1239 KASSERT(npages > 0, ("npages is 0")); 1240 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1241 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1242 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1243 if (low >= high) 1244 return (NULL); 1245 m_run = NULL; 1246 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1247 seg = &vm_phys_segs[segind]; 1248 if (seg->start >= high || seg->domain != domain) 1249 continue; 1250 if (low >= seg->end) 1251 break; 1252 if (low <= seg->start) 1253 pa_start = seg->start; 1254 else 1255 pa_start = low; 1256 if (high < seg->end) 1257 pa_end = high; 1258 else 1259 pa_end = seg->end; 1260 if (pa_end - pa_start < ptoa(npages)) 1261 continue; 1262 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1263 alignment, boundary); 1264 if (m_run != NULL) 1265 break; 1266 } 1267 return (m_run); 1268 } 1269 1270 /* 1271 * Allocate a run of contiguous physical pages from the free list for the 1272 * specified segment. 1273 */ 1274 static vm_page_t 1275 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1276 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1277 { 1278 struct vm_freelist *fl; 1279 vm_paddr_t pa, pa_end, size; 1280 vm_page_t m, m_ret; 1281 u_long npages_end; 1282 int oind, order, pind; 1283 1284 KASSERT(npages > 0, ("npages is 0")); 1285 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1286 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1287 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1288 /* Compute the queue that is the best fit for npages. */ 1289 order = flsl(npages - 1); 1290 /* Search for a run satisfying the specified conditions. */ 1291 size = npages << PAGE_SHIFT; 1292 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1293 oind++) { 1294 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1295 fl = (*seg->free_queues)[pind]; 1296 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1297 /* 1298 * Is the size of this allocation request 1299 * larger than the largest block size? 1300 */ 1301 if (order >= VM_NFREEORDER) { 1302 /* 1303 * Determine if a sufficient number of 1304 * subsequent blocks to satisfy the 1305 * allocation request are free. 1306 */ 1307 pa = VM_PAGE_TO_PHYS(m_ret); 1308 pa_end = pa + size; 1309 if (pa_end < pa) 1310 continue; 1311 for (;;) { 1312 pa += 1 << (PAGE_SHIFT + 1313 VM_NFREEORDER - 1); 1314 if (pa >= pa_end || 1315 pa < seg->start || 1316 pa >= seg->end) 1317 break; 1318 m = &seg->first_page[atop(pa - 1319 seg->start)]; 1320 if (m->order != VM_NFREEORDER - 1321 1) 1322 break; 1323 } 1324 /* If not, go to the next block. */ 1325 if (pa < pa_end) 1326 continue; 1327 } 1328 1329 /* 1330 * Determine if the blocks are within the 1331 * given range, satisfy the given alignment, 1332 * and do not cross the given boundary. 1333 */ 1334 pa = VM_PAGE_TO_PHYS(m_ret); 1335 pa_end = pa + size; 1336 if (pa >= low && pa_end <= high && 1337 (pa & (alignment - 1)) == 0 && 1338 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1339 goto done; 1340 } 1341 } 1342 } 1343 return (NULL); 1344 done: 1345 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1346 fl = (*seg->free_queues)[m->pool]; 1347 vm_freelist_rem(fl, m, oind); 1348 if (m->pool != VM_FREEPOOL_DEFAULT) 1349 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1350 } 1351 /* Return excess pages to the free lists. */ 1352 npages_end = roundup2(npages, 1 << oind); 1353 if (npages < npages_end) { 1354 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1355 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1356 } 1357 return (m_ret); 1358 } 1359 1360 #ifdef DDB 1361 /* 1362 * Show the number of physical pages in each of the free lists. 1363 */ 1364 DB_SHOW_COMMAND(freepages, db_show_freepages) 1365 { 1366 struct vm_freelist *fl; 1367 int flind, oind, pind, dom; 1368 1369 for (dom = 0; dom < vm_ndomains; dom++) { 1370 db_printf("DOMAIN: %d\n", dom); 1371 for (flind = 0; flind < vm_nfreelists; flind++) { 1372 db_printf("FREE LIST %d:\n" 1373 "\n ORDER (SIZE) | NUMBER" 1374 "\n ", flind); 1375 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1376 db_printf(" | POOL %d", pind); 1377 db_printf("\n-- "); 1378 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1379 db_printf("-- -- "); 1380 db_printf("--\n"); 1381 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1382 db_printf(" %2.2d (%6.6dK)", oind, 1383 1 << (PAGE_SHIFT - 10 + oind)); 1384 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1385 fl = vm_phys_free_queues[dom][flind][pind]; 1386 db_printf(" | %6.6d", fl[oind].lcnt); 1387 } 1388 db_printf("\n"); 1389 } 1390 db_printf("\n"); 1391 } 1392 db_printf("\n"); 1393 } 1394 } 1395 #endif 1396