1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/lock.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/sysctl.h> 58 #include <sys/tree.h> 59 #include <sys/vmmeter.h> 60 #include <sys/seq.h> 61 62 #include <ddb/ddb.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_phys.h> 70 #include <vm/vm_pagequeue.h> 71 72 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73 "Too many physsegs."); 74 75 #ifdef NUMA 76 struct mem_affinity __read_mostly *mem_affinity; 77 int __read_mostly *mem_locality; 78 #endif 79 80 int __read_mostly vm_ndomains = 1; 81 domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 82 83 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 84 int __read_mostly vm_phys_nsegs; 85 86 struct vm_phys_fictitious_seg; 87 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 88 struct vm_phys_fictitious_seg *); 89 90 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 91 RB_INITIALIZER(_vm_phys_fictitious_tree); 92 93 struct vm_phys_fictitious_seg { 94 RB_ENTRY(vm_phys_fictitious_seg) node; 95 /* Memory region data */ 96 vm_paddr_t start; 97 vm_paddr_t end; 98 vm_page_t first_page; 99 }; 100 101 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 102 vm_phys_fictitious_cmp); 103 104 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 105 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 106 107 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 108 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 109 110 static int __read_mostly vm_nfreelists; 111 112 /* 113 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 114 */ 115 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 116 117 CTASSERT(VM_FREELIST_DEFAULT == 0); 118 119 #ifdef VM_FREELIST_DMA32 120 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 121 #endif 122 123 /* 124 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 125 * the ordering of the free list boundaries. 126 */ 127 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 128 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 129 #endif 130 131 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 132 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 133 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 134 135 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 136 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 137 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 138 139 #ifdef NUMA 140 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 141 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 142 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 143 #endif 144 145 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 146 &vm_ndomains, 0, "Number of physical memory domains available."); 147 148 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 149 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 150 vm_paddr_t boundary); 151 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 152 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 153 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 154 int order, int tail); 155 156 /* 157 * Red-black tree helpers for vm fictitious range management. 158 */ 159 static inline int 160 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 161 struct vm_phys_fictitious_seg *range) 162 { 163 164 KASSERT(range->start != 0 && range->end != 0, 165 ("Invalid range passed on search for vm_fictitious page")); 166 if (p->start >= range->end) 167 return (1); 168 if (p->start < range->start) 169 return (-1); 170 171 return (0); 172 } 173 174 static int 175 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 176 struct vm_phys_fictitious_seg *p2) 177 { 178 179 /* Check if this is a search for a page */ 180 if (p1->end == 0) 181 return (vm_phys_fictitious_in_range(p1, p2)); 182 183 KASSERT(p2->end != 0, 184 ("Invalid range passed as second parameter to vm fictitious comparison")); 185 186 /* Searching to add a new range */ 187 if (p1->end <= p2->start) 188 return (-1); 189 if (p1->start >= p2->end) 190 return (1); 191 192 panic("Trying to add overlapping vm fictitious ranges:\n" 193 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 194 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 195 } 196 197 int 198 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 199 { 200 #ifdef NUMA 201 domainset_t mask; 202 int i; 203 204 if (vm_ndomains == 1 || mem_affinity == NULL) 205 return (0); 206 207 DOMAINSET_ZERO(&mask); 208 /* 209 * Check for any memory that overlaps low, high. 210 */ 211 for (i = 0; mem_affinity[i].end != 0; i++) 212 if (mem_affinity[i].start <= high && 213 mem_affinity[i].end >= low) 214 DOMAINSET_SET(mem_affinity[i].domain, &mask); 215 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 216 return (prefer); 217 if (DOMAINSET_EMPTY(&mask)) 218 panic("vm_phys_domain_match: Impossible constraint"); 219 return (DOMAINSET_FFS(&mask) - 1); 220 #else 221 return (0); 222 #endif 223 } 224 225 /* 226 * Outputs the state of the physical memory allocator, specifically, 227 * the amount of physical memory in each free list. 228 */ 229 static int 230 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 231 { 232 struct sbuf sbuf; 233 struct vm_freelist *fl; 234 int dom, error, flind, oind, pind; 235 236 error = sysctl_wire_old_buffer(req, 0); 237 if (error != 0) 238 return (error); 239 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 240 for (dom = 0; dom < vm_ndomains; dom++) { 241 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 242 for (flind = 0; flind < vm_nfreelists; flind++) { 243 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 244 "\n ORDER (SIZE) | NUMBER" 245 "\n ", flind); 246 for (pind = 0; pind < VM_NFREEPOOL; pind++) 247 sbuf_printf(&sbuf, " | POOL %d", pind); 248 sbuf_printf(&sbuf, "\n-- "); 249 for (pind = 0; pind < VM_NFREEPOOL; pind++) 250 sbuf_printf(&sbuf, "-- -- "); 251 sbuf_printf(&sbuf, "--\n"); 252 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 253 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 254 1 << (PAGE_SHIFT - 10 + oind)); 255 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 256 fl = vm_phys_free_queues[dom][flind][pind]; 257 sbuf_printf(&sbuf, " | %6d", 258 fl[oind].lcnt); 259 } 260 sbuf_printf(&sbuf, "\n"); 261 } 262 } 263 } 264 error = sbuf_finish(&sbuf); 265 sbuf_delete(&sbuf); 266 return (error); 267 } 268 269 /* 270 * Outputs the set of physical memory segments. 271 */ 272 static int 273 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 274 { 275 struct sbuf sbuf; 276 struct vm_phys_seg *seg; 277 int error, segind; 278 279 error = sysctl_wire_old_buffer(req, 0); 280 if (error != 0) 281 return (error); 282 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 283 for (segind = 0; segind < vm_phys_nsegs; segind++) { 284 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 285 seg = &vm_phys_segs[segind]; 286 sbuf_printf(&sbuf, "start: %#jx\n", 287 (uintmax_t)seg->start); 288 sbuf_printf(&sbuf, "end: %#jx\n", 289 (uintmax_t)seg->end); 290 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 291 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 292 } 293 error = sbuf_finish(&sbuf); 294 sbuf_delete(&sbuf); 295 return (error); 296 } 297 298 /* 299 * Return affinity, or -1 if there's no affinity information. 300 */ 301 int 302 vm_phys_mem_affinity(int f, int t) 303 { 304 305 #ifdef NUMA 306 if (mem_locality == NULL) 307 return (-1); 308 if (f >= vm_ndomains || t >= vm_ndomains) 309 return (-1); 310 return (mem_locality[f * vm_ndomains + t]); 311 #else 312 return (-1); 313 #endif 314 } 315 316 #ifdef NUMA 317 /* 318 * Outputs the VM locality table. 319 */ 320 static int 321 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 322 { 323 struct sbuf sbuf; 324 int error, i, j; 325 326 error = sysctl_wire_old_buffer(req, 0); 327 if (error != 0) 328 return (error); 329 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 330 331 sbuf_printf(&sbuf, "\n"); 332 333 for (i = 0; i < vm_ndomains; i++) { 334 sbuf_printf(&sbuf, "%d: ", i); 335 for (j = 0; j < vm_ndomains; j++) { 336 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 337 } 338 sbuf_printf(&sbuf, "\n"); 339 } 340 error = sbuf_finish(&sbuf); 341 sbuf_delete(&sbuf); 342 return (error); 343 } 344 #endif 345 346 static void 347 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 348 { 349 350 m->order = order; 351 if (tail) 352 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 353 else 354 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 355 fl[order].lcnt++; 356 } 357 358 static void 359 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 360 { 361 362 TAILQ_REMOVE(&fl[order].pl, m, listq); 363 fl[order].lcnt--; 364 m->order = VM_NFREEORDER; 365 } 366 367 /* 368 * Create a physical memory segment. 369 */ 370 static void 371 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 372 { 373 struct vm_phys_seg *seg; 374 375 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 376 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 377 KASSERT(domain >= 0 && domain < vm_ndomains, 378 ("vm_phys_create_seg: invalid domain provided")); 379 seg = &vm_phys_segs[vm_phys_nsegs++]; 380 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 381 *seg = *(seg - 1); 382 seg--; 383 } 384 seg->start = start; 385 seg->end = end; 386 seg->domain = domain; 387 } 388 389 static void 390 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 391 { 392 #ifdef NUMA 393 int i; 394 395 if (mem_affinity == NULL) { 396 _vm_phys_create_seg(start, end, 0); 397 return; 398 } 399 400 for (i = 0;; i++) { 401 if (mem_affinity[i].end == 0) 402 panic("Reached end of affinity info"); 403 if (mem_affinity[i].end <= start) 404 continue; 405 if (mem_affinity[i].start > start) 406 panic("No affinity info for start %jx", 407 (uintmax_t)start); 408 if (mem_affinity[i].end >= end) { 409 _vm_phys_create_seg(start, end, 410 mem_affinity[i].domain); 411 break; 412 } 413 _vm_phys_create_seg(start, mem_affinity[i].end, 414 mem_affinity[i].domain); 415 start = mem_affinity[i].end; 416 } 417 #else 418 _vm_phys_create_seg(start, end, 0); 419 #endif 420 } 421 422 /* 423 * Add a physical memory segment. 424 */ 425 void 426 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 427 { 428 vm_paddr_t paddr; 429 430 KASSERT((start & PAGE_MASK) == 0, 431 ("vm_phys_define_seg: start is not page aligned")); 432 KASSERT((end & PAGE_MASK) == 0, 433 ("vm_phys_define_seg: end is not page aligned")); 434 435 /* 436 * Split the physical memory segment if it spans two or more free 437 * list boundaries. 438 */ 439 paddr = start; 440 #ifdef VM_FREELIST_LOWMEM 441 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 442 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 443 paddr = VM_LOWMEM_BOUNDARY; 444 } 445 #endif 446 #ifdef VM_FREELIST_DMA32 447 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 448 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 449 paddr = VM_DMA32_BOUNDARY; 450 } 451 #endif 452 vm_phys_create_seg(paddr, end); 453 } 454 455 /* 456 * Initialize the physical memory allocator. 457 * 458 * Requires that vm_page_array is initialized! 459 */ 460 void 461 vm_phys_init(void) 462 { 463 struct vm_freelist *fl; 464 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 465 u_long npages; 466 int dom, flind, freelist, oind, pind, segind; 467 468 /* 469 * Compute the number of free lists, and generate the mapping from the 470 * manifest constants VM_FREELIST_* to the free list indices. 471 * 472 * Initially, the entries of vm_freelist_to_flind[] are set to either 473 * 0 or 1 to indicate which free lists should be created. 474 */ 475 npages = 0; 476 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 477 seg = &vm_phys_segs[segind]; 478 #ifdef VM_FREELIST_LOWMEM 479 if (seg->end <= VM_LOWMEM_BOUNDARY) 480 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 481 else 482 #endif 483 #ifdef VM_FREELIST_DMA32 484 if ( 485 #ifdef VM_DMA32_NPAGES_THRESHOLD 486 /* 487 * Create the DMA32 free list only if the amount of 488 * physical memory above physical address 4G exceeds the 489 * given threshold. 490 */ 491 npages > VM_DMA32_NPAGES_THRESHOLD && 492 #endif 493 seg->end <= VM_DMA32_BOUNDARY) 494 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 495 else 496 #endif 497 { 498 npages += atop(seg->end - seg->start); 499 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 500 } 501 } 502 /* Change each entry into a running total of the free lists. */ 503 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 504 vm_freelist_to_flind[freelist] += 505 vm_freelist_to_flind[freelist - 1]; 506 } 507 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 508 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 509 /* Change each entry into a free list index. */ 510 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 511 vm_freelist_to_flind[freelist]--; 512 513 /* 514 * Initialize the first_page and free_queues fields of each physical 515 * memory segment. 516 */ 517 #ifdef VM_PHYSSEG_SPARSE 518 npages = 0; 519 #endif 520 for (segind = 0; segind < vm_phys_nsegs; segind++) { 521 seg = &vm_phys_segs[segind]; 522 #ifdef VM_PHYSSEG_SPARSE 523 seg->first_page = &vm_page_array[npages]; 524 npages += atop(seg->end - seg->start); 525 #else 526 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 527 #endif 528 #ifdef VM_FREELIST_LOWMEM 529 if (seg->end <= VM_LOWMEM_BOUNDARY) { 530 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 531 KASSERT(flind >= 0, 532 ("vm_phys_init: LOWMEM flind < 0")); 533 } else 534 #endif 535 #ifdef VM_FREELIST_DMA32 536 if (seg->end <= VM_DMA32_BOUNDARY) { 537 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 538 KASSERT(flind >= 0, 539 ("vm_phys_init: DMA32 flind < 0")); 540 } else 541 #endif 542 { 543 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 544 KASSERT(flind >= 0, 545 ("vm_phys_init: DEFAULT flind < 0")); 546 } 547 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 548 } 549 550 /* 551 * Coalesce physical memory segments that are contiguous and share the 552 * same per-domain free queues. 553 */ 554 prev_seg = vm_phys_segs; 555 seg = &vm_phys_segs[1]; 556 end_seg = &vm_phys_segs[vm_phys_nsegs]; 557 while (seg < end_seg) { 558 if (prev_seg->end == seg->start && 559 prev_seg->free_queues == seg->free_queues) { 560 prev_seg->end = seg->end; 561 KASSERT(prev_seg->domain == seg->domain, 562 ("vm_phys_init: free queues cannot span domains")); 563 vm_phys_nsegs--; 564 end_seg--; 565 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 566 *tmp_seg = *(tmp_seg + 1); 567 } else { 568 prev_seg = seg; 569 seg++; 570 } 571 } 572 573 /* 574 * Initialize the free queues. 575 */ 576 for (dom = 0; dom < vm_ndomains; dom++) { 577 for (flind = 0; flind < vm_nfreelists; flind++) { 578 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 579 fl = vm_phys_free_queues[dom][flind][pind]; 580 for (oind = 0; oind < VM_NFREEORDER; oind++) 581 TAILQ_INIT(&fl[oind].pl); 582 } 583 } 584 } 585 586 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 587 } 588 589 /* 590 * Split a contiguous, power of two-sized set of physical pages. 591 * 592 * When this function is called by a page allocation function, the caller 593 * should request insertion at the head unless the order [order, oind) queues 594 * are known to be empty. The objective being to reduce the likelihood of 595 * long-term fragmentation by promoting contemporaneous allocation and 596 * (hopefully) deallocation. 597 */ 598 static __inline void 599 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 600 int tail) 601 { 602 vm_page_t m_buddy; 603 604 while (oind > order) { 605 oind--; 606 m_buddy = &m[1 << oind]; 607 KASSERT(m_buddy->order == VM_NFREEORDER, 608 ("vm_phys_split_pages: page %p has unexpected order %d", 609 m_buddy, m_buddy->order)); 610 vm_freelist_add(fl, m_buddy, oind, tail); 611 } 612 } 613 614 /* 615 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 616 * and sized set to the specified free list. 617 * 618 * When this function is called by a page allocation function, the caller 619 * should request insertion at the head unless the lower-order queues are 620 * known to be empty. The objective being to reduce the likelihood of long- 621 * term fragmentation by promoting contemporaneous allocation and (hopefully) 622 * deallocation. 623 * 624 * The physical page m's buddy must not be free. 625 */ 626 static void 627 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 628 { 629 u_int n; 630 int order; 631 632 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 633 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 634 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 635 ("vm_phys_enq_range: page %p and npages %u are misaligned", 636 m, npages)); 637 do { 638 KASSERT(m->order == VM_NFREEORDER, 639 ("vm_phys_enq_range: page %p has unexpected order %d", 640 m, m->order)); 641 order = ffs(npages) - 1; 642 KASSERT(order < VM_NFREEORDER, 643 ("vm_phys_enq_range: order %d is out of range", order)); 644 vm_freelist_add(fl, m, order, tail); 645 n = 1 << order; 646 m += n; 647 npages -= n; 648 } while (npages > 0); 649 } 650 651 /* 652 * Tries to allocate the specified number of pages from the specified pool 653 * within the specified domain. Returns the actual number of allocated pages 654 * and a pointer to each page through the array ma[]. 655 * 656 * The returned pages may not be physically contiguous. However, in contrast 657 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 658 * calling this function once to allocate the desired number of pages will 659 * avoid wasted time in vm_phys_split_pages(). 660 * 661 * The free page queues for the specified domain must be locked. 662 */ 663 int 664 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 665 { 666 struct vm_freelist *alt, *fl; 667 vm_page_t m; 668 int avail, end, flind, freelist, i, need, oind, pind; 669 670 KASSERT(domain >= 0 && domain < vm_ndomains, 671 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 672 KASSERT(pool < VM_NFREEPOOL, 673 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 674 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 675 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 676 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 677 i = 0; 678 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 679 flind = vm_freelist_to_flind[freelist]; 680 if (flind < 0) 681 continue; 682 fl = vm_phys_free_queues[domain][flind][pool]; 683 for (oind = 0; oind < VM_NFREEORDER; oind++) { 684 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 685 vm_freelist_rem(fl, m, oind); 686 avail = 1 << oind; 687 need = imin(npages - i, avail); 688 for (end = i + need; i < end;) 689 ma[i++] = m++; 690 if (need < avail) { 691 /* 692 * Return excess pages to fl. Its 693 * order [0, oind) queues are empty. 694 */ 695 vm_phys_enq_range(m, avail - need, fl, 696 1); 697 return (npages); 698 } else if (i == npages) 699 return (npages); 700 } 701 } 702 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 703 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 704 alt = vm_phys_free_queues[domain][flind][pind]; 705 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 706 NULL) { 707 vm_freelist_rem(alt, m, oind); 708 vm_phys_set_pool(pool, m, oind); 709 avail = 1 << oind; 710 need = imin(npages - i, avail); 711 for (end = i + need; i < end;) 712 ma[i++] = m++; 713 if (need < avail) { 714 /* 715 * Return excess pages to fl. 716 * Its order [0, oind) queues 717 * are empty. 718 */ 719 vm_phys_enq_range(m, avail - 720 need, fl, 1); 721 return (npages); 722 } else if (i == npages) 723 return (npages); 724 } 725 } 726 } 727 } 728 return (i); 729 } 730 731 /* 732 * Allocate a contiguous, power of two-sized set of physical pages 733 * from the free lists. 734 * 735 * The free page queues must be locked. 736 */ 737 vm_page_t 738 vm_phys_alloc_pages(int domain, int pool, int order) 739 { 740 vm_page_t m; 741 int freelist; 742 743 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 744 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 745 if (m != NULL) 746 return (m); 747 } 748 return (NULL); 749 } 750 751 /* 752 * Allocate a contiguous, power of two-sized set of physical pages from the 753 * specified free list. The free list must be specified using one of the 754 * manifest constants VM_FREELIST_*. 755 * 756 * The free page queues must be locked. 757 */ 758 vm_page_t 759 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 760 { 761 struct vm_freelist *alt, *fl; 762 vm_page_t m; 763 int oind, pind, flind; 764 765 KASSERT(domain >= 0 && domain < vm_ndomains, 766 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 767 domain)); 768 KASSERT(freelist < VM_NFREELIST, 769 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 770 freelist)); 771 KASSERT(pool < VM_NFREEPOOL, 772 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 773 KASSERT(order < VM_NFREEORDER, 774 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 775 776 flind = vm_freelist_to_flind[freelist]; 777 /* Check if freelist is present */ 778 if (flind < 0) 779 return (NULL); 780 781 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 782 fl = &vm_phys_free_queues[domain][flind][pool][0]; 783 for (oind = order; oind < VM_NFREEORDER; oind++) { 784 m = TAILQ_FIRST(&fl[oind].pl); 785 if (m != NULL) { 786 vm_freelist_rem(fl, m, oind); 787 /* The order [order, oind) queues are empty. */ 788 vm_phys_split_pages(m, oind, fl, order, 1); 789 return (m); 790 } 791 } 792 793 /* 794 * The given pool was empty. Find the largest 795 * contiguous, power-of-two-sized set of pages in any 796 * pool. Transfer these pages to the given pool, and 797 * use them to satisfy the allocation. 798 */ 799 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 800 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 801 alt = &vm_phys_free_queues[domain][flind][pind][0]; 802 m = TAILQ_FIRST(&alt[oind].pl); 803 if (m != NULL) { 804 vm_freelist_rem(alt, m, oind); 805 vm_phys_set_pool(pool, m, oind); 806 /* The order [order, oind) queues are empty. */ 807 vm_phys_split_pages(m, oind, fl, order, 1); 808 return (m); 809 } 810 } 811 } 812 return (NULL); 813 } 814 815 /* 816 * Find the vm_page corresponding to the given physical address. 817 */ 818 vm_page_t 819 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 820 { 821 struct vm_phys_seg *seg; 822 int segind; 823 824 for (segind = 0; segind < vm_phys_nsegs; segind++) { 825 seg = &vm_phys_segs[segind]; 826 if (pa >= seg->start && pa < seg->end) 827 return (&seg->first_page[atop(pa - seg->start)]); 828 } 829 return (NULL); 830 } 831 832 vm_page_t 833 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 834 { 835 struct vm_phys_fictitious_seg tmp, *seg; 836 vm_page_t m; 837 838 m = NULL; 839 tmp.start = pa; 840 tmp.end = 0; 841 842 rw_rlock(&vm_phys_fictitious_reg_lock); 843 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 844 rw_runlock(&vm_phys_fictitious_reg_lock); 845 if (seg == NULL) 846 return (NULL); 847 848 m = &seg->first_page[atop(pa - seg->start)]; 849 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 850 851 return (m); 852 } 853 854 static inline void 855 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 856 long page_count, vm_memattr_t memattr) 857 { 858 long i; 859 860 bzero(range, page_count * sizeof(*range)); 861 for (i = 0; i < page_count; i++) { 862 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 863 range[i].oflags &= ~VPO_UNMANAGED; 864 range[i].busy_lock = VPB_UNBUSIED; 865 } 866 } 867 868 int 869 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 870 vm_memattr_t memattr) 871 { 872 struct vm_phys_fictitious_seg *seg; 873 vm_page_t fp; 874 long page_count; 875 #ifdef VM_PHYSSEG_DENSE 876 long pi, pe; 877 long dpage_count; 878 #endif 879 880 KASSERT(start < end, 881 ("Start of segment isn't less than end (start: %jx end: %jx)", 882 (uintmax_t)start, (uintmax_t)end)); 883 884 page_count = (end - start) / PAGE_SIZE; 885 886 #ifdef VM_PHYSSEG_DENSE 887 pi = atop(start); 888 pe = atop(end); 889 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 890 fp = &vm_page_array[pi - first_page]; 891 if ((pe - first_page) > vm_page_array_size) { 892 /* 893 * We have a segment that starts inside 894 * of vm_page_array, but ends outside of it. 895 * 896 * Use vm_page_array pages for those that are 897 * inside of the vm_page_array range, and 898 * allocate the remaining ones. 899 */ 900 dpage_count = vm_page_array_size - (pi - first_page); 901 vm_phys_fictitious_init_range(fp, start, dpage_count, 902 memattr); 903 page_count -= dpage_count; 904 start += ptoa(dpage_count); 905 goto alloc; 906 } 907 /* 908 * We can allocate the full range from vm_page_array, 909 * so there's no need to register the range in the tree. 910 */ 911 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 912 return (0); 913 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 914 /* 915 * We have a segment that ends inside of vm_page_array, 916 * but starts outside of it. 917 */ 918 fp = &vm_page_array[0]; 919 dpage_count = pe - first_page; 920 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 921 memattr); 922 end -= ptoa(dpage_count); 923 page_count -= dpage_count; 924 goto alloc; 925 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 926 /* 927 * Trying to register a fictitious range that expands before 928 * and after vm_page_array. 929 */ 930 return (EINVAL); 931 } else { 932 alloc: 933 #endif 934 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 935 M_WAITOK); 936 #ifdef VM_PHYSSEG_DENSE 937 } 938 #endif 939 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 940 941 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 942 seg->start = start; 943 seg->end = end; 944 seg->first_page = fp; 945 946 rw_wlock(&vm_phys_fictitious_reg_lock); 947 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 948 rw_wunlock(&vm_phys_fictitious_reg_lock); 949 950 return (0); 951 } 952 953 void 954 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 955 { 956 struct vm_phys_fictitious_seg *seg, tmp; 957 #ifdef VM_PHYSSEG_DENSE 958 long pi, pe; 959 #endif 960 961 KASSERT(start < end, 962 ("Start of segment isn't less than end (start: %jx end: %jx)", 963 (uintmax_t)start, (uintmax_t)end)); 964 965 #ifdef VM_PHYSSEG_DENSE 966 pi = atop(start); 967 pe = atop(end); 968 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 969 if ((pe - first_page) <= vm_page_array_size) { 970 /* 971 * This segment was allocated using vm_page_array 972 * only, there's nothing to do since those pages 973 * were never added to the tree. 974 */ 975 return; 976 } 977 /* 978 * We have a segment that starts inside 979 * of vm_page_array, but ends outside of it. 980 * 981 * Calculate how many pages were added to the 982 * tree and free them. 983 */ 984 start = ptoa(first_page + vm_page_array_size); 985 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 986 /* 987 * We have a segment that ends inside of vm_page_array, 988 * but starts outside of it. 989 */ 990 end = ptoa(first_page); 991 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 992 /* Since it's not possible to register such a range, panic. */ 993 panic( 994 "Unregistering not registered fictitious range [%#jx:%#jx]", 995 (uintmax_t)start, (uintmax_t)end); 996 } 997 #endif 998 tmp.start = start; 999 tmp.end = 0; 1000 1001 rw_wlock(&vm_phys_fictitious_reg_lock); 1002 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1003 if (seg->start != start || seg->end != end) { 1004 rw_wunlock(&vm_phys_fictitious_reg_lock); 1005 panic( 1006 "Unregistering not registered fictitious range [%#jx:%#jx]", 1007 (uintmax_t)start, (uintmax_t)end); 1008 } 1009 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1010 rw_wunlock(&vm_phys_fictitious_reg_lock); 1011 free(seg->first_page, M_FICT_PAGES); 1012 free(seg, M_FICT_PAGES); 1013 } 1014 1015 /* 1016 * Free a contiguous, power of two-sized set of physical pages. 1017 * 1018 * The free page queues must be locked. 1019 */ 1020 void 1021 vm_phys_free_pages(vm_page_t m, int order) 1022 { 1023 struct vm_freelist *fl; 1024 struct vm_phys_seg *seg; 1025 vm_paddr_t pa; 1026 vm_page_t m_buddy; 1027 1028 KASSERT(m->order == VM_NFREEORDER, 1029 ("vm_phys_free_pages: page %p has unexpected order %d", 1030 m, m->order)); 1031 KASSERT(m->pool < VM_NFREEPOOL, 1032 ("vm_phys_free_pages: page %p has unexpected pool %d", 1033 m, m->pool)); 1034 KASSERT(order < VM_NFREEORDER, 1035 ("vm_phys_free_pages: order %d is out of range", order)); 1036 seg = &vm_phys_segs[m->segind]; 1037 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1038 if (order < VM_NFREEORDER - 1) { 1039 pa = VM_PAGE_TO_PHYS(m); 1040 do { 1041 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1042 if (pa < seg->start || pa >= seg->end) 1043 break; 1044 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1045 if (m_buddy->order != order) 1046 break; 1047 fl = (*seg->free_queues)[m_buddy->pool]; 1048 vm_freelist_rem(fl, m_buddy, order); 1049 if (m_buddy->pool != m->pool) 1050 vm_phys_set_pool(m->pool, m_buddy, order); 1051 order++; 1052 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1053 m = &seg->first_page[atop(pa - seg->start)]; 1054 } while (order < VM_NFREEORDER - 1); 1055 } 1056 fl = (*seg->free_queues)[m->pool]; 1057 vm_freelist_add(fl, m, order, 1); 1058 } 1059 1060 /* 1061 * Free a contiguous, arbitrarily sized set of physical pages. 1062 * 1063 * The free page queues must be locked. 1064 */ 1065 void 1066 vm_phys_free_contig(vm_page_t m, u_long npages) 1067 { 1068 u_int n; 1069 int order; 1070 1071 /* 1072 * Avoid unnecessary coalescing by freeing the pages in the largest 1073 * possible power-of-two-sized subsets. 1074 */ 1075 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1076 for (;; npages -= n) { 1077 /* 1078 * Unsigned "min" is used here so that "order" is assigned 1079 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1080 * or the low-order bits of its physical address are zero 1081 * because the size of a physical address exceeds the size of 1082 * a long. 1083 */ 1084 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1085 VM_NFREEORDER - 1); 1086 n = 1 << order; 1087 if (npages < n) 1088 break; 1089 vm_phys_free_pages(m, order); 1090 m += n; 1091 } 1092 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1093 for (; npages > 0; npages -= n) { 1094 order = flsl(npages) - 1; 1095 n = 1 << order; 1096 vm_phys_free_pages(m, order); 1097 m += n; 1098 } 1099 } 1100 1101 /* 1102 * Scan physical memory between the specified addresses "low" and "high" for a 1103 * run of contiguous physical pages that satisfy the specified conditions, and 1104 * return the lowest page in the run. The specified "alignment" determines 1105 * the alignment of the lowest physical page in the run. If the specified 1106 * "boundary" is non-zero, then the run of physical pages cannot span a 1107 * physical address that is a multiple of "boundary". 1108 * 1109 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1110 * be a power of two. 1111 */ 1112 vm_page_t 1113 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1114 u_long alignment, vm_paddr_t boundary, int options) 1115 { 1116 vm_paddr_t pa_end; 1117 vm_page_t m_end, m_run, m_start; 1118 struct vm_phys_seg *seg; 1119 int segind; 1120 1121 KASSERT(npages > 0, ("npages is 0")); 1122 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1123 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1124 if (low >= high) 1125 return (NULL); 1126 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1127 seg = &vm_phys_segs[segind]; 1128 if (seg->domain != domain) 1129 continue; 1130 if (seg->start >= high) 1131 break; 1132 if (low >= seg->end) 1133 continue; 1134 if (low <= seg->start) 1135 m_start = seg->first_page; 1136 else 1137 m_start = &seg->first_page[atop(low - seg->start)]; 1138 if (high < seg->end) 1139 pa_end = high; 1140 else 1141 pa_end = seg->end; 1142 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1143 continue; 1144 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1145 m_run = vm_page_scan_contig(npages, m_start, m_end, 1146 alignment, boundary, options); 1147 if (m_run != NULL) 1148 return (m_run); 1149 } 1150 return (NULL); 1151 } 1152 1153 /* 1154 * Set the pool for a contiguous, power of two-sized set of physical pages. 1155 */ 1156 void 1157 vm_phys_set_pool(int pool, vm_page_t m, int order) 1158 { 1159 vm_page_t m_tmp; 1160 1161 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1162 m_tmp->pool = pool; 1163 } 1164 1165 /* 1166 * Search for the given physical page "m" in the free lists. If the search 1167 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1168 * FALSE, indicating that "m" is not in the free lists. 1169 * 1170 * The free page queues must be locked. 1171 */ 1172 boolean_t 1173 vm_phys_unfree_page(vm_page_t m) 1174 { 1175 struct vm_freelist *fl; 1176 struct vm_phys_seg *seg; 1177 vm_paddr_t pa, pa_half; 1178 vm_page_t m_set, m_tmp; 1179 int order; 1180 1181 /* 1182 * First, find the contiguous, power of two-sized set of free 1183 * physical pages containing the given physical page "m" and 1184 * assign it to "m_set". 1185 */ 1186 seg = &vm_phys_segs[m->segind]; 1187 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1188 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1189 order < VM_NFREEORDER - 1; ) { 1190 order++; 1191 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1192 if (pa >= seg->start) 1193 m_set = &seg->first_page[atop(pa - seg->start)]; 1194 else 1195 return (FALSE); 1196 } 1197 if (m_set->order < order) 1198 return (FALSE); 1199 if (m_set->order == VM_NFREEORDER) 1200 return (FALSE); 1201 KASSERT(m_set->order < VM_NFREEORDER, 1202 ("vm_phys_unfree_page: page %p has unexpected order %d", 1203 m_set, m_set->order)); 1204 1205 /* 1206 * Next, remove "m_set" from the free lists. Finally, extract 1207 * "m" from "m_set" using an iterative algorithm: While "m_set" 1208 * is larger than a page, shrink "m_set" by returning the half 1209 * of "m_set" that does not contain "m" to the free lists. 1210 */ 1211 fl = (*seg->free_queues)[m_set->pool]; 1212 order = m_set->order; 1213 vm_freelist_rem(fl, m_set, order); 1214 while (order > 0) { 1215 order--; 1216 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1217 if (m->phys_addr < pa_half) 1218 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1219 else { 1220 m_tmp = m_set; 1221 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1222 } 1223 vm_freelist_add(fl, m_tmp, order, 0); 1224 } 1225 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1226 return (TRUE); 1227 } 1228 1229 /* 1230 * Allocate a contiguous set of physical pages of the given size 1231 * "npages" from the free lists. All of the physical pages must be at 1232 * or above the given physical address "low" and below the given 1233 * physical address "high". The given value "alignment" determines the 1234 * alignment of the first physical page in the set. If the given value 1235 * "boundary" is non-zero, then the set of physical pages cannot cross 1236 * any physical address boundary that is a multiple of that value. Both 1237 * "alignment" and "boundary" must be a power of two. 1238 */ 1239 vm_page_t 1240 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1241 u_long alignment, vm_paddr_t boundary) 1242 { 1243 vm_paddr_t pa_end, pa_start; 1244 vm_page_t m_run; 1245 struct vm_phys_seg *seg; 1246 int segind; 1247 1248 KASSERT(npages > 0, ("npages is 0")); 1249 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1250 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1251 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1252 if (low >= high) 1253 return (NULL); 1254 m_run = NULL; 1255 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1256 seg = &vm_phys_segs[segind]; 1257 if (seg->start >= high || seg->domain != domain) 1258 continue; 1259 if (low >= seg->end) 1260 break; 1261 if (low <= seg->start) 1262 pa_start = seg->start; 1263 else 1264 pa_start = low; 1265 if (high < seg->end) 1266 pa_end = high; 1267 else 1268 pa_end = seg->end; 1269 if (pa_end - pa_start < ptoa(npages)) 1270 continue; 1271 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1272 alignment, boundary); 1273 if (m_run != NULL) 1274 break; 1275 } 1276 return (m_run); 1277 } 1278 1279 /* 1280 * Allocate a run of contiguous physical pages from the free list for the 1281 * specified segment. 1282 */ 1283 static vm_page_t 1284 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1285 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1286 { 1287 struct vm_freelist *fl; 1288 vm_paddr_t pa, pa_end, size; 1289 vm_page_t m, m_ret; 1290 u_long npages_end; 1291 int oind, order, pind; 1292 1293 KASSERT(npages > 0, ("npages is 0")); 1294 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1295 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1296 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1297 /* Compute the queue that is the best fit for npages. */ 1298 order = flsl(npages - 1); 1299 /* Search for a run satisfying the specified conditions. */ 1300 size = npages << PAGE_SHIFT; 1301 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1302 oind++) { 1303 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1304 fl = (*seg->free_queues)[pind]; 1305 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1306 /* 1307 * Is the size of this allocation request 1308 * larger than the largest block size? 1309 */ 1310 if (order >= VM_NFREEORDER) { 1311 /* 1312 * Determine if a sufficient number of 1313 * subsequent blocks to satisfy the 1314 * allocation request are free. 1315 */ 1316 pa = VM_PAGE_TO_PHYS(m_ret); 1317 pa_end = pa + size; 1318 if (pa_end < pa) 1319 continue; 1320 for (;;) { 1321 pa += 1 << (PAGE_SHIFT + 1322 VM_NFREEORDER - 1); 1323 if (pa >= pa_end || 1324 pa < seg->start || 1325 pa >= seg->end) 1326 break; 1327 m = &seg->first_page[atop(pa - 1328 seg->start)]; 1329 if (m->order != VM_NFREEORDER - 1330 1) 1331 break; 1332 } 1333 /* If not, go to the next block. */ 1334 if (pa < pa_end) 1335 continue; 1336 } 1337 1338 /* 1339 * Determine if the blocks are within the 1340 * given range, satisfy the given alignment, 1341 * and do not cross the given boundary. 1342 */ 1343 pa = VM_PAGE_TO_PHYS(m_ret); 1344 pa_end = pa + size; 1345 if (pa >= low && pa_end <= high && 1346 (pa & (alignment - 1)) == 0 && 1347 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1348 goto done; 1349 } 1350 } 1351 } 1352 return (NULL); 1353 done: 1354 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1355 fl = (*seg->free_queues)[m->pool]; 1356 vm_freelist_rem(fl, m, oind); 1357 if (m->pool != VM_FREEPOOL_DEFAULT) 1358 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1359 } 1360 /* Return excess pages to the free lists. */ 1361 npages_end = roundup2(npages, 1 << oind); 1362 if (npages < npages_end) { 1363 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1364 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1365 } 1366 return (m_ret); 1367 } 1368 1369 #ifdef DDB 1370 /* 1371 * Show the number of physical pages in each of the free lists. 1372 */ 1373 DB_SHOW_COMMAND(freepages, db_show_freepages) 1374 { 1375 struct vm_freelist *fl; 1376 int flind, oind, pind, dom; 1377 1378 for (dom = 0; dom < vm_ndomains; dom++) { 1379 db_printf("DOMAIN: %d\n", dom); 1380 for (flind = 0; flind < vm_nfreelists; flind++) { 1381 db_printf("FREE LIST %d:\n" 1382 "\n ORDER (SIZE) | NUMBER" 1383 "\n ", flind); 1384 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1385 db_printf(" | POOL %d", pind); 1386 db_printf("\n-- "); 1387 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1388 db_printf("-- -- "); 1389 db_printf("--\n"); 1390 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1391 db_printf(" %2.2d (%6.6dK)", oind, 1392 1 << (PAGE_SHIFT - 10 + oind)); 1393 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1394 fl = vm_phys_free_queues[dom][flind][pind]; 1395 db_printf(" | %6.6d", fl[oind].lcnt); 1396 } 1397 db_printf("\n"); 1398 } 1399 db_printf("\n"); 1400 } 1401 db_printf("\n"); 1402 } 1403 } 1404 #endif 1405