1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/lock.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/sysctl.h> 58 #include <sys/tree.h> 59 #include <sys/vmmeter.h> 60 #include <sys/seq.h> 61 62 #include <ddb/ddb.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_phys.h> 70 #include <vm/vm_pagequeue.h> 71 72 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73 "Too many physsegs."); 74 75 #ifdef NUMA 76 struct mem_affinity __read_mostly *mem_affinity; 77 int __read_mostly *mem_locality; 78 #endif 79 80 int __read_mostly vm_ndomains = 1; 81 82 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 83 int __read_mostly vm_phys_nsegs; 84 85 struct vm_phys_fictitious_seg; 86 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 87 struct vm_phys_fictitious_seg *); 88 89 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 90 RB_INITIALIZER(_vm_phys_fictitious_tree); 91 92 struct vm_phys_fictitious_seg { 93 RB_ENTRY(vm_phys_fictitious_seg) node; 94 /* Memory region data */ 95 vm_paddr_t start; 96 vm_paddr_t end; 97 vm_page_t first_page; 98 }; 99 100 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 101 vm_phys_fictitious_cmp); 102 103 static struct rwlock_padalign vm_phys_fictitious_reg_lock; 104 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 105 106 static struct vm_freelist __aligned(CACHE_LINE_SIZE) 107 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 108 109 static int __read_mostly vm_nfreelists; 110 111 /* 112 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 113 */ 114 static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 115 116 CTASSERT(VM_FREELIST_DEFAULT == 0); 117 118 #ifdef VM_FREELIST_DMA32 119 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 120 #endif 121 122 /* 123 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 124 * the ordering of the free list boundaries. 125 */ 126 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 127 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 128 #endif 129 130 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 131 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 132 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 133 134 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 135 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 136 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 137 138 #ifdef NUMA 139 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 140 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 141 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 142 #endif 143 144 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 145 &vm_ndomains, 0, "Number of physical memory domains available."); 146 147 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 148 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 149 vm_paddr_t boundary); 150 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 151 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 152 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 153 int order, int tail); 154 155 /* 156 * Red-black tree helpers for vm fictitious range management. 157 */ 158 static inline int 159 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 160 struct vm_phys_fictitious_seg *range) 161 { 162 163 KASSERT(range->start != 0 && range->end != 0, 164 ("Invalid range passed on search for vm_fictitious page")); 165 if (p->start >= range->end) 166 return (1); 167 if (p->start < range->start) 168 return (-1); 169 170 return (0); 171 } 172 173 static int 174 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 175 struct vm_phys_fictitious_seg *p2) 176 { 177 178 /* Check if this is a search for a page */ 179 if (p1->end == 0) 180 return (vm_phys_fictitious_in_range(p1, p2)); 181 182 KASSERT(p2->end != 0, 183 ("Invalid range passed as second parameter to vm fictitious comparison")); 184 185 /* Searching to add a new range */ 186 if (p1->end <= p2->start) 187 return (-1); 188 if (p1->start >= p2->end) 189 return (1); 190 191 panic("Trying to add overlapping vm fictitious ranges:\n" 192 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 193 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 194 } 195 196 int 197 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 198 { 199 #ifdef NUMA 200 domainset_t mask; 201 int i; 202 203 if (vm_ndomains == 1 || mem_affinity == NULL) 204 return (0); 205 206 DOMAINSET_ZERO(&mask); 207 /* 208 * Check for any memory that overlaps low, high. 209 */ 210 for (i = 0; mem_affinity[i].end != 0; i++) 211 if (mem_affinity[i].start <= high && 212 mem_affinity[i].end >= low) 213 DOMAINSET_SET(mem_affinity[i].domain, &mask); 214 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 215 return (prefer); 216 if (DOMAINSET_EMPTY(&mask)) 217 panic("vm_phys_domain_match: Impossible constraint"); 218 return (DOMAINSET_FFS(&mask) - 1); 219 #else 220 return (0); 221 #endif 222 } 223 224 /* 225 * Outputs the state of the physical memory allocator, specifically, 226 * the amount of physical memory in each free list. 227 */ 228 static int 229 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 230 { 231 struct sbuf sbuf; 232 struct vm_freelist *fl; 233 int dom, error, flind, oind, pind; 234 235 error = sysctl_wire_old_buffer(req, 0); 236 if (error != 0) 237 return (error); 238 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 239 for (dom = 0; dom < vm_ndomains; dom++) { 240 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 241 for (flind = 0; flind < vm_nfreelists; flind++) { 242 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 243 "\n ORDER (SIZE) | NUMBER" 244 "\n ", flind); 245 for (pind = 0; pind < VM_NFREEPOOL; pind++) 246 sbuf_printf(&sbuf, " | POOL %d", pind); 247 sbuf_printf(&sbuf, "\n-- "); 248 for (pind = 0; pind < VM_NFREEPOOL; pind++) 249 sbuf_printf(&sbuf, "-- -- "); 250 sbuf_printf(&sbuf, "--\n"); 251 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 252 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 253 1 << (PAGE_SHIFT - 10 + oind)); 254 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 255 fl = vm_phys_free_queues[dom][flind][pind]; 256 sbuf_printf(&sbuf, " | %6d", 257 fl[oind].lcnt); 258 } 259 sbuf_printf(&sbuf, "\n"); 260 } 261 } 262 } 263 error = sbuf_finish(&sbuf); 264 sbuf_delete(&sbuf); 265 return (error); 266 } 267 268 /* 269 * Outputs the set of physical memory segments. 270 */ 271 static int 272 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 273 { 274 struct sbuf sbuf; 275 struct vm_phys_seg *seg; 276 int error, segind; 277 278 error = sysctl_wire_old_buffer(req, 0); 279 if (error != 0) 280 return (error); 281 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 282 for (segind = 0; segind < vm_phys_nsegs; segind++) { 283 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 284 seg = &vm_phys_segs[segind]; 285 sbuf_printf(&sbuf, "start: %#jx\n", 286 (uintmax_t)seg->start); 287 sbuf_printf(&sbuf, "end: %#jx\n", 288 (uintmax_t)seg->end); 289 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 290 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 291 } 292 error = sbuf_finish(&sbuf); 293 sbuf_delete(&sbuf); 294 return (error); 295 } 296 297 /* 298 * Return affinity, or -1 if there's no affinity information. 299 */ 300 int 301 vm_phys_mem_affinity(int f, int t) 302 { 303 304 #ifdef NUMA 305 if (mem_locality == NULL) 306 return (-1); 307 if (f >= vm_ndomains || t >= vm_ndomains) 308 return (-1); 309 return (mem_locality[f * vm_ndomains + t]); 310 #else 311 return (-1); 312 #endif 313 } 314 315 #ifdef NUMA 316 /* 317 * Outputs the VM locality table. 318 */ 319 static int 320 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 321 { 322 struct sbuf sbuf; 323 int error, i, j; 324 325 error = sysctl_wire_old_buffer(req, 0); 326 if (error != 0) 327 return (error); 328 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 329 330 sbuf_printf(&sbuf, "\n"); 331 332 for (i = 0; i < vm_ndomains; i++) { 333 sbuf_printf(&sbuf, "%d: ", i); 334 for (j = 0; j < vm_ndomains; j++) { 335 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 336 } 337 sbuf_printf(&sbuf, "\n"); 338 } 339 error = sbuf_finish(&sbuf); 340 sbuf_delete(&sbuf); 341 return (error); 342 } 343 #endif 344 345 static void 346 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 347 { 348 349 m->order = order; 350 if (tail) 351 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 352 else 353 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 354 fl[order].lcnt++; 355 } 356 357 static void 358 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 359 { 360 361 TAILQ_REMOVE(&fl[order].pl, m, listq); 362 fl[order].lcnt--; 363 m->order = VM_NFREEORDER; 364 } 365 366 /* 367 * Create a physical memory segment. 368 */ 369 static void 370 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 371 { 372 struct vm_phys_seg *seg; 373 374 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 375 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 376 KASSERT(domain >= 0 && domain < vm_ndomains, 377 ("vm_phys_create_seg: invalid domain provided")); 378 seg = &vm_phys_segs[vm_phys_nsegs++]; 379 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 380 *seg = *(seg - 1); 381 seg--; 382 } 383 seg->start = start; 384 seg->end = end; 385 seg->domain = domain; 386 } 387 388 static void 389 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 390 { 391 #ifdef NUMA 392 int i; 393 394 if (mem_affinity == NULL) { 395 _vm_phys_create_seg(start, end, 0); 396 return; 397 } 398 399 for (i = 0;; i++) { 400 if (mem_affinity[i].end == 0) 401 panic("Reached end of affinity info"); 402 if (mem_affinity[i].end <= start) 403 continue; 404 if (mem_affinity[i].start > start) 405 panic("No affinity info for start %jx", 406 (uintmax_t)start); 407 if (mem_affinity[i].end >= end) { 408 _vm_phys_create_seg(start, end, 409 mem_affinity[i].domain); 410 break; 411 } 412 _vm_phys_create_seg(start, mem_affinity[i].end, 413 mem_affinity[i].domain); 414 start = mem_affinity[i].end; 415 } 416 #else 417 _vm_phys_create_seg(start, end, 0); 418 #endif 419 } 420 421 /* 422 * Add a physical memory segment. 423 */ 424 void 425 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 426 { 427 vm_paddr_t paddr; 428 429 KASSERT((start & PAGE_MASK) == 0, 430 ("vm_phys_define_seg: start is not page aligned")); 431 KASSERT((end & PAGE_MASK) == 0, 432 ("vm_phys_define_seg: end is not page aligned")); 433 434 /* 435 * Split the physical memory segment if it spans two or more free 436 * list boundaries. 437 */ 438 paddr = start; 439 #ifdef VM_FREELIST_LOWMEM 440 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 441 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 442 paddr = VM_LOWMEM_BOUNDARY; 443 } 444 #endif 445 #ifdef VM_FREELIST_DMA32 446 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 447 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 448 paddr = VM_DMA32_BOUNDARY; 449 } 450 #endif 451 vm_phys_create_seg(paddr, end); 452 } 453 454 /* 455 * Initialize the physical memory allocator. 456 * 457 * Requires that vm_page_array is initialized! 458 */ 459 void 460 vm_phys_init(void) 461 { 462 struct vm_freelist *fl; 463 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 464 u_long npages; 465 int dom, flind, freelist, oind, pind, segind; 466 467 /* 468 * Compute the number of free lists, and generate the mapping from the 469 * manifest constants VM_FREELIST_* to the free list indices. 470 * 471 * Initially, the entries of vm_freelist_to_flind[] are set to either 472 * 0 or 1 to indicate which free lists should be created. 473 */ 474 npages = 0; 475 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 476 seg = &vm_phys_segs[segind]; 477 #ifdef VM_FREELIST_LOWMEM 478 if (seg->end <= VM_LOWMEM_BOUNDARY) 479 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 480 else 481 #endif 482 #ifdef VM_FREELIST_DMA32 483 if ( 484 #ifdef VM_DMA32_NPAGES_THRESHOLD 485 /* 486 * Create the DMA32 free list only if the amount of 487 * physical memory above physical address 4G exceeds the 488 * given threshold. 489 */ 490 npages > VM_DMA32_NPAGES_THRESHOLD && 491 #endif 492 seg->end <= VM_DMA32_BOUNDARY) 493 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 494 else 495 #endif 496 { 497 npages += atop(seg->end - seg->start); 498 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 499 } 500 } 501 /* Change each entry into a running total of the free lists. */ 502 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 503 vm_freelist_to_flind[freelist] += 504 vm_freelist_to_flind[freelist - 1]; 505 } 506 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 507 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 508 /* Change each entry into a free list index. */ 509 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 510 vm_freelist_to_flind[freelist]--; 511 512 /* 513 * Initialize the first_page and free_queues fields of each physical 514 * memory segment. 515 */ 516 #ifdef VM_PHYSSEG_SPARSE 517 npages = 0; 518 #endif 519 for (segind = 0; segind < vm_phys_nsegs; segind++) { 520 seg = &vm_phys_segs[segind]; 521 #ifdef VM_PHYSSEG_SPARSE 522 seg->first_page = &vm_page_array[npages]; 523 npages += atop(seg->end - seg->start); 524 #else 525 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 526 #endif 527 #ifdef VM_FREELIST_LOWMEM 528 if (seg->end <= VM_LOWMEM_BOUNDARY) { 529 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 530 KASSERT(flind >= 0, 531 ("vm_phys_init: LOWMEM flind < 0")); 532 } else 533 #endif 534 #ifdef VM_FREELIST_DMA32 535 if (seg->end <= VM_DMA32_BOUNDARY) { 536 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 537 KASSERT(flind >= 0, 538 ("vm_phys_init: DMA32 flind < 0")); 539 } else 540 #endif 541 { 542 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 543 KASSERT(flind >= 0, 544 ("vm_phys_init: DEFAULT flind < 0")); 545 } 546 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 547 } 548 549 /* 550 * Coalesce physical memory segments that are contiguous and share the 551 * same per-domain free queues. 552 */ 553 prev_seg = vm_phys_segs; 554 seg = &vm_phys_segs[1]; 555 end_seg = &vm_phys_segs[vm_phys_nsegs]; 556 while (seg < end_seg) { 557 if (prev_seg->end == seg->start && 558 prev_seg->free_queues == seg->free_queues) { 559 prev_seg->end = seg->end; 560 KASSERT(prev_seg->domain == seg->domain, 561 ("vm_phys_init: free queues cannot span domains")); 562 vm_phys_nsegs--; 563 end_seg--; 564 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 565 *tmp_seg = *(tmp_seg + 1); 566 } else { 567 prev_seg = seg; 568 seg++; 569 } 570 } 571 572 /* 573 * Initialize the free queues. 574 */ 575 for (dom = 0; dom < vm_ndomains; dom++) { 576 for (flind = 0; flind < vm_nfreelists; flind++) { 577 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 578 fl = vm_phys_free_queues[dom][flind][pind]; 579 for (oind = 0; oind < VM_NFREEORDER; oind++) 580 TAILQ_INIT(&fl[oind].pl); 581 } 582 } 583 } 584 585 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 586 } 587 588 /* 589 * Split a contiguous, power of two-sized set of physical pages. 590 * 591 * When this function is called by a page allocation function, the caller 592 * should request insertion at the head unless the order [order, oind) queues 593 * are known to be empty. The objective being to reduce the likelihood of 594 * long-term fragmentation by promoting contemporaneous allocation and 595 * (hopefully) deallocation. 596 */ 597 static __inline void 598 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 599 int tail) 600 { 601 vm_page_t m_buddy; 602 603 while (oind > order) { 604 oind--; 605 m_buddy = &m[1 << oind]; 606 KASSERT(m_buddy->order == VM_NFREEORDER, 607 ("vm_phys_split_pages: page %p has unexpected order %d", 608 m_buddy, m_buddy->order)); 609 vm_freelist_add(fl, m_buddy, oind, tail); 610 } 611 } 612 613 /* 614 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 615 * and sized set to the specified free list. 616 * 617 * When this function is called by a page allocation function, the caller 618 * should request insertion at the head unless the lower-order queues are 619 * known to be empty. The objective being to reduce the likelihood of long- 620 * term fragmentation by promoting contemporaneous allocation and (hopefully) 621 * deallocation. 622 * 623 * The physical page m's buddy must not be free. 624 */ 625 static void 626 vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 627 { 628 u_int n; 629 int order; 630 631 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 632 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 633 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 634 ("vm_phys_enq_range: page %p and npages %u are misaligned", 635 m, npages)); 636 do { 637 KASSERT(m->order == VM_NFREEORDER, 638 ("vm_phys_enq_range: page %p has unexpected order %d", 639 m, m->order)); 640 order = ffs(npages) - 1; 641 KASSERT(order < VM_NFREEORDER, 642 ("vm_phys_enq_range: order %d is out of range", order)); 643 vm_freelist_add(fl, m, order, tail); 644 n = 1 << order; 645 m += n; 646 npages -= n; 647 } while (npages > 0); 648 } 649 650 /* 651 * Tries to allocate the specified number of pages from the specified pool 652 * within the specified domain. Returns the actual number of allocated pages 653 * and a pointer to each page through the array ma[]. 654 * 655 * The returned pages may not be physically contiguous. However, in contrast 656 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 657 * calling this function once to allocate the desired number of pages will 658 * avoid wasted time in vm_phys_split_pages(). 659 * 660 * The free page queues for the specified domain must be locked. 661 */ 662 int 663 vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 664 { 665 struct vm_freelist *alt, *fl; 666 vm_page_t m; 667 int avail, end, flind, freelist, i, need, oind, pind; 668 669 KASSERT(domain >= 0 && domain < vm_ndomains, 670 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 671 KASSERT(pool < VM_NFREEPOOL, 672 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 673 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 674 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 675 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 676 i = 0; 677 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 678 flind = vm_freelist_to_flind[freelist]; 679 if (flind < 0) 680 continue; 681 fl = vm_phys_free_queues[domain][flind][pool]; 682 for (oind = 0; oind < VM_NFREEORDER; oind++) { 683 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 684 vm_freelist_rem(fl, m, oind); 685 avail = 1 << oind; 686 need = imin(npages - i, avail); 687 for (end = i + need; i < end;) 688 ma[i++] = m++; 689 if (need < avail) { 690 /* 691 * Return excess pages to fl. Its 692 * order [0, oind) queues are empty. 693 */ 694 vm_phys_enq_range(m, avail - need, fl, 695 1); 696 return (npages); 697 } else if (i == npages) 698 return (npages); 699 } 700 } 701 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 702 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 703 alt = vm_phys_free_queues[domain][flind][pind]; 704 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 705 NULL) { 706 vm_freelist_rem(alt, m, oind); 707 vm_phys_set_pool(pool, m, oind); 708 avail = 1 << oind; 709 need = imin(npages - i, avail); 710 for (end = i + need; i < end;) 711 ma[i++] = m++; 712 if (need < avail) { 713 /* 714 * Return excess pages to fl. 715 * Its order [0, oind) queues 716 * are empty. 717 */ 718 vm_phys_enq_range(m, avail - 719 need, fl, 1); 720 return (npages); 721 } else if (i == npages) 722 return (npages); 723 } 724 } 725 } 726 } 727 return (i); 728 } 729 730 /* 731 * Allocate a contiguous, power of two-sized set of physical pages 732 * from the free lists. 733 * 734 * The free page queues must be locked. 735 */ 736 vm_page_t 737 vm_phys_alloc_pages(int domain, int pool, int order) 738 { 739 vm_page_t m; 740 int freelist; 741 742 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 743 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 744 if (m != NULL) 745 return (m); 746 } 747 return (NULL); 748 } 749 750 /* 751 * Allocate a contiguous, power of two-sized set of physical pages from the 752 * specified free list. The free list must be specified using one of the 753 * manifest constants VM_FREELIST_*. 754 * 755 * The free page queues must be locked. 756 */ 757 vm_page_t 758 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 759 { 760 struct vm_freelist *alt, *fl; 761 vm_page_t m; 762 int oind, pind, flind; 763 764 KASSERT(domain >= 0 && domain < vm_ndomains, 765 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 766 domain)); 767 KASSERT(freelist < VM_NFREELIST, 768 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 769 freelist)); 770 KASSERT(pool < VM_NFREEPOOL, 771 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 772 KASSERT(order < VM_NFREEORDER, 773 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 774 775 flind = vm_freelist_to_flind[freelist]; 776 /* Check if freelist is present */ 777 if (flind < 0) 778 return (NULL); 779 780 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 781 fl = &vm_phys_free_queues[domain][flind][pool][0]; 782 for (oind = order; oind < VM_NFREEORDER; oind++) { 783 m = TAILQ_FIRST(&fl[oind].pl); 784 if (m != NULL) { 785 vm_freelist_rem(fl, m, oind); 786 /* The order [order, oind) queues are empty. */ 787 vm_phys_split_pages(m, oind, fl, order, 1); 788 return (m); 789 } 790 } 791 792 /* 793 * The given pool was empty. Find the largest 794 * contiguous, power-of-two-sized set of pages in any 795 * pool. Transfer these pages to the given pool, and 796 * use them to satisfy the allocation. 797 */ 798 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 799 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 800 alt = &vm_phys_free_queues[domain][flind][pind][0]; 801 m = TAILQ_FIRST(&alt[oind].pl); 802 if (m != NULL) { 803 vm_freelist_rem(alt, m, oind); 804 vm_phys_set_pool(pool, m, oind); 805 /* The order [order, oind) queues are empty. */ 806 vm_phys_split_pages(m, oind, fl, order, 1); 807 return (m); 808 } 809 } 810 } 811 return (NULL); 812 } 813 814 /* 815 * Find the vm_page corresponding to the given physical address. 816 */ 817 vm_page_t 818 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 819 { 820 struct vm_phys_seg *seg; 821 int segind; 822 823 for (segind = 0; segind < vm_phys_nsegs; segind++) { 824 seg = &vm_phys_segs[segind]; 825 if (pa >= seg->start && pa < seg->end) 826 return (&seg->first_page[atop(pa - seg->start)]); 827 } 828 return (NULL); 829 } 830 831 vm_page_t 832 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 833 { 834 struct vm_phys_fictitious_seg tmp, *seg; 835 vm_page_t m; 836 837 m = NULL; 838 tmp.start = pa; 839 tmp.end = 0; 840 841 rw_rlock(&vm_phys_fictitious_reg_lock); 842 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 843 rw_runlock(&vm_phys_fictitious_reg_lock); 844 if (seg == NULL) 845 return (NULL); 846 847 m = &seg->first_page[atop(pa - seg->start)]; 848 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 849 850 return (m); 851 } 852 853 static inline void 854 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 855 long page_count, vm_memattr_t memattr) 856 { 857 long i; 858 859 bzero(range, page_count * sizeof(*range)); 860 for (i = 0; i < page_count; i++) { 861 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 862 range[i].oflags &= ~VPO_UNMANAGED; 863 range[i].busy_lock = VPB_UNBUSIED; 864 } 865 } 866 867 int 868 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 869 vm_memattr_t memattr) 870 { 871 struct vm_phys_fictitious_seg *seg; 872 vm_page_t fp; 873 long page_count; 874 #ifdef VM_PHYSSEG_DENSE 875 long pi, pe; 876 long dpage_count; 877 #endif 878 879 KASSERT(start < end, 880 ("Start of segment isn't less than end (start: %jx end: %jx)", 881 (uintmax_t)start, (uintmax_t)end)); 882 883 page_count = (end - start) / PAGE_SIZE; 884 885 #ifdef VM_PHYSSEG_DENSE 886 pi = atop(start); 887 pe = atop(end); 888 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 889 fp = &vm_page_array[pi - first_page]; 890 if ((pe - first_page) > vm_page_array_size) { 891 /* 892 * We have a segment that starts inside 893 * of vm_page_array, but ends outside of it. 894 * 895 * Use vm_page_array pages for those that are 896 * inside of the vm_page_array range, and 897 * allocate the remaining ones. 898 */ 899 dpage_count = vm_page_array_size - (pi - first_page); 900 vm_phys_fictitious_init_range(fp, start, dpage_count, 901 memattr); 902 page_count -= dpage_count; 903 start += ptoa(dpage_count); 904 goto alloc; 905 } 906 /* 907 * We can allocate the full range from vm_page_array, 908 * so there's no need to register the range in the tree. 909 */ 910 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 911 return (0); 912 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 913 /* 914 * We have a segment that ends inside of vm_page_array, 915 * but starts outside of it. 916 */ 917 fp = &vm_page_array[0]; 918 dpage_count = pe - first_page; 919 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 920 memattr); 921 end -= ptoa(dpage_count); 922 page_count -= dpage_count; 923 goto alloc; 924 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 925 /* 926 * Trying to register a fictitious range that expands before 927 * and after vm_page_array. 928 */ 929 return (EINVAL); 930 } else { 931 alloc: 932 #endif 933 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 934 M_WAITOK); 935 #ifdef VM_PHYSSEG_DENSE 936 } 937 #endif 938 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 939 940 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 941 seg->start = start; 942 seg->end = end; 943 seg->first_page = fp; 944 945 rw_wlock(&vm_phys_fictitious_reg_lock); 946 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 947 rw_wunlock(&vm_phys_fictitious_reg_lock); 948 949 return (0); 950 } 951 952 void 953 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 954 { 955 struct vm_phys_fictitious_seg *seg, tmp; 956 #ifdef VM_PHYSSEG_DENSE 957 long pi, pe; 958 #endif 959 960 KASSERT(start < end, 961 ("Start of segment isn't less than end (start: %jx end: %jx)", 962 (uintmax_t)start, (uintmax_t)end)); 963 964 #ifdef VM_PHYSSEG_DENSE 965 pi = atop(start); 966 pe = atop(end); 967 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 968 if ((pe - first_page) <= vm_page_array_size) { 969 /* 970 * This segment was allocated using vm_page_array 971 * only, there's nothing to do since those pages 972 * were never added to the tree. 973 */ 974 return; 975 } 976 /* 977 * We have a segment that starts inside 978 * of vm_page_array, but ends outside of it. 979 * 980 * Calculate how many pages were added to the 981 * tree and free them. 982 */ 983 start = ptoa(first_page + vm_page_array_size); 984 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 985 /* 986 * We have a segment that ends inside of vm_page_array, 987 * but starts outside of it. 988 */ 989 end = ptoa(first_page); 990 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 991 /* Since it's not possible to register such a range, panic. */ 992 panic( 993 "Unregistering not registered fictitious range [%#jx:%#jx]", 994 (uintmax_t)start, (uintmax_t)end); 995 } 996 #endif 997 tmp.start = start; 998 tmp.end = 0; 999 1000 rw_wlock(&vm_phys_fictitious_reg_lock); 1001 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1002 if (seg->start != start || seg->end != end) { 1003 rw_wunlock(&vm_phys_fictitious_reg_lock); 1004 panic( 1005 "Unregistering not registered fictitious range [%#jx:%#jx]", 1006 (uintmax_t)start, (uintmax_t)end); 1007 } 1008 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1009 rw_wunlock(&vm_phys_fictitious_reg_lock); 1010 free(seg->first_page, M_FICT_PAGES); 1011 free(seg, M_FICT_PAGES); 1012 } 1013 1014 /* 1015 * Free a contiguous, power of two-sized set of physical pages. 1016 * 1017 * The free page queues must be locked. 1018 */ 1019 void 1020 vm_phys_free_pages(vm_page_t m, int order) 1021 { 1022 struct vm_freelist *fl; 1023 struct vm_phys_seg *seg; 1024 vm_paddr_t pa; 1025 vm_page_t m_buddy; 1026 1027 KASSERT(m->order == VM_NFREEORDER, 1028 ("vm_phys_free_pages: page %p has unexpected order %d", 1029 m, m->order)); 1030 KASSERT(m->pool < VM_NFREEPOOL, 1031 ("vm_phys_free_pages: page %p has unexpected pool %d", 1032 m, m->pool)); 1033 KASSERT(order < VM_NFREEORDER, 1034 ("vm_phys_free_pages: order %d is out of range", order)); 1035 seg = &vm_phys_segs[m->segind]; 1036 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1037 if (order < VM_NFREEORDER - 1) { 1038 pa = VM_PAGE_TO_PHYS(m); 1039 do { 1040 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1041 if (pa < seg->start || pa >= seg->end) 1042 break; 1043 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1044 if (m_buddy->order != order) 1045 break; 1046 fl = (*seg->free_queues)[m_buddy->pool]; 1047 vm_freelist_rem(fl, m_buddy, order); 1048 if (m_buddy->pool != m->pool) 1049 vm_phys_set_pool(m->pool, m_buddy, order); 1050 order++; 1051 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1052 m = &seg->first_page[atop(pa - seg->start)]; 1053 } while (order < VM_NFREEORDER - 1); 1054 } 1055 fl = (*seg->free_queues)[m->pool]; 1056 vm_freelist_add(fl, m, order, 1); 1057 } 1058 1059 /* 1060 * Free a contiguous, arbitrarily sized set of physical pages. 1061 * 1062 * The free page queues must be locked. 1063 */ 1064 void 1065 vm_phys_free_contig(vm_page_t m, u_long npages) 1066 { 1067 u_int n; 1068 int order; 1069 1070 /* 1071 * Avoid unnecessary coalescing by freeing the pages in the largest 1072 * possible power-of-two-sized subsets. 1073 */ 1074 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1075 for (;; npages -= n) { 1076 /* 1077 * Unsigned "min" is used here so that "order" is assigned 1078 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1079 * or the low-order bits of its physical address are zero 1080 * because the size of a physical address exceeds the size of 1081 * a long. 1082 */ 1083 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1084 VM_NFREEORDER - 1); 1085 n = 1 << order; 1086 if (npages < n) 1087 break; 1088 vm_phys_free_pages(m, order); 1089 m += n; 1090 } 1091 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1092 for (; npages > 0; npages -= n) { 1093 order = flsl(npages) - 1; 1094 n = 1 << order; 1095 vm_phys_free_pages(m, order); 1096 m += n; 1097 } 1098 } 1099 1100 /* 1101 * Scan physical memory between the specified addresses "low" and "high" for a 1102 * run of contiguous physical pages that satisfy the specified conditions, and 1103 * return the lowest page in the run. The specified "alignment" determines 1104 * the alignment of the lowest physical page in the run. If the specified 1105 * "boundary" is non-zero, then the run of physical pages cannot span a 1106 * physical address that is a multiple of "boundary". 1107 * 1108 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1109 * be a power of two. 1110 */ 1111 vm_page_t 1112 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1113 u_long alignment, vm_paddr_t boundary, int options) 1114 { 1115 vm_paddr_t pa_end; 1116 vm_page_t m_end, m_run, m_start; 1117 struct vm_phys_seg *seg; 1118 int segind; 1119 1120 KASSERT(npages > 0, ("npages is 0")); 1121 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1122 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1123 if (low >= high) 1124 return (NULL); 1125 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1126 seg = &vm_phys_segs[segind]; 1127 if (seg->domain != domain) 1128 continue; 1129 if (seg->start >= high) 1130 break; 1131 if (low >= seg->end) 1132 continue; 1133 if (low <= seg->start) 1134 m_start = seg->first_page; 1135 else 1136 m_start = &seg->first_page[atop(low - seg->start)]; 1137 if (high < seg->end) 1138 pa_end = high; 1139 else 1140 pa_end = seg->end; 1141 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1142 continue; 1143 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1144 m_run = vm_page_scan_contig(npages, m_start, m_end, 1145 alignment, boundary, options); 1146 if (m_run != NULL) 1147 return (m_run); 1148 } 1149 return (NULL); 1150 } 1151 1152 /* 1153 * Set the pool for a contiguous, power of two-sized set of physical pages. 1154 */ 1155 void 1156 vm_phys_set_pool(int pool, vm_page_t m, int order) 1157 { 1158 vm_page_t m_tmp; 1159 1160 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1161 m_tmp->pool = pool; 1162 } 1163 1164 /* 1165 * Search for the given physical page "m" in the free lists. If the search 1166 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1167 * FALSE, indicating that "m" is not in the free lists. 1168 * 1169 * The free page queues must be locked. 1170 */ 1171 boolean_t 1172 vm_phys_unfree_page(vm_page_t m) 1173 { 1174 struct vm_freelist *fl; 1175 struct vm_phys_seg *seg; 1176 vm_paddr_t pa, pa_half; 1177 vm_page_t m_set, m_tmp; 1178 int order; 1179 1180 /* 1181 * First, find the contiguous, power of two-sized set of free 1182 * physical pages containing the given physical page "m" and 1183 * assign it to "m_set". 1184 */ 1185 seg = &vm_phys_segs[m->segind]; 1186 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1187 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1188 order < VM_NFREEORDER - 1; ) { 1189 order++; 1190 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1191 if (pa >= seg->start) 1192 m_set = &seg->first_page[atop(pa - seg->start)]; 1193 else 1194 return (FALSE); 1195 } 1196 if (m_set->order < order) 1197 return (FALSE); 1198 if (m_set->order == VM_NFREEORDER) 1199 return (FALSE); 1200 KASSERT(m_set->order < VM_NFREEORDER, 1201 ("vm_phys_unfree_page: page %p has unexpected order %d", 1202 m_set, m_set->order)); 1203 1204 /* 1205 * Next, remove "m_set" from the free lists. Finally, extract 1206 * "m" from "m_set" using an iterative algorithm: While "m_set" 1207 * is larger than a page, shrink "m_set" by returning the half 1208 * of "m_set" that does not contain "m" to the free lists. 1209 */ 1210 fl = (*seg->free_queues)[m_set->pool]; 1211 order = m_set->order; 1212 vm_freelist_rem(fl, m_set, order); 1213 while (order > 0) { 1214 order--; 1215 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1216 if (m->phys_addr < pa_half) 1217 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1218 else { 1219 m_tmp = m_set; 1220 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1221 } 1222 vm_freelist_add(fl, m_tmp, order, 0); 1223 } 1224 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1225 return (TRUE); 1226 } 1227 1228 /* 1229 * Allocate a contiguous set of physical pages of the given size 1230 * "npages" from the free lists. All of the physical pages must be at 1231 * or above the given physical address "low" and below the given 1232 * physical address "high". The given value "alignment" determines the 1233 * alignment of the first physical page in the set. If the given value 1234 * "boundary" is non-zero, then the set of physical pages cannot cross 1235 * any physical address boundary that is a multiple of that value. Both 1236 * "alignment" and "boundary" must be a power of two. 1237 */ 1238 vm_page_t 1239 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1240 u_long alignment, vm_paddr_t boundary) 1241 { 1242 vm_paddr_t pa_end, pa_start; 1243 vm_page_t m_run; 1244 struct vm_phys_seg *seg; 1245 int segind; 1246 1247 KASSERT(npages > 0, ("npages is 0")); 1248 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1249 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1250 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1251 if (low >= high) 1252 return (NULL); 1253 m_run = NULL; 1254 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1255 seg = &vm_phys_segs[segind]; 1256 if (seg->start >= high || seg->domain != domain) 1257 continue; 1258 if (low >= seg->end) 1259 break; 1260 if (low <= seg->start) 1261 pa_start = seg->start; 1262 else 1263 pa_start = low; 1264 if (high < seg->end) 1265 pa_end = high; 1266 else 1267 pa_end = seg->end; 1268 if (pa_end - pa_start < ptoa(npages)) 1269 continue; 1270 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1271 alignment, boundary); 1272 if (m_run != NULL) 1273 break; 1274 } 1275 return (m_run); 1276 } 1277 1278 /* 1279 * Allocate a run of contiguous physical pages from the free list for the 1280 * specified segment. 1281 */ 1282 static vm_page_t 1283 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1284 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1285 { 1286 struct vm_freelist *fl; 1287 vm_paddr_t pa, pa_end, size; 1288 vm_page_t m, m_ret; 1289 u_long npages_end; 1290 int oind, order, pind; 1291 1292 KASSERT(npages > 0, ("npages is 0")); 1293 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1294 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1295 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1296 /* Compute the queue that is the best fit for npages. */ 1297 order = flsl(npages - 1); 1298 /* Search for a run satisfying the specified conditions. */ 1299 size = npages << PAGE_SHIFT; 1300 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1301 oind++) { 1302 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1303 fl = (*seg->free_queues)[pind]; 1304 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1305 /* 1306 * Is the size of this allocation request 1307 * larger than the largest block size? 1308 */ 1309 if (order >= VM_NFREEORDER) { 1310 /* 1311 * Determine if a sufficient number of 1312 * subsequent blocks to satisfy the 1313 * allocation request are free. 1314 */ 1315 pa = VM_PAGE_TO_PHYS(m_ret); 1316 pa_end = pa + size; 1317 if (pa_end < pa) 1318 continue; 1319 for (;;) { 1320 pa += 1 << (PAGE_SHIFT + 1321 VM_NFREEORDER - 1); 1322 if (pa >= pa_end || 1323 pa < seg->start || 1324 pa >= seg->end) 1325 break; 1326 m = &seg->first_page[atop(pa - 1327 seg->start)]; 1328 if (m->order != VM_NFREEORDER - 1329 1) 1330 break; 1331 } 1332 /* If not, go to the next block. */ 1333 if (pa < pa_end) 1334 continue; 1335 } 1336 1337 /* 1338 * Determine if the blocks are within the 1339 * given range, satisfy the given alignment, 1340 * and do not cross the given boundary. 1341 */ 1342 pa = VM_PAGE_TO_PHYS(m_ret); 1343 pa_end = pa + size; 1344 if (pa >= low && pa_end <= high && 1345 (pa & (alignment - 1)) == 0 && 1346 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1347 goto done; 1348 } 1349 } 1350 } 1351 return (NULL); 1352 done: 1353 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1354 fl = (*seg->free_queues)[m->pool]; 1355 vm_freelist_rem(fl, m, oind); 1356 if (m->pool != VM_FREEPOOL_DEFAULT) 1357 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1358 } 1359 /* Return excess pages to the free lists. */ 1360 npages_end = roundup2(npages, 1 << oind); 1361 if (npages < npages_end) { 1362 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1363 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1364 } 1365 return (m_ret); 1366 } 1367 1368 #ifdef DDB 1369 /* 1370 * Show the number of physical pages in each of the free lists. 1371 */ 1372 DB_SHOW_COMMAND(freepages, db_show_freepages) 1373 { 1374 struct vm_freelist *fl; 1375 int flind, oind, pind, dom; 1376 1377 for (dom = 0; dom < vm_ndomains; dom++) { 1378 db_printf("DOMAIN: %d\n", dom); 1379 for (flind = 0; flind < vm_nfreelists; flind++) { 1380 db_printf("FREE LIST %d:\n" 1381 "\n ORDER (SIZE) | NUMBER" 1382 "\n ", flind); 1383 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1384 db_printf(" | POOL %d", pind); 1385 db_printf("\n-- "); 1386 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1387 db_printf("-- -- "); 1388 db_printf("--\n"); 1389 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1390 db_printf(" %2.2d (%6.6dK)", oind, 1391 1 << (PAGE_SHIFT - 10 + oind)); 1392 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1393 fl = vm_phys_free_queues[dom][flind][pind]; 1394 db_printf(" | %6.6d", fl[oind].lcnt); 1395 } 1396 db_printf("\n"); 1397 } 1398 db_printf("\n"); 1399 } 1400 db_printf("\n"); 1401 } 1402 } 1403 #endif 1404