1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 #include "opt_vm.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/lock.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/sysctl.h> 58 #include <sys/tree.h> 59 #include <sys/vmmeter.h> 60 #include <sys/seq.h> 61 62 #include <ddb/ddb.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_phys.h> 70 71 #include <vm/vm_domain.h> 72 73 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 74 "Too many physsegs."); 75 76 #ifdef VM_NUMA_ALLOC 77 struct mem_affinity *mem_affinity; 78 int *mem_locality; 79 #endif 80 81 int vm_ndomains = 1; 82 83 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 84 int vm_phys_nsegs; 85 86 struct vm_phys_fictitious_seg; 87 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 88 struct vm_phys_fictitious_seg *); 89 90 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 91 RB_INITIALIZER(_vm_phys_fictitious_tree); 92 93 struct vm_phys_fictitious_seg { 94 RB_ENTRY(vm_phys_fictitious_seg) node; 95 /* Memory region data */ 96 vm_paddr_t start; 97 vm_paddr_t end; 98 vm_page_t first_page; 99 }; 100 101 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 102 vm_phys_fictitious_cmp); 103 104 static struct rwlock vm_phys_fictitious_reg_lock; 105 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 106 107 static struct vm_freelist 108 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 109 110 static int vm_nfreelists; 111 112 /* 113 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 114 */ 115 static int vm_freelist_to_flind[VM_NFREELIST]; 116 117 CTASSERT(VM_FREELIST_DEFAULT == 0); 118 119 #ifdef VM_FREELIST_ISADMA 120 #define VM_ISADMA_BOUNDARY 16777216 121 #endif 122 #ifdef VM_FREELIST_DMA32 123 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 124 #endif 125 126 /* 127 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 128 * the ordering of the free list boundaries. 129 */ 130 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) 131 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); 132 #endif 133 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 134 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 135 #endif 136 137 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 138 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 139 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 140 141 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 142 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 143 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 144 145 #ifdef VM_NUMA_ALLOC 146 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 147 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 148 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 149 #endif 150 151 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 152 &vm_ndomains, 0, "Number of physical memory domains available."); 153 154 /* 155 * Default to first-touch + round-robin. 156 */ 157 static struct mtx vm_default_policy_mtx; 158 MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex", 159 MTX_DEF); 160 #ifdef VM_NUMA_ALLOC 161 static struct vm_domain_policy vm_default_policy = 162 VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 163 #else 164 /* Use round-robin so the domain policy code will only try once per allocation */ 165 static struct vm_domain_policy vm_default_policy = 166 VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0); 167 #endif 168 169 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, 170 int order); 171 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 172 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 173 vm_paddr_t boundary); 174 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 175 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 176 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 177 int order); 178 179 static int 180 sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) 181 { 182 char policy_name[32]; 183 int error; 184 185 mtx_lock(&vm_default_policy_mtx); 186 187 /* Map policy to output string */ 188 switch (vm_default_policy.p.policy) { 189 case VM_POLICY_FIRST_TOUCH: 190 strcpy(policy_name, "first-touch"); 191 break; 192 case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: 193 strcpy(policy_name, "first-touch-rr"); 194 break; 195 case VM_POLICY_ROUND_ROBIN: 196 default: 197 strcpy(policy_name, "rr"); 198 break; 199 } 200 mtx_unlock(&vm_default_policy_mtx); 201 202 error = sysctl_handle_string(oidp, &policy_name[0], 203 sizeof(policy_name), req); 204 if (error != 0 || req->newptr == NULL) 205 return (error); 206 207 mtx_lock(&vm_default_policy_mtx); 208 /* Set: match on the subset of policies that make sense as a default */ 209 if (strcmp("first-touch-rr", policy_name) == 0) { 210 vm_domain_policy_set(&vm_default_policy, 211 VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 212 } else if (strcmp("first-touch", policy_name) == 0) { 213 vm_domain_policy_set(&vm_default_policy, 214 VM_POLICY_FIRST_TOUCH, 0); 215 } else if (strcmp("rr", policy_name) == 0) { 216 vm_domain_policy_set(&vm_default_policy, 217 VM_POLICY_ROUND_ROBIN, 0); 218 } else { 219 error = EINVAL; 220 goto finish; 221 } 222 223 error = 0; 224 finish: 225 mtx_unlock(&vm_default_policy_mtx); 226 return (error); 227 } 228 229 SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW, 230 0, 0, sysctl_vm_default_policy, "A", 231 "Default policy (rr, first-touch, first-touch-rr"); 232 233 /* 234 * Red-black tree helpers for vm fictitious range management. 235 */ 236 static inline int 237 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 238 struct vm_phys_fictitious_seg *range) 239 { 240 241 KASSERT(range->start != 0 && range->end != 0, 242 ("Invalid range passed on search for vm_fictitious page")); 243 if (p->start >= range->end) 244 return (1); 245 if (p->start < range->start) 246 return (-1); 247 248 return (0); 249 } 250 251 static int 252 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 253 struct vm_phys_fictitious_seg *p2) 254 { 255 256 /* Check if this is a search for a page */ 257 if (p1->end == 0) 258 return (vm_phys_fictitious_in_range(p1, p2)); 259 260 KASSERT(p2->end != 0, 261 ("Invalid range passed as second parameter to vm fictitious comparison")); 262 263 /* Searching to add a new range */ 264 if (p1->end <= p2->start) 265 return (-1); 266 if (p1->start >= p2->end) 267 return (1); 268 269 panic("Trying to add overlapping vm fictitious ranges:\n" 270 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 271 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 272 } 273 274 #ifdef notyet 275 static __inline int 276 vm_rr_selectdomain(void) 277 { 278 #ifdef VM_NUMA_ALLOC 279 struct thread *td; 280 281 td = curthread; 282 283 td->td_dom_rr_idx++; 284 td->td_dom_rr_idx %= vm_ndomains; 285 return (td->td_dom_rr_idx); 286 #else 287 return (0); 288 #endif 289 } 290 #endif /* notyet */ 291 292 /* 293 * Initialise a VM domain iterator. 294 * 295 * Check the thread policy, then the proc policy, 296 * then default to the system policy. 297 * 298 * Later on the various layers will have this logic 299 * plumbed into them and the phys code will be explicitly 300 * handed a VM domain policy to use. 301 */ 302 static void 303 vm_policy_iterator_init(struct vm_domain_iterator *vi) 304 { 305 #ifdef VM_NUMA_ALLOC 306 struct vm_domain_policy lcl; 307 #endif 308 309 vm_domain_iterator_init(vi); 310 311 #ifdef VM_NUMA_ALLOC 312 /* Copy out the thread policy */ 313 vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy); 314 if (lcl.p.policy != VM_POLICY_NONE) { 315 /* Thread policy is present; use it */ 316 vm_domain_iterator_set_policy(vi, &lcl); 317 return; 318 } 319 320 vm_domain_policy_localcopy(&lcl, 321 &curthread->td_proc->p_vm_dom_policy); 322 if (lcl.p.policy != VM_POLICY_NONE) { 323 /* Process policy is present; use it */ 324 vm_domain_iterator_set_policy(vi, &lcl); 325 return; 326 } 327 #endif 328 /* Use system default policy */ 329 vm_domain_iterator_set_policy(vi, &vm_default_policy); 330 } 331 332 static void 333 vm_policy_iterator_finish(struct vm_domain_iterator *vi) 334 { 335 336 vm_domain_iterator_cleanup(vi); 337 } 338 339 boolean_t 340 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) 341 { 342 struct vm_phys_seg *s; 343 int idx; 344 345 while ((idx = ffsl(mask)) != 0) { 346 idx--; /* ffsl counts from 1 */ 347 mask &= ~(1UL << idx); 348 s = &vm_phys_segs[idx]; 349 if (low < s->end && high > s->start) 350 return (TRUE); 351 } 352 return (FALSE); 353 } 354 355 /* 356 * Outputs the state of the physical memory allocator, specifically, 357 * the amount of physical memory in each free list. 358 */ 359 static int 360 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 361 { 362 struct sbuf sbuf; 363 struct vm_freelist *fl; 364 int dom, error, flind, oind, pind; 365 366 error = sysctl_wire_old_buffer(req, 0); 367 if (error != 0) 368 return (error); 369 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 370 for (dom = 0; dom < vm_ndomains; dom++) { 371 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 372 for (flind = 0; flind < vm_nfreelists; flind++) { 373 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 374 "\n ORDER (SIZE) | NUMBER" 375 "\n ", flind); 376 for (pind = 0; pind < VM_NFREEPOOL; pind++) 377 sbuf_printf(&sbuf, " | POOL %d", pind); 378 sbuf_printf(&sbuf, "\n-- "); 379 for (pind = 0; pind < VM_NFREEPOOL; pind++) 380 sbuf_printf(&sbuf, "-- -- "); 381 sbuf_printf(&sbuf, "--\n"); 382 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 383 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 384 1 << (PAGE_SHIFT - 10 + oind)); 385 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 386 fl = vm_phys_free_queues[dom][flind][pind]; 387 sbuf_printf(&sbuf, " | %6d", 388 fl[oind].lcnt); 389 } 390 sbuf_printf(&sbuf, "\n"); 391 } 392 } 393 } 394 error = sbuf_finish(&sbuf); 395 sbuf_delete(&sbuf); 396 return (error); 397 } 398 399 /* 400 * Outputs the set of physical memory segments. 401 */ 402 static int 403 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 404 { 405 struct sbuf sbuf; 406 struct vm_phys_seg *seg; 407 int error, segind; 408 409 error = sysctl_wire_old_buffer(req, 0); 410 if (error != 0) 411 return (error); 412 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 413 for (segind = 0; segind < vm_phys_nsegs; segind++) { 414 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 415 seg = &vm_phys_segs[segind]; 416 sbuf_printf(&sbuf, "start: %#jx\n", 417 (uintmax_t)seg->start); 418 sbuf_printf(&sbuf, "end: %#jx\n", 419 (uintmax_t)seg->end); 420 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 421 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 422 } 423 error = sbuf_finish(&sbuf); 424 sbuf_delete(&sbuf); 425 return (error); 426 } 427 428 /* 429 * Return affinity, or -1 if there's no affinity information. 430 */ 431 int 432 vm_phys_mem_affinity(int f, int t) 433 { 434 435 #ifdef VM_NUMA_ALLOC 436 if (mem_locality == NULL) 437 return (-1); 438 if (f >= vm_ndomains || t >= vm_ndomains) 439 return (-1); 440 return (mem_locality[f * vm_ndomains + t]); 441 #else 442 return (-1); 443 #endif 444 } 445 446 #ifdef VM_NUMA_ALLOC 447 /* 448 * Outputs the VM locality table. 449 */ 450 static int 451 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 452 { 453 struct sbuf sbuf; 454 int error, i, j; 455 456 error = sysctl_wire_old_buffer(req, 0); 457 if (error != 0) 458 return (error); 459 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 460 461 sbuf_printf(&sbuf, "\n"); 462 463 for (i = 0; i < vm_ndomains; i++) { 464 sbuf_printf(&sbuf, "%d: ", i); 465 for (j = 0; j < vm_ndomains; j++) { 466 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 467 } 468 sbuf_printf(&sbuf, "\n"); 469 } 470 error = sbuf_finish(&sbuf); 471 sbuf_delete(&sbuf); 472 return (error); 473 } 474 #endif 475 476 static void 477 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 478 { 479 480 m->order = order; 481 if (tail) 482 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); 483 else 484 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); 485 fl[order].lcnt++; 486 } 487 488 static void 489 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 490 { 491 492 TAILQ_REMOVE(&fl[order].pl, m, plinks.q); 493 fl[order].lcnt--; 494 m->order = VM_NFREEORDER; 495 } 496 497 /* 498 * Create a physical memory segment. 499 */ 500 static void 501 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 502 { 503 struct vm_phys_seg *seg; 504 505 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 506 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 507 KASSERT(domain < vm_ndomains, 508 ("vm_phys_create_seg: invalid domain provided")); 509 seg = &vm_phys_segs[vm_phys_nsegs++]; 510 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 511 *seg = *(seg - 1); 512 seg--; 513 } 514 seg->start = start; 515 seg->end = end; 516 seg->domain = domain; 517 } 518 519 static void 520 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 521 { 522 #ifdef VM_NUMA_ALLOC 523 int i; 524 525 if (mem_affinity == NULL) { 526 _vm_phys_create_seg(start, end, 0); 527 return; 528 } 529 530 for (i = 0;; i++) { 531 if (mem_affinity[i].end == 0) 532 panic("Reached end of affinity info"); 533 if (mem_affinity[i].end <= start) 534 continue; 535 if (mem_affinity[i].start > start) 536 panic("No affinity info for start %jx", 537 (uintmax_t)start); 538 if (mem_affinity[i].end >= end) { 539 _vm_phys_create_seg(start, end, 540 mem_affinity[i].domain); 541 break; 542 } 543 _vm_phys_create_seg(start, mem_affinity[i].end, 544 mem_affinity[i].domain); 545 start = mem_affinity[i].end; 546 } 547 #else 548 _vm_phys_create_seg(start, end, 0); 549 #endif 550 } 551 552 /* 553 * Add a physical memory segment. 554 */ 555 void 556 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 557 { 558 vm_paddr_t paddr; 559 560 KASSERT((start & PAGE_MASK) == 0, 561 ("vm_phys_define_seg: start is not page aligned")); 562 KASSERT((end & PAGE_MASK) == 0, 563 ("vm_phys_define_seg: end is not page aligned")); 564 565 /* 566 * Split the physical memory segment if it spans two or more free 567 * list boundaries. 568 */ 569 paddr = start; 570 #ifdef VM_FREELIST_ISADMA 571 if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { 572 vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); 573 paddr = VM_ISADMA_BOUNDARY; 574 } 575 #endif 576 #ifdef VM_FREELIST_LOWMEM 577 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 578 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 579 paddr = VM_LOWMEM_BOUNDARY; 580 } 581 #endif 582 #ifdef VM_FREELIST_DMA32 583 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 584 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 585 paddr = VM_DMA32_BOUNDARY; 586 } 587 #endif 588 vm_phys_create_seg(paddr, end); 589 } 590 591 /* 592 * Initialize the physical memory allocator. 593 * 594 * Requires that vm_page_array is initialized! 595 */ 596 void 597 vm_phys_init(void) 598 { 599 struct vm_freelist *fl; 600 struct vm_phys_seg *seg; 601 u_long npages; 602 int dom, flind, freelist, oind, pind, segind; 603 604 /* 605 * Compute the number of free lists, and generate the mapping from the 606 * manifest constants VM_FREELIST_* to the free list indices. 607 * 608 * Initially, the entries of vm_freelist_to_flind[] are set to either 609 * 0 or 1 to indicate which free lists should be created. 610 */ 611 npages = 0; 612 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 613 seg = &vm_phys_segs[segind]; 614 #ifdef VM_FREELIST_ISADMA 615 if (seg->end <= VM_ISADMA_BOUNDARY) 616 vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; 617 else 618 #endif 619 #ifdef VM_FREELIST_LOWMEM 620 if (seg->end <= VM_LOWMEM_BOUNDARY) 621 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 622 else 623 #endif 624 #ifdef VM_FREELIST_DMA32 625 if ( 626 #ifdef VM_DMA32_NPAGES_THRESHOLD 627 /* 628 * Create the DMA32 free list only if the amount of 629 * physical memory above physical address 4G exceeds the 630 * given threshold. 631 */ 632 npages > VM_DMA32_NPAGES_THRESHOLD && 633 #endif 634 seg->end <= VM_DMA32_BOUNDARY) 635 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 636 else 637 #endif 638 { 639 npages += atop(seg->end - seg->start); 640 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 641 } 642 } 643 /* Change each entry into a running total of the free lists. */ 644 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 645 vm_freelist_to_flind[freelist] += 646 vm_freelist_to_flind[freelist - 1]; 647 } 648 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 649 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 650 /* Change each entry into a free list index. */ 651 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 652 vm_freelist_to_flind[freelist]--; 653 654 /* 655 * Initialize the first_page and free_queues fields of each physical 656 * memory segment. 657 */ 658 #ifdef VM_PHYSSEG_SPARSE 659 npages = 0; 660 #endif 661 for (segind = 0; segind < vm_phys_nsegs; segind++) { 662 seg = &vm_phys_segs[segind]; 663 #ifdef VM_PHYSSEG_SPARSE 664 seg->first_page = &vm_page_array[npages]; 665 npages += atop(seg->end - seg->start); 666 #else 667 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 668 #endif 669 #ifdef VM_FREELIST_ISADMA 670 if (seg->end <= VM_ISADMA_BOUNDARY) { 671 flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; 672 KASSERT(flind >= 0, 673 ("vm_phys_init: ISADMA flind < 0")); 674 } else 675 #endif 676 #ifdef VM_FREELIST_LOWMEM 677 if (seg->end <= VM_LOWMEM_BOUNDARY) { 678 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 679 KASSERT(flind >= 0, 680 ("vm_phys_init: LOWMEM flind < 0")); 681 } else 682 #endif 683 #ifdef VM_FREELIST_DMA32 684 if (seg->end <= VM_DMA32_BOUNDARY) { 685 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 686 KASSERT(flind >= 0, 687 ("vm_phys_init: DMA32 flind < 0")); 688 } else 689 #endif 690 { 691 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 692 KASSERT(flind >= 0, 693 ("vm_phys_init: DEFAULT flind < 0")); 694 } 695 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 696 } 697 698 /* 699 * Initialize the free queues. 700 */ 701 for (dom = 0; dom < vm_ndomains; dom++) { 702 for (flind = 0; flind < vm_nfreelists; flind++) { 703 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 704 fl = vm_phys_free_queues[dom][flind][pind]; 705 for (oind = 0; oind < VM_NFREEORDER; oind++) 706 TAILQ_INIT(&fl[oind].pl); 707 } 708 } 709 } 710 711 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 712 } 713 714 /* 715 * Split a contiguous, power of two-sized set of physical pages. 716 */ 717 static __inline void 718 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 719 { 720 vm_page_t m_buddy; 721 722 while (oind > order) { 723 oind--; 724 m_buddy = &m[1 << oind]; 725 KASSERT(m_buddy->order == VM_NFREEORDER, 726 ("vm_phys_split_pages: page %p has unexpected order %d", 727 m_buddy, m_buddy->order)); 728 vm_freelist_add(fl, m_buddy, oind, 0); 729 } 730 } 731 732 /* 733 * Allocate a contiguous, power of two-sized set of physical pages 734 * from the free lists. 735 * 736 * The free page queues must be locked. 737 */ 738 vm_page_t 739 vm_phys_alloc_pages(int pool, int order) 740 { 741 vm_page_t m; 742 int domain, flind; 743 struct vm_domain_iterator vi; 744 745 KASSERT(pool < VM_NFREEPOOL, 746 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 747 KASSERT(order < VM_NFREEORDER, 748 ("vm_phys_alloc_pages: order %d is out of range", order)); 749 750 vm_policy_iterator_init(&vi); 751 752 while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 753 for (flind = 0; flind < vm_nfreelists; flind++) { 754 m = vm_phys_alloc_domain_pages(domain, flind, pool, 755 order); 756 if (m != NULL) 757 return (m); 758 } 759 } 760 761 vm_policy_iterator_finish(&vi); 762 return (NULL); 763 } 764 765 /* 766 * Allocate a contiguous, power of two-sized set of physical pages from the 767 * specified free list. The free list must be specified using one of the 768 * manifest constants VM_FREELIST_*. 769 * 770 * The free page queues must be locked. 771 */ 772 vm_page_t 773 vm_phys_alloc_freelist_pages(int freelist, int pool, int order) 774 { 775 vm_page_t m; 776 struct vm_domain_iterator vi; 777 int domain; 778 779 KASSERT(freelist < VM_NFREELIST, 780 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 781 freelist)); 782 KASSERT(pool < VM_NFREEPOOL, 783 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 784 KASSERT(order < VM_NFREEORDER, 785 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 786 787 vm_policy_iterator_init(&vi); 788 789 while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 790 m = vm_phys_alloc_domain_pages(domain, 791 vm_freelist_to_flind[freelist], pool, order); 792 if (m != NULL) 793 return (m); 794 } 795 796 vm_policy_iterator_finish(&vi); 797 return (NULL); 798 } 799 800 static vm_page_t 801 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) 802 { 803 struct vm_freelist *fl; 804 struct vm_freelist *alt; 805 int oind, pind; 806 vm_page_t m; 807 808 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 809 fl = &vm_phys_free_queues[domain][flind][pool][0]; 810 for (oind = order; oind < VM_NFREEORDER; oind++) { 811 m = TAILQ_FIRST(&fl[oind].pl); 812 if (m != NULL) { 813 vm_freelist_rem(fl, m, oind); 814 vm_phys_split_pages(m, oind, fl, order); 815 return (m); 816 } 817 } 818 819 /* 820 * The given pool was empty. Find the largest 821 * contiguous, power-of-two-sized set of pages in any 822 * pool. Transfer these pages to the given pool, and 823 * use them to satisfy the allocation. 824 */ 825 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 826 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 827 alt = &vm_phys_free_queues[domain][flind][pind][0]; 828 m = TAILQ_FIRST(&alt[oind].pl); 829 if (m != NULL) { 830 vm_freelist_rem(alt, m, oind); 831 vm_phys_set_pool(pool, m, oind); 832 vm_phys_split_pages(m, oind, fl, order); 833 return (m); 834 } 835 } 836 } 837 return (NULL); 838 } 839 840 /* 841 * Find the vm_page corresponding to the given physical address. 842 */ 843 vm_page_t 844 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 845 { 846 struct vm_phys_seg *seg; 847 int segind; 848 849 for (segind = 0; segind < vm_phys_nsegs; segind++) { 850 seg = &vm_phys_segs[segind]; 851 if (pa >= seg->start && pa < seg->end) 852 return (&seg->first_page[atop(pa - seg->start)]); 853 } 854 return (NULL); 855 } 856 857 vm_page_t 858 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 859 { 860 struct vm_phys_fictitious_seg tmp, *seg; 861 vm_page_t m; 862 863 m = NULL; 864 tmp.start = pa; 865 tmp.end = 0; 866 867 rw_rlock(&vm_phys_fictitious_reg_lock); 868 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 869 rw_runlock(&vm_phys_fictitious_reg_lock); 870 if (seg == NULL) 871 return (NULL); 872 873 m = &seg->first_page[atop(pa - seg->start)]; 874 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 875 876 return (m); 877 } 878 879 static inline void 880 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 881 long page_count, vm_memattr_t memattr) 882 { 883 long i; 884 885 bzero(range, page_count * sizeof(*range)); 886 for (i = 0; i < page_count; i++) { 887 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 888 range[i].oflags &= ~VPO_UNMANAGED; 889 range[i].busy_lock = VPB_UNBUSIED; 890 } 891 } 892 893 int 894 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 895 vm_memattr_t memattr) 896 { 897 struct vm_phys_fictitious_seg *seg; 898 vm_page_t fp; 899 long page_count; 900 #ifdef VM_PHYSSEG_DENSE 901 long pi, pe; 902 long dpage_count; 903 #endif 904 905 KASSERT(start < end, 906 ("Start of segment isn't less than end (start: %jx end: %jx)", 907 (uintmax_t)start, (uintmax_t)end)); 908 909 page_count = (end - start) / PAGE_SIZE; 910 911 #ifdef VM_PHYSSEG_DENSE 912 pi = atop(start); 913 pe = atop(end); 914 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 915 fp = &vm_page_array[pi - first_page]; 916 if ((pe - first_page) > vm_page_array_size) { 917 /* 918 * We have a segment that starts inside 919 * of vm_page_array, but ends outside of it. 920 * 921 * Use vm_page_array pages for those that are 922 * inside of the vm_page_array range, and 923 * allocate the remaining ones. 924 */ 925 dpage_count = vm_page_array_size - (pi - first_page); 926 vm_phys_fictitious_init_range(fp, start, dpage_count, 927 memattr); 928 page_count -= dpage_count; 929 start += ptoa(dpage_count); 930 goto alloc; 931 } 932 /* 933 * We can allocate the full range from vm_page_array, 934 * so there's no need to register the range in the tree. 935 */ 936 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 937 return (0); 938 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 939 /* 940 * We have a segment that ends inside of vm_page_array, 941 * but starts outside of it. 942 */ 943 fp = &vm_page_array[0]; 944 dpage_count = pe - first_page; 945 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 946 memattr); 947 end -= ptoa(dpage_count); 948 page_count -= dpage_count; 949 goto alloc; 950 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 951 /* 952 * Trying to register a fictitious range that expands before 953 * and after vm_page_array. 954 */ 955 return (EINVAL); 956 } else { 957 alloc: 958 #endif 959 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 960 M_WAITOK); 961 #ifdef VM_PHYSSEG_DENSE 962 } 963 #endif 964 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 965 966 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 967 seg->start = start; 968 seg->end = end; 969 seg->first_page = fp; 970 971 rw_wlock(&vm_phys_fictitious_reg_lock); 972 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 973 rw_wunlock(&vm_phys_fictitious_reg_lock); 974 975 return (0); 976 } 977 978 void 979 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 980 { 981 struct vm_phys_fictitious_seg *seg, tmp; 982 #ifdef VM_PHYSSEG_DENSE 983 long pi, pe; 984 #endif 985 986 KASSERT(start < end, 987 ("Start of segment isn't less than end (start: %jx end: %jx)", 988 (uintmax_t)start, (uintmax_t)end)); 989 990 #ifdef VM_PHYSSEG_DENSE 991 pi = atop(start); 992 pe = atop(end); 993 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 994 if ((pe - first_page) <= vm_page_array_size) { 995 /* 996 * This segment was allocated using vm_page_array 997 * only, there's nothing to do since those pages 998 * were never added to the tree. 999 */ 1000 return; 1001 } 1002 /* 1003 * We have a segment that starts inside 1004 * of vm_page_array, but ends outside of it. 1005 * 1006 * Calculate how many pages were added to the 1007 * tree and free them. 1008 */ 1009 start = ptoa(first_page + vm_page_array_size); 1010 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1011 /* 1012 * We have a segment that ends inside of vm_page_array, 1013 * but starts outside of it. 1014 */ 1015 end = ptoa(first_page); 1016 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1017 /* Since it's not possible to register such a range, panic. */ 1018 panic( 1019 "Unregistering not registered fictitious range [%#jx:%#jx]", 1020 (uintmax_t)start, (uintmax_t)end); 1021 } 1022 #endif 1023 tmp.start = start; 1024 tmp.end = 0; 1025 1026 rw_wlock(&vm_phys_fictitious_reg_lock); 1027 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1028 if (seg->start != start || seg->end != end) { 1029 rw_wunlock(&vm_phys_fictitious_reg_lock); 1030 panic( 1031 "Unregistering not registered fictitious range [%#jx:%#jx]", 1032 (uintmax_t)start, (uintmax_t)end); 1033 } 1034 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1035 rw_wunlock(&vm_phys_fictitious_reg_lock); 1036 free(seg->first_page, M_FICT_PAGES); 1037 free(seg, M_FICT_PAGES); 1038 } 1039 1040 /* 1041 * Find the segment containing the given physical address. 1042 */ 1043 int 1044 vm_phys_paddr_to_segind(vm_paddr_t pa) 1045 { 1046 struct vm_phys_seg *seg; 1047 int segind; 1048 1049 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1050 seg = &vm_phys_segs[segind]; 1051 if (pa >= seg->start && pa < seg->end) 1052 return (segind); 1053 } 1054 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 1055 (uintmax_t)pa); 1056 } 1057 1058 /* 1059 * Free a contiguous, power of two-sized set of physical pages. 1060 * 1061 * The free page queues must be locked. 1062 */ 1063 void 1064 vm_phys_free_pages(vm_page_t m, int order) 1065 { 1066 struct vm_freelist *fl; 1067 struct vm_phys_seg *seg; 1068 vm_paddr_t pa; 1069 vm_page_t m_buddy; 1070 1071 KASSERT(m->order == VM_NFREEORDER, 1072 ("vm_phys_free_pages: page %p has unexpected order %d", 1073 m, m->order)); 1074 KASSERT(m->pool < VM_NFREEPOOL, 1075 ("vm_phys_free_pages: page %p has unexpected pool %d", 1076 m, m->pool)); 1077 KASSERT(order < VM_NFREEORDER, 1078 ("vm_phys_free_pages: order %d is out of range", order)); 1079 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1080 seg = &vm_phys_segs[m->segind]; 1081 if (order < VM_NFREEORDER - 1) { 1082 pa = VM_PAGE_TO_PHYS(m); 1083 do { 1084 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1085 if (pa < seg->start || pa >= seg->end) 1086 break; 1087 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1088 if (m_buddy->order != order) 1089 break; 1090 fl = (*seg->free_queues)[m_buddy->pool]; 1091 vm_freelist_rem(fl, m_buddy, order); 1092 if (m_buddy->pool != m->pool) 1093 vm_phys_set_pool(m->pool, m_buddy, order); 1094 order++; 1095 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1096 m = &seg->first_page[atop(pa - seg->start)]; 1097 } while (order < VM_NFREEORDER - 1); 1098 } 1099 fl = (*seg->free_queues)[m->pool]; 1100 vm_freelist_add(fl, m, order, 1); 1101 } 1102 1103 /* 1104 * Free a contiguous, arbitrarily sized set of physical pages. 1105 * 1106 * The free page queues must be locked. 1107 */ 1108 void 1109 vm_phys_free_contig(vm_page_t m, u_long npages) 1110 { 1111 u_int n; 1112 int order; 1113 1114 /* 1115 * Avoid unnecessary coalescing by freeing the pages in the largest 1116 * possible power-of-two-sized subsets. 1117 */ 1118 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1119 for (;; npages -= n) { 1120 /* 1121 * Unsigned "min" is used here so that "order" is assigned 1122 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1123 * or the low-order bits of its physical address are zero 1124 * because the size of a physical address exceeds the size of 1125 * a long. 1126 */ 1127 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1128 VM_NFREEORDER - 1); 1129 n = 1 << order; 1130 if (npages < n) 1131 break; 1132 vm_phys_free_pages(m, order); 1133 m += n; 1134 } 1135 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1136 for (; npages > 0; npages -= n) { 1137 order = flsl(npages) - 1; 1138 n = 1 << order; 1139 vm_phys_free_pages(m, order); 1140 m += n; 1141 } 1142 } 1143 1144 /* 1145 * Scan physical memory between the specified addresses "low" and "high" for a 1146 * run of contiguous physical pages that satisfy the specified conditions, and 1147 * return the lowest page in the run. The specified "alignment" determines 1148 * the alignment of the lowest physical page in the run. If the specified 1149 * "boundary" is non-zero, then the run of physical pages cannot span a 1150 * physical address that is a multiple of "boundary". 1151 * 1152 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1153 * be a power of two. 1154 */ 1155 vm_page_t 1156 vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 1157 u_long alignment, vm_paddr_t boundary, int options) 1158 { 1159 vm_paddr_t pa_end; 1160 vm_page_t m_end, m_run, m_start; 1161 struct vm_phys_seg *seg; 1162 int segind; 1163 1164 KASSERT(npages > 0, ("npages is 0")); 1165 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1166 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1167 if (low >= high) 1168 return (NULL); 1169 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1170 seg = &vm_phys_segs[segind]; 1171 if (seg->start >= high) 1172 break; 1173 if (low >= seg->end) 1174 continue; 1175 if (low <= seg->start) 1176 m_start = seg->first_page; 1177 else 1178 m_start = &seg->first_page[atop(low - seg->start)]; 1179 if (high < seg->end) 1180 pa_end = high; 1181 else 1182 pa_end = seg->end; 1183 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1184 continue; 1185 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1186 m_run = vm_page_scan_contig(npages, m_start, m_end, 1187 alignment, boundary, options); 1188 if (m_run != NULL) 1189 return (m_run); 1190 } 1191 return (NULL); 1192 } 1193 1194 /* 1195 * Set the pool for a contiguous, power of two-sized set of physical pages. 1196 */ 1197 void 1198 vm_phys_set_pool(int pool, vm_page_t m, int order) 1199 { 1200 vm_page_t m_tmp; 1201 1202 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1203 m_tmp->pool = pool; 1204 } 1205 1206 /* 1207 * Search for the given physical page "m" in the free lists. If the search 1208 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1209 * FALSE, indicating that "m" is not in the free lists. 1210 * 1211 * The free page queues must be locked. 1212 */ 1213 boolean_t 1214 vm_phys_unfree_page(vm_page_t m) 1215 { 1216 struct vm_freelist *fl; 1217 struct vm_phys_seg *seg; 1218 vm_paddr_t pa, pa_half; 1219 vm_page_t m_set, m_tmp; 1220 int order; 1221 1222 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1223 1224 /* 1225 * First, find the contiguous, power of two-sized set of free 1226 * physical pages containing the given physical page "m" and 1227 * assign it to "m_set". 1228 */ 1229 seg = &vm_phys_segs[m->segind]; 1230 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1231 order < VM_NFREEORDER - 1; ) { 1232 order++; 1233 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1234 if (pa >= seg->start) 1235 m_set = &seg->first_page[atop(pa - seg->start)]; 1236 else 1237 return (FALSE); 1238 } 1239 if (m_set->order < order) 1240 return (FALSE); 1241 if (m_set->order == VM_NFREEORDER) 1242 return (FALSE); 1243 KASSERT(m_set->order < VM_NFREEORDER, 1244 ("vm_phys_unfree_page: page %p has unexpected order %d", 1245 m_set, m_set->order)); 1246 1247 /* 1248 * Next, remove "m_set" from the free lists. Finally, extract 1249 * "m" from "m_set" using an iterative algorithm: While "m_set" 1250 * is larger than a page, shrink "m_set" by returning the half 1251 * of "m_set" that does not contain "m" to the free lists. 1252 */ 1253 fl = (*seg->free_queues)[m_set->pool]; 1254 order = m_set->order; 1255 vm_freelist_rem(fl, m_set, order); 1256 while (order > 0) { 1257 order--; 1258 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1259 if (m->phys_addr < pa_half) 1260 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1261 else { 1262 m_tmp = m_set; 1263 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1264 } 1265 vm_freelist_add(fl, m_tmp, order, 0); 1266 } 1267 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1268 return (TRUE); 1269 } 1270 1271 /* 1272 * Allocate a contiguous set of physical pages of the given size 1273 * "npages" from the free lists. All of the physical pages must be at 1274 * or above the given physical address "low" and below the given 1275 * physical address "high". The given value "alignment" determines the 1276 * alignment of the first physical page in the set. If the given value 1277 * "boundary" is non-zero, then the set of physical pages cannot cross 1278 * any physical address boundary that is a multiple of that value. Both 1279 * "alignment" and "boundary" must be a power of two. 1280 */ 1281 vm_page_t 1282 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 1283 u_long alignment, vm_paddr_t boundary) 1284 { 1285 vm_paddr_t pa_end, pa_start; 1286 vm_page_t m_run; 1287 struct vm_domain_iterator vi; 1288 struct vm_phys_seg *seg; 1289 int domain, segind; 1290 1291 KASSERT(npages > 0, ("npages is 0")); 1292 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1293 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1294 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1295 if (low >= high) 1296 return (NULL); 1297 vm_policy_iterator_init(&vi); 1298 restartdom: 1299 if (vm_domain_iterator_run(&vi, &domain) != 0) { 1300 vm_policy_iterator_finish(&vi); 1301 return (NULL); 1302 } 1303 m_run = NULL; 1304 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1305 seg = &vm_phys_segs[segind]; 1306 if (seg->start >= high || seg->domain != domain) 1307 continue; 1308 if (low >= seg->end) 1309 break; 1310 if (low <= seg->start) 1311 pa_start = seg->start; 1312 else 1313 pa_start = low; 1314 if (high < seg->end) 1315 pa_end = high; 1316 else 1317 pa_end = seg->end; 1318 if (pa_end - pa_start < ptoa(npages)) 1319 continue; 1320 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1321 alignment, boundary); 1322 if (m_run != NULL) 1323 break; 1324 } 1325 if (m_run == NULL && !vm_domain_iterator_isdone(&vi)) 1326 goto restartdom; 1327 vm_policy_iterator_finish(&vi); 1328 return (m_run); 1329 } 1330 1331 /* 1332 * Allocate a run of contiguous physical pages from the free list for the 1333 * specified segment. 1334 */ 1335 static vm_page_t 1336 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1337 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1338 { 1339 struct vm_freelist *fl; 1340 vm_paddr_t pa, pa_end, size; 1341 vm_page_t m, m_ret; 1342 u_long npages_end; 1343 int oind, order, pind; 1344 1345 KASSERT(npages > 0, ("npages is 0")); 1346 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1347 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1348 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1349 /* Compute the queue that is the best fit for npages. */ 1350 for (order = 0; (1 << order) < npages; order++); 1351 /* Search for a run satisfying the specified conditions. */ 1352 size = npages << PAGE_SHIFT; 1353 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1354 oind++) { 1355 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1356 fl = (*seg->free_queues)[pind]; 1357 TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { 1358 /* 1359 * Is the size of this allocation request 1360 * larger than the largest block size? 1361 */ 1362 if (order >= VM_NFREEORDER) { 1363 /* 1364 * Determine if a sufficient number of 1365 * subsequent blocks to satisfy the 1366 * allocation request are free. 1367 */ 1368 pa = VM_PAGE_TO_PHYS(m_ret); 1369 pa_end = pa + size; 1370 for (;;) { 1371 pa += 1 << (PAGE_SHIFT + 1372 VM_NFREEORDER - 1); 1373 if (pa >= pa_end || 1374 pa < seg->start || 1375 pa >= seg->end) 1376 break; 1377 m = &seg->first_page[atop(pa - 1378 seg->start)]; 1379 if (m->order != VM_NFREEORDER - 1380 1) 1381 break; 1382 } 1383 /* If not, go to the next block. */ 1384 if (pa < pa_end) 1385 continue; 1386 } 1387 1388 /* 1389 * Determine if the blocks are within the 1390 * given range, satisfy the given alignment, 1391 * and do not cross the given boundary. 1392 */ 1393 pa = VM_PAGE_TO_PHYS(m_ret); 1394 pa_end = pa + size; 1395 if (pa >= low && pa_end <= high && 1396 (pa & (alignment - 1)) == 0 && 1397 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1398 goto done; 1399 } 1400 } 1401 } 1402 return (NULL); 1403 done: 1404 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1405 fl = (*seg->free_queues)[m->pool]; 1406 vm_freelist_rem(fl, m, m->order); 1407 } 1408 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 1409 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 1410 fl = (*seg->free_queues)[m_ret->pool]; 1411 vm_phys_split_pages(m_ret, oind, fl, order); 1412 /* Return excess pages to the free lists. */ 1413 npages_end = roundup2(npages, 1 << imin(oind, order)); 1414 if (npages < npages_end) 1415 vm_phys_free_contig(&m_ret[npages], npages_end - npages); 1416 return (m_ret); 1417 } 1418 1419 #ifdef DDB 1420 /* 1421 * Show the number of physical pages in each of the free lists. 1422 */ 1423 DB_SHOW_COMMAND(freepages, db_show_freepages) 1424 { 1425 struct vm_freelist *fl; 1426 int flind, oind, pind, dom; 1427 1428 for (dom = 0; dom < vm_ndomains; dom++) { 1429 db_printf("DOMAIN: %d\n", dom); 1430 for (flind = 0; flind < vm_nfreelists; flind++) { 1431 db_printf("FREE LIST %d:\n" 1432 "\n ORDER (SIZE) | NUMBER" 1433 "\n ", flind); 1434 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1435 db_printf(" | POOL %d", pind); 1436 db_printf("\n-- "); 1437 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1438 db_printf("-- -- "); 1439 db_printf("--\n"); 1440 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1441 db_printf(" %2.2d (%6.6dK)", oind, 1442 1 << (PAGE_SHIFT - 10 + oind)); 1443 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1444 fl = vm_phys_free_queues[dom][flind][pind]; 1445 db_printf(" | %6.6d", fl[oind].lcnt); 1446 } 1447 db_printf("\n"); 1448 } 1449 db_printf("\n"); 1450 } 1451 db_printf("\n"); 1452 } 1453 } 1454 #endif 1455