1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Physical memory system implementation 34 * 35 * Any external functions defined by this module are only to be used by the 36 * virtual memory system. 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_vm.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/lock.h> 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #include <sys/proc.h> 52 #include <sys/queue.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sysctl.h> 56 #include <sys/tree.h> 57 #include <sys/vmmeter.h> 58 #include <sys/seq.h> 59 60 #include <ddb/ddb.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <vm/vm_kern.h> 65 #include <vm/vm_object.h> 66 #include <vm/vm_page.h> 67 #include <vm/vm_phys.h> 68 69 #include <vm/vm_domain.h> 70 71 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 72 "Too many physsegs."); 73 74 #ifdef VM_NUMA_ALLOC 75 struct mem_affinity *mem_affinity; 76 int *mem_locality; 77 #endif 78 79 int vm_ndomains = 1; 80 81 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 82 int vm_phys_nsegs; 83 84 struct vm_phys_fictitious_seg; 85 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 86 struct vm_phys_fictitious_seg *); 87 88 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 89 RB_INITIALIZER(_vm_phys_fictitious_tree); 90 91 struct vm_phys_fictitious_seg { 92 RB_ENTRY(vm_phys_fictitious_seg) node; 93 /* Memory region data */ 94 vm_paddr_t start; 95 vm_paddr_t end; 96 vm_page_t first_page; 97 }; 98 99 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 100 vm_phys_fictitious_cmp); 101 102 static struct rwlock vm_phys_fictitious_reg_lock; 103 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 104 105 static struct vm_freelist 106 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 107 108 static int vm_nfreelists; 109 110 /* 111 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 112 */ 113 static int vm_freelist_to_flind[VM_NFREELIST]; 114 115 CTASSERT(VM_FREELIST_DEFAULT == 0); 116 117 #ifdef VM_FREELIST_ISADMA 118 #define VM_ISADMA_BOUNDARY 16777216 119 #endif 120 #ifdef VM_FREELIST_DMA32 121 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122 #endif 123 124 /* 125 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126 * the ordering of the free list boundaries. 127 */ 128 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) 129 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); 130 #endif 131 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 132 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 133 #endif 134 135 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 136 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 137 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 138 139 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 140 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 141 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 142 143 #ifdef VM_NUMA_ALLOC 144 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 145 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 146 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 147 #endif 148 149 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 150 &vm_ndomains, 0, "Number of physical memory domains available."); 151 152 /* 153 * Default to first-touch + round-robin. 154 */ 155 static struct mtx vm_default_policy_mtx; 156 MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex", 157 MTX_DEF); 158 #ifdef VM_NUMA_ALLOC 159 static struct vm_domain_policy vm_default_policy = 160 VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 161 #else 162 /* Use round-robin so the domain policy code will only try once per allocation */ 163 static struct vm_domain_policy vm_default_policy = 164 VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0); 165 #endif 166 167 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, 168 int order); 169 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 170 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 171 vm_paddr_t boundary); 172 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 173 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 174 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 175 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 176 int order); 177 178 static int 179 sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) 180 { 181 char policy_name[32]; 182 int error; 183 184 mtx_lock(&vm_default_policy_mtx); 185 186 /* Map policy to output string */ 187 switch (vm_default_policy.p.policy) { 188 case VM_POLICY_FIRST_TOUCH: 189 strcpy(policy_name, "first-touch"); 190 break; 191 case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: 192 strcpy(policy_name, "first-touch-rr"); 193 break; 194 case VM_POLICY_ROUND_ROBIN: 195 default: 196 strcpy(policy_name, "rr"); 197 break; 198 } 199 mtx_unlock(&vm_default_policy_mtx); 200 201 error = sysctl_handle_string(oidp, &policy_name[0], 202 sizeof(policy_name), req); 203 if (error != 0 || req->newptr == NULL) 204 return (error); 205 206 mtx_lock(&vm_default_policy_mtx); 207 /* Set: match on the subset of policies that make sense as a default */ 208 if (strcmp("first-touch-rr", policy_name) == 0) { 209 vm_domain_policy_set(&vm_default_policy, 210 VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 211 } else if (strcmp("first-touch", policy_name) == 0) { 212 vm_domain_policy_set(&vm_default_policy, 213 VM_POLICY_FIRST_TOUCH, 0); 214 } else if (strcmp("rr", policy_name) == 0) { 215 vm_domain_policy_set(&vm_default_policy, 216 VM_POLICY_ROUND_ROBIN, 0); 217 } else { 218 error = EINVAL; 219 goto finish; 220 } 221 222 error = 0; 223 finish: 224 mtx_unlock(&vm_default_policy_mtx); 225 return (error); 226 } 227 228 SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW, 229 0, 0, sysctl_vm_default_policy, "A", 230 "Default policy (rr, first-touch, first-touch-rr"); 231 232 /* 233 * Red-black tree helpers for vm fictitious range management. 234 */ 235 static inline int 236 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 237 struct vm_phys_fictitious_seg *range) 238 { 239 240 KASSERT(range->start != 0 && range->end != 0, 241 ("Invalid range passed on search for vm_fictitious page")); 242 if (p->start >= range->end) 243 return (1); 244 if (p->start < range->start) 245 return (-1); 246 247 return (0); 248 } 249 250 static int 251 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 252 struct vm_phys_fictitious_seg *p2) 253 { 254 255 /* Check if this is a search for a page */ 256 if (p1->end == 0) 257 return (vm_phys_fictitious_in_range(p1, p2)); 258 259 KASSERT(p2->end != 0, 260 ("Invalid range passed as second parameter to vm fictitious comparison")); 261 262 /* Searching to add a new range */ 263 if (p1->end <= p2->start) 264 return (-1); 265 if (p1->start >= p2->end) 266 return (1); 267 268 panic("Trying to add overlapping vm fictitious ranges:\n" 269 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 270 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 271 } 272 273 #ifdef notyet 274 static __inline int 275 vm_rr_selectdomain(void) 276 { 277 #ifdef VM_NUMA_ALLOC 278 struct thread *td; 279 280 td = curthread; 281 282 td->td_dom_rr_idx++; 283 td->td_dom_rr_idx %= vm_ndomains; 284 return (td->td_dom_rr_idx); 285 #else 286 return (0); 287 #endif 288 } 289 #endif /* notyet */ 290 291 /* 292 * Initialise a VM domain iterator. 293 * 294 * Check the thread policy, then the proc policy, 295 * then default to the system policy. 296 * 297 * Later on the various layers will have this logic 298 * plumbed into them and the phys code will be explicitly 299 * handed a VM domain policy to use. 300 */ 301 static void 302 vm_policy_iterator_init(struct vm_domain_iterator *vi) 303 { 304 #ifdef VM_NUMA_ALLOC 305 struct vm_domain_policy lcl; 306 #endif 307 308 vm_domain_iterator_init(vi); 309 310 #ifdef VM_NUMA_ALLOC 311 /* Copy out the thread policy */ 312 vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy); 313 if (lcl.p.policy != VM_POLICY_NONE) { 314 /* Thread policy is present; use it */ 315 vm_domain_iterator_set_policy(vi, &lcl); 316 return; 317 } 318 319 vm_domain_policy_localcopy(&lcl, 320 &curthread->td_proc->p_vm_dom_policy); 321 if (lcl.p.policy != VM_POLICY_NONE) { 322 /* Process policy is present; use it */ 323 vm_domain_iterator_set_policy(vi, &lcl); 324 return; 325 } 326 #endif 327 /* Use system default policy */ 328 vm_domain_iterator_set_policy(vi, &vm_default_policy); 329 } 330 331 static void 332 vm_policy_iterator_finish(struct vm_domain_iterator *vi) 333 { 334 335 vm_domain_iterator_cleanup(vi); 336 } 337 338 boolean_t 339 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) 340 { 341 struct vm_phys_seg *s; 342 int idx; 343 344 while ((idx = ffsl(mask)) != 0) { 345 idx--; /* ffsl counts from 1 */ 346 mask &= ~(1UL << idx); 347 s = &vm_phys_segs[idx]; 348 if (low < s->end && high > s->start) 349 return (TRUE); 350 } 351 return (FALSE); 352 } 353 354 /* 355 * Outputs the state of the physical memory allocator, specifically, 356 * the amount of physical memory in each free list. 357 */ 358 static int 359 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 360 { 361 struct sbuf sbuf; 362 struct vm_freelist *fl; 363 int dom, error, flind, oind, pind; 364 365 error = sysctl_wire_old_buffer(req, 0); 366 if (error != 0) 367 return (error); 368 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 369 for (dom = 0; dom < vm_ndomains; dom++) { 370 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 371 for (flind = 0; flind < vm_nfreelists; flind++) { 372 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 373 "\n ORDER (SIZE) | NUMBER" 374 "\n ", flind); 375 for (pind = 0; pind < VM_NFREEPOOL; pind++) 376 sbuf_printf(&sbuf, " | POOL %d", pind); 377 sbuf_printf(&sbuf, "\n-- "); 378 for (pind = 0; pind < VM_NFREEPOOL; pind++) 379 sbuf_printf(&sbuf, "-- -- "); 380 sbuf_printf(&sbuf, "--\n"); 381 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 382 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 383 1 << (PAGE_SHIFT - 10 + oind)); 384 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 385 fl = vm_phys_free_queues[dom][flind][pind]; 386 sbuf_printf(&sbuf, " | %6d", 387 fl[oind].lcnt); 388 } 389 sbuf_printf(&sbuf, "\n"); 390 } 391 } 392 } 393 error = sbuf_finish(&sbuf); 394 sbuf_delete(&sbuf); 395 return (error); 396 } 397 398 /* 399 * Outputs the set of physical memory segments. 400 */ 401 static int 402 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 403 { 404 struct sbuf sbuf; 405 struct vm_phys_seg *seg; 406 int error, segind; 407 408 error = sysctl_wire_old_buffer(req, 0); 409 if (error != 0) 410 return (error); 411 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 412 for (segind = 0; segind < vm_phys_nsegs; segind++) { 413 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 414 seg = &vm_phys_segs[segind]; 415 sbuf_printf(&sbuf, "start: %#jx\n", 416 (uintmax_t)seg->start); 417 sbuf_printf(&sbuf, "end: %#jx\n", 418 (uintmax_t)seg->end); 419 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 420 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 421 } 422 error = sbuf_finish(&sbuf); 423 sbuf_delete(&sbuf); 424 return (error); 425 } 426 427 /* 428 * Return affinity, or -1 if there's no affinity information. 429 */ 430 int 431 vm_phys_mem_affinity(int f, int t) 432 { 433 434 #ifdef VM_NUMA_ALLOC 435 if (mem_locality == NULL) 436 return (-1); 437 if (f >= vm_ndomains || t >= vm_ndomains) 438 return (-1); 439 return (mem_locality[f * vm_ndomains + t]); 440 #else 441 return (-1); 442 #endif 443 } 444 445 #ifdef VM_NUMA_ALLOC 446 /* 447 * Outputs the VM locality table. 448 */ 449 static int 450 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 451 { 452 struct sbuf sbuf; 453 int error, i, j; 454 455 error = sysctl_wire_old_buffer(req, 0); 456 if (error != 0) 457 return (error); 458 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 459 460 sbuf_printf(&sbuf, "\n"); 461 462 for (i = 0; i < vm_ndomains; i++) { 463 sbuf_printf(&sbuf, "%d: ", i); 464 for (j = 0; j < vm_ndomains; j++) { 465 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 466 } 467 sbuf_printf(&sbuf, "\n"); 468 } 469 error = sbuf_finish(&sbuf); 470 sbuf_delete(&sbuf); 471 return (error); 472 } 473 #endif 474 475 static void 476 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 477 { 478 479 m->order = order; 480 if (tail) 481 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); 482 else 483 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); 484 fl[order].lcnt++; 485 } 486 487 static void 488 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 489 { 490 491 TAILQ_REMOVE(&fl[order].pl, m, plinks.q); 492 fl[order].lcnt--; 493 m->order = VM_NFREEORDER; 494 } 495 496 /* 497 * Create a physical memory segment. 498 */ 499 static void 500 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 501 { 502 struct vm_phys_seg *seg; 503 504 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 505 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 506 KASSERT(domain < vm_ndomains, 507 ("vm_phys_create_seg: invalid domain provided")); 508 seg = &vm_phys_segs[vm_phys_nsegs++]; 509 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 510 *seg = *(seg - 1); 511 seg--; 512 } 513 seg->start = start; 514 seg->end = end; 515 seg->domain = domain; 516 } 517 518 static void 519 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 520 { 521 #ifdef VM_NUMA_ALLOC 522 int i; 523 524 if (mem_affinity == NULL) { 525 _vm_phys_create_seg(start, end, 0); 526 return; 527 } 528 529 for (i = 0;; i++) { 530 if (mem_affinity[i].end == 0) 531 panic("Reached end of affinity info"); 532 if (mem_affinity[i].end <= start) 533 continue; 534 if (mem_affinity[i].start > start) 535 panic("No affinity info for start %jx", 536 (uintmax_t)start); 537 if (mem_affinity[i].end >= end) { 538 _vm_phys_create_seg(start, end, 539 mem_affinity[i].domain); 540 break; 541 } 542 _vm_phys_create_seg(start, mem_affinity[i].end, 543 mem_affinity[i].domain); 544 start = mem_affinity[i].end; 545 } 546 #else 547 _vm_phys_create_seg(start, end, 0); 548 #endif 549 } 550 551 /* 552 * Add a physical memory segment. 553 */ 554 void 555 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 556 { 557 vm_paddr_t paddr; 558 559 KASSERT((start & PAGE_MASK) == 0, 560 ("vm_phys_define_seg: start is not page aligned")); 561 KASSERT((end & PAGE_MASK) == 0, 562 ("vm_phys_define_seg: end is not page aligned")); 563 564 /* 565 * Split the physical memory segment if it spans two or more free 566 * list boundaries. 567 */ 568 paddr = start; 569 #ifdef VM_FREELIST_ISADMA 570 if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { 571 vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); 572 paddr = VM_ISADMA_BOUNDARY; 573 } 574 #endif 575 #ifdef VM_FREELIST_LOWMEM 576 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 577 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 578 paddr = VM_LOWMEM_BOUNDARY; 579 } 580 #endif 581 #ifdef VM_FREELIST_DMA32 582 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 583 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 584 paddr = VM_DMA32_BOUNDARY; 585 } 586 #endif 587 vm_phys_create_seg(paddr, end); 588 } 589 590 /* 591 * Initialize the physical memory allocator. 592 * 593 * Requires that vm_page_array is initialized! 594 */ 595 void 596 vm_phys_init(void) 597 { 598 struct vm_freelist *fl; 599 struct vm_phys_seg *seg; 600 u_long npages; 601 int dom, flind, freelist, oind, pind, segind; 602 603 /* 604 * Compute the number of free lists, and generate the mapping from the 605 * manifest constants VM_FREELIST_* to the free list indices. 606 * 607 * Initially, the entries of vm_freelist_to_flind[] are set to either 608 * 0 or 1 to indicate which free lists should be created. 609 */ 610 npages = 0; 611 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 612 seg = &vm_phys_segs[segind]; 613 #ifdef VM_FREELIST_ISADMA 614 if (seg->end <= VM_ISADMA_BOUNDARY) 615 vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; 616 else 617 #endif 618 #ifdef VM_FREELIST_LOWMEM 619 if (seg->end <= VM_LOWMEM_BOUNDARY) 620 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 621 else 622 #endif 623 #ifdef VM_FREELIST_DMA32 624 if ( 625 #ifdef VM_DMA32_NPAGES_THRESHOLD 626 /* 627 * Create the DMA32 free list only if the amount of 628 * physical memory above physical address 4G exceeds the 629 * given threshold. 630 */ 631 npages > VM_DMA32_NPAGES_THRESHOLD && 632 #endif 633 seg->end <= VM_DMA32_BOUNDARY) 634 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 635 else 636 #endif 637 { 638 npages += atop(seg->end - seg->start); 639 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 640 } 641 } 642 /* Change each entry into a running total of the free lists. */ 643 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 644 vm_freelist_to_flind[freelist] += 645 vm_freelist_to_flind[freelist - 1]; 646 } 647 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 648 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 649 /* Change each entry into a free list index. */ 650 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 651 vm_freelist_to_flind[freelist]--; 652 653 /* 654 * Initialize the first_page and free_queues fields of each physical 655 * memory segment. 656 */ 657 #ifdef VM_PHYSSEG_SPARSE 658 npages = 0; 659 #endif 660 for (segind = 0; segind < vm_phys_nsegs; segind++) { 661 seg = &vm_phys_segs[segind]; 662 #ifdef VM_PHYSSEG_SPARSE 663 seg->first_page = &vm_page_array[npages]; 664 npages += atop(seg->end - seg->start); 665 #else 666 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 667 #endif 668 #ifdef VM_FREELIST_ISADMA 669 if (seg->end <= VM_ISADMA_BOUNDARY) { 670 flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; 671 KASSERT(flind >= 0, 672 ("vm_phys_init: ISADMA flind < 0")); 673 } else 674 #endif 675 #ifdef VM_FREELIST_LOWMEM 676 if (seg->end <= VM_LOWMEM_BOUNDARY) { 677 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 678 KASSERT(flind >= 0, 679 ("vm_phys_init: LOWMEM flind < 0")); 680 } else 681 #endif 682 #ifdef VM_FREELIST_DMA32 683 if (seg->end <= VM_DMA32_BOUNDARY) { 684 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 685 KASSERT(flind >= 0, 686 ("vm_phys_init: DMA32 flind < 0")); 687 } else 688 #endif 689 { 690 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 691 KASSERT(flind >= 0, 692 ("vm_phys_init: DEFAULT flind < 0")); 693 } 694 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 695 } 696 697 /* 698 * Initialize the free queues. 699 */ 700 for (dom = 0; dom < vm_ndomains; dom++) { 701 for (flind = 0; flind < vm_nfreelists; flind++) { 702 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 703 fl = vm_phys_free_queues[dom][flind][pind]; 704 for (oind = 0; oind < VM_NFREEORDER; oind++) 705 TAILQ_INIT(&fl[oind].pl); 706 } 707 } 708 } 709 710 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 711 } 712 713 /* 714 * Split a contiguous, power of two-sized set of physical pages. 715 */ 716 static __inline void 717 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 718 { 719 vm_page_t m_buddy; 720 721 while (oind > order) { 722 oind--; 723 m_buddy = &m[1 << oind]; 724 KASSERT(m_buddy->order == VM_NFREEORDER, 725 ("vm_phys_split_pages: page %p has unexpected order %d", 726 m_buddy, m_buddy->order)); 727 vm_freelist_add(fl, m_buddy, oind, 0); 728 } 729 } 730 731 /* 732 * Initialize a physical page and add it to the free lists. 733 */ 734 void 735 vm_phys_add_page(vm_paddr_t pa) 736 { 737 vm_page_t m; 738 struct vm_domain *vmd; 739 740 vm_cnt.v_page_count++; 741 m = vm_phys_paddr_to_vm_page(pa); 742 m->busy_lock = VPB_UNBUSIED; 743 m->phys_addr = pa; 744 m->queue = PQ_NONE; 745 m->segind = vm_phys_paddr_to_segind(pa); 746 vmd = vm_phys_domain(m); 747 vmd->vmd_page_count++; 748 vmd->vmd_segs |= 1UL << m->segind; 749 KASSERT(m->order == VM_NFREEORDER, 750 ("vm_phys_add_page: page %p has unexpected order %d", 751 m, m->order)); 752 m->pool = VM_FREEPOOL_DEFAULT; 753 pmap_page_init(m); 754 mtx_lock(&vm_page_queue_free_mtx); 755 vm_phys_freecnt_adj(m, 1); 756 vm_phys_free_pages(m, 0); 757 mtx_unlock(&vm_page_queue_free_mtx); 758 } 759 760 /* 761 * Allocate a contiguous, power of two-sized set of physical pages 762 * from the free lists. 763 * 764 * The free page queues must be locked. 765 */ 766 vm_page_t 767 vm_phys_alloc_pages(int pool, int order) 768 { 769 vm_page_t m; 770 int domain, flind; 771 struct vm_domain_iterator vi; 772 773 KASSERT(pool < VM_NFREEPOOL, 774 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 775 KASSERT(order < VM_NFREEORDER, 776 ("vm_phys_alloc_pages: order %d is out of range", order)); 777 778 vm_policy_iterator_init(&vi); 779 780 while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 781 for (flind = 0; flind < vm_nfreelists; flind++) { 782 m = vm_phys_alloc_domain_pages(domain, flind, pool, 783 order); 784 if (m != NULL) 785 return (m); 786 } 787 } 788 789 vm_policy_iterator_finish(&vi); 790 return (NULL); 791 } 792 793 /* 794 * Allocate a contiguous, power of two-sized set of physical pages from the 795 * specified free list. The free list must be specified using one of the 796 * manifest constants VM_FREELIST_*. 797 * 798 * The free page queues must be locked. 799 */ 800 vm_page_t 801 vm_phys_alloc_freelist_pages(int freelist, int pool, int order) 802 { 803 vm_page_t m; 804 struct vm_domain_iterator vi; 805 int domain; 806 807 KASSERT(freelist < VM_NFREELIST, 808 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 809 freelist)); 810 KASSERT(pool < VM_NFREEPOOL, 811 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 812 KASSERT(order < VM_NFREEORDER, 813 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 814 815 vm_policy_iterator_init(&vi); 816 817 while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 818 m = vm_phys_alloc_domain_pages(domain, 819 vm_freelist_to_flind[freelist], pool, order); 820 if (m != NULL) 821 return (m); 822 } 823 824 vm_policy_iterator_finish(&vi); 825 return (NULL); 826 } 827 828 static vm_page_t 829 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) 830 { 831 struct vm_freelist *fl; 832 struct vm_freelist *alt; 833 int oind, pind; 834 vm_page_t m; 835 836 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 837 fl = &vm_phys_free_queues[domain][flind][pool][0]; 838 for (oind = order; oind < VM_NFREEORDER; oind++) { 839 m = TAILQ_FIRST(&fl[oind].pl); 840 if (m != NULL) { 841 vm_freelist_rem(fl, m, oind); 842 vm_phys_split_pages(m, oind, fl, order); 843 return (m); 844 } 845 } 846 847 /* 848 * The given pool was empty. Find the largest 849 * contiguous, power-of-two-sized set of pages in any 850 * pool. Transfer these pages to the given pool, and 851 * use them to satisfy the allocation. 852 */ 853 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 854 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 855 alt = &vm_phys_free_queues[domain][flind][pind][0]; 856 m = TAILQ_FIRST(&alt[oind].pl); 857 if (m != NULL) { 858 vm_freelist_rem(alt, m, oind); 859 vm_phys_set_pool(pool, m, oind); 860 vm_phys_split_pages(m, oind, fl, order); 861 return (m); 862 } 863 } 864 } 865 return (NULL); 866 } 867 868 /* 869 * Find the vm_page corresponding to the given physical address. 870 */ 871 vm_page_t 872 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 873 { 874 struct vm_phys_seg *seg; 875 int segind; 876 877 for (segind = 0; segind < vm_phys_nsegs; segind++) { 878 seg = &vm_phys_segs[segind]; 879 if (pa >= seg->start && pa < seg->end) 880 return (&seg->first_page[atop(pa - seg->start)]); 881 } 882 return (NULL); 883 } 884 885 vm_page_t 886 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 887 { 888 struct vm_phys_fictitious_seg tmp, *seg; 889 vm_page_t m; 890 891 m = NULL; 892 tmp.start = pa; 893 tmp.end = 0; 894 895 rw_rlock(&vm_phys_fictitious_reg_lock); 896 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 897 rw_runlock(&vm_phys_fictitious_reg_lock); 898 if (seg == NULL) 899 return (NULL); 900 901 m = &seg->first_page[atop(pa - seg->start)]; 902 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 903 904 return (m); 905 } 906 907 static inline void 908 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 909 long page_count, vm_memattr_t memattr) 910 { 911 long i; 912 913 for (i = 0; i < page_count; i++) { 914 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 915 range[i].oflags &= ~VPO_UNMANAGED; 916 range[i].busy_lock = VPB_UNBUSIED; 917 } 918 } 919 920 int 921 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 922 vm_memattr_t memattr) 923 { 924 struct vm_phys_fictitious_seg *seg; 925 vm_page_t fp; 926 long page_count; 927 #ifdef VM_PHYSSEG_DENSE 928 long pi, pe; 929 long dpage_count; 930 #endif 931 932 KASSERT(start < end, 933 ("Start of segment isn't less than end (start: %jx end: %jx)", 934 (uintmax_t)start, (uintmax_t)end)); 935 936 page_count = (end - start) / PAGE_SIZE; 937 938 #ifdef VM_PHYSSEG_DENSE 939 pi = atop(start); 940 pe = atop(end); 941 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 942 fp = &vm_page_array[pi - first_page]; 943 if ((pe - first_page) > vm_page_array_size) { 944 /* 945 * We have a segment that starts inside 946 * of vm_page_array, but ends outside of it. 947 * 948 * Use vm_page_array pages for those that are 949 * inside of the vm_page_array range, and 950 * allocate the remaining ones. 951 */ 952 dpage_count = vm_page_array_size - (pi - first_page); 953 vm_phys_fictitious_init_range(fp, start, dpage_count, 954 memattr); 955 page_count -= dpage_count; 956 start += ptoa(dpage_count); 957 goto alloc; 958 } 959 /* 960 * We can allocate the full range from vm_page_array, 961 * so there's no need to register the range in the tree. 962 */ 963 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 964 return (0); 965 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 966 /* 967 * We have a segment that ends inside of vm_page_array, 968 * but starts outside of it. 969 */ 970 fp = &vm_page_array[0]; 971 dpage_count = pe - first_page; 972 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 973 memattr); 974 end -= ptoa(dpage_count); 975 page_count -= dpage_count; 976 goto alloc; 977 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 978 /* 979 * Trying to register a fictitious range that expands before 980 * and after vm_page_array. 981 */ 982 return (EINVAL); 983 } else { 984 alloc: 985 #endif 986 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 987 M_WAITOK | M_ZERO); 988 #ifdef VM_PHYSSEG_DENSE 989 } 990 #endif 991 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 992 993 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 994 seg->start = start; 995 seg->end = end; 996 seg->first_page = fp; 997 998 rw_wlock(&vm_phys_fictitious_reg_lock); 999 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 1000 rw_wunlock(&vm_phys_fictitious_reg_lock); 1001 1002 return (0); 1003 } 1004 1005 void 1006 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1007 { 1008 struct vm_phys_fictitious_seg *seg, tmp; 1009 #ifdef VM_PHYSSEG_DENSE 1010 long pi, pe; 1011 #endif 1012 1013 KASSERT(start < end, 1014 ("Start of segment isn't less than end (start: %jx end: %jx)", 1015 (uintmax_t)start, (uintmax_t)end)); 1016 1017 #ifdef VM_PHYSSEG_DENSE 1018 pi = atop(start); 1019 pe = atop(end); 1020 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1021 if ((pe - first_page) <= vm_page_array_size) { 1022 /* 1023 * This segment was allocated using vm_page_array 1024 * only, there's nothing to do since those pages 1025 * were never added to the tree. 1026 */ 1027 return; 1028 } 1029 /* 1030 * We have a segment that starts inside 1031 * of vm_page_array, but ends outside of it. 1032 * 1033 * Calculate how many pages were added to the 1034 * tree and free them. 1035 */ 1036 start = ptoa(first_page + vm_page_array_size); 1037 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1038 /* 1039 * We have a segment that ends inside of vm_page_array, 1040 * but starts outside of it. 1041 */ 1042 end = ptoa(first_page); 1043 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1044 /* Since it's not possible to register such a range, panic. */ 1045 panic( 1046 "Unregistering not registered fictitious range [%#jx:%#jx]", 1047 (uintmax_t)start, (uintmax_t)end); 1048 } 1049 #endif 1050 tmp.start = start; 1051 tmp.end = 0; 1052 1053 rw_wlock(&vm_phys_fictitious_reg_lock); 1054 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1055 if (seg->start != start || seg->end != end) { 1056 rw_wunlock(&vm_phys_fictitious_reg_lock); 1057 panic( 1058 "Unregistering not registered fictitious range [%#jx:%#jx]", 1059 (uintmax_t)start, (uintmax_t)end); 1060 } 1061 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1062 rw_wunlock(&vm_phys_fictitious_reg_lock); 1063 free(seg->first_page, M_FICT_PAGES); 1064 free(seg, M_FICT_PAGES); 1065 } 1066 1067 /* 1068 * Find the segment containing the given physical address. 1069 */ 1070 static int 1071 vm_phys_paddr_to_segind(vm_paddr_t pa) 1072 { 1073 struct vm_phys_seg *seg; 1074 int segind; 1075 1076 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1077 seg = &vm_phys_segs[segind]; 1078 if (pa >= seg->start && pa < seg->end) 1079 return (segind); 1080 } 1081 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 1082 (uintmax_t)pa); 1083 } 1084 1085 /* 1086 * Free a contiguous, power of two-sized set of physical pages. 1087 * 1088 * The free page queues must be locked. 1089 */ 1090 void 1091 vm_phys_free_pages(vm_page_t m, int order) 1092 { 1093 struct vm_freelist *fl; 1094 struct vm_phys_seg *seg; 1095 vm_paddr_t pa; 1096 vm_page_t m_buddy; 1097 1098 KASSERT(m->order == VM_NFREEORDER, 1099 ("vm_phys_free_pages: page %p has unexpected order %d", 1100 m, m->order)); 1101 KASSERT(m->pool < VM_NFREEPOOL, 1102 ("vm_phys_free_pages: page %p has unexpected pool %d", 1103 m, m->pool)); 1104 KASSERT(order < VM_NFREEORDER, 1105 ("vm_phys_free_pages: order %d is out of range", order)); 1106 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1107 seg = &vm_phys_segs[m->segind]; 1108 if (order < VM_NFREEORDER - 1) { 1109 pa = VM_PAGE_TO_PHYS(m); 1110 do { 1111 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1112 if (pa < seg->start || pa >= seg->end) 1113 break; 1114 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1115 if (m_buddy->order != order) 1116 break; 1117 fl = (*seg->free_queues)[m_buddy->pool]; 1118 vm_freelist_rem(fl, m_buddy, order); 1119 if (m_buddy->pool != m->pool) 1120 vm_phys_set_pool(m->pool, m_buddy, order); 1121 order++; 1122 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1123 m = &seg->first_page[atop(pa - seg->start)]; 1124 } while (order < VM_NFREEORDER - 1); 1125 } 1126 fl = (*seg->free_queues)[m->pool]; 1127 vm_freelist_add(fl, m, order, 1); 1128 } 1129 1130 /* 1131 * Free a contiguous, arbitrarily sized set of physical pages. 1132 * 1133 * The free page queues must be locked. 1134 */ 1135 void 1136 vm_phys_free_contig(vm_page_t m, u_long npages) 1137 { 1138 u_int n; 1139 int order; 1140 1141 /* 1142 * Avoid unnecessary coalescing by freeing the pages in the largest 1143 * possible power-of-two-sized subsets. 1144 */ 1145 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1146 for (;; npages -= n) { 1147 /* 1148 * Unsigned "min" is used here so that "order" is assigned 1149 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1150 * or the low-order bits of its physical address are zero 1151 * because the size of a physical address exceeds the size of 1152 * a long. 1153 */ 1154 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1155 VM_NFREEORDER - 1); 1156 n = 1 << order; 1157 if (npages < n) 1158 break; 1159 vm_phys_free_pages(m, order); 1160 m += n; 1161 } 1162 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1163 for (; npages > 0; npages -= n) { 1164 order = flsl(npages) - 1; 1165 n = 1 << order; 1166 vm_phys_free_pages(m, order); 1167 m += n; 1168 } 1169 } 1170 1171 /* 1172 * Scan physical memory between the specified addresses "low" and "high" for a 1173 * run of contiguous physical pages that satisfy the specified conditions, and 1174 * return the lowest page in the run. The specified "alignment" determines 1175 * the alignment of the lowest physical page in the run. If the specified 1176 * "boundary" is non-zero, then the run of physical pages cannot span a 1177 * physical address that is a multiple of "boundary". 1178 * 1179 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1180 * be a power of two. 1181 */ 1182 vm_page_t 1183 vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 1184 u_long alignment, vm_paddr_t boundary, int options) 1185 { 1186 vm_paddr_t pa_end; 1187 vm_page_t m_end, m_run, m_start; 1188 struct vm_phys_seg *seg; 1189 int segind; 1190 1191 KASSERT(npages > 0, ("npages is 0")); 1192 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1193 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1194 if (low >= high) 1195 return (NULL); 1196 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1197 seg = &vm_phys_segs[segind]; 1198 if (seg->start >= high) 1199 break; 1200 if (low >= seg->end) 1201 continue; 1202 if (low <= seg->start) 1203 m_start = seg->first_page; 1204 else 1205 m_start = &seg->first_page[atop(low - seg->start)]; 1206 if (high < seg->end) 1207 pa_end = high; 1208 else 1209 pa_end = seg->end; 1210 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1211 continue; 1212 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1213 m_run = vm_page_scan_contig(npages, m_start, m_end, 1214 alignment, boundary, options); 1215 if (m_run != NULL) 1216 return (m_run); 1217 } 1218 return (NULL); 1219 } 1220 1221 /* 1222 * Set the pool for a contiguous, power of two-sized set of physical pages. 1223 */ 1224 void 1225 vm_phys_set_pool(int pool, vm_page_t m, int order) 1226 { 1227 vm_page_t m_tmp; 1228 1229 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1230 m_tmp->pool = pool; 1231 } 1232 1233 /* 1234 * Search for the given physical page "m" in the free lists. If the search 1235 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1236 * FALSE, indicating that "m" is not in the free lists. 1237 * 1238 * The free page queues must be locked. 1239 */ 1240 boolean_t 1241 vm_phys_unfree_page(vm_page_t m) 1242 { 1243 struct vm_freelist *fl; 1244 struct vm_phys_seg *seg; 1245 vm_paddr_t pa, pa_half; 1246 vm_page_t m_set, m_tmp; 1247 int order; 1248 1249 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1250 1251 /* 1252 * First, find the contiguous, power of two-sized set of free 1253 * physical pages containing the given physical page "m" and 1254 * assign it to "m_set". 1255 */ 1256 seg = &vm_phys_segs[m->segind]; 1257 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1258 order < VM_NFREEORDER - 1; ) { 1259 order++; 1260 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1261 if (pa >= seg->start) 1262 m_set = &seg->first_page[atop(pa - seg->start)]; 1263 else 1264 return (FALSE); 1265 } 1266 if (m_set->order < order) 1267 return (FALSE); 1268 if (m_set->order == VM_NFREEORDER) 1269 return (FALSE); 1270 KASSERT(m_set->order < VM_NFREEORDER, 1271 ("vm_phys_unfree_page: page %p has unexpected order %d", 1272 m_set, m_set->order)); 1273 1274 /* 1275 * Next, remove "m_set" from the free lists. Finally, extract 1276 * "m" from "m_set" using an iterative algorithm: While "m_set" 1277 * is larger than a page, shrink "m_set" by returning the half 1278 * of "m_set" that does not contain "m" to the free lists. 1279 */ 1280 fl = (*seg->free_queues)[m_set->pool]; 1281 order = m_set->order; 1282 vm_freelist_rem(fl, m_set, order); 1283 while (order > 0) { 1284 order--; 1285 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1286 if (m->phys_addr < pa_half) 1287 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1288 else { 1289 m_tmp = m_set; 1290 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1291 } 1292 vm_freelist_add(fl, m_tmp, order, 0); 1293 } 1294 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1295 return (TRUE); 1296 } 1297 1298 /* 1299 * Allocate a contiguous set of physical pages of the given size 1300 * "npages" from the free lists. All of the physical pages must be at 1301 * or above the given physical address "low" and below the given 1302 * physical address "high". The given value "alignment" determines the 1303 * alignment of the first physical page in the set. If the given value 1304 * "boundary" is non-zero, then the set of physical pages cannot cross 1305 * any physical address boundary that is a multiple of that value. Both 1306 * "alignment" and "boundary" must be a power of two. 1307 */ 1308 vm_page_t 1309 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 1310 u_long alignment, vm_paddr_t boundary) 1311 { 1312 vm_paddr_t pa_end, pa_start; 1313 vm_page_t m_run; 1314 struct vm_domain_iterator vi; 1315 struct vm_phys_seg *seg; 1316 int domain, segind; 1317 1318 KASSERT(npages > 0, ("npages is 0")); 1319 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1320 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1321 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1322 if (low >= high) 1323 return (NULL); 1324 vm_policy_iterator_init(&vi); 1325 restartdom: 1326 if (vm_domain_iterator_run(&vi, &domain) != 0) { 1327 vm_policy_iterator_finish(&vi); 1328 return (NULL); 1329 } 1330 m_run = NULL; 1331 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1332 seg = &vm_phys_segs[segind]; 1333 if (seg->start >= high || seg->domain != domain) 1334 continue; 1335 if (low >= seg->end) 1336 break; 1337 if (low <= seg->start) 1338 pa_start = seg->start; 1339 else 1340 pa_start = low; 1341 if (high < seg->end) 1342 pa_end = high; 1343 else 1344 pa_end = seg->end; 1345 if (pa_end - pa_start < ptoa(npages)) 1346 continue; 1347 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1348 alignment, boundary); 1349 if (m_run != NULL) 1350 break; 1351 } 1352 if (m_run == NULL && !vm_domain_iterator_isdone(&vi)) 1353 goto restartdom; 1354 vm_policy_iterator_finish(&vi); 1355 return (m_run); 1356 } 1357 1358 /* 1359 * Allocate a run of contiguous physical pages from the free list for the 1360 * specified segment. 1361 */ 1362 static vm_page_t 1363 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1364 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1365 { 1366 struct vm_freelist *fl; 1367 vm_paddr_t pa, pa_end, size; 1368 vm_page_t m, m_ret; 1369 u_long npages_end; 1370 int oind, order, pind; 1371 1372 KASSERT(npages > 0, ("npages is 0")); 1373 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1374 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1375 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1376 /* Compute the queue that is the best fit for npages. */ 1377 for (order = 0; (1 << order) < npages; order++); 1378 /* Search for a run satisfying the specified conditions. */ 1379 size = npages << PAGE_SHIFT; 1380 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1381 oind++) { 1382 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1383 fl = (*seg->free_queues)[pind]; 1384 TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { 1385 /* 1386 * Is the size of this allocation request 1387 * larger than the largest block size? 1388 */ 1389 if (order >= VM_NFREEORDER) { 1390 /* 1391 * Determine if a sufficient number of 1392 * subsequent blocks to satisfy the 1393 * allocation request are free. 1394 */ 1395 pa = VM_PAGE_TO_PHYS(m_ret); 1396 pa_end = pa + size; 1397 for (;;) { 1398 pa += 1 << (PAGE_SHIFT + 1399 VM_NFREEORDER - 1); 1400 if (pa >= pa_end || 1401 pa < seg->start || 1402 pa >= seg->end) 1403 break; 1404 m = &seg->first_page[atop(pa - 1405 seg->start)]; 1406 if (m->order != VM_NFREEORDER - 1407 1) 1408 break; 1409 } 1410 /* If not, go to the next block. */ 1411 if (pa < pa_end) 1412 continue; 1413 } 1414 1415 /* 1416 * Determine if the blocks are within the 1417 * given range, satisfy the given alignment, 1418 * and do not cross the given boundary. 1419 */ 1420 pa = VM_PAGE_TO_PHYS(m_ret); 1421 pa_end = pa + size; 1422 if (pa >= low && pa_end <= high && 1423 (pa & (alignment - 1)) == 0 && 1424 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1425 goto done; 1426 } 1427 } 1428 } 1429 return (NULL); 1430 done: 1431 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1432 fl = (*seg->free_queues)[m->pool]; 1433 vm_freelist_rem(fl, m, m->order); 1434 } 1435 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 1436 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 1437 fl = (*seg->free_queues)[m_ret->pool]; 1438 vm_phys_split_pages(m_ret, oind, fl, order); 1439 /* Return excess pages to the free lists. */ 1440 npages_end = roundup2(npages, 1 << imin(oind, order)); 1441 if (npages < npages_end) 1442 vm_phys_free_contig(&m_ret[npages], npages_end - npages); 1443 return (m_ret); 1444 } 1445 1446 #ifdef DDB 1447 /* 1448 * Show the number of physical pages in each of the free lists. 1449 */ 1450 DB_SHOW_COMMAND(freepages, db_show_freepages) 1451 { 1452 struct vm_freelist *fl; 1453 int flind, oind, pind, dom; 1454 1455 for (dom = 0; dom < vm_ndomains; dom++) { 1456 db_printf("DOMAIN: %d\n", dom); 1457 for (flind = 0; flind < vm_nfreelists; flind++) { 1458 db_printf("FREE LIST %d:\n" 1459 "\n ORDER (SIZE) | NUMBER" 1460 "\n ", flind); 1461 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1462 db_printf(" | POOL %d", pind); 1463 db_printf("\n-- "); 1464 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1465 db_printf("-- -- "); 1466 db_printf("--\n"); 1467 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1468 db_printf(" %2.2d (%6.6dK)", oind, 1469 1 << (PAGE_SHIFT - 10 + oind)); 1470 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1471 fl = vm_phys_free_queues[dom][flind][pind]; 1472 db_printf(" | %6.6d", fl[oind].lcnt); 1473 } 1474 db_printf("\n"); 1475 } 1476 db_printf("\n"); 1477 } 1478 db_printf("\n"); 1479 } 1480 } 1481 #endif 1482