1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Physical memory system implementation 34 * 35 * Any external functions defined by this module are only to be used by the 36 * virtual memory system. 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_vm.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/lock.h> 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #include <sys/proc.h> 52 #include <sys/queue.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sysctl.h> 56 #include <sys/tree.h> 57 #include <sys/vmmeter.h> 58 #include <sys/seq.h> 59 60 #include <ddb/ddb.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <vm/vm_kern.h> 65 #include <vm/vm_object.h> 66 #include <vm/vm_page.h> 67 #include <vm/vm_phys.h> 68 69 #include <vm/vm_domain.h> 70 71 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 72 "Too many physsegs."); 73 74 #ifdef VM_NUMA_ALLOC 75 struct mem_affinity *mem_affinity; 76 int *mem_locality; 77 #endif 78 79 int vm_ndomains = 1; 80 81 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 82 int vm_phys_nsegs; 83 84 struct vm_phys_fictitious_seg; 85 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 86 struct vm_phys_fictitious_seg *); 87 88 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 89 RB_INITIALIZER(_vm_phys_fictitious_tree); 90 91 struct vm_phys_fictitious_seg { 92 RB_ENTRY(vm_phys_fictitious_seg) node; 93 /* Memory region data */ 94 vm_paddr_t start; 95 vm_paddr_t end; 96 vm_page_t first_page; 97 }; 98 99 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 100 vm_phys_fictitious_cmp); 101 102 static struct rwlock vm_phys_fictitious_reg_lock; 103 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 104 105 static struct vm_freelist 106 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 107 108 static int vm_nfreelists; 109 110 /* 111 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 112 */ 113 static int vm_freelist_to_flind[VM_NFREELIST]; 114 115 CTASSERT(VM_FREELIST_DEFAULT == 0); 116 117 #ifdef VM_FREELIST_ISADMA 118 #define VM_ISADMA_BOUNDARY 16777216 119 #endif 120 #ifdef VM_FREELIST_DMA32 121 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122 #endif 123 124 /* 125 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126 * the ordering of the free list boundaries. 127 */ 128 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) 129 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); 130 #endif 131 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 132 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 133 #endif 134 135 static int cnt_prezero; 136 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 137 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 138 139 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 140 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 141 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 142 143 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 144 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 145 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 146 147 #ifdef VM_NUMA_ALLOC 148 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 149 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 150 NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 151 #endif 152 153 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 154 &vm_ndomains, 0, "Number of physical memory domains available."); 155 156 /* 157 * Default to first-touch + round-robin. 158 */ 159 static struct mtx vm_default_policy_mtx; 160 MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex", 161 MTX_DEF); 162 #ifdef VM_NUMA_ALLOC 163 static struct vm_domain_policy vm_default_policy = 164 VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 165 #else 166 /* Use round-robin so the domain policy code will only try once per allocation */ 167 static struct vm_domain_policy vm_default_policy = 168 VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0); 169 #endif 170 171 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, 172 int order); 173 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 174 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 175 vm_paddr_t boundary); 176 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 177 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 178 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 179 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 180 int order); 181 182 static int 183 sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) 184 { 185 char policy_name[32]; 186 int error; 187 188 mtx_lock(&vm_default_policy_mtx); 189 190 /* Map policy to output string */ 191 switch (vm_default_policy.p.policy) { 192 case VM_POLICY_FIRST_TOUCH: 193 strcpy(policy_name, "first-touch"); 194 break; 195 case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: 196 strcpy(policy_name, "first-touch-rr"); 197 break; 198 case VM_POLICY_ROUND_ROBIN: 199 default: 200 strcpy(policy_name, "rr"); 201 break; 202 } 203 mtx_unlock(&vm_default_policy_mtx); 204 205 error = sysctl_handle_string(oidp, &policy_name[0], 206 sizeof(policy_name), req); 207 if (error != 0 || req->newptr == NULL) 208 return (error); 209 210 mtx_lock(&vm_default_policy_mtx); 211 /* Set: match on the subset of policies that make sense as a default */ 212 if (strcmp("first-touch-rr", policy_name) == 0) { 213 vm_domain_policy_set(&vm_default_policy, 214 VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 215 } else if (strcmp("first-touch", policy_name) == 0) { 216 vm_domain_policy_set(&vm_default_policy, 217 VM_POLICY_FIRST_TOUCH, 0); 218 } else if (strcmp("rr", policy_name) == 0) { 219 vm_domain_policy_set(&vm_default_policy, 220 VM_POLICY_ROUND_ROBIN, 0); 221 } else { 222 error = EINVAL; 223 goto finish; 224 } 225 226 error = 0; 227 finish: 228 mtx_unlock(&vm_default_policy_mtx); 229 return (error); 230 } 231 232 SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW, 233 0, 0, sysctl_vm_default_policy, "A", 234 "Default policy (rr, first-touch, first-touch-rr"); 235 236 /* 237 * Red-black tree helpers for vm fictitious range management. 238 */ 239 static inline int 240 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 241 struct vm_phys_fictitious_seg *range) 242 { 243 244 KASSERT(range->start != 0 && range->end != 0, 245 ("Invalid range passed on search for vm_fictitious page")); 246 if (p->start >= range->end) 247 return (1); 248 if (p->start < range->start) 249 return (-1); 250 251 return (0); 252 } 253 254 static int 255 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 256 struct vm_phys_fictitious_seg *p2) 257 { 258 259 /* Check if this is a search for a page */ 260 if (p1->end == 0) 261 return (vm_phys_fictitious_in_range(p1, p2)); 262 263 KASSERT(p2->end != 0, 264 ("Invalid range passed as second parameter to vm fictitious comparison")); 265 266 /* Searching to add a new range */ 267 if (p1->end <= p2->start) 268 return (-1); 269 if (p1->start >= p2->end) 270 return (1); 271 272 panic("Trying to add overlapping vm fictitious ranges:\n" 273 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 274 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 275 } 276 277 static __inline int 278 vm_rr_selectdomain(void) 279 { 280 #ifdef VM_NUMA_ALLOC 281 struct thread *td; 282 283 td = curthread; 284 285 td->td_dom_rr_idx++; 286 td->td_dom_rr_idx %= vm_ndomains; 287 return (td->td_dom_rr_idx); 288 #else 289 return (0); 290 #endif 291 } 292 293 /* 294 * Initialise a VM domain iterator. 295 * 296 * Check the thread policy, then the proc policy, 297 * then default to the system policy. 298 * 299 * Later on the various layers will have this logic 300 * plumbed into them and the phys code will be explicitly 301 * handed a VM domain policy to use. 302 */ 303 static void 304 vm_policy_iterator_init(struct vm_domain_iterator *vi) 305 { 306 #ifdef VM_NUMA_ALLOC 307 struct vm_domain_policy lcl; 308 #endif 309 310 vm_domain_iterator_init(vi); 311 312 #ifdef VM_NUMA_ALLOC 313 /* Copy out the thread policy */ 314 vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy); 315 if (lcl.p.policy != VM_POLICY_NONE) { 316 /* Thread policy is present; use it */ 317 vm_domain_iterator_set_policy(vi, &lcl); 318 return; 319 } 320 321 vm_domain_policy_localcopy(&lcl, 322 &curthread->td_proc->p_vm_dom_policy); 323 if (lcl.p.policy != VM_POLICY_NONE) { 324 /* Process policy is present; use it */ 325 vm_domain_iterator_set_policy(vi, &lcl); 326 return; 327 } 328 #endif 329 /* Use system default policy */ 330 vm_domain_iterator_set_policy(vi, &vm_default_policy); 331 } 332 333 static void 334 vm_policy_iterator_finish(struct vm_domain_iterator *vi) 335 { 336 337 vm_domain_iterator_cleanup(vi); 338 } 339 340 boolean_t 341 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) 342 { 343 struct vm_phys_seg *s; 344 int idx; 345 346 while ((idx = ffsl(mask)) != 0) { 347 idx--; /* ffsl counts from 1 */ 348 mask &= ~(1UL << idx); 349 s = &vm_phys_segs[idx]; 350 if (low < s->end && high > s->start) 351 return (TRUE); 352 } 353 return (FALSE); 354 } 355 356 /* 357 * Outputs the state of the physical memory allocator, specifically, 358 * the amount of physical memory in each free list. 359 */ 360 static int 361 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 362 { 363 struct sbuf sbuf; 364 struct vm_freelist *fl; 365 int dom, error, flind, oind, pind; 366 367 error = sysctl_wire_old_buffer(req, 0); 368 if (error != 0) 369 return (error); 370 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 371 for (dom = 0; dom < vm_ndomains; dom++) { 372 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 373 for (flind = 0; flind < vm_nfreelists; flind++) { 374 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 375 "\n ORDER (SIZE) | NUMBER" 376 "\n ", flind); 377 for (pind = 0; pind < VM_NFREEPOOL; pind++) 378 sbuf_printf(&sbuf, " | POOL %d", pind); 379 sbuf_printf(&sbuf, "\n-- "); 380 for (pind = 0; pind < VM_NFREEPOOL; pind++) 381 sbuf_printf(&sbuf, "-- -- "); 382 sbuf_printf(&sbuf, "--\n"); 383 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 384 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 385 1 << (PAGE_SHIFT - 10 + oind)); 386 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 387 fl = vm_phys_free_queues[dom][flind][pind]; 388 sbuf_printf(&sbuf, " | %6d", 389 fl[oind].lcnt); 390 } 391 sbuf_printf(&sbuf, "\n"); 392 } 393 } 394 } 395 error = sbuf_finish(&sbuf); 396 sbuf_delete(&sbuf); 397 return (error); 398 } 399 400 /* 401 * Outputs the set of physical memory segments. 402 */ 403 static int 404 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 405 { 406 struct sbuf sbuf; 407 struct vm_phys_seg *seg; 408 int error, segind; 409 410 error = sysctl_wire_old_buffer(req, 0); 411 if (error != 0) 412 return (error); 413 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 414 for (segind = 0; segind < vm_phys_nsegs; segind++) { 415 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 416 seg = &vm_phys_segs[segind]; 417 sbuf_printf(&sbuf, "start: %#jx\n", 418 (uintmax_t)seg->start); 419 sbuf_printf(&sbuf, "end: %#jx\n", 420 (uintmax_t)seg->end); 421 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 422 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 423 } 424 error = sbuf_finish(&sbuf); 425 sbuf_delete(&sbuf); 426 return (error); 427 } 428 429 /* 430 * Return affinity, or -1 if there's no affinity information. 431 */ 432 int 433 vm_phys_mem_affinity(int f, int t) 434 { 435 436 #ifdef VM_NUMA_ALLOC 437 if (mem_locality == NULL) 438 return (-1); 439 if (f >= vm_ndomains || t >= vm_ndomains) 440 return (-1); 441 return (mem_locality[f * vm_ndomains + t]); 442 #else 443 return (-1); 444 #endif 445 } 446 447 #ifdef VM_NUMA_ALLOC 448 /* 449 * Outputs the VM locality table. 450 */ 451 static int 452 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 453 { 454 struct sbuf sbuf; 455 int error, i, j; 456 457 error = sysctl_wire_old_buffer(req, 0); 458 if (error != 0) 459 return (error); 460 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 461 462 sbuf_printf(&sbuf, "\n"); 463 464 for (i = 0; i < vm_ndomains; i++) { 465 sbuf_printf(&sbuf, "%d: ", i); 466 for (j = 0; j < vm_ndomains; j++) { 467 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 468 } 469 sbuf_printf(&sbuf, "\n"); 470 } 471 error = sbuf_finish(&sbuf); 472 sbuf_delete(&sbuf); 473 return (error); 474 } 475 #endif 476 477 static void 478 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 479 { 480 481 m->order = order; 482 if (tail) 483 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); 484 else 485 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); 486 fl[order].lcnt++; 487 } 488 489 static void 490 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 491 { 492 493 TAILQ_REMOVE(&fl[order].pl, m, plinks.q); 494 fl[order].lcnt--; 495 m->order = VM_NFREEORDER; 496 } 497 498 /* 499 * Create a physical memory segment. 500 */ 501 static void 502 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 503 { 504 struct vm_phys_seg *seg; 505 506 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 507 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 508 KASSERT(domain < vm_ndomains, 509 ("vm_phys_create_seg: invalid domain provided")); 510 seg = &vm_phys_segs[vm_phys_nsegs++]; 511 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 512 *seg = *(seg - 1); 513 seg--; 514 } 515 seg->start = start; 516 seg->end = end; 517 seg->domain = domain; 518 } 519 520 static void 521 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 522 { 523 #ifdef VM_NUMA_ALLOC 524 int i; 525 526 if (mem_affinity == NULL) { 527 _vm_phys_create_seg(start, end, 0); 528 return; 529 } 530 531 for (i = 0;; i++) { 532 if (mem_affinity[i].end == 0) 533 panic("Reached end of affinity info"); 534 if (mem_affinity[i].end <= start) 535 continue; 536 if (mem_affinity[i].start > start) 537 panic("No affinity info for start %jx", 538 (uintmax_t)start); 539 if (mem_affinity[i].end >= end) { 540 _vm_phys_create_seg(start, end, 541 mem_affinity[i].domain); 542 break; 543 } 544 _vm_phys_create_seg(start, mem_affinity[i].end, 545 mem_affinity[i].domain); 546 start = mem_affinity[i].end; 547 } 548 #else 549 _vm_phys_create_seg(start, end, 0); 550 #endif 551 } 552 553 /* 554 * Add a physical memory segment. 555 */ 556 void 557 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 558 { 559 vm_paddr_t paddr; 560 561 KASSERT((start & PAGE_MASK) == 0, 562 ("vm_phys_define_seg: start is not page aligned")); 563 KASSERT((end & PAGE_MASK) == 0, 564 ("vm_phys_define_seg: end is not page aligned")); 565 566 /* 567 * Split the physical memory segment if it spans two or more free 568 * list boundaries. 569 */ 570 paddr = start; 571 #ifdef VM_FREELIST_ISADMA 572 if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { 573 vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); 574 paddr = VM_ISADMA_BOUNDARY; 575 } 576 #endif 577 #ifdef VM_FREELIST_LOWMEM 578 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 579 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 580 paddr = VM_LOWMEM_BOUNDARY; 581 } 582 #endif 583 #ifdef VM_FREELIST_DMA32 584 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 585 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 586 paddr = VM_DMA32_BOUNDARY; 587 } 588 #endif 589 vm_phys_create_seg(paddr, end); 590 } 591 592 /* 593 * Initialize the physical memory allocator. 594 * 595 * Requires that vm_page_array is initialized! 596 */ 597 void 598 vm_phys_init(void) 599 { 600 struct vm_freelist *fl; 601 struct vm_phys_seg *seg; 602 u_long npages; 603 int dom, flind, freelist, oind, pind, segind; 604 605 /* 606 * Compute the number of free lists, and generate the mapping from the 607 * manifest constants VM_FREELIST_* to the free list indices. 608 * 609 * Initially, the entries of vm_freelist_to_flind[] are set to either 610 * 0 or 1 to indicate which free lists should be created. 611 */ 612 npages = 0; 613 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 614 seg = &vm_phys_segs[segind]; 615 #ifdef VM_FREELIST_ISADMA 616 if (seg->end <= VM_ISADMA_BOUNDARY) 617 vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; 618 else 619 #endif 620 #ifdef VM_FREELIST_LOWMEM 621 if (seg->end <= VM_LOWMEM_BOUNDARY) 622 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 623 else 624 #endif 625 #ifdef VM_FREELIST_DMA32 626 if ( 627 #ifdef VM_DMA32_NPAGES_THRESHOLD 628 /* 629 * Create the DMA32 free list only if the amount of 630 * physical memory above physical address 4G exceeds the 631 * given threshold. 632 */ 633 npages > VM_DMA32_NPAGES_THRESHOLD && 634 #endif 635 seg->end <= VM_DMA32_BOUNDARY) 636 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 637 else 638 #endif 639 { 640 npages += atop(seg->end - seg->start); 641 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 642 } 643 } 644 /* Change each entry into a running total of the free lists. */ 645 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 646 vm_freelist_to_flind[freelist] += 647 vm_freelist_to_flind[freelist - 1]; 648 } 649 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 650 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 651 /* Change each entry into a free list index. */ 652 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 653 vm_freelist_to_flind[freelist]--; 654 655 /* 656 * Initialize the first_page and free_queues fields of each physical 657 * memory segment. 658 */ 659 #ifdef VM_PHYSSEG_SPARSE 660 npages = 0; 661 #endif 662 for (segind = 0; segind < vm_phys_nsegs; segind++) { 663 seg = &vm_phys_segs[segind]; 664 #ifdef VM_PHYSSEG_SPARSE 665 seg->first_page = &vm_page_array[npages]; 666 npages += atop(seg->end - seg->start); 667 #else 668 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 669 #endif 670 #ifdef VM_FREELIST_ISADMA 671 if (seg->end <= VM_ISADMA_BOUNDARY) { 672 flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; 673 KASSERT(flind >= 0, 674 ("vm_phys_init: ISADMA flind < 0")); 675 } else 676 #endif 677 #ifdef VM_FREELIST_LOWMEM 678 if (seg->end <= VM_LOWMEM_BOUNDARY) { 679 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 680 KASSERT(flind >= 0, 681 ("vm_phys_init: LOWMEM flind < 0")); 682 } else 683 #endif 684 #ifdef VM_FREELIST_DMA32 685 if (seg->end <= VM_DMA32_BOUNDARY) { 686 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 687 KASSERT(flind >= 0, 688 ("vm_phys_init: DMA32 flind < 0")); 689 } else 690 #endif 691 { 692 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 693 KASSERT(flind >= 0, 694 ("vm_phys_init: DEFAULT flind < 0")); 695 } 696 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 697 } 698 699 /* 700 * Initialize the free queues. 701 */ 702 for (dom = 0; dom < vm_ndomains; dom++) { 703 for (flind = 0; flind < vm_nfreelists; flind++) { 704 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 705 fl = vm_phys_free_queues[dom][flind][pind]; 706 for (oind = 0; oind < VM_NFREEORDER; oind++) 707 TAILQ_INIT(&fl[oind].pl); 708 } 709 } 710 } 711 712 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 713 } 714 715 /* 716 * Split a contiguous, power of two-sized set of physical pages. 717 */ 718 static __inline void 719 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 720 { 721 vm_page_t m_buddy; 722 723 while (oind > order) { 724 oind--; 725 m_buddy = &m[1 << oind]; 726 KASSERT(m_buddy->order == VM_NFREEORDER, 727 ("vm_phys_split_pages: page %p has unexpected order %d", 728 m_buddy, m_buddy->order)); 729 vm_freelist_add(fl, m_buddy, oind, 0); 730 } 731 } 732 733 /* 734 * Initialize a physical page and add it to the free lists. 735 */ 736 void 737 vm_phys_add_page(vm_paddr_t pa) 738 { 739 vm_page_t m; 740 struct vm_domain *vmd; 741 742 vm_cnt.v_page_count++; 743 m = vm_phys_paddr_to_vm_page(pa); 744 m->busy_lock = VPB_UNBUSIED; 745 m->phys_addr = pa; 746 m->queue = PQ_NONE; 747 m->segind = vm_phys_paddr_to_segind(pa); 748 vmd = vm_phys_domain(m); 749 vmd->vmd_page_count++; 750 vmd->vmd_segs |= 1UL << m->segind; 751 KASSERT(m->order == VM_NFREEORDER, 752 ("vm_phys_add_page: page %p has unexpected order %d", 753 m, m->order)); 754 m->pool = VM_FREEPOOL_DEFAULT; 755 pmap_page_init(m); 756 mtx_lock(&vm_page_queue_free_mtx); 757 vm_phys_freecnt_adj(m, 1); 758 vm_phys_free_pages(m, 0); 759 mtx_unlock(&vm_page_queue_free_mtx); 760 } 761 762 /* 763 * Allocate a contiguous, power of two-sized set of physical pages 764 * from the free lists. 765 * 766 * The free page queues must be locked. 767 */ 768 vm_page_t 769 vm_phys_alloc_pages(int pool, int order) 770 { 771 vm_page_t m; 772 int domain, flind; 773 struct vm_domain_iterator vi; 774 775 KASSERT(pool < VM_NFREEPOOL, 776 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 777 KASSERT(order < VM_NFREEORDER, 778 ("vm_phys_alloc_pages: order %d is out of range", order)); 779 780 vm_policy_iterator_init(&vi); 781 782 while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 783 for (flind = 0; flind < vm_nfreelists; flind++) { 784 m = vm_phys_alloc_domain_pages(domain, flind, pool, 785 order); 786 if (m != NULL) 787 return (m); 788 } 789 } 790 791 vm_policy_iterator_finish(&vi); 792 return (NULL); 793 } 794 795 /* 796 * Allocate a contiguous, power of two-sized set of physical pages from the 797 * specified free list. The free list must be specified using one of the 798 * manifest constants VM_FREELIST_*. 799 * 800 * The free page queues must be locked. 801 */ 802 vm_page_t 803 vm_phys_alloc_freelist_pages(int freelist, int pool, int order) 804 { 805 vm_page_t m; 806 struct vm_domain_iterator vi; 807 int domain; 808 809 KASSERT(freelist < VM_NFREELIST, 810 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 811 freelist)); 812 KASSERT(pool < VM_NFREEPOOL, 813 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 814 KASSERT(order < VM_NFREEORDER, 815 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 816 817 vm_policy_iterator_init(&vi); 818 819 while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 820 m = vm_phys_alloc_domain_pages(domain, 821 vm_freelist_to_flind[freelist], pool, order); 822 if (m != NULL) 823 return (m); 824 } 825 826 vm_policy_iterator_finish(&vi); 827 return (NULL); 828 } 829 830 static vm_page_t 831 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) 832 { 833 struct vm_freelist *fl; 834 struct vm_freelist *alt; 835 int oind, pind; 836 vm_page_t m; 837 838 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 839 fl = &vm_phys_free_queues[domain][flind][pool][0]; 840 for (oind = order; oind < VM_NFREEORDER; oind++) { 841 m = TAILQ_FIRST(&fl[oind].pl); 842 if (m != NULL) { 843 vm_freelist_rem(fl, m, oind); 844 vm_phys_split_pages(m, oind, fl, order); 845 return (m); 846 } 847 } 848 849 /* 850 * The given pool was empty. Find the largest 851 * contiguous, power-of-two-sized set of pages in any 852 * pool. Transfer these pages to the given pool, and 853 * use them to satisfy the allocation. 854 */ 855 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 856 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 857 alt = &vm_phys_free_queues[domain][flind][pind][0]; 858 m = TAILQ_FIRST(&alt[oind].pl); 859 if (m != NULL) { 860 vm_freelist_rem(alt, m, oind); 861 vm_phys_set_pool(pool, m, oind); 862 vm_phys_split_pages(m, oind, fl, order); 863 return (m); 864 } 865 } 866 } 867 return (NULL); 868 } 869 870 /* 871 * Find the vm_page corresponding to the given physical address. 872 */ 873 vm_page_t 874 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 875 { 876 struct vm_phys_seg *seg; 877 int segind; 878 879 for (segind = 0; segind < vm_phys_nsegs; segind++) { 880 seg = &vm_phys_segs[segind]; 881 if (pa >= seg->start && pa < seg->end) 882 return (&seg->first_page[atop(pa - seg->start)]); 883 } 884 return (NULL); 885 } 886 887 vm_page_t 888 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 889 { 890 struct vm_phys_fictitious_seg tmp, *seg; 891 vm_page_t m; 892 893 m = NULL; 894 tmp.start = pa; 895 tmp.end = 0; 896 897 rw_rlock(&vm_phys_fictitious_reg_lock); 898 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 899 rw_runlock(&vm_phys_fictitious_reg_lock); 900 if (seg == NULL) 901 return (NULL); 902 903 m = &seg->first_page[atop(pa - seg->start)]; 904 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 905 906 return (m); 907 } 908 909 static inline void 910 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 911 long page_count, vm_memattr_t memattr) 912 { 913 long i; 914 915 for (i = 0; i < page_count; i++) { 916 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 917 range[i].oflags &= ~VPO_UNMANAGED; 918 range[i].busy_lock = VPB_UNBUSIED; 919 } 920 } 921 922 int 923 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 924 vm_memattr_t memattr) 925 { 926 struct vm_phys_fictitious_seg *seg; 927 vm_page_t fp; 928 long page_count; 929 #ifdef VM_PHYSSEG_DENSE 930 long pi, pe; 931 long dpage_count; 932 #endif 933 934 KASSERT(start < end, 935 ("Start of segment isn't less than end (start: %jx end: %jx)", 936 (uintmax_t)start, (uintmax_t)end)); 937 938 page_count = (end - start) / PAGE_SIZE; 939 940 #ifdef VM_PHYSSEG_DENSE 941 pi = atop(start); 942 pe = atop(end); 943 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 944 fp = &vm_page_array[pi - first_page]; 945 if ((pe - first_page) > vm_page_array_size) { 946 /* 947 * We have a segment that starts inside 948 * of vm_page_array, but ends outside of it. 949 * 950 * Use vm_page_array pages for those that are 951 * inside of the vm_page_array range, and 952 * allocate the remaining ones. 953 */ 954 dpage_count = vm_page_array_size - (pi - first_page); 955 vm_phys_fictitious_init_range(fp, start, dpage_count, 956 memattr); 957 page_count -= dpage_count; 958 start += ptoa(dpage_count); 959 goto alloc; 960 } 961 /* 962 * We can allocate the full range from vm_page_array, 963 * so there's no need to register the range in the tree. 964 */ 965 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 966 return (0); 967 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 968 /* 969 * We have a segment that ends inside of vm_page_array, 970 * but starts outside of it. 971 */ 972 fp = &vm_page_array[0]; 973 dpage_count = pe - first_page; 974 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 975 memattr); 976 end -= ptoa(dpage_count); 977 page_count -= dpage_count; 978 goto alloc; 979 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 980 /* 981 * Trying to register a fictitious range that expands before 982 * and after vm_page_array. 983 */ 984 return (EINVAL); 985 } else { 986 alloc: 987 #endif 988 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 989 M_WAITOK | M_ZERO); 990 #ifdef VM_PHYSSEG_DENSE 991 } 992 #endif 993 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 994 995 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 996 seg->start = start; 997 seg->end = end; 998 seg->first_page = fp; 999 1000 rw_wlock(&vm_phys_fictitious_reg_lock); 1001 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 1002 rw_wunlock(&vm_phys_fictitious_reg_lock); 1003 1004 return (0); 1005 } 1006 1007 void 1008 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1009 { 1010 struct vm_phys_fictitious_seg *seg, tmp; 1011 #ifdef VM_PHYSSEG_DENSE 1012 long pi, pe; 1013 #endif 1014 1015 KASSERT(start < end, 1016 ("Start of segment isn't less than end (start: %jx end: %jx)", 1017 (uintmax_t)start, (uintmax_t)end)); 1018 1019 #ifdef VM_PHYSSEG_DENSE 1020 pi = atop(start); 1021 pe = atop(end); 1022 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1023 if ((pe - first_page) <= vm_page_array_size) { 1024 /* 1025 * This segment was allocated using vm_page_array 1026 * only, there's nothing to do since those pages 1027 * were never added to the tree. 1028 */ 1029 return; 1030 } 1031 /* 1032 * We have a segment that starts inside 1033 * of vm_page_array, but ends outside of it. 1034 * 1035 * Calculate how many pages were added to the 1036 * tree and free them. 1037 */ 1038 start = ptoa(first_page + vm_page_array_size); 1039 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1040 /* 1041 * We have a segment that ends inside of vm_page_array, 1042 * but starts outside of it. 1043 */ 1044 end = ptoa(first_page); 1045 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1046 /* Since it's not possible to register such a range, panic. */ 1047 panic( 1048 "Unregistering not registered fictitious range [%#jx:%#jx]", 1049 (uintmax_t)start, (uintmax_t)end); 1050 } 1051 #endif 1052 tmp.start = start; 1053 tmp.end = 0; 1054 1055 rw_wlock(&vm_phys_fictitious_reg_lock); 1056 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1057 if (seg->start != start || seg->end != end) { 1058 rw_wunlock(&vm_phys_fictitious_reg_lock); 1059 panic( 1060 "Unregistering not registered fictitious range [%#jx:%#jx]", 1061 (uintmax_t)start, (uintmax_t)end); 1062 } 1063 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1064 rw_wunlock(&vm_phys_fictitious_reg_lock); 1065 free(seg->first_page, M_FICT_PAGES); 1066 free(seg, M_FICT_PAGES); 1067 } 1068 1069 /* 1070 * Find the segment containing the given physical address. 1071 */ 1072 static int 1073 vm_phys_paddr_to_segind(vm_paddr_t pa) 1074 { 1075 struct vm_phys_seg *seg; 1076 int segind; 1077 1078 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1079 seg = &vm_phys_segs[segind]; 1080 if (pa >= seg->start && pa < seg->end) 1081 return (segind); 1082 } 1083 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 1084 (uintmax_t)pa); 1085 } 1086 1087 /* 1088 * Free a contiguous, power of two-sized set of physical pages. 1089 * 1090 * The free page queues must be locked. 1091 */ 1092 void 1093 vm_phys_free_pages(vm_page_t m, int order) 1094 { 1095 struct vm_freelist *fl; 1096 struct vm_phys_seg *seg; 1097 vm_paddr_t pa; 1098 vm_page_t m_buddy; 1099 1100 KASSERT(m->order == VM_NFREEORDER, 1101 ("vm_phys_free_pages: page %p has unexpected order %d", 1102 m, m->order)); 1103 KASSERT(m->pool < VM_NFREEPOOL, 1104 ("vm_phys_free_pages: page %p has unexpected pool %d", 1105 m, m->pool)); 1106 KASSERT(order < VM_NFREEORDER, 1107 ("vm_phys_free_pages: order %d is out of range", order)); 1108 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1109 seg = &vm_phys_segs[m->segind]; 1110 if (order < VM_NFREEORDER - 1) { 1111 pa = VM_PAGE_TO_PHYS(m); 1112 do { 1113 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1114 if (pa < seg->start || pa >= seg->end) 1115 break; 1116 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1117 if (m_buddy->order != order) 1118 break; 1119 fl = (*seg->free_queues)[m_buddy->pool]; 1120 vm_freelist_rem(fl, m_buddy, order); 1121 if (m_buddy->pool != m->pool) 1122 vm_phys_set_pool(m->pool, m_buddy, order); 1123 order++; 1124 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1125 m = &seg->first_page[atop(pa - seg->start)]; 1126 } while (order < VM_NFREEORDER - 1); 1127 } 1128 fl = (*seg->free_queues)[m->pool]; 1129 vm_freelist_add(fl, m, order, 1); 1130 } 1131 1132 /* 1133 * Free a contiguous, arbitrarily sized set of physical pages. 1134 * 1135 * The free page queues must be locked. 1136 */ 1137 void 1138 vm_phys_free_contig(vm_page_t m, u_long npages) 1139 { 1140 u_int n; 1141 int order; 1142 1143 /* 1144 * Avoid unnecessary coalescing by freeing the pages in the largest 1145 * possible power-of-two-sized subsets. 1146 */ 1147 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1148 for (;; npages -= n) { 1149 /* 1150 * Unsigned "min" is used here so that "order" is assigned 1151 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1152 * or the low-order bits of its physical address are zero 1153 * because the size of a physical address exceeds the size of 1154 * a long. 1155 */ 1156 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1157 VM_NFREEORDER - 1); 1158 n = 1 << order; 1159 if (npages < n) 1160 break; 1161 vm_phys_free_pages(m, order); 1162 m += n; 1163 } 1164 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1165 for (; npages > 0; npages -= n) { 1166 order = flsl(npages) - 1; 1167 n = 1 << order; 1168 vm_phys_free_pages(m, order); 1169 m += n; 1170 } 1171 } 1172 1173 /* 1174 * Scan physical memory between the specified addresses "low" and "high" for a 1175 * run of contiguous physical pages that satisfy the specified conditions, and 1176 * return the lowest page in the run. The specified "alignment" determines 1177 * the alignment of the lowest physical page in the run. If the specified 1178 * "boundary" is non-zero, then the run of physical pages cannot span a 1179 * physical address that is a multiple of "boundary". 1180 * 1181 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1182 * be a power of two. 1183 */ 1184 vm_page_t 1185 vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 1186 u_long alignment, vm_paddr_t boundary, int options) 1187 { 1188 vm_paddr_t pa_end; 1189 vm_page_t m_end, m_run, m_start; 1190 struct vm_phys_seg *seg; 1191 int segind; 1192 1193 KASSERT(npages > 0, ("npages is 0")); 1194 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1195 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1196 if (low >= high) 1197 return (NULL); 1198 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1199 seg = &vm_phys_segs[segind]; 1200 if (seg->start >= high) 1201 break; 1202 if (low >= seg->end) 1203 continue; 1204 if (low <= seg->start) 1205 m_start = seg->first_page; 1206 else 1207 m_start = &seg->first_page[atop(low - seg->start)]; 1208 if (high < seg->end) 1209 pa_end = high; 1210 else 1211 pa_end = seg->end; 1212 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1213 continue; 1214 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1215 m_run = vm_page_scan_contig(npages, m_start, m_end, 1216 alignment, boundary, options); 1217 if (m_run != NULL) 1218 return (m_run); 1219 } 1220 return (NULL); 1221 } 1222 1223 /* 1224 * Set the pool for a contiguous, power of two-sized set of physical pages. 1225 */ 1226 void 1227 vm_phys_set_pool(int pool, vm_page_t m, int order) 1228 { 1229 vm_page_t m_tmp; 1230 1231 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1232 m_tmp->pool = pool; 1233 } 1234 1235 /* 1236 * Search for the given physical page "m" in the free lists. If the search 1237 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1238 * FALSE, indicating that "m" is not in the free lists. 1239 * 1240 * The free page queues must be locked. 1241 */ 1242 boolean_t 1243 vm_phys_unfree_page(vm_page_t m) 1244 { 1245 struct vm_freelist *fl; 1246 struct vm_phys_seg *seg; 1247 vm_paddr_t pa, pa_half; 1248 vm_page_t m_set, m_tmp; 1249 int order; 1250 1251 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1252 1253 /* 1254 * First, find the contiguous, power of two-sized set of free 1255 * physical pages containing the given physical page "m" and 1256 * assign it to "m_set". 1257 */ 1258 seg = &vm_phys_segs[m->segind]; 1259 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1260 order < VM_NFREEORDER - 1; ) { 1261 order++; 1262 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1263 if (pa >= seg->start) 1264 m_set = &seg->first_page[atop(pa - seg->start)]; 1265 else 1266 return (FALSE); 1267 } 1268 if (m_set->order < order) 1269 return (FALSE); 1270 if (m_set->order == VM_NFREEORDER) 1271 return (FALSE); 1272 KASSERT(m_set->order < VM_NFREEORDER, 1273 ("vm_phys_unfree_page: page %p has unexpected order %d", 1274 m_set, m_set->order)); 1275 1276 /* 1277 * Next, remove "m_set" from the free lists. Finally, extract 1278 * "m" from "m_set" using an iterative algorithm: While "m_set" 1279 * is larger than a page, shrink "m_set" by returning the half 1280 * of "m_set" that does not contain "m" to the free lists. 1281 */ 1282 fl = (*seg->free_queues)[m_set->pool]; 1283 order = m_set->order; 1284 vm_freelist_rem(fl, m_set, order); 1285 while (order > 0) { 1286 order--; 1287 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1288 if (m->phys_addr < pa_half) 1289 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1290 else { 1291 m_tmp = m_set; 1292 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1293 } 1294 vm_freelist_add(fl, m_tmp, order, 0); 1295 } 1296 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1297 return (TRUE); 1298 } 1299 1300 /* 1301 * Try to zero one physical page. Used by an idle priority thread. 1302 */ 1303 boolean_t 1304 vm_phys_zero_pages_idle(void) 1305 { 1306 static struct vm_freelist *fl; 1307 static int flind, oind, pind; 1308 vm_page_t m, m_tmp; 1309 int domain; 1310 1311 domain = vm_rr_selectdomain(); 1312 fl = vm_phys_free_queues[domain][0][0]; 1313 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1314 for (;;) { 1315 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) { 1316 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 1317 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 1318 vm_phys_unfree_page(m_tmp); 1319 vm_phys_freecnt_adj(m, -1); 1320 mtx_unlock(&vm_page_queue_free_mtx); 1321 pmap_zero_page_idle(m_tmp); 1322 m_tmp->flags |= PG_ZERO; 1323 mtx_lock(&vm_page_queue_free_mtx); 1324 vm_phys_freecnt_adj(m, 1); 1325 vm_phys_free_pages(m_tmp, 0); 1326 vm_page_zero_count++; 1327 cnt_prezero++; 1328 return (TRUE); 1329 } 1330 } 1331 } 1332 oind++; 1333 if (oind == VM_NFREEORDER) { 1334 oind = 0; 1335 pind++; 1336 if (pind == VM_NFREEPOOL) { 1337 pind = 0; 1338 flind++; 1339 if (flind == vm_nfreelists) 1340 flind = 0; 1341 } 1342 fl = vm_phys_free_queues[domain][flind][pind]; 1343 } 1344 } 1345 } 1346 1347 /* 1348 * Allocate a contiguous set of physical pages of the given size 1349 * "npages" from the free lists. All of the physical pages must be at 1350 * or above the given physical address "low" and below the given 1351 * physical address "high". The given value "alignment" determines the 1352 * alignment of the first physical page in the set. If the given value 1353 * "boundary" is non-zero, then the set of physical pages cannot cross 1354 * any physical address boundary that is a multiple of that value. Both 1355 * "alignment" and "boundary" must be a power of two. 1356 */ 1357 vm_page_t 1358 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 1359 u_long alignment, vm_paddr_t boundary) 1360 { 1361 vm_paddr_t pa_end, pa_start; 1362 vm_page_t m_run; 1363 struct vm_domain_iterator vi; 1364 struct vm_phys_seg *seg; 1365 int domain, segind; 1366 1367 KASSERT(npages > 0, ("npages is 0")); 1368 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1369 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1370 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1371 if (low >= high) 1372 return (NULL); 1373 vm_policy_iterator_init(&vi); 1374 restartdom: 1375 if (vm_domain_iterator_run(&vi, &domain) != 0) { 1376 vm_policy_iterator_finish(&vi); 1377 return (NULL); 1378 } 1379 m_run = NULL; 1380 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1381 seg = &vm_phys_segs[segind]; 1382 if (seg->start >= high || seg->domain != domain) 1383 continue; 1384 if (low >= seg->end) 1385 break; 1386 if (low <= seg->start) 1387 pa_start = seg->start; 1388 else 1389 pa_start = low; 1390 if (high < seg->end) 1391 pa_end = high; 1392 else 1393 pa_end = seg->end; 1394 if (pa_end - pa_start < ptoa(npages)) 1395 continue; 1396 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1397 alignment, boundary); 1398 if (m_run != NULL) 1399 break; 1400 } 1401 if (m_run == NULL && !vm_domain_iterator_isdone(&vi)) 1402 goto restartdom; 1403 vm_policy_iterator_finish(&vi); 1404 return (m_run); 1405 } 1406 1407 /* 1408 * Allocate a run of contiguous physical pages from the free list for the 1409 * specified segment. 1410 */ 1411 static vm_page_t 1412 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1413 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1414 { 1415 struct vm_freelist *fl; 1416 vm_paddr_t pa, pa_end, size; 1417 vm_page_t m, m_ret; 1418 u_long npages_end; 1419 int oind, order, pind; 1420 1421 KASSERT(npages > 0, ("npages is 0")); 1422 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1423 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1424 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1425 /* Compute the queue that is the best fit for npages. */ 1426 for (order = 0; (1 << order) < npages; order++); 1427 /* Search for a run satisfying the specified conditions. */ 1428 size = npages << PAGE_SHIFT; 1429 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1430 oind++) { 1431 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1432 fl = (*seg->free_queues)[pind]; 1433 TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { 1434 /* 1435 * Is the size of this allocation request 1436 * larger than the largest block size? 1437 */ 1438 if (order >= VM_NFREEORDER) { 1439 /* 1440 * Determine if a sufficient number of 1441 * subsequent blocks to satisfy the 1442 * allocation request are free. 1443 */ 1444 pa = VM_PAGE_TO_PHYS(m_ret); 1445 pa_end = pa + size; 1446 for (;;) { 1447 pa += 1 << (PAGE_SHIFT + 1448 VM_NFREEORDER - 1); 1449 if (pa >= pa_end || 1450 pa < seg->start || 1451 pa >= seg->end) 1452 break; 1453 m = &seg->first_page[atop(pa - 1454 seg->start)]; 1455 if (m->order != VM_NFREEORDER - 1456 1) 1457 break; 1458 } 1459 /* If not, go to the next block. */ 1460 if (pa < pa_end) 1461 continue; 1462 } 1463 1464 /* 1465 * Determine if the blocks are within the 1466 * given range, satisfy the given alignment, 1467 * and do not cross the given boundary. 1468 */ 1469 pa = VM_PAGE_TO_PHYS(m_ret); 1470 pa_end = pa + size; 1471 if (pa >= low && pa_end <= high && 1472 (pa & (alignment - 1)) == 0 && 1473 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1474 goto done; 1475 } 1476 } 1477 } 1478 return (NULL); 1479 done: 1480 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1481 fl = (*seg->free_queues)[m->pool]; 1482 vm_freelist_rem(fl, m, m->order); 1483 } 1484 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 1485 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 1486 fl = (*seg->free_queues)[m_ret->pool]; 1487 vm_phys_split_pages(m_ret, oind, fl, order); 1488 /* Return excess pages to the free lists. */ 1489 npages_end = roundup2(npages, 1 << imin(oind, order)); 1490 if (npages < npages_end) 1491 vm_phys_free_contig(&m_ret[npages], npages_end - npages); 1492 return (m_ret); 1493 } 1494 1495 #ifdef DDB 1496 /* 1497 * Show the number of physical pages in each of the free lists. 1498 */ 1499 DB_SHOW_COMMAND(freepages, db_show_freepages) 1500 { 1501 struct vm_freelist *fl; 1502 int flind, oind, pind, dom; 1503 1504 for (dom = 0; dom < vm_ndomains; dom++) { 1505 db_printf("DOMAIN: %d\n", dom); 1506 for (flind = 0; flind < vm_nfreelists; flind++) { 1507 db_printf("FREE LIST %d:\n" 1508 "\n ORDER (SIZE) | NUMBER" 1509 "\n ", flind); 1510 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1511 db_printf(" | POOL %d", pind); 1512 db_printf("\n-- "); 1513 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1514 db_printf("-- -- "); 1515 db_printf("--\n"); 1516 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1517 db_printf(" %2.2d (%6.6dK)", oind, 1518 1 << (PAGE_SHIFT - 10 + oind)); 1519 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1520 fl = vm_phys_free_queues[dom][flind][pind]; 1521 db_printf(" | %6.6d", fl[oind].lcnt); 1522 } 1523 db_printf("\n"); 1524 } 1525 db_printf("\n"); 1526 } 1527 db_printf("\n"); 1528 } 1529 } 1530 #endif 1531