111752d88SAlan Cox /*- 2fe267a55SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3fe267a55SPedro F. Giffuni * 411752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 511752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 611752d88SAlan Cox * All rights reserved. 711752d88SAlan Cox * 811752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 911752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 1011752d88SAlan Cox * 1111752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1211752d88SAlan Cox * modification, are permitted provided that the following conditions 1311752d88SAlan Cox * are met: 1411752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1611752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1711752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1811752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1911752d88SAlan Cox * 2011752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2111752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2211752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2311752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2411752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2511752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2611752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2711752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2811752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2911752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 3011752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3211752d88SAlan Cox */ 3311752d88SAlan Cox 34fbd80bd0SAlan Cox /* 35fbd80bd0SAlan Cox * Physical memory system implementation 36fbd80bd0SAlan Cox * 37fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 38fbd80bd0SAlan Cox * virtual memory system. 39fbd80bd0SAlan Cox */ 40fbd80bd0SAlan Cox 4111752d88SAlan Cox #include <sys/cdefs.h> 4211752d88SAlan Cox __FBSDID("$FreeBSD$"); 4311752d88SAlan Cox 4411752d88SAlan Cox #include "opt_ddb.h" 45174b5f38SJohn Baldwin #include "opt_vm.h" 4611752d88SAlan Cox 4711752d88SAlan Cox #include <sys/param.h> 4811752d88SAlan Cox #include <sys/systm.h> 4911752d88SAlan Cox #include <sys/lock.h> 5011752d88SAlan Cox #include <sys/kernel.h> 5111752d88SAlan Cox #include <sys/malloc.h> 5211752d88SAlan Cox #include <sys/mutex.h> 537e226537SAttilio Rao #include <sys/proc.h> 5411752d88SAlan Cox #include <sys/queue.h> 5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h> 5611752d88SAlan Cox #include <sys/sbuf.h> 5711752d88SAlan Cox #include <sys/sysctl.h> 5838d6b2dcSRoger Pau Monné #include <sys/tree.h> 5911752d88SAlan Cox #include <sys/vmmeter.h> 606520495aSAdrian Chadd #include <sys/seq.h> 6111752d88SAlan Cox 6211752d88SAlan Cox #include <ddb/ddb.h> 6311752d88SAlan Cox 6411752d88SAlan Cox #include <vm/vm.h> 6511752d88SAlan Cox #include <vm/vm_param.h> 6611752d88SAlan Cox #include <vm/vm_kern.h> 6711752d88SAlan Cox #include <vm/vm_object.h> 6811752d88SAlan Cox #include <vm/vm_page.h> 6911752d88SAlan Cox #include <vm/vm_phys.h> 70e2068d0bSJeff Roberson #include <vm/vm_pagequeue.h> 7111752d88SAlan Cox 72449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73449c2e92SKonstantin Belousov "Too many physsegs."); 7411752d88SAlan Cox 75b6715dabSJeff Roberson #ifdef NUMA 76cdfeced8SJeff Roberson struct mem_affinity __read_mostly *mem_affinity; 77cdfeced8SJeff Roberson int __read_mostly *mem_locality; 7862d70a81SJohn Baldwin #endif 79a3870a18SJohn Baldwin 80cdfeced8SJeff Roberson int __read_mostly vm_ndomains = 1; 817e226537SAttilio Rao 82cdfeced8SJeff Roberson struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 83cdfeced8SJeff Roberson int __read_mostly vm_phys_nsegs; 8411752d88SAlan Cox 8538d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg; 8638d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 8738d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *); 8838d6b2dcSRoger Pau Monné 8938d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 9038d6b2dcSRoger Pau Monné RB_INITIALIZER(_vm_phys_fictitious_tree); 9138d6b2dcSRoger Pau Monné 9238d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg { 9338d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node; 9438d6b2dcSRoger Pau Monné /* Memory region data */ 95b6de32bdSKonstantin Belousov vm_paddr_t start; 96b6de32bdSKonstantin Belousov vm_paddr_t end; 97b6de32bdSKonstantin Belousov vm_page_t first_page; 9838d6b2dcSRoger Pau Monné }; 9938d6b2dcSRoger Pau Monné 10038d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 10138d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp); 10238d6b2dcSRoger Pau Monné 103cdfeced8SJeff Roberson static struct rwlock_padalign vm_phys_fictitious_reg_lock; 104c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 105b6de32bdSKonstantin Belousov 106cdfeced8SJeff Roberson static struct vm_freelist __aligned(CACHE_LINE_SIZE) 1077e226537SAttilio Rao vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 10811752d88SAlan Cox 109cdfeced8SJeff Roberson static int __read_mostly vm_nfreelists; 110d866a563SAlan Cox 111d866a563SAlan Cox /* 112d866a563SAlan Cox * Provides the mapping from VM_FREELIST_* to free list indices (flind). 113d866a563SAlan Cox */ 114cdfeced8SJeff Roberson static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 115d866a563SAlan Cox 116d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0); 117d866a563SAlan Cox 118d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 119d866a563SAlan Cox #define VM_ISADMA_BOUNDARY 16777216 120d866a563SAlan Cox #endif 121d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 122d866a563SAlan Cox #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 123d866a563SAlan Cox #endif 124d866a563SAlan Cox 125d866a563SAlan Cox /* 126d866a563SAlan Cox * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 127d866a563SAlan Cox * the ordering of the free list boundaries. 128d866a563SAlan Cox */ 129d866a563SAlan Cox #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) 130d866a563SAlan Cox CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); 131d866a563SAlan Cox #endif 132d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 133d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 134d866a563SAlan Cox #endif 13511752d88SAlan Cox 13611752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 13711752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 13811752d88SAlan Cox NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 13911752d88SAlan Cox 14011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 14111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 14211752d88SAlan Cox NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 14311752d88SAlan Cox 144b6715dabSJeff Roberson #ifdef NUMA 145415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 146415d7ccaSAdrian Chadd SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 147415d7ccaSAdrian Chadd NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 1486520495aSAdrian Chadd #endif 149415d7ccaSAdrian Chadd 1507e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 1517e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available."); 152a3870a18SJohn Baldwin 153c869e672SAlan Cox static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 154c869e672SAlan Cox u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 155c869e672SAlan Cox vm_paddr_t boundary); 156d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 157d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 15811752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 15911752d88SAlan Cox int order); 16011752d88SAlan Cox 16138d6b2dcSRoger Pau Monné /* 16238d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management. 16338d6b2dcSRoger Pau Monné */ 16438d6b2dcSRoger Pau Monné static inline int 16538d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 16638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range) 16738d6b2dcSRoger Pau Monné { 16838d6b2dcSRoger Pau Monné 16938d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0, 17038d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page")); 17138d6b2dcSRoger Pau Monné if (p->start >= range->end) 17238d6b2dcSRoger Pau Monné return (1); 17338d6b2dcSRoger Pau Monné if (p->start < range->start) 17438d6b2dcSRoger Pau Monné return (-1); 17538d6b2dcSRoger Pau Monné 17638d6b2dcSRoger Pau Monné return (0); 17738d6b2dcSRoger Pau Monné } 17838d6b2dcSRoger Pau Monné 17938d6b2dcSRoger Pau Monné static int 18038d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 18138d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2) 18238d6b2dcSRoger Pau Monné { 18338d6b2dcSRoger Pau Monné 18438d6b2dcSRoger Pau Monné /* Check if this is a search for a page */ 18538d6b2dcSRoger Pau Monné if (p1->end == 0) 18638d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2)); 18738d6b2dcSRoger Pau Monné 18838d6b2dcSRoger Pau Monné KASSERT(p2->end != 0, 18938d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison")); 19038d6b2dcSRoger Pau Monné 19138d6b2dcSRoger Pau Monné /* Searching to add a new range */ 19238d6b2dcSRoger Pau Monné if (p1->end <= p2->start) 19338d6b2dcSRoger Pau Monné return (-1); 19438d6b2dcSRoger Pau Monné if (p1->start >= p2->end) 19538d6b2dcSRoger Pau Monné return (1); 19638d6b2dcSRoger Pau Monné 19738d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n" 19838d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 19938d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 20038d6b2dcSRoger Pau Monné } 20138d6b2dcSRoger Pau Monné 2026f4acaf4SJeff Roberson int 2036f4acaf4SJeff Roberson vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 204449c2e92SKonstantin Belousov { 205b6715dabSJeff Roberson #ifdef NUMA 2066f4acaf4SJeff Roberson domainset_t mask; 2076f4acaf4SJeff Roberson int i; 208449c2e92SKonstantin Belousov 2096f4acaf4SJeff Roberson if (vm_ndomains == 1 || mem_affinity == NULL) 2106f4acaf4SJeff Roberson return (0); 2116f4acaf4SJeff Roberson 2126f4acaf4SJeff Roberson DOMAINSET_ZERO(&mask); 2136f4acaf4SJeff Roberson /* 2146f4acaf4SJeff Roberson * Check for any memory that overlaps low, high. 2156f4acaf4SJeff Roberson */ 2166f4acaf4SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) 2176f4acaf4SJeff Roberson if (mem_affinity[i].start <= high && 2186f4acaf4SJeff Roberson mem_affinity[i].end >= low) 2196f4acaf4SJeff Roberson DOMAINSET_SET(mem_affinity[i].domain, &mask); 2206f4acaf4SJeff Roberson if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 2216f4acaf4SJeff Roberson return (prefer); 2226f4acaf4SJeff Roberson if (DOMAINSET_EMPTY(&mask)) 2236f4acaf4SJeff Roberson panic("vm_phys_domain_match: Impossible constraint"); 2246f4acaf4SJeff Roberson return (DOMAINSET_FFS(&mask) - 1); 2256f4acaf4SJeff Roberson #else 2266f4acaf4SJeff Roberson return (0); 2276f4acaf4SJeff Roberson #endif 228449c2e92SKonstantin Belousov } 229449c2e92SKonstantin Belousov 23011752d88SAlan Cox /* 23111752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 23211752d88SAlan Cox * the amount of physical memory in each free list. 23311752d88SAlan Cox */ 23411752d88SAlan Cox static int 23511752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 23611752d88SAlan Cox { 23711752d88SAlan Cox struct sbuf sbuf; 23811752d88SAlan Cox struct vm_freelist *fl; 2397e226537SAttilio Rao int dom, error, flind, oind, pind; 24011752d88SAlan Cox 24100f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 24200f0e671SMatthew D Fleming if (error != 0) 24300f0e671SMatthew D Fleming return (error); 2447e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 2457e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 246eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 24711752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 248eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 24911752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 25011752d88SAlan Cox "\n ", flind); 25111752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 25211752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 25311752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 25411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 25511752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 25611752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 25711752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 258d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 25911752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 26011752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 2617e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 262eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d", 2637e226537SAttilio Rao fl[oind].lcnt); 26411752d88SAlan Cox } 26511752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 26611752d88SAlan Cox } 2677e226537SAttilio Rao } 26811752d88SAlan Cox } 2694e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 27011752d88SAlan Cox sbuf_delete(&sbuf); 27111752d88SAlan Cox return (error); 27211752d88SAlan Cox } 27311752d88SAlan Cox 27411752d88SAlan Cox /* 27511752d88SAlan Cox * Outputs the set of physical memory segments. 27611752d88SAlan Cox */ 27711752d88SAlan Cox static int 27811752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 27911752d88SAlan Cox { 28011752d88SAlan Cox struct sbuf sbuf; 28111752d88SAlan Cox struct vm_phys_seg *seg; 28211752d88SAlan Cox int error, segind; 28311752d88SAlan Cox 28400f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 28500f0e671SMatthew D Fleming if (error != 0) 28600f0e671SMatthew D Fleming return (error); 2874e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 28811752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 28911752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 29011752d88SAlan Cox seg = &vm_phys_segs[segind]; 29111752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 29211752d88SAlan Cox (uintmax_t)seg->start); 29311752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 29411752d88SAlan Cox (uintmax_t)seg->end); 295a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 29611752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 29711752d88SAlan Cox } 2984e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 29911752d88SAlan Cox sbuf_delete(&sbuf); 30011752d88SAlan Cox return (error); 30111752d88SAlan Cox } 30211752d88SAlan Cox 303415d7ccaSAdrian Chadd /* 304415d7ccaSAdrian Chadd * Return affinity, or -1 if there's no affinity information. 305415d7ccaSAdrian Chadd */ 3066520495aSAdrian Chadd int 307415d7ccaSAdrian Chadd vm_phys_mem_affinity(int f, int t) 308415d7ccaSAdrian Chadd { 309415d7ccaSAdrian Chadd 310b6715dabSJeff Roberson #ifdef NUMA 311415d7ccaSAdrian Chadd if (mem_locality == NULL) 312415d7ccaSAdrian Chadd return (-1); 313415d7ccaSAdrian Chadd if (f >= vm_ndomains || t >= vm_ndomains) 314415d7ccaSAdrian Chadd return (-1); 315415d7ccaSAdrian Chadd return (mem_locality[f * vm_ndomains + t]); 3166520495aSAdrian Chadd #else 3176520495aSAdrian Chadd return (-1); 3186520495aSAdrian Chadd #endif 319415d7ccaSAdrian Chadd } 320415d7ccaSAdrian Chadd 321b6715dabSJeff Roberson #ifdef NUMA 322415d7ccaSAdrian Chadd /* 323415d7ccaSAdrian Chadd * Outputs the VM locality table. 324415d7ccaSAdrian Chadd */ 325415d7ccaSAdrian Chadd static int 326415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 327415d7ccaSAdrian Chadd { 328415d7ccaSAdrian Chadd struct sbuf sbuf; 329415d7ccaSAdrian Chadd int error, i, j; 330415d7ccaSAdrian Chadd 331415d7ccaSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 332415d7ccaSAdrian Chadd if (error != 0) 333415d7ccaSAdrian Chadd return (error); 334415d7ccaSAdrian Chadd sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 335415d7ccaSAdrian Chadd 336415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 337415d7ccaSAdrian Chadd 338415d7ccaSAdrian Chadd for (i = 0; i < vm_ndomains; i++) { 339415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d: ", i); 340415d7ccaSAdrian Chadd for (j = 0; j < vm_ndomains; j++) { 341415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 342415d7ccaSAdrian Chadd } 343415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 344415d7ccaSAdrian Chadd } 345415d7ccaSAdrian Chadd error = sbuf_finish(&sbuf); 346415d7ccaSAdrian Chadd sbuf_delete(&sbuf); 347415d7ccaSAdrian Chadd return (error); 348415d7ccaSAdrian Chadd } 3496520495aSAdrian Chadd #endif 350415d7ccaSAdrian Chadd 3517e226537SAttilio Rao static void 3527e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 353a3870a18SJohn Baldwin { 354a3870a18SJohn Baldwin 3557e226537SAttilio Rao m->order = order; 3567e226537SAttilio Rao if (tail) 3575cd29d0fSMark Johnston TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 3587e226537SAttilio Rao else 3595cd29d0fSMark Johnston TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 3607e226537SAttilio Rao fl[order].lcnt++; 361a3870a18SJohn Baldwin } 3627e226537SAttilio Rao 3637e226537SAttilio Rao static void 3647e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 3657e226537SAttilio Rao { 3667e226537SAttilio Rao 3675cd29d0fSMark Johnston TAILQ_REMOVE(&fl[order].pl, m, listq); 3687e226537SAttilio Rao fl[order].lcnt--; 3697e226537SAttilio Rao m->order = VM_NFREEORDER; 370a3870a18SJohn Baldwin } 371a3870a18SJohn Baldwin 37211752d88SAlan Cox /* 37311752d88SAlan Cox * Create a physical memory segment. 37411752d88SAlan Cox */ 37511752d88SAlan Cox static void 376d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 37711752d88SAlan Cox { 37811752d88SAlan Cox struct vm_phys_seg *seg; 37911752d88SAlan Cox 38011752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 38111752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 382ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 3837e226537SAttilio Rao ("vm_phys_create_seg: invalid domain provided")); 38411752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 385271f0f12SAlan Cox while (seg > vm_phys_segs && (seg - 1)->start >= end) { 386271f0f12SAlan Cox *seg = *(seg - 1); 387271f0f12SAlan Cox seg--; 388271f0f12SAlan Cox } 38911752d88SAlan Cox seg->start = start; 39011752d88SAlan Cox seg->end = end; 391a3870a18SJohn Baldwin seg->domain = domain; 39211752d88SAlan Cox } 39311752d88SAlan Cox 394a3870a18SJohn Baldwin static void 395d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 396a3870a18SJohn Baldwin { 397b6715dabSJeff Roberson #ifdef NUMA 398a3870a18SJohn Baldwin int i; 399a3870a18SJohn Baldwin 400a3870a18SJohn Baldwin if (mem_affinity == NULL) { 401d866a563SAlan Cox _vm_phys_create_seg(start, end, 0); 402a3870a18SJohn Baldwin return; 403a3870a18SJohn Baldwin } 404a3870a18SJohn Baldwin 405a3870a18SJohn Baldwin for (i = 0;; i++) { 406a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 407a3870a18SJohn Baldwin panic("Reached end of affinity info"); 408a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 409a3870a18SJohn Baldwin continue; 410a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 411a3870a18SJohn Baldwin panic("No affinity info for start %jx", 412a3870a18SJohn Baldwin (uintmax_t)start); 413a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 414d866a563SAlan Cox _vm_phys_create_seg(start, end, 415a3870a18SJohn Baldwin mem_affinity[i].domain); 416a3870a18SJohn Baldwin break; 417a3870a18SJohn Baldwin } 418d866a563SAlan Cox _vm_phys_create_seg(start, mem_affinity[i].end, 419a3870a18SJohn Baldwin mem_affinity[i].domain); 420a3870a18SJohn Baldwin start = mem_affinity[i].end; 421a3870a18SJohn Baldwin } 42262d70a81SJohn Baldwin #else 42362d70a81SJohn Baldwin _vm_phys_create_seg(start, end, 0); 42462d70a81SJohn Baldwin #endif 425a3870a18SJohn Baldwin } 426a3870a18SJohn Baldwin 42711752d88SAlan Cox /* 428271f0f12SAlan Cox * Add a physical memory segment. 429271f0f12SAlan Cox */ 430271f0f12SAlan Cox void 431271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 432271f0f12SAlan Cox { 433d866a563SAlan Cox vm_paddr_t paddr; 434271f0f12SAlan Cox 435271f0f12SAlan Cox KASSERT((start & PAGE_MASK) == 0, 436271f0f12SAlan Cox ("vm_phys_define_seg: start is not page aligned")); 437271f0f12SAlan Cox KASSERT((end & PAGE_MASK) == 0, 438271f0f12SAlan Cox ("vm_phys_define_seg: end is not page aligned")); 439d866a563SAlan Cox 440d866a563SAlan Cox /* 441d866a563SAlan Cox * Split the physical memory segment if it spans two or more free 442d866a563SAlan Cox * list boundaries. 443d866a563SAlan Cox */ 444d866a563SAlan Cox paddr = start; 445271f0f12SAlan Cox #ifdef VM_FREELIST_ISADMA 446d866a563SAlan Cox if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { 447d866a563SAlan Cox vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); 448d866a563SAlan Cox paddr = VM_ISADMA_BOUNDARY; 449d866a563SAlan Cox } 450271f0f12SAlan Cox #endif 451d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 452d866a563SAlan Cox if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 453d866a563SAlan Cox vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 454d866a563SAlan Cox paddr = VM_LOWMEM_BOUNDARY; 455d866a563SAlan Cox } 456271f0f12SAlan Cox #endif 457d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 458d866a563SAlan Cox if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 459d866a563SAlan Cox vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 460d866a563SAlan Cox paddr = VM_DMA32_BOUNDARY; 461d866a563SAlan Cox } 462d866a563SAlan Cox #endif 463d866a563SAlan Cox vm_phys_create_seg(paddr, end); 464271f0f12SAlan Cox } 465271f0f12SAlan Cox 466271f0f12SAlan Cox /* 46711752d88SAlan Cox * Initialize the physical memory allocator. 468d866a563SAlan Cox * 469d866a563SAlan Cox * Requires that vm_page_array is initialized! 47011752d88SAlan Cox */ 47111752d88SAlan Cox void 47211752d88SAlan Cox vm_phys_init(void) 47311752d88SAlan Cox { 47411752d88SAlan Cox struct vm_freelist *fl; 475271f0f12SAlan Cox struct vm_phys_seg *seg; 476d866a563SAlan Cox u_long npages; 477d866a563SAlan Cox int dom, flind, freelist, oind, pind, segind; 47811752d88SAlan Cox 479d866a563SAlan Cox /* 480d866a563SAlan Cox * Compute the number of free lists, and generate the mapping from the 481d866a563SAlan Cox * manifest constants VM_FREELIST_* to the free list indices. 482d866a563SAlan Cox * 483d866a563SAlan Cox * Initially, the entries of vm_freelist_to_flind[] are set to either 484d866a563SAlan Cox * 0 or 1 to indicate which free lists should be created. 485d866a563SAlan Cox */ 486d866a563SAlan Cox npages = 0; 487d866a563SAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 488d866a563SAlan Cox seg = &vm_phys_segs[segind]; 489d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 490d866a563SAlan Cox if (seg->end <= VM_ISADMA_BOUNDARY) 491d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; 492d866a563SAlan Cox else 493d866a563SAlan Cox #endif 494d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 495d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) 496d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 497d866a563SAlan Cox else 498d866a563SAlan Cox #endif 499d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 500d866a563SAlan Cox if ( 501d866a563SAlan Cox #ifdef VM_DMA32_NPAGES_THRESHOLD 502d866a563SAlan Cox /* 503d866a563SAlan Cox * Create the DMA32 free list only if the amount of 504d866a563SAlan Cox * physical memory above physical address 4G exceeds the 505d866a563SAlan Cox * given threshold. 506d866a563SAlan Cox */ 507d866a563SAlan Cox npages > VM_DMA32_NPAGES_THRESHOLD && 508d866a563SAlan Cox #endif 509d866a563SAlan Cox seg->end <= VM_DMA32_BOUNDARY) 510d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 511d866a563SAlan Cox else 512d866a563SAlan Cox #endif 513d866a563SAlan Cox { 514d866a563SAlan Cox npages += atop(seg->end - seg->start); 515d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 516d866a563SAlan Cox } 517d866a563SAlan Cox } 518d866a563SAlan Cox /* Change each entry into a running total of the free lists. */ 519d866a563SAlan Cox for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 520d866a563SAlan Cox vm_freelist_to_flind[freelist] += 521d866a563SAlan Cox vm_freelist_to_flind[freelist - 1]; 522d866a563SAlan Cox } 523d866a563SAlan Cox vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 524d866a563SAlan Cox KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 525d866a563SAlan Cox /* Change each entry into a free list index. */ 526d866a563SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) 527d866a563SAlan Cox vm_freelist_to_flind[freelist]--; 528d866a563SAlan Cox 529d866a563SAlan Cox /* 530d866a563SAlan Cox * Initialize the first_page and free_queues fields of each physical 531d866a563SAlan Cox * memory segment. 532d866a563SAlan Cox */ 533271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 534d866a563SAlan Cox npages = 0; 53511752d88SAlan Cox #endif 536271f0f12SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 537271f0f12SAlan Cox seg = &vm_phys_segs[segind]; 538271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 539d866a563SAlan Cox seg->first_page = &vm_page_array[npages]; 540d866a563SAlan Cox npages += atop(seg->end - seg->start); 541271f0f12SAlan Cox #else 542271f0f12SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(seg->start); 54311752d88SAlan Cox #endif 544d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 545d866a563SAlan Cox if (seg->end <= VM_ISADMA_BOUNDARY) { 546d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; 547d866a563SAlan Cox KASSERT(flind >= 0, 548d866a563SAlan Cox ("vm_phys_init: ISADMA flind < 0")); 549d866a563SAlan Cox } else 550d866a563SAlan Cox #endif 551d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 552d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) { 553d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 554d866a563SAlan Cox KASSERT(flind >= 0, 555d866a563SAlan Cox ("vm_phys_init: LOWMEM flind < 0")); 556d866a563SAlan Cox } else 557d866a563SAlan Cox #endif 558d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 559d866a563SAlan Cox if (seg->end <= VM_DMA32_BOUNDARY) { 560d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 561d866a563SAlan Cox KASSERT(flind >= 0, 562d866a563SAlan Cox ("vm_phys_init: DMA32 flind < 0")); 563d866a563SAlan Cox } else 564d866a563SAlan Cox #endif 565d866a563SAlan Cox { 566d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 567d866a563SAlan Cox KASSERT(flind >= 0, 568d866a563SAlan Cox ("vm_phys_init: DEFAULT flind < 0")); 56911752d88SAlan Cox } 570d866a563SAlan Cox seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 571d866a563SAlan Cox } 572d866a563SAlan Cox 573d866a563SAlan Cox /* 574d866a563SAlan Cox * Initialize the free queues. 575d866a563SAlan Cox */ 5767e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 57711752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 57811752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 5797e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 58011752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 58111752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 58211752d88SAlan Cox } 58311752d88SAlan Cox } 584a3870a18SJohn Baldwin } 585d866a563SAlan Cox 58638d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 58711752d88SAlan Cox } 58811752d88SAlan Cox 58911752d88SAlan Cox /* 59011752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 59111752d88SAlan Cox */ 59211752d88SAlan Cox static __inline void 59311752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 59411752d88SAlan Cox { 59511752d88SAlan Cox vm_page_t m_buddy; 59611752d88SAlan Cox 59711752d88SAlan Cox while (oind > order) { 59811752d88SAlan Cox oind--; 59911752d88SAlan Cox m_buddy = &m[1 << oind]; 60011752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 60111752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 60211752d88SAlan Cox m_buddy, m_buddy->order)); 6037e226537SAttilio Rao vm_freelist_add(fl, m_buddy, oind, 0); 60411752d88SAlan Cox } 60511752d88SAlan Cox } 60611752d88SAlan Cox 60711752d88SAlan Cox /* 60889ea39a7SAlan Cox * Tries to allocate the specified number of pages from the specified pool 60989ea39a7SAlan Cox * within the specified domain. Returns the actual number of allocated pages 61089ea39a7SAlan Cox * and a pointer to each page through the array ma[]. 61189ea39a7SAlan Cox * 612*32d81f21SAlan Cox * The returned pages may not be physically contiguous. However, in contrast 613*32d81f21SAlan Cox * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 614*32d81f21SAlan Cox * calling this function once to allocate the desired number of pages will 615*32d81f21SAlan Cox * avoid wasted time in vm_phys_split_pages(). 61689ea39a7SAlan Cox * 61789ea39a7SAlan Cox * The free page queues for the specified domain must be locked. 61889ea39a7SAlan Cox */ 61989ea39a7SAlan Cox int 62089ea39a7SAlan Cox vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 62189ea39a7SAlan Cox { 62289ea39a7SAlan Cox struct vm_freelist *alt, *fl; 62389ea39a7SAlan Cox vm_page_t m; 62489ea39a7SAlan Cox int avail, end, flind, freelist, i, need, oind, pind; 62589ea39a7SAlan Cox 62689ea39a7SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains, 62789ea39a7SAlan Cox ("vm_phys_alloc_npages: domain %d is out of range", domain)); 62889ea39a7SAlan Cox KASSERT(pool < VM_NFREEPOOL, 62989ea39a7SAlan Cox ("vm_phys_alloc_npages: pool %d is out of range", pool)); 63089ea39a7SAlan Cox KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 63189ea39a7SAlan Cox ("vm_phys_alloc_npages: npages %d is out of range", npages)); 63289ea39a7SAlan Cox vm_domain_free_assert_locked(VM_DOMAIN(domain)); 63389ea39a7SAlan Cox i = 0; 63489ea39a7SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 63589ea39a7SAlan Cox flind = vm_freelist_to_flind[freelist]; 63689ea39a7SAlan Cox if (flind < 0) 63789ea39a7SAlan Cox continue; 63889ea39a7SAlan Cox fl = vm_phys_free_queues[domain][flind][pool]; 63989ea39a7SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) { 64089ea39a7SAlan Cox while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 64189ea39a7SAlan Cox vm_freelist_rem(fl, m, oind); 64289ea39a7SAlan Cox avail = 1 << oind; 64389ea39a7SAlan Cox need = imin(npages - i, avail); 64489ea39a7SAlan Cox for (end = i + need; i < end;) 64589ea39a7SAlan Cox ma[i++] = m++; 64689ea39a7SAlan Cox if (need < avail) { 64789ea39a7SAlan Cox vm_phys_free_contig(m, avail - need); 64889ea39a7SAlan Cox return (npages); 64989ea39a7SAlan Cox } else if (i == npages) 65089ea39a7SAlan Cox return (npages); 65189ea39a7SAlan Cox } 65289ea39a7SAlan Cox } 65389ea39a7SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 65489ea39a7SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 65589ea39a7SAlan Cox alt = vm_phys_free_queues[domain][flind][pind]; 65689ea39a7SAlan Cox while ((m = TAILQ_FIRST(&alt[oind].pl)) != 65789ea39a7SAlan Cox NULL) { 65889ea39a7SAlan Cox vm_freelist_rem(alt, m, oind); 65989ea39a7SAlan Cox vm_phys_set_pool(pool, m, oind); 66089ea39a7SAlan Cox avail = 1 << oind; 66189ea39a7SAlan Cox need = imin(npages - i, avail); 66289ea39a7SAlan Cox for (end = i + need; i < end;) 66389ea39a7SAlan Cox ma[i++] = m++; 66489ea39a7SAlan Cox if (need < avail) { 66589ea39a7SAlan Cox vm_phys_free_contig(m, avail - 66689ea39a7SAlan Cox need); 66789ea39a7SAlan Cox return (npages); 66889ea39a7SAlan Cox } else if (i == npages) 66989ea39a7SAlan Cox return (npages); 67089ea39a7SAlan Cox } 67189ea39a7SAlan Cox } 67289ea39a7SAlan Cox } 67389ea39a7SAlan Cox } 67489ea39a7SAlan Cox return (i); 67589ea39a7SAlan Cox } 67689ea39a7SAlan Cox 67789ea39a7SAlan Cox /* 67811752d88SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages 67911752d88SAlan Cox * from the free lists. 6808941dc44SAlan Cox * 6818941dc44SAlan Cox * The free page queues must be locked. 68211752d88SAlan Cox */ 68311752d88SAlan Cox vm_page_t 684ef435ae7SJeff Roberson vm_phys_alloc_pages(int domain, int pool, int order) 68511752d88SAlan Cox { 68649ca10d4SJayachandran C. vm_page_t m; 6870db2102aSMichael Zhilin int freelist; 68849ca10d4SJayachandran C. 6890db2102aSMichael Zhilin for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 6900db2102aSMichael Zhilin m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 69149ca10d4SJayachandran C. if (m != NULL) 69249ca10d4SJayachandran C. return (m); 69349ca10d4SJayachandran C. } 69449ca10d4SJayachandran C. return (NULL); 69549ca10d4SJayachandran C. } 69649ca10d4SJayachandran C. 69749ca10d4SJayachandran C. /* 698d866a563SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages from the 699d866a563SAlan Cox * specified free list. The free list must be specified using one of the 700d866a563SAlan Cox * manifest constants VM_FREELIST_*. 701d866a563SAlan Cox * 702d866a563SAlan Cox * The free page queues must be locked. 70349ca10d4SJayachandran C. */ 70449ca10d4SJayachandran C. vm_page_t 7050db2102aSMichael Zhilin vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 70649ca10d4SJayachandran C. { 707ef435ae7SJeff Roberson struct vm_freelist *alt, *fl; 70811752d88SAlan Cox vm_page_t m; 7090db2102aSMichael Zhilin int oind, pind, flind; 71011752d88SAlan Cox 711ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 712ef435ae7SJeff Roberson ("vm_phys_alloc_freelist_pages: domain %d is out of range", 713ef435ae7SJeff Roberson domain)); 7140db2102aSMichael Zhilin KASSERT(freelist < VM_NFREELIST, 715d866a563SAlan Cox ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 7165be93778SAndrew Turner freelist)); 71711752d88SAlan Cox KASSERT(pool < VM_NFREEPOOL, 71849ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 71911752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 72049ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 7216520495aSAdrian Chadd 7220db2102aSMichael Zhilin flind = vm_freelist_to_flind[freelist]; 7230db2102aSMichael Zhilin /* Check if freelist is present */ 7240db2102aSMichael Zhilin if (flind < 0) 7250db2102aSMichael Zhilin return (NULL); 7260db2102aSMichael Zhilin 727e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(domain)); 7287e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0]; 72911752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 73011752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 73111752d88SAlan Cox if (m != NULL) { 7327e226537SAttilio Rao vm_freelist_rem(fl, m, oind); 73311752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 73411752d88SAlan Cox return (m); 73511752d88SAlan Cox } 73611752d88SAlan Cox } 73711752d88SAlan Cox 73811752d88SAlan Cox /* 73911752d88SAlan Cox * The given pool was empty. Find the largest 74011752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 74111752d88SAlan Cox * pool. Transfer these pages to the given pool, and 74211752d88SAlan Cox * use them to satisfy the allocation. 74311752d88SAlan Cox */ 74411752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 74511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 7467e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0]; 74711752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 74811752d88SAlan Cox if (m != NULL) { 7497e226537SAttilio Rao vm_freelist_rem(alt, m, oind); 75011752d88SAlan Cox vm_phys_set_pool(pool, m, oind); 75111752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 75211752d88SAlan Cox return (m); 75311752d88SAlan Cox } 75411752d88SAlan Cox } 75511752d88SAlan Cox } 75611752d88SAlan Cox return (NULL); 75711752d88SAlan Cox } 75811752d88SAlan Cox 75911752d88SAlan Cox /* 76011752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 76111752d88SAlan Cox */ 76211752d88SAlan Cox vm_page_t 76311752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 76411752d88SAlan Cox { 76511752d88SAlan Cox struct vm_phys_seg *seg; 76611752d88SAlan Cox int segind; 76711752d88SAlan Cox 76811752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 76911752d88SAlan Cox seg = &vm_phys_segs[segind]; 77011752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 77111752d88SAlan Cox return (&seg->first_page[atop(pa - seg->start)]); 77211752d88SAlan Cox } 773f06a3a36SAndrew Thompson return (NULL); 77411752d88SAlan Cox } 77511752d88SAlan Cox 776b6de32bdSKonstantin Belousov vm_page_t 777b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 778b6de32bdSKonstantin Belousov { 77938d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg; 780b6de32bdSKonstantin Belousov vm_page_t m; 781b6de32bdSKonstantin Belousov 782b6de32bdSKonstantin Belousov m = NULL; 78338d6b2dcSRoger Pau Monné tmp.start = pa; 78438d6b2dcSRoger Pau Monné tmp.end = 0; 78538d6b2dcSRoger Pau Monné 78638d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock); 78738d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 78838d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock); 78938d6b2dcSRoger Pau Monné if (seg == NULL) 79038d6b2dcSRoger Pau Monné return (NULL); 79138d6b2dcSRoger Pau Monné 792b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 79338d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 79438d6b2dcSRoger Pau Monné 795b6de32bdSKonstantin Belousov return (m); 796b6de32bdSKonstantin Belousov } 797b6de32bdSKonstantin Belousov 7985ebe728dSRoger Pau Monné static inline void 7995ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 8005ebe728dSRoger Pau Monné long page_count, vm_memattr_t memattr) 8015ebe728dSRoger Pau Monné { 8025ebe728dSRoger Pau Monné long i; 8035ebe728dSRoger Pau Monné 804f93f7cf1SMark Johnston bzero(range, page_count * sizeof(*range)); 8055ebe728dSRoger Pau Monné for (i = 0; i < page_count; i++) { 8065ebe728dSRoger Pau Monné vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 8075ebe728dSRoger Pau Monné range[i].oflags &= ~VPO_UNMANAGED; 8085ebe728dSRoger Pau Monné range[i].busy_lock = VPB_UNBUSIED; 8095ebe728dSRoger Pau Monné } 8105ebe728dSRoger Pau Monné } 8115ebe728dSRoger Pau Monné 812b6de32bdSKonstantin Belousov int 813b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 814b6de32bdSKonstantin Belousov vm_memattr_t memattr) 815b6de32bdSKonstantin Belousov { 816b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 817b6de32bdSKonstantin Belousov vm_page_t fp; 8185ebe728dSRoger Pau Monné long page_count; 819b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 8205ebe728dSRoger Pau Monné long pi, pe; 8215ebe728dSRoger Pau Monné long dpage_count; 822b6de32bdSKonstantin Belousov #endif 823b6de32bdSKonstantin Belousov 8245ebe728dSRoger Pau Monné KASSERT(start < end, 8255ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 8265ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 8275ebe728dSRoger Pau Monné 828b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 829b6de32bdSKonstantin Belousov 830b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 831b6de32bdSKonstantin Belousov pi = atop(start); 8325ebe728dSRoger Pau Monné pe = atop(end); 8335ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 834b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 8355ebe728dSRoger Pau Monné if ((pe - first_page) > vm_page_array_size) { 8365ebe728dSRoger Pau Monné /* 8375ebe728dSRoger Pau Monné * We have a segment that starts inside 8385ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 8395ebe728dSRoger Pau Monné * 8405ebe728dSRoger Pau Monné * Use vm_page_array pages for those that are 8415ebe728dSRoger Pau Monné * inside of the vm_page_array range, and 8425ebe728dSRoger Pau Monné * allocate the remaining ones. 8435ebe728dSRoger Pau Monné */ 8445ebe728dSRoger Pau Monné dpage_count = vm_page_array_size - (pi - first_page); 8455ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, dpage_count, 8465ebe728dSRoger Pau Monné memattr); 8475ebe728dSRoger Pau Monné page_count -= dpage_count; 8485ebe728dSRoger Pau Monné start += ptoa(dpage_count); 8495ebe728dSRoger Pau Monné goto alloc; 8505ebe728dSRoger Pau Monné } 8515ebe728dSRoger Pau Monné /* 8525ebe728dSRoger Pau Monné * We can allocate the full range from vm_page_array, 8535ebe728dSRoger Pau Monné * so there's no need to register the range in the tree. 8545ebe728dSRoger Pau Monné */ 8555ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 8565ebe728dSRoger Pau Monné return (0); 8575ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 8585ebe728dSRoger Pau Monné /* 8595ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 8605ebe728dSRoger Pau Monné * but starts outside of it. 8615ebe728dSRoger Pau Monné */ 8625ebe728dSRoger Pau Monné fp = &vm_page_array[0]; 8635ebe728dSRoger Pau Monné dpage_count = pe - first_page; 8645ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 8655ebe728dSRoger Pau Monné memattr); 8665ebe728dSRoger Pau Monné end -= ptoa(dpage_count); 8675ebe728dSRoger Pau Monné page_count -= dpage_count; 8685ebe728dSRoger Pau Monné goto alloc; 8695ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 8705ebe728dSRoger Pau Monné /* 8715ebe728dSRoger Pau Monné * Trying to register a fictitious range that expands before 8725ebe728dSRoger Pau Monné * and after vm_page_array. 8735ebe728dSRoger Pau Monné */ 8745ebe728dSRoger Pau Monné return (EINVAL); 8755ebe728dSRoger Pau Monné } else { 8765ebe728dSRoger Pau Monné alloc: 877b6de32bdSKonstantin Belousov #endif 878b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 879f93f7cf1SMark Johnston M_WAITOK); 8805ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE 881b6de32bdSKonstantin Belousov } 8825ebe728dSRoger Pau Monné #endif 8835ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 88438d6b2dcSRoger Pau Monné 88538d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 886b6de32bdSKonstantin Belousov seg->start = start; 887b6de32bdSKonstantin Belousov seg->end = end; 888b6de32bdSKonstantin Belousov seg->first_page = fp; 88938d6b2dcSRoger Pau Monné 89038d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 89138d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 89238d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 89338d6b2dcSRoger Pau Monné 894b6de32bdSKonstantin Belousov return (0); 895b6de32bdSKonstantin Belousov } 896b6de32bdSKonstantin Belousov 897b6de32bdSKonstantin Belousov void 898b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 899b6de32bdSKonstantin Belousov { 90038d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp; 901b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 9025ebe728dSRoger Pau Monné long pi, pe; 903b6de32bdSKonstantin Belousov #endif 904b6de32bdSKonstantin Belousov 9055ebe728dSRoger Pau Monné KASSERT(start < end, 9065ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 9075ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 9085ebe728dSRoger Pau Monné 909b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 910b6de32bdSKonstantin Belousov pi = atop(start); 9115ebe728dSRoger Pau Monné pe = atop(end); 9125ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 9135ebe728dSRoger Pau Monné if ((pe - first_page) <= vm_page_array_size) { 9145ebe728dSRoger Pau Monné /* 9155ebe728dSRoger Pau Monné * This segment was allocated using vm_page_array 9165ebe728dSRoger Pau Monné * only, there's nothing to do since those pages 9175ebe728dSRoger Pau Monné * were never added to the tree. 9185ebe728dSRoger Pau Monné */ 9195ebe728dSRoger Pau Monné return; 9205ebe728dSRoger Pau Monné } 9215ebe728dSRoger Pau Monné /* 9225ebe728dSRoger Pau Monné * We have a segment that starts inside 9235ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 9245ebe728dSRoger Pau Monné * 9255ebe728dSRoger Pau Monné * Calculate how many pages were added to the 9265ebe728dSRoger Pau Monné * tree and free them. 9275ebe728dSRoger Pau Monné */ 9285ebe728dSRoger Pau Monné start = ptoa(first_page + vm_page_array_size); 9295ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 9305ebe728dSRoger Pau Monné /* 9315ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 9325ebe728dSRoger Pau Monné * but starts outside of it. 9335ebe728dSRoger Pau Monné */ 9345ebe728dSRoger Pau Monné end = ptoa(first_page); 9355ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 9365ebe728dSRoger Pau Monné /* Since it's not possible to register such a range, panic. */ 9375ebe728dSRoger Pau Monné panic( 9385ebe728dSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 9395ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 9405ebe728dSRoger Pau Monné } 941b6de32bdSKonstantin Belousov #endif 94238d6b2dcSRoger Pau Monné tmp.start = start; 94338d6b2dcSRoger Pau Monné tmp.end = 0; 944b6de32bdSKonstantin Belousov 94538d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 94638d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 94738d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) { 94838d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 94938d6b2dcSRoger Pau Monné panic( 95038d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 95138d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 95238d6b2dcSRoger Pau Monné } 95338d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 95438d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 95538d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES); 95638d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES); 957b6de32bdSKonstantin Belousov } 958b6de32bdSKonstantin Belousov 95911752d88SAlan Cox /* 96011752d88SAlan Cox * Free a contiguous, power of two-sized set of physical pages. 9618941dc44SAlan Cox * 9628941dc44SAlan Cox * The free page queues must be locked. 96311752d88SAlan Cox */ 96411752d88SAlan Cox void 96511752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order) 96611752d88SAlan Cox { 96711752d88SAlan Cox struct vm_freelist *fl; 96811752d88SAlan Cox struct vm_phys_seg *seg; 9695c1f2cc4SAlan Cox vm_paddr_t pa; 97011752d88SAlan Cox vm_page_t m_buddy; 97111752d88SAlan Cox 97211752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 9738941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected order %d", 97411752d88SAlan Cox m, m->order)); 97511752d88SAlan Cox KASSERT(m->pool < VM_NFREEPOOL, 9768941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected pool %d", 97711752d88SAlan Cox m, m->pool)); 97811752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 9798941dc44SAlan Cox ("vm_phys_free_pages: order %d is out of range", order)); 98011752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 981e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 9825c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 9835c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 9845c1f2cc4SAlan Cox do { 9855c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 9865c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 98711752d88SAlan Cox break; 9885c1f2cc4SAlan Cox m_buddy = &seg->first_page[atop(pa - seg->start)]; 98911752d88SAlan Cox if (m_buddy->order != order) 99011752d88SAlan Cox break; 99111752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 9927e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order); 99311752d88SAlan Cox if (m_buddy->pool != m->pool) 99411752d88SAlan Cox vm_phys_set_pool(m->pool, m_buddy, order); 99511752d88SAlan Cox order++; 9965c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 99711752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 9985c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 99911752d88SAlan Cox } 100011752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 10017e226537SAttilio Rao vm_freelist_add(fl, m, order, 1); 100211752d88SAlan Cox } 100311752d88SAlan Cox 100411752d88SAlan Cox /* 10055c1f2cc4SAlan Cox * Free a contiguous, arbitrarily sized set of physical pages. 10065c1f2cc4SAlan Cox * 10075c1f2cc4SAlan Cox * The free page queues must be locked. 10085c1f2cc4SAlan Cox */ 10095c1f2cc4SAlan Cox void 10105c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages) 10115c1f2cc4SAlan Cox { 10125c1f2cc4SAlan Cox u_int n; 10135c1f2cc4SAlan Cox int order; 10145c1f2cc4SAlan Cox 10155c1f2cc4SAlan Cox /* 10165c1f2cc4SAlan Cox * Avoid unnecessary coalescing by freeing the pages in the largest 10175c1f2cc4SAlan Cox * possible power-of-two-sized subsets. 10185c1f2cc4SAlan Cox */ 1019e2068d0bSJeff Roberson vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 10205c1f2cc4SAlan Cox for (;; npages -= n) { 10215c1f2cc4SAlan Cox /* 10225c1f2cc4SAlan Cox * Unsigned "min" is used here so that "order" is assigned 10235c1f2cc4SAlan Cox * "VM_NFREEORDER - 1" when "m"'s physical address is zero 10245c1f2cc4SAlan Cox * or the low-order bits of its physical address are zero 10255c1f2cc4SAlan Cox * because the size of a physical address exceeds the size of 10265c1f2cc4SAlan Cox * a long. 10275c1f2cc4SAlan Cox */ 10285c1f2cc4SAlan Cox order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 10295c1f2cc4SAlan Cox VM_NFREEORDER - 1); 10305c1f2cc4SAlan Cox n = 1 << order; 10315c1f2cc4SAlan Cox if (npages < n) 10325c1f2cc4SAlan Cox break; 10335c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 10345c1f2cc4SAlan Cox m += n; 10355c1f2cc4SAlan Cox } 10365c1f2cc4SAlan Cox /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 10375c1f2cc4SAlan Cox for (; npages > 0; npages -= n) { 10385c1f2cc4SAlan Cox order = flsl(npages) - 1; 10395c1f2cc4SAlan Cox n = 1 << order; 10405c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 10415c1f2cc4SAlan Cox m += n; 10425c1f2cc4SAlan Cox } 10435c1f2cc4SAlan Cox } 10445c1f2cc4SAlan Cox 10455c1f2cc4SAlan Cox /* 1046c869e672SAlan Cox * Scan physical memory between the specified addresses "low" and "high" for a 1047c869e672SAlan Cox * run of contiguous physical pages that satisfy the specified conditions, and 1048c869e672SAlan Cox * return the lowest page in the run. The specified "alignment" determines 1049c869e672SAlan Cox * the alignment of the lowest physical page in the run. If the specified 1050c869e672SAlan Cox * "boundary" is non-zero, then the run of physical pages cannot span a 1051c869e672SAlan Cox * physical address that is a multiple of "boundary". 1052c869e672SAlan Cox * 1053c869e672SAlan Cox * "npages" must be greater than zero. Both "alignment" and "boundary" must 1054c869e672SAlan Cox * be a power of two. 1055c869e672SAlan Cox */ 1056c869e672SAlan Cox vm_page_t 10573f289c3fSJeff Roberson vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1058c869e672SAlan Cox u_long alignment, vm_paddr_t boundary, int options) 1059c869e672SAlan Cox { 1060c869e672SAlan Cox vm_paddr_t pa_end; 1061c869e672SAlan Cox vm_page_t m_end, m_run, m_start; 1062c869e672SAlan Cox struct vm_phys_seg *seg; 1063c869e672SAlan Cox int segind; 1064c869e672SAlan Cox 1065c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1066c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1067c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1068c869e672SAlan Cox if (low >= high) 1069c869e672SAlan Cox return (NULL); 1070c869e672SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 1071c869e672SAlan Cox seg = &vm_phys_segs[segind]; 10723f289c3fSJeff Roberson if (seg->domain != domain) 10733f289c3fSJeff Roberson continue; 1074c869e672SAlan Cox if (seg->start >= high) 1075c869e672SAlan Cox break; 1076c869e672SAlan Cox if (low >= seg->end) 1077c869e672SAlan Cox continue; 1078c869e672SAlan Cox if (low <= seg->start) 1079c869e672SAlan Cox m_start = seg->first_page; 1080c869e672SAlan Cox else 1081c869e672SAlan Cox m_start = &seg->first_page[atop(low - seg->start)]; 1082c869e672SAlan Cox if (high < seg->end) 1083c869e672SAlan Cox pa_end = high; 1084c869e672SAlan Cox else 1085c869e672SAlan Cox pa_end = seg->end; 1086c869e672SAlan Cox if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1087c869e672SAlan Cox continue; 1088c869e672SAlan Cox m_end = &seg->first_page[atop(pa_end - seg->start)]; 1089c869e672SAlan Cox m_run = vm_page_scan_contig(npages, m_start, m_end, 1090c869e672SAlan Cox alignment, boundary, options); 1091c869e672SAlan Cox if (m_run != NULL) 1092c869e672SAlan Cox return (m_run); 1093c869e672SAlan Cox } 1094c869e672SAlan Cox return (NULL); 1095c869e672SAlan Cox } 1096c869e672SAlan Cox 1097c869e672SAlan Cox /* 109811752d88SAlan Cox * Set the pool for a contiguous, power of two-sized set of physical pages. 109911752d88SAlan Cox */ 11007bfda801SAlan Cox void 110111752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order) 110211752d88SAlan Cox { 110311752d88SAlan Cox vm_page_t m_tmp; 110411752d88SAlan Cox 110511752d88SAlan Cox for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 110611752d88SAlan Cox m_tmp->pool = pool; 110711752d88SAlan Cox } 110811752d88SAlan Cox 110911752d88SAlan Cox /* 11109742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 11119742373aSAlan Cox * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 11129742373aSAlan Cox * FALSE, indicating that "m" is not in the free lists. 11137bfda801SAlan Cox * 11147bfda801SAlan Cox * The free page queues must be locked. 11157bfda801SAlan Cox */ 1116e35395ceSAlan Cox boolean_t 11177bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m) 11187bfda801SAlan Cox { 11197bfda801SAlan Cox struct vm_freelist *fl; 11207bfda801SAlan Cox struct vm_phys_seg *seg; 11217bfda801SAlan Cox vm_paddr_t pa, pa_half; 11227bfda801SAlan Cox vm_page_t m_set, m_tmp; 11237bfda801SAlan Cox int order; 11247bfda801SAlan Cox 11257bfda801SAlan Cox /* 11267bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 11277bfda801SAlan Cox * physical pages containing the given physical page "m" and 11287bfda801SAlan Cox * assign it to "m_set". 11297bfda801SAlan Cox */ 11307bfda801SAlan Cox seg = &vm_phys_segs[m->segind]; 1131e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 11327bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1133bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 11347bfda801SAlan Cox order++; 11357bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 11362fbced65SAlan Cox if (pa >= seg->start) 11377bfda801SAlan Cox m_set = &seg->first_page[atop(pa - seg->start)]; 1138e35395ceSAlan Cox else 1139e35395ceSAlan Cox return (FALSE); 11407bfda801SAlan Cox } 1141e35395ceSAlan Cox if (m_set->order < order) 1142e35395ceSAlan Cox return (FALSE); 1143e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 1144e35395ceSAlan Cox return (FALSE); 11457bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 11467bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 11477bfda801SAlan Cox m_set, m_set->order)); 11487bfda801SAlan Cox 11497bfda801SAlan Cox /* 11507bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 11517bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 11527bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 11537bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 11547bfda801SAlan Cox */ 11557bfda801SAlan Cox fl = (*seg->free_queues)[m_set->pool]; 11567bfda801SAlan Cox order = m_set->order; 11577e226537SAttilio Rao vm_freelist_rem(fl, m_set, order); 11587bfda801SAlan Cox while (order > 0) { 11597bfda801SAlan Cox order--; 11607bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 11617bfda801SAlan Cox if (m->phys_addr < pa_half) 11627bfda801SAlan Cox m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 11637bfda801SAlan Cox else { 11647bfda801SAlan Cox m_tmp = m_set; 11657bfda801SAlan Cox m_set = &seg->first_page[atop(pa_half - seg->start)]; 11667bfda801SAlan Cox } 11677e226537SAttilio Rao vm_freelist_add(fl, m_tmp, order, 0); 11687bfda801SAlan Cox } 11697bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1170e35395ceSAlan Cox return (TRUE); 11717bfda801SAlan Cox } 11727bfda801SAlan Cox 11737bfda801SAlan Cox /* 11742f9f48d6SAlan Cox * Allocate a contiguous set of physical pages of the given size 11752f9f48d6SAlan Cox * "npages" from the free lists. All of the physical pages must be at 11762f9f48d6SAlan Cox * or above the given physical address "low" and below the given 11772f9f48d6SAlan Cox * physical address "high". The given value "alignment" determines the 11782f9f48d6SAlan Cox * alignment of the first physical page in the set. If the given value 11792f9f48d6SAlan Cox * "boundary" is non-zero, then the set of physical pages cannot cross 11802f9f48d6SAlan Cox * any physical address boundary that is a multiple of that value. Both 118111752d88SAlan Cox * "alignment" and "boundary" must be a power of two. 118211752d88SAlan Cox */ 118311752d88SAlan Cox vm_page_t 1184ef435ae7SJeff Roberson vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 11855c1f2cc4SAlan Cox u_long alignment, vm_paddr_t boundary) 118611752d88SAlan Cox { 1187c869e672SAlan Cox vm_paddr_t pa_end, pa_start; 1188c869e672SAlan Cox vm_page_t m_run; 1189c869e672SAlan Cox struct vm_phys_seg *seg; 1190ef435ae7SJeff Roberson int segind; 119111752d88SAlan Cox 1192c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1193c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1194c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1195e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1196c869e672SAlan Cox if (low >= high) 1197c869e672SAlan Cox return (NULL); 1198c869e672SAlan Cox m_run = NULL; 1199477bffbeSAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1200c869e672SAlan Cox seg = &vm_phys_segs[segind]; 1201477bffbeSAlan Cox if (seg->start >= high || seg->domain != domain) 120211752d88SAlan Cox continue; 1203477bffbeSAlan Cox if (low >= seg->end) 1204477bffbeSAlan Cox break; 1205c869e672SAlan Cox if (low <= seg->start) 1206c869e672SAlan Cox pa_start = seg->start; 1207c869e672SAlan Cox else 1208c869e672SAlan Cox pa_start = low; 1209c869e672SAlan Cox if (high < seg->end) 1210c869e672SAlan Cox pa_end = high; 1211c869e672SAlan Cox else 1212c869e672SAlan Cox pa_end = seg->end; 1213c869e672SAlan Cox if (pa_end - pa_start < ptoa(npages)) 1214c869e672SAlan Cox continue; 1215c869e672SAlan Cox m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1216c869e672SAlan Cox alignment, boundary); 1217c869e672SAlan Cox if (m_run != NULL) 1218c869e672SAlan Cox break; 1219c869e672SAlan Cox } 1220c869e672SAlan Cox return (m_run); 1221c869e672SAlan Cox } 122211752d88SAlan Cox 122311752d88SAlan Cox /* 1224c869e672SAlan Cox * Allocate a run of contiguous physical pages from the free list for the 1225c869e672SAlan Cox * specified segment. 1226c869e672SAlan Cox */ 1227c869e672SAlan Cox static vm_page_t 1228c869e672SAlan Cox vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1229c869e672SAlan Cox vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1230c869e672SAlan Cox { 1231c869e672SAlan Cox struct vm_freelist *fl; 1232c869e672SAlan Cox vm_paddr_t pa, pa_end, size; 1233c869e672SAlan Cox vm_page_t m, m_ret; 1234c869e672SAlan Cox u_long npages_end; 1235c869e672SAlan Cox int oind, order, pind; 1236c869e672SAlan Cox 1237c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1238c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1239c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1240e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1241c869e672SAlan Cox /* Compute the queue that is the best fit for npages. */ 1242c869e672SAlan Cox for (order = 0; (1 << order) < npages; order++); 1243c869e672SAlan Cox /* Search for a run satisfying the specified conditions. */ 1244c869e672SAlan Cox size = npages << PAGE_SHIFT; 1245c869e672SAlan Cox for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1246c869e672SAlan Cox oind++) { 1247c869e672SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1248c869e672SAlan Cox fl = (*seg->free_queues)[pind]; 12495cd29d0fSMark Johnston TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1250c869e672SAlan Cox /* 125111752d88SAlan Cox * Is the size of this allocation request 125211752d88SAlan Cox * larger than the largest block size? 125311752d88SAlan Cox */ 125411752d88SAlan Cox if (order >= VM_NFREEORDER) { 125511752d88SAlan Cox /* 1256c869e672SAlan Cox * Determine if a sufficient number of 1257c869e672SAlan Cox * subsequent blocks to satisfy the 1258c869e672SAlan Cox * allocation request are free. 125911752d88SAlan Cox */ 126011752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 1261c869e672SAlan Cox pa_end = pa + size; 126279e9552eSKonstantin Belousov if (pa_end < pa) 126379e9552eSKonstantin Belousov continue; 126411752d88SAlan Cox for (;;) { 1265c869e672SAlan Cox pa += 1 << (PAGE_SHIFT + 1266c869e672SAlan Cox VM_NFREEORDER - 1); 1267c869e672SAlan Cox if (pa >= pa_end || 1268c869e672SAlan Cox pa < seg->start || 126911752d88SAlan Cox pa >= seg->end) 127011752d88SAlan Cox break; 1271c869e672SAlan Cox m = &seg->first_page[atop(pa - 1272c869e672SAlan Cox seg->start)]; 1273c869e672SAlan Cox if (m->order != VM_NFREEORDER - 1274c869e672SAlan Cox 1) 127511752d88SAlan Cox break; 127611752d88SAlan Cox } 1277c869e672SAlan Cox /* If not, go to the next block. */ 1278c869e672SAlan Cox if (pa < pa_end) 127911752d88SAlan Cox continue; 128011752d88SAlan Cox } 128111752d88SAlan Cox 128211752d88SAlan Cox /* 1283c869e672SAlan Cox * Determine if the blocks are within the 1284c869e672SAlan Cox * given range, satisfy the given alignment, 1285c869e672SAlan Cox * and do not cross the given boundary. 128611752d88SAlan Cox */ 128711752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 1288c869e672SAlan Cox pa_end = pa + size; 1289d9c9c81cSPedro F. Giffuni if (pa >= low && pa_end <= high && 1290d9c9c81cSPedro F. Giffuni (pa & (alignment - 1)) == 0 && 1291d9c9c81cSPedro F. Giffuni rounddown2(pa ^ (pa_end - 1), boundary) == 0) 129211752d88SAlan Cox goto done; 129311752d88SAlan Cox } 129411752d88SAlan Cox } 129511752d88SAlan Cox } 129611752d88SAlan Cox return (NULL); 129711752d88SAlan Cox done: 129811752d88SAlan Cox for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 129911752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 13007e226537SAttilio Rao vm_freelist_rem(fl, m, m->order); 130111752d88SAlan Cox } 130211752d88SAlan Cox if (m_ret->pool != VM_FREEPOOL_DEFAULT) 130311752d88SAlan Cox vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 130411752d88SAlan Cox fl = (*seg->free_queues)[m_ret->pool]; 130511752d88SAlan Cox vm_phys_split_pages(m_ret, oind, fl, order); 13065c1f2cc4SAlan Cox /* Return excess pages to the free lists. */ 13075c1f2cc4SAlan Cox npages_end = roundup2(npages, 1 << imin(oind, order)); 13085c1f2cc4SAlan Cox if (npages < npages_end) 13095c1f2cc4SAlan Cox vm_phys_free_contig(&m_ret[npages], npages_end - npages); 131011752d88SAlan Cox return (m_ret); 131111752d88SAlan Cox } 131211752d88SAlan Cox 131311752d88SAlan Cox #ifdef DDB 131411752d88SAlan Cox /* 131511752d88SAlan Cox * Show the number of physical pages in each of the free lists. 131611752d88SAlan Cox */ 131711752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages) 131811752d88SAlan Cox { 131911752d88SAlan Cox struct vm_freelist *fl; 13207e226537SAttilio Rao int flind, oind, pind, dom; 132111752d88SAlan Cox 13227e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 13237e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom); 132411752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 132511752d88SAlan Cox db_printf("FREE LIST %d:\n" 132611752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 132711752d88SAlan Cox "\n ", flind); 132811752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 132911752d88SAlan Cox db_printf(" | POOL %d", pind); 133011752d88SAlan Cox db_printf("\n-- "); 133111752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 133211752d88SAlan Cox db_printf("-- -- "); 133311752d88SAlan Cox db_printf("--\n"); 133411752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 133511752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 133611752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 133711752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 13387e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 133911752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 134011752d88SAlan Cox } 134111752d88SAlan Cox db_printf("\n"); 134211752d88SAlan Cox } 134311752d88SAlan Cox db_printf("\n"); 134411752d88SAlan Cox } 13457e226537SAttilio Rao db_printf("\n"); 13467e226537SAttilio Rao } 134711752d88SAlan Cox } 134811752d88SAlan Cox #endif 1349