111752d88SAlan Cox /*- 2fe267a55SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3fe267a55SPedro F. Giffuni * 411752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 511752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 611752d88SAlan Cox * All rights reserved. 711752d88SAlan Cox * 811752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 911752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 1011752d88SAlan Cox * 1111752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1211752d88SAlan Cox * modification, are permitted provided that the following conditions 1311752d88SAlan Cox * are met: 1411752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1611752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1711752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1811752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1911752d88SAlan Cox * 2011752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2111752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2211752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2311752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2411752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2511752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2611752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2711752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2811752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2911752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 3011752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3211752d88SAlan Cox */ 3311752d88SAlan Cox 34fbd80bd0SAlan Cox /* 35fbd80bd0SAlan Cox * Physical memory system implementation 36fbd80bd0SAlan Cox * 37fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 38fbd80bd0SAlan Cox * virtual memory system. 39fbd80bd0SAlan Cox */ 40fbd80bd0SAlan Cox 4111752d88SAlan Cox #include <sys/cdefs.h> 4211752d88SAlan Cox __FBSDID("$FreeBSD$"); 4311752d88SAlan Cox 4411752d88SAlan Cox #include "opt_ddb.h" 45174b5f38SJohn Baldwin #include "opt_vm.h" 4611752d88SAlan Cox 4711752d88SAlan Cox #include <sys/param.h> 4811752d88SAlan Cox #include <sys/systm.h> 49662e7fa8SMark Johnston #include <sys/domainset.h> 5011752d88SAlan Cox #include <sys/lock.h> 5111752d88SAlan Cox #include <sys/kernel.h> 5211752d88SAlan Cox #include <sys/malloc.h> 5311752d88SAlan Cox #include <sys/mutex.h> 547e226537SAttilio Rao #include <sys/proc.h> 5511752d88SAlan Cox #include <sys/queue.h> 5638d6b2dcSRoger Pau Monné #include <sys/rwlock.h> 5711752d88SAlan Cox #include <sys/sbuf.h> 5811752d88SAlan Cox #include <sys/sysctl.h> 5938d6b2dcSRoger Pau Monné #include <sys/tree.h> 6011752d88SAlan Cox #include <sys/vmmeter.h> 6111752d88SAlan Cox 6211752d88SAlan Cox #include <ddb/ddb.h> 6311752d88SAlan Cox 6411752d88SAlan Cox #include <vm/vm.h> 6511752d88SAlan Cox #include <vm/vm_param.h> 6611752d88SAlan Cox #include <vm/vm_kern.h> 6711752d88SAlan Cox #include <vm/vm_object.h> 6811752d88SAlan Cox #include <vm/vm_page.h> 6911752d88SAlan Cox #include <vm/vm_phys.h> 70e2068d0bSJeff Roberson #include <vm/vm_pagequeue.h> 7111752d88SAlan Cox 72449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 73449c2e92SKonstantin Belousov "Too many physsegs."); 7411752d88SAlan Cox 75b6715dabSJeff Roberson #ifdef NUMA 76cdfeced8SJeff Roberson struct mem_affinity __read_mostly *mem_affinity; 77cdfeced8SJeff Roberson int __read_mostly *mem_locality; 7862d70a81SJohn Baldwin #endif 79a3870a18SJohn Baldwin 80cdfeced8SJeff Roberson int __read_mostly vm_ndomains = 1; 81463406acSMark Johnston domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 827e226537SAttilio Rao 83cdfeced8SJeff Roberson struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 84cdfeced8SJeff Roberson int __read_mostly vm_phys_nsegs; 8511752d88SAlan Cox 8638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg; 8738d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 8838d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *); 8938d6b2dcSRoger Pau Monné 9038d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 9138d6b2dcSRoger Pau Monné RB_INITIALIZER(_vm_phys_fictitious_tree); 9238d6b2dcSRoger Pau Monné 9338d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg { 9438d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node; 9538d6b2dcSRoger Pau Monné /* Memory region data */ 96b6de32bdSKonstantin Belousov vm_paddr_t start; 97b6de32bdSKonstantin Belousov vm_paddr_t end; 98b6de32bdSKonstantin Belousov vm_page_t first_page; 9938d6b2dcSRoger Pau Monné }; 10038d6b2dcSRoger Pau Monné 10138d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 10238d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp); 10338d6b2dcSRoger Pau Monné 104cdfeced8SJeff Roberson static struct rwlock_padalign vm_phys_fictitious_reg_lock; 105c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 106b6de32bdSKonstantin Belousov 107cdfeced8SJeff Roberson static struct vm_freelist __aligned(CACHE_LINE_SIZE) 108f2a496d6SKonstantin Belousov vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] 109f2a496d6SKonstantin Belousov [VM_NFREEORDER_MAX]; 11011752d88SAlan Cox 111cdfeced8SJeff Roberson static int __read_mostly vm_nfreelists; 112d866a563SAlan Cox 113d866a563SAlan Cox /* 11421943937SJeff Roberson * These "avail lists" are globals used to communicate boot-time physical 11521943937SJeff Roberson * memory layout to other parts of the kernel. Each physically contiguous 11621943937SJeff Roberson * region of memory is defined by a start address at an even index and an 11721943937SJeff Roberson * end address at the following odd index. Each list is terminated by a 11821943937SJeff Roberson * pair of zero entries. 11921943937SJeff Roberson * 12021943937SJeff Roberson * dump_avail tells the dump code what regions to include in a crash dump, and 12121943937SJeff Roberson * phys_avail is all of the remaining physical memory that is available for 12221943937SJeff Roberson * the vm system. 12321943937SJeff Roberson * 12421943937SJeff Roberson * Initially dump_avail and phys_avail are identical. Boot time memory 12521943937SJeff Roberson * allocations remove extents from phys_avail that may still be included 12621943937SJeff Roberson * in dumps. 12721943937SJeff Roberson */ 12821943937SJeff Roberson vm_paddr_t phys_avail[PHYS_AVAIL_COUNT]; 12921943937SJeff Roberson vm_paddr_t dump_avail[PHYS_AVAIL_COUNT]; 13021943937SJeff Roberson 13121943937SJeff Roberson /* 132d866a563SAlan Cox * Provides the mapping from VM_FREELIST_* to free list indices (flind). 133d866a563SAlan Cox */ 134cdfeced8SJeff Roberson static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 135d866a563SAlan Cox 136d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0); 137d866a563SAlan Cox 138d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 139d866a563SAlan Cox #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 140d866a563SAlan Cox #endif 141d866a563SAlan Cox 142d866a563SAlan Cox /* 143d866a563SAlan Cox * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 144d866a563SAlan Cox * the ordering of the free list boundaries. 145d866a563SAlan Cox */ 146d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 147d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 148d866a563SAlan Cox #endif 14911752d88SAlan Cox 15011752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 15111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 15211752d88SAlan Cox NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 15311752d88SAlan Cox 15411752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 15511752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 15611752d88SAlan Cox NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 15711752d88SAlan Cox 158b6715dabSJeff Roberson #ifdef NUMA 159415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 160415d7ccaSAdrian Chadd SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 161415d7ccaSAdrian Chadd NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 1626520495aSAdrian Chadd #endif 163415d7ccaSAdrian Chadd 1647e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 1657e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available."); 166a3870a18SJohn Baldwin 167c869e672SAlan Cox static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 168c869e672SAlan Cox u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 169c869e672SAlan Cox vm_paddr_t boundary); 170d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 171d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 17211752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 173370a338aSAlan Cox int order, int tail); 17411752d88SAlan Cox 17538d6b2dcSRoger Pau Monné /* 17638d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management. 17738d6b2dcSRoger Pau Monné */ 17838d6b2dcSRoger Pau Monné static inline int 17938d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 18038d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range) 18138d6b2dcSRoger Pau Monné { 18238d6b2dcSRoger Pau Monné 18338d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0, 18438d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page")); 18538d6b2dcSRoger Pau Monné if (p->start >= range->end) 18638d6b2dcSRoger Pau Monné return (1); 18738d6b2dcSRoger Pau Monné if (p->start < range->start) 18838d6b2dcSRoger Pau Monné return (-1); 18938d6b2dcSRoger Pau Monné 19038d6b2dcSRoger Pau Monné return (0); 19138d6b2dcSRoger Pau Monné } 19238d6b2dcSRoger Pau Monné 19338d6b2dcSRoger Pau Monné static int 19438d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 19538d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2) 19638d6b2dcSRoger Pau Monné { 19738d6b2dcSRoger Pau Monné 19838d6b2dcSRoger Pau Monné /* Check if this is a search for a page */ 19938d6b2dcSRoger Pau Monné if (p1->end == 0) 20038d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2)); 20138d6b2dcSRoger Pau Monné 20238d6b2dcSRoger Pau Monné KASSERT(p2->end != 0, 20338d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison")); 20438d6b2dcSRoger Pau Monné 20538d6b2dcSRoger Pau Monné /* Searching to add a new range */ 20638d6b2dcSRoger Pau Monné if (p1->end <= p2->start) 20738d6b2dcSRoger Pau Monné return (-1); 20838d6b2dcSRoger Pau Monné if (p1->start >= p2->end) 20938d6b2dcSRoger Pau Monné return (1); 21038d6b2dcSRoger Pau Monné 21138d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n" 21238d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 21338d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 21438d6b2dcSRoger Pau Monné } 21538d6b2dcSRoger Pau Monné 2166f4acaf4SJeff Roberson int 2176f4acaf4SJeff Roberson vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 218449c2e92SKonstantin Belousov { 219b6715dabSJeff Roberson #ifdef NUMA 2206f4acaf4SJeff Roberson domainset_t mask; 2216f4acaf4SJeff Roberson int i; 222449c2e92SKonstantin Belousov 2236f4acaf4SJeff Roberson if (vm_ndomains == 1 || mem_affinity == NULL) 2246f4acaf4SJeff Roberson return (0); 2256f4acaf4SJeff Roberson 2266f4acaf4SJeff Roberson DOMAINSET_ZERO(&mask); 2276f4acaf4SJeff Roberson /* 2286f4acaf4SJeff Roberson * Check for any memory that overlaps low, high. 2296f4acaf4SJeff Roberson */ 2306f4acaf4SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) 2316f4acaf4SJeff Roberson if (mem_affinity[i].start <= high && 2326f4acaf4SJeff Roberson mem_affinity[i].end >= low) 2336f4acaf4SJeff Roberson DOMAINSET_SET(mem_affinity[i].domain, &mask); 2346f4acaf4SJeff Roberson if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 2356f4acaf4SJeff Roberson return (prefer); 2366f4acaf4SJeff Roberson if (DOMAINSET_EMPTY(&mask)) 2376f4acaf4SJeff Roberson panic("vm_phys_domain_match: Impossible constraint"); 2386f4acaf4SJeff Roberson return (DOMAINSET_FFS(&mask) - 1); 2396f4acaf4SJeff Roberson #else 2406f4acaf4SJeff Roberson return (0); 2416f4acaf4SJeff Roberson #endif 242449c2e92SKonstantin Belousov } 243449c2e92SKonstantin Belousov 24411752d88SAlan Cox /* 24511752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 24611752d88SAlan Cox * the amount of physical memory in each free list. 24711752d88SAlan Cox */ 24811752d88SAlan Cox static int 24911752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 25011752d88SAlan Cox { 25111752d88SAlan Cox struct sbuf sbuf; 25211752d88SAlan Cox struct vm_freelist *fl; 2537e226537SAttilio Rao int dom, error, flind, oind, pind; 25411752d88SAlan Cox 25500f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 25600f0e671SMatthew D Fleming if (error != 0) 25700f0e671SMatthew D Fleming return (error); 2587e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 2597e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 260eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 26111752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 262eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 26311752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 26411752d88SAlan Cox "\n ", flind); 26511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 26611752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 26711752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 26811752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 26911752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 27011752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 27111752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 272d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 27311752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 27411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 2757e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 276eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d", 2777e226537SAttilio Rao fl[oind].lcnt); 27811752d88SAlan Cox } 27911752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 28011752d88SAlan Cox } 2817e226537SAttilio Rao } 28211752d88SAlan Cox } 2834e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 28411752d88SAlan Cox sbuf_delete(&sbuf); 28511752d88SAlan Cox return (error); 28611752d88SAlan Cox } 28711752d88SAlan Cox 28811752d88SAlan Cox /* 28911752d88SAlan Cox * Outputs the set of physical memory segments. 29011752d88SAlan Cox */ 29111752d88SAlan Cox static int 29211752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 29311752d88SAlan Cox { 29411752d88SAlan Cox struct sbuf sbuf; 29511752d88SAlan Cox struct vm_phys_seg *seg; 29611752d88SAlan Cox int error, segind; 29711752d88SAlan Cox 29800f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 29900f0e671SMatthew D Fleming if (error != 0) 30000f0e671SMatthew D Fleming return (error); 3014e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 30211752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 30311752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 30411752d88SAlan Cox seg = &vm_phys_segs[segind]; 30511752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 30611752d88SAlan Cox (uintmax_t)seg->start); 30711752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 30811752d88SAlan Cox (uintmax_t)seg->end); 309a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 31011752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 31111752d88SAlan Cox } 3124e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 31311752d88SAlan Cox sbuf_delete(&sbuf); 31411752d88SAlan Cox return (error); 31511752d88SAlan Cox } 31611752d88SAlan Cox 317415d7ccaSAdrian Chadd /* 318415d7ccaSAdrian Chadd * Return affinity, or -1 if there's no affinity information. 319415d7ccaSAdrian Chadd */ 3206520495aSAdrian Chadd int 321415d7ccaSAdrian Chadd vm_phys_mem_affinity(int f, int t) 322415d7ccaSAdrian Chadd { 323415d7ccaSAdrian Chadd 324b6715dabSJeff Roberson #ifdef NUMA 325415d7ccaSAdrian Chadd if (mem_locality == NULL) 326415d7ccaSAdrian Chadd return (-1); 327415d7ccaSAdrian Chadd if (f >= vm_ndomains || t >= vm_ndomains) 328415d7ccaSAdrian Chadd return (-1); 329415d7ccaSAdrian Chadd return (mem_locality[f * vm_ndomains + t]); 3306520495aSAdrian Chadd #else 3316520495aSAdrian Chadd return (-1); 3326520495aSAdrian Chadd #endif 333415d7ccaSAdrian Chadd } 334415d7ccaSAdrian Chadd 335b6715dabSJeff Roberson #ifdef NUMA 336415d7ccaSAdrian Chadd /* 337415d7ccaSAdrian Chadd * Outputs the VM locality table. 338415d7ccaSAdrian Chadd */ 339415d7ccaSAdrian Chadd static int 340415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 341415d7ccaSAdrian Chadd { 342415d7ccaSAdrian Chadd struct sbuf sbuf; 343415d7ccaSAdrian Chadd int error, i, j; 344415d7ccaSAdrian Chadd 345415d7ccaSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 346415d7ccaSAdrian Chadd if (error != 0) 347415d7ccaSAdrian Chadd return (error); 348415d7ccaSAdrian Chadd sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 349415d7ccaSAdrian Chadd 350415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 351415d7ccaSAdrian Chadd 352415d7ccaSAdrian Chadd for (i = 0; i < vm_ndomains; i++) { 353415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d: ", i); 354415d7ccaSAdrian Chadd for (j = 0; j < vm_ndomains; j++) { 355415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 356415d7ccaSAdrian Chadd } 357415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 358415d7ccaSAdrian Chadd } 359415d7ccaSAdrian Chadd error = sbuf_finish(&sbuf); 360415d7ccaSAdrian Chadd sbuf_delete(&sbuf); 361415d7ccaSAdrian Chadd return (error); 362415d7ccaSAdrian Chadd } 3636520495aSAdrian Chadd #endif 364415d7ccaSAdrian Chadd 3657e226537SAttilio Rao static void 3667e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 367a3870a18SJohn Baldwin { 368a3870a18SJohn Baldwin 3697e226537SAttilio Rao m->order = order; 3707e226537SAttilio Rao if (tail) 3715cd29d0fSMark Johnston TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 3727e226537SAttilio Rao else 3735cd29d0fSMark Johnston TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 3747e226537SAttilio Rao fl[order].lcnt++; 375a3870a18SJohn Baldwin } 3767e226537SAttilio Rao 3777e226537SAttilio Rao static void 3787e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 3797e226537SAttilio Rao { 3807e226537SAttilio Rao 3815cd29d0fSMark Johnston TAILQ_REMOVE(&fl[order].pl, m, listq); 3827e226537SAttilio Rao fl[order].lcnt--; 3837e226537SAttilio Rao m->order = VM_NFREEORDER; 384a3870a18SJohn Baldwin } 385a3870a18SJohn Baldwin 38611752d88SAlan Cox /* 38711752d88SAlan Cox * Create a physical memory segment. 38811752d88SAlan Cox */ 38911752d88SAlan Cox static void 390d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 39111752d88SAlan Cox { 39211752d88SAlan Cox struct vm_phys_seg *seg; 39311752d88SAlan Cox 39411752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 39511752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 396ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 3977e226537SAttilio Rao ("vm_phys_create_seg: invalid domain provided")); 39811752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 399271f0f12SAlan Cox while (seg > vm_phys_segs && (seg - 1)->start >= end) { 400271f0f12SAlan Cox *seg = *(seg - 1); 401271f0f12SAlan Cox seg--; 402271f0f12SAlan Cox } 40311752d88SAlan Cox seg->start = start; 40411752d88SAlan Cox seg->end = end; 405a3870a18SJohn Baldwin seg->domain = domain; 40611752d88SAlan Cox } 40711752d88SAlan Cox 408a3870a18SJohn Baldwin static void 409d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 410a3870a18SJohn Baldwin { 411b6715dabSJeff Roberson #ifdef NUMA 412a3870a18SJohn Baldwin int i; 413a3870a18SJohn Baldwin 414a3870a18SJohn Baldwin if (mem_affinity == NULL) { 415d866a563SAlan Cox _vm_phys_create_seg(start, end, 0); 416a3870a18SJohn Baldwin return; 417a3870a18SJohn Baldwin } 418a3870a18SJohn Baldwin 419a3870a18SJohn Baldwin for (i = 0;; i++) { 420a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 421a3870a18SJohn Baldwin panic("Reached end of affinity info"); 422a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 423a3870a18SJohn Baldwin continue; 424a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 425a3870a18SJohn Baldwin panic("No affinity info for start %jx", 426a3870a18SJohn Baldwin (uintmax_t)start); 427a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 428d866a563SAlan Cox _vm_phys_create_seg(start, end, 429a3870a18SJohn Baldwin mem_affinity[i].domain); 430a3870a18SJohn Baldwin break; 431a3870a18SJohn Baldwin } 432d866a563SAlan Cox _vm_phys_create_seg(start, mem_affinity[i].end, 433a3870a18SJohn Baldwin mem_affinity[i].domain); 434a3870a18SJohn Baldwin start = mem_affinity[i].end; 435a3870a18SJohn Baldwin } 43662d70a81SJohn Baldwin #else 43762d70a81SJohn Baldwin _vm_phys_create_seg(start, end, 0); 43862d70a81SJohn Baldwin #endif 439a3870a18SJohn Baldwin } 440a3870a18SJohn Baldwin 44111752d88SAlan Cox /* 442271f0f12SAlan Cox * Add a physical memory segment. 443271f0f12SAlan Cox */ 444271f0f12SAlan Cox void 445271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 446271f0f12SAlan Cox { 447d866a563SAlan Cox vm_paddr_t paddr; 448271f0f12SAlan Cox 449271f0f12SAlan Cox KASSERT((start & PAGE_MASK) == 0, 450271f0f12SAlan Cox ("vm_phys_define_seg: start is not page aligned")); 451271f0f12SAlan Cox KASSERT((end & PAGE_MASK) == 0, 452271f0f12SAlan Cox ("vm_phys_define_seg: end is not page aligned")); 453d866a563SAlan Cox 454d866a563SAlan Cox /* 455d866a563SAlan Cox * Split the physical memory segment if it spans two or more free 456d866a563SAlan Cox * list boundaries. 457d866a563SAlan Cox */ 458d866a563SAlan Cox paddr = start; 459d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 460d866a563SAlan Cox if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 461d866a563SAlan Cox vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 462d866a563SAlan Cox paddr = VM_LOWMEM_BOUNDARY; 463d866a563SAlan Cox } 464271f0f12SAlan Cox #endif 465d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 466d866a563SAlan Cox if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 467d866a563SAlan Cox vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 468d866a563SAlan Cox paddr = VM_DMA32_BOUNDARY; 469d866a563SAlan Cox } 470d866a563SAlan Cox #endif 471d866a563SAlan Cox vm_phys_create_seg(paddr, end); 472271f0f12SAlan Cox } 473271f0f12SAlan Cox 474271f0f12SAlan Cox /* 47511752d88SAlan Cox * Initialize the physical memory allocator. 476d866a563SAlan Cox * 477d866a563SAlan Cox * Requires that vm_page_array is initialized! 47811752d88SAlan Cox */ 47911752d88SAlan Cox void 48011752d88SAlan Cox vm_phys_init(void) 48111752d88SAlan Cox { 48211752d88SAlan Cox struct vm_freelist *fl; 48372aebdd7SAlan Cox struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 484d866a563SAlan Cox u_long npages; 485d866a563SAlan Cox int dom, flind, freelist, oind, pind, segind; 48611752d88SAlan Cox 487d866a563SAlan Cox /* 488d866a563SAlan Cox * Compute the number of free lists, and generate the mapping from the 489d866a563SAlan Cox * manifest constants VM_FREELIST_* to the free list indices. 490d866a563SAlan Cox * 491d866a563SAlan Cox * Initially, the entries of vm_freelist_to_flind[] are set to either 492d866a563SAlan Cox * 0 or 1 to indicate which free lists should be created. 493d866a563SAlan Cox */ 494d866a563SAlan Cox npages = 0; 495d866a563SAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 496d866a563SAlan Cox seg = &vm_phys_segs[segind]; 497d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 498d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) 499d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 500d866a563SAlan Cox else 501d866a563SAlan Cox #endif 502d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 503d866a563SAlan Cox if ( 504d866a563SAlan Cox #ifdef VM_DMA32_NPAGES_THRESHOLD 505d866a563SAlan Cox /* 506d866a563SAlan Cox * Create the DMA32 free list only if the amount of 507d866a563SAlan Cox * physical memory above physical address 4G exceeds the 508d866a563SAlan Cox * given threshold. 509d866a563SAlan Cox */ 510d866a563SAlan Cox npages > VM_DMA32_NPAGES_THRESHOLD && 511d866a563SAlan Cox #endif 512d866a563SAlan Cox seg->end <= VM_DMA32_BOUNDARY) 513d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 514d866a563SAlan Cox else 515d866a563SAlan Cox #endif 516d866a563SAlan Cox { 517d866a563SAlan Cox npages += atop(seg->end - seg->start); 518d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 519d866a563SAlan Cox } 520d866a563SAlan Cox } 521d866a563SAlan Cox /* Change each entry into a running total of the free lists. */ 522d866a563SAlan Cox for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 523d866a563SAlan Cox vm_freelist_to_flind[freelist] += 524d866a563SAlan Cox vm_freelist_to_flind[freelist - 1]; 525d866a563SAlan Cox } 526d866a563SAlan Cox vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 527d866a563SAlan Cox KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 528d866a563SAlan Cox /* Change each entry into a free list index. */ 529d866a563SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) 530d866a563SAlan Cox vm_freelist_to_flind[freelist]--; 531d866a563SAlan Cox 532d866a563SAlan Cox /* 533d866a563SAlan Cox * Initialize the first_page and free_queues fields of each physical 534d866a563SAlan Cox * memory segment. 535d866a563SAlan Cox */ 536271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 537d866a563SAlan Cox npages = 0; 53811752d88SAlan Cox #endif 539271f0f12SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 540271f0f12SAlan Cox seg = &vm_phys_segs[segind]; 541271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 542d866a563SAlan Cox seg->first_page = &vm_page_array[npages]; 543d866a563SAlan Cox npages += atop(seg->end - seg->start); 544271f0f12SAlan Cox #else 545271f0f12SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(seg->start); 54611752d88SAlan Cox #endif 547d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 548d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) { 549d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 550d866a563SAlan Cox KASSERT(flind >= 0, 551d866a563SAlan Cox ("vm_phys_init: LOWMEM flind < 0")); 552d866a563SAlan Cox } else 553d866a563SAlan Cox #endif 554d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 555d866a563SAlan Cox if (seg->end <= VM_DMA32_BOUNDARY) { 556d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 557d866a563SAlan Cox KASSERT(flind >= 0, 558d866a563SAlan Cox ("vm_phys_init: DMA32 flind < 0")); 559d866a563SAlan Cox } else 560d866a563SAlan Cox #endif 561d866a563SAlan Cox { 562d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 563d866a563SAlan Cox KASSERT(flind >= 0, 564d866a563SAlan Cox ("vm_phys_init: DEFAULT flind < 0")); 56511752d88SAlan Cox } 566d866a563SAlan Cox seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 567d866a563SAlan Cox } 568d866a563SAlan Cox 569d866a563SAlan Cox /* 57072aebdd7SAlan Cox * Coalesce physical memory segments that are contiguous and share the 57172aebdd7SAlan Cox * same per-domain free queues. 57272aebdd7SAlan Cox */ 57372aebdd7SAlan Cox prev_seg = vm_phys_segs; 57472aebdd7SAlan Cox seg = &vm_phys_segs[1]; 57572aebdd7SAlan Cox end_seg = &vm_phys_segs[vm_phys_nsegs]; 57672aebdd7SAlan Cox while (seg < end_seg) { 57772aebdd7SAlan Cox if (prev_seg->end == seg->start && 57872aebdd7SAlan Cox prev_seg->free_queues == seg->free_queues) { 57972aebdd7SAlan Cox prev_seg->end = seg->end; 58072aebdd7SAlan Cox KASSERT(prev_seg->domain == seg->domain, 58172aebdd7SAlan Cox ("vm_phys_init: free queues cannot span domains")); 58272aebdd7SAlan Cox vm_phys_nsegs--; 58372aebdd7SAlan Cox end_seg--; 58472aebdd7SAlan Cox for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 58572aebdd7SAlan Cox *tmp_seg = *(tmp_seg + 1); 58672aebdd7SAlan Cox } else { 58772aebdd7SAlan Cox prev_seg = seg; 58872aebdd7SAlan Cox seg++; 58972aebdd7SAlan Cox } 59072aebdd7SAlan Cox } 59172aebdd7SAlan Cox 59272aebdd7SAlan Cox /* 593d866a563SAlan Cox * Initialize the free queues. 594d866a563SAlan Cox */ 5957e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 59611752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 59711752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 5987e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 59911752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 60011752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 60111752d88SAlan Cox } 60211752d88SAlan Cox } 603a3870a18SJohn Baldwin } 604d866a563SAlan Cox 60538d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 60611752d88SAlan Cox } 60711752d88SAlan Cox 60811752d88SAlan Cox /* 609662e7fa8SMark Johnston * Register info about the NUMA topology of the system. 610662e7fa8SMark Johnston * 611662e7fa8SMark Johnston * Invoked by platform-dependent code prior to vm_phys_init(). 612662e7fa8SMark Johnston */ 613662e7fa8SMark Johnston void 614662e7fa8SMark Johnston vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, 615662e7fa8SMark Johnston int *locality) 616662e7fa8SMark Johnston { 617662e7fa8SMark Johnston #ifdef NUMA 618b61f3142SMark Johnston int d, i; 619662e7fa8SMark Johnston 620b61f3142SMark Johnston /* 621b61f3142SMark Johnston * For now the only override value that we support is 1, which 622b61f3142SMark Johnston * effectively disables NUMA-awareness in the allocators. 623b61f3142SMark Johnston */ 624b61f3142SMark Johnston d = 0; 625b61f3142SMark Johnston TUNABLE_INT_FETCH("vm.numa.disabled", &d); 626b61f3142SMark Johnston if (d) 627b61f3142SMark Johnston ndomains = 1; 628b61f3142SMark Johnston 629b61f3142SMark Johnston if (ndomains > 1) { 630662e7fa8SMark Johnston vm_ndomains = ndomains; 631662e7fa8SMark Johnston mem_affinity = affinity; 632662e7fa8SMark Johnston mem_locality = locality; 633b61f3142SMark Johnston } 634662e7fa8SMark Johnston 635662e7fa8SMark Johnston for (i = 0; i < vm_ndomains; i++) 636662e7fa8SMark Johnston DOMAINSET_SET(i, &all_domains); 637662e7fa8SMark Johnston #else 638662e7fa8SMark Johnston (void)ndomains; 639662e7fa8SMark Johnston (void)affinity; 640662e7fa8SMark Johnston (void)locality; 641662e7fa8SMark Johnston #endif 642662e7fa8SMark Johnston } 643662e7fa8SMark Johnston 644c1685086SJeff Roberson int 645c1685086SJeff Roberson _vm_phys_domain(vm_paddr_t pa) 646c1685086SJeff Roberson { 647c1685086SJeff Roberson #ifdef NUMA 648c1685086SJeff Roberson int i; 649c1685086SJeff Roberson 650c1685086SJeff Roberson if (vm_ndomains == 1 || mem_affinity == NULL) 651c1685086SJeff Roberson return (0); 652c1685086SJeff Roberson 653c1685086SJeff Roberson /* 654c1685086SJeff Roberson * Check for any memory that overlaps. 655c1685086SJeff Roberson */ 656c1685086SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) 657c1685086SJeff Roberson if (mem_affinity[i].start <= pa && 658c1685086SJeff Roberson mem_affinity[i].end >= pa) 659c1685086SJeff Roberson return (mem_affinity[i].domain); 660c1685086SJeff Roberson #endif 661c1685086SJeff Roberson return (0); 662c1685086SJeff Roberson } 663c1685086SJeff Roberson 664662e7fa8SMark Johnston /* 66511752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 666370a338aSAlan Cox * 667370a338aSAlan Cox * When this function is called by a page allocation function, the caller 668370a338aSAlan Cox * should request insertion at the head unless the order [order, oind) queues 669370a338aSAlan Cox * are known to be empty. The objective being to reduce the likelihood of 670370a338aSAlan Cox * long-term fragmentation by promoting contemporaneous allocation and 671370a338aSAlan Cox * (hopefully) deallocation. 67211752d88SAlan Cox */ 67311752d88SAlan Cox static __inline void 674370a338aSAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 675370a338aSAlan Cox int tail) 67611752d88SAlan Cox { 67711752d88SAlan Cox vm_page_t m_buddy; 67811752d88SAlan Cox 67911752d88SAlan Cox while (oind > order) { 68011752d88SAlan Cox oind--; 68111752d88SAlan Cox m_buddy = &m[1 << oind]; 68211752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 68311752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 68411752d88SAlan Cox m_buddy, m_buddy->order)); 685370a338aSAlan Cox vm_freelist_add(fl, m_buddy, oind, tail); 68611752d88SAlan Cox } 68711752d88SAlan Cox } 68811752d88SAlan Cox 68911752d88SAlan Cox /* 6907493904eSAlan Cox * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 6917493904eSAlan Cox * and sized set to the specified free list. 6927493904eSAlan Cox * 6937493904eSAlan Cox * When this function is called by a page allocation function, the caller 6947493904eSAlan Cox * should request insertion at the head unless the lower-order queues are 6957493904eSAlan Cox * known to be empty. The objective being to reduce the likelihood of long- 6967493904eSAlan Cox * term fragmentation by promoting contemporaneous allocation and (hopefully) 6977493904eSAlan Cox * deallocation. 6987493904eSAlan Cox * 6997493904eSAlan Cox * The physical page m's buddy must not be free. 7007493904eSAlan Cox */ 7017493904eSAlan Cox static void 7027493904eSAlan Cox vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 7037493904eSAlan Cox { 7047493904eSAlan Cox u_int n; 7057493904eSAlan Cox int order; 7067493904eSAlan Cox 7077493904eSAlan Cox KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 7087493904eSAlan Cox KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 7097493904eSAlan Cox ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 7107493904eSAlan Cox ("vm_phys_enq_range: page %p and npages %u are misaligned", 7117493904eSAlan Cox m, npages)); 7127493904eSAlan Cox do { 7137493904eSAlan Cox KASSERT(m->order == VM_NFREEORDER, 7147493904eSAlan Cox ("vm_phys_enq_range: page %p has unexpected order %d", 7157493904eSAlan Cox m, m->order)); 7167493904eSAlan Cox order = ffs(npages) - 1; 7177493904eSAlan Cox KASSERT(order < VM_NFREEORDER, 7187493904eSAlan Cox ("vm_phys_enq_range: order %d is out of range", order)); 7197493904eSAlan Cox vm_freelist_add(fl, m, order, tail); 7207493904eSAlan Cox n = 1 << order; 7217493904eSAlan Cox m += n; 7227493904eSAlan Cox npages -= n; 7237493904eSAlan Cox } while (npages > 0); 7247493904eSAlan Cox } 7257493904eSAlan Cox 7267493904eSAlan Cox /* 72789ea39a7SAlan Cox * Tries to allocate the specified number of pages from the specified pool 72889ea39a7SAlan Cox * within the specified domain. Returns the actual number of allocated pages 72989ea39a7SAlan Cox * and a pointer to each page through the array ma[]. 73089ea39a7SAlan Cox * 73132d81f21SAlan Cox * The returned pages may not be physically contiguous. However, in contrast 73232d81f21SAlan Cox * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 73332d81f21SAlan Cox * calling this function once to allocate the desired number of pages will 73432d81f21SAlan Cox * avoid wasted time in vm_phys_split_pages(). 73589ea39a7SAlan Cox * 73689ea39a7SAlan Cox * The free page queues for the specified domain must be locked. 73789ea39a7SAlan Cox */ 73889ea39a7SAlan Cox int 73989ea39a7SAlan Cox vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 74089ea39a7SAlan Cox { 74189ea39a7SAlan Cox struct vm_freelist *alt, *fl; 74289ea39a7SAlan Cox vm_page_t m; 74389ea39a7SAlan Cox int avail, end, flind, freelist, i, need, oind, pind; 74489ea39a7SAlan Cox 74589ea39a7SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains, 74689ea39a7SAlan Cox ("vm_phys_alloc_npages: domain %d is out of range", domain)); 74789ea39a7SAlan Cox KASSERT(pool < VM_NFREEPOOL, 74889ea39a7SAlan Cox ("vm_phys_alloc_npages: pool %d is out of range", pool)); 74989ea39a7SAlan Cox KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 75089ea39a7SAlan Cox ("vm_phys_alloc_npages: npages %d is out of range", npages)); 75189ea39a7SAlan Cox vm_domain_free_assert_locked(VM_DOMAIN(domain)); 75289ea39a7SAlan Cox i = 0; 75389ea39a7SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 75489ea39a7SAlan Cox flind = vm_freelist_to_flind[freelist]; 75589ea39a7SAlan Cox if (flind < 0) 75689ea39a7SAlan Cox continue; 75789ea39a7SAlan Cox fl = vm_phys_free_queues[domain][flind][pool]; 75889ea39a7SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) { 75989ea39a7SAlan Cox while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 76089ea39a7SAlan Cox vm_freelist_rem(fl, m, oind); 76189ea39a7SAlan Cox avail = 1 << oind; 76289ea39a7SAlan Cox need = imin(npages - i, avail); 76389ea39a7SAlan Cox for (end = i + need; i < end;) 76489ea39a7SAlan Cox ma[i++] = m++; 76589ea39a7SAlan Cox if (need < avail) { 7667493904eSAlan Cox /* 7677493904eSAlan Cox * Return excess pages to fl. Its 7687493904eSAlan Cox * order [0, oind) queues are empty. 7697493904eSAlan Cox */ 7707493904eSAlan Cox vm_phys_enq_range(m, avail - need, fl, 7717493904eSAlan Cox 1); 77289ea39a7SAlan Cox return (npages); 77389ea39a7SAlan Cox } else if (i == npages) 77489ea39a7SAlan Cox return (npages); 77589ea39a7SAlan Cox } 77689ea39a7SAlan Cox } 77789ea39a7SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 77889ea39a7SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 77989ea39a7SAlan Cox alt = vm_phys_free_queues[domain][flind][pind]; 78089ea39a7SAlan Cox while ((m = TAILQ_FIRST(&alt[oind].pl)) != 78189ea39a7SAlan Cox NULL) { 78289ea39a7SAlan Cox vm_freelist_rem(alt, m, oind); 78389ea39a7SAlan Cox vm_phys_set_pool(pool, m, oind); 78489ea39a7SAlan Cox avail = 1 << oind; 78589ea39a7SAlan Cox need = imin(npages - i, avail); 78689ea39a7SAlan Cox for (end = i + need; i < end;) 78789ea39a7SAlan Cox ma[i++] = m++; 78889ea39a7SAlan Cox if (need < avail) { 7897493904eSAlan Cox /* 7907493904eSAlan Cox * Return excess pages to fl. 7917493904eSAlan Cox * Its order [0, oind) queues 7927493904eSAlan Cox * are empty. 7937493904eSAlan Cox */ 7947493904eSAlan Cox vm_phys_enq_range(m, avail - 7957493904eSAlan Cox need, fl, 1); 79689ea39a7SAlan Cox return (npages); 79789ea39a7SAlan Cox } else if (i == npages) 79889ea39a7SAlan Cox return (npages); 79989ea39a7SAlan Cox } 80089ea39a7SAlan Cox } 80189ea39a7SAlan Cox } 80289ea39a7SAlan Cox } 80389ea39a7SAlan Cox return (i); 80489ea39a7SAlan Cox } 80589ea39a7SAlan Cox 80689ea39a7SAlan Cox /* 80711752d88SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages 80811752d88SAlan Cox * from the free lists. 8098941dc44SAlan Cox * 8108941dc44SAlan Cox * The free page queues must be locked. 81111752d88SAlan Cox */ 81211752d88SAlan Cox vm_page_t 813ef435ae7SJeff Roberson vm_phys_alloc_pages(int domain, int pool, int order) 81411752d88SAlan Cox { 81549ca10d4SJayachandran C. vm_page_t m; 8160db2102aSMichael Zhilin int freelist; 81749ca10d4SJayachandran C. 8180db2102aSMichael Zhilin for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 8190db2102aSMichael Zhilin m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 82049ca10d4SJayachandran C. if (m != NULL) 82149ca10d4SJayachandran C. return (m); 82249ca10d4SJayachandran C. } 82349ca10d4SJayachandran C. return (NULL); 82449ca10d4SJayachandran C. } 82549ca10d4SJayachandran C. 82649ca10d4SJayachandran C. /* 827d866a563SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages from the 828d866a563SAlan Cox * specified free list. The free list must be specified using one of the 829d866a563SAlan Cox * manifest constants VM_FREELIST_*. 830d866a563SAlan Cox * 831d866a563SAlan Cox * The free page queues must be locked. 83249ca10d4SJayachandran C. */ 83349ca10d4SJayachandran C. vm_page_t 8340db2102aSMichael Zhilin vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 83549ca10d4SJayachandran C. { 836ef435ae7SJeff Roberson struct vm_freelist *alt, *fl; 83711752d88SAlan Cox vm_page_t m; 8380db2102aSMichael Zhilin int oind, pind, flind; 83911752d88SAlan Cox 840ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 841ef435ae7SJeff Roberson ("vm_phys_alloc_freelist_pages: domain %d is out of range", 842ef435ae7SJeff Roberson domain)); 8430db2102aSMichael Zhilin KASSERT(freelist < VM_NFREELIST, 844d866a563SAlan Cox ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 8455be93778SAndrew Turner freelist)); 84611752d88SAlan Cox KASSERT(pool < VM_NFREEPOOL, 84749ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 84811752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 84949ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 8506520495aSAdrian Chadd 8510db2102aSMichael Zhilin flind = vm_freelist_to_flind[freelist]; 8520db2102aSMichael Zhilin /* Check if freelist is present */ 8530db2102aSMichael Zhilin if (flind < 0) 8540db2102aSMichael Zhilin return (NULL); 8550db2102aSMichael Zhilin 856e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(domain)); 8577e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0]; 85811752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 85911752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 86011752d88SAlan Cox if (m != NULL) { 8617e226537SAttilio Rao vm_freelist_rem(fl, m, oind); 862370a338aSAlan Cox /* The order [order, oind) queues are empty. */ 863370a338aSAlan Cox vm_phys_split_pages(m, oind, fl, order, 1); 86411752d88SAlan Cox return (m); 86511752d88SAlan Cox } 86611752d88SAlan Cox } 86711752d88SAlan Cox 86811752d88SAlan Cox /* 86911752d88SAlan Cox * The given pool was empty. Find the largest 87011752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 87111752d88SAlan Cox * pool. Transfer these pages to the given pool, and 87211752d88SAlan Cox * use them to satisfy the allocation. 87311752d88SAlan Cox */ 87411752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 87511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 8767e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0]; 87711752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 87811752d88SAlan Cox if (m != NULL) { 8797e226537SAttilio Rao vm_freelist_rem(alt, m, oind); 88011752d88SAlan Cox vm_phys_set_pool(pool, m, oind); 881370a338aSAlan Cox /* The order [order, oind) queues are empty. */ 882370a338aSAlan Cox vm_phys_split_pages(m, oind, fl, order, 1); 88311752d88SAlan Cox return (m); 88411752d88SAlan Cox } 88511752d88SAlan Cox } 88611752d88SAlan Cox } 88711752d88SAlan Cox return (NULL); 88811752d88SAlan Cox } 88911752d88SAlan Cox 89011752d88SAlan Cox /* 89111752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 89211752d88SAlan Cox */ 89311752d88SAlan Cox vm_page_t 89411752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 89511752d88SAlan Cox { 89611752d88SAlan Cox struct vm_phys_seg *seg; 89711752d88SAlan Cox int segind; 89811752d88SAlan Cox 89911752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 90011752d88SAlan Cox seg = &vm_phys_segs[segind]; 90111752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 90211752d88SAlan Cox return (&seg->first_page[atop(pa - seg->start)]); 90311752d88SAlan Cox } 904f06a3a36SAndrew Thompson return (NULL); 90511752d88SAlan Cox } 90611752d88SAlan Cox 907b6de32bdSKonstantin Belousov vm_page_t 908b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 909b6de32bdSKonstantin Belousov { 91038d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg; 911b6de32bdSKonstantin Belousov vm_page_t m; 912b6de32bdSKonstantin Belousov 913b6de32bdSKonstantin Belousov m = NULL; 91438d6b2dcSRoger Pau Monné tmp.start = pa; 91538d6b2dcSRoger Pau Monné tmp.end = 0; 91638d6b2dcSRoger Pau Monné 91738d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock); 91838d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 91938d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock); 92038d6b2dcSRoger Pau Monné if (seg == NULL) 92138d6b2dcSRoger Pau Monné return (NULL); 92238d6b2dcSRoger Pau Monné 923b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 92438d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 92538d6b2dcSRoger Pau Monné 926b6de32bdSKonstantin Belousov return (m); 927b6de32bdSKonstantin Belousov } 928b6de32bdSKonstantin Belousov 9295ebe728dSRoger Pau Monné static inline void 9305ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 9315ebe728dSRoger Pau Monné long page_count, vm_memattr_t memattr) 9325ebe728dSRoger Pau Monné { 9335ebe728dSRoger Pau Monné long i; 9345ebe728dSRoger Pau Monné 935f93f7cf1SMark Johnston bzero(range, page_count * sizeof(*range)); 9365ebe728dSRoger Pau Monné for (i = 0; i < page_count; i++) { 9375ebe728dSRoger Pau Monné vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 9385ebe728dSRoger Pau Monné range[i].oflags &= ~VPO_UNMANAGED; 9395ebe728dSRoger Pau Monné range[i].busy_lock = VPB_UNBUSIED; 9405ebe728dSRoger Pau Monné } 9415ebe728dSRoger Pau Monné } 9425ebe728dSRoger Pau Monné 943b6de32bdSKonstantin Belousov int 944b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 945b6de32bdSKonstantin Belousov vm_memattr_t memattr) 946b6de32bdSKonstantin Belousov { 947b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 948b6de32bdSKonstantin Belousov vm_page_t fp; 9495ebe728dSRoger Pau Monné long page_count; 950b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 9515ebe728dSRoger Pau Monné long pi, pe; 9525ebe728dSRoger Pau Monné long dpage_count; 953b6de32bdSKonstantin Belousov #endif 954b6de32bdSKonstantin Belousov 9555ebe728dSRoger Pau Monné KASSERT(start < end, 9565ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 9575ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 9585ebe728dSRoger Pau Monné 959b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 960b6de32bdSKonstantin Belousov 961b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 962b6de32bdSKonstantin Belousov pi = atop(start); 9635ebe728dSRoger Pau Monné pe = atop(end); 9645ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 965b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 9665ebe728dSRoger Pau Monné if ((pe - first_page) > vm_page_array_size) { 9675ebe728dSRoger Pau Monné /* 9685ebe728dSRoger Pau Monné * We have a segment that starts inside 9695ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 9705ebe728dSRoger Pau Monné * 9715ebe728dSRoger Pau Monné * Use vm_page_array pages for those that are 9725ebe728dSRoger Pau Monné * inside of the vm_page_array range, and 9735ebe728dSRoger Pau Monné * allocate the remaining ones. 9745ebe728dSRoger Pau Monné */ 9755ebe728dSRoger Pau Monné dpage_count = vm_page_array_size - (pi - first_page); 9765ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, dpage_count, 9775ebe728dSRoger Pau Monné memattr); 9785ebe728dSRoger Pau Monné page_count -= dpage_count; 9795ebe728dSRoger Pau Monné start += ptoa(dpage_count); 9805ebe728dSRoger Pau Monné goto alloc; 9815ebe728dSRoger Pau Monné } 9825ebe728dSRoger Pau Monné /* 9835ebe728dSRoger Pau Monné * We can allocate the full range from vm_page_array, 9845ebe728dSRoger Pau Monné * so there's no need to register the range in the tree. 9855ebe728dSRoger Pau Monné */ 9865ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 9875ebe728dSRoger Pau Monné return (0); 9885ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 9895ebe728dSRoger Pau Monné /* 9905ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 9915ebe728dSRoger Pau Monné * but starts outside of it. 9925ebe728dSRoger Pau Monné */ 9935ebe728dSRoger Pau Monné fp = &vm_page_array[0]; 9945ebe728dSRoger Pau Monné dpage_count = pe - first_page; 9955ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 9965ebe728dSRoger Pau Monné memattr); 9975ebe728dSRoger Pau Monné end -= ptoa(dpage_count); 9985ebe728dSRoger Pau Monné page_count -= dpage_count; 9995ebe728dSRoger Pau Monné goto alloc; 10005ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 10015ebe728dSRoger Pau Monné /* 10025ebe728dSRoger Pau Monné * Trying to register a fictitious range that expands before 10035ebe728dSRoger Pau Monné * and after vm_page_array. 10045ebe728dSRoger Pau Monné */ 10055ebe728dSRoger Pau Monné return (EINVAL); 10065ebe728dSRoger Pau Monné } else { 10075ebe728dSRoger Pau Monné alloc: 1008b6de32bdSKonstantin Belousov #endif 1009b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 1010f93f7cf1SMark Johnston M_WAITOK); 10115ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE 1012b6de32bdSKonstantin Belousov } 10135ebe728dSRoger Pau Monné #endif 10145ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 101538d6b2dcSRoger Pau Monné 101638d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 1017b6de32bdSKonstantin Belousov seg->start = start; 1018b6de32bdSKonstantin Belousov seg->end = end; 1019b6de32bdSKonstantin Belousov seg->first_page = fp; 102038d6b2dcSRoger Pau Monné 102138d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 102238d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 102338d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 102438d6b2dcSRoger Pau Monné 1025b6de32bdSKonstantin Belousov return (0); 1026b6de32bdSKonstantin Belousov } 1027b6de32bdSKonstantin Belousov 1028b6de32bdSKonstantin Belousov void 1029b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1030b6de32bdSKonstantin Belousov { 103138d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp; 1032b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 10335ebe728dSRoger Pau Monné long pi, pe; 1034b6de32bdSKonstantin Belousov #endif 1035b6de32bdSKonstantin Belousov 10365ebe728dSRoger Pau Monné KASSERT(start < end, 10375ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 10385ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 10395ebe728dSRoger Pau Monné 1040b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 1041b6de32bdSKonstantin Belousov pi = atop(start); 10425ebe728dSRoger Pau Monné pe = atop(end); 10435ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 10445ebe728dSRoger Pau Monné if ((pe - first_page) <= vm_page_array_size) { 10455ebe728dSRoger Pau Monné /* 10465ebe728dSRoger Pau Monné * This segment was allocated using vm_page_array 10475ebe728dSRoger Pau Monné * only, there's nothing to do since those pages 10485ebe728dSRoger Pau Monné * were never added to the tree. 10495ebe728dSRoger Pau Monné */ 10505ebe728dSRoger Pau Monné return; 10515ebe728dSRoger Pau Monné } 10525ebe728dSRoger Pau Monné /* 10535ebe728dSRoger Pau Monné * We have a segment that starts inside 10545ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 10555ebe728dSRoger Pau Monné * 10565ebe728dSRoger Pau Monné * Calculate how many pages were added to the 10575ebe728dSRoger Pau Monné * tree and free them. 10585ebe728dSRoger Pau Monné */ 10595ebe728dSRoger Pau Monné start = ptoa(first_page + vm_page_array_size); 10605ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 10615ebe728dSRoger Pau Monné /* 10625ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 10635ebe728dSRoger Pau Monné * but starts outside of it. 10645ebe728dSRoger Pau Monné */ 10655ebe728dSRoger Pau Monné end = ptoa(first_page); 10665ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 10675ebe728dSRoger Pau Monné /* Since it's not possible to register such a range, panic. */ 10685ebe728dSRoger Pau Monné panic( 10695ebe728dSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 10705ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 10715ebe728dSRoger Pau Monné } 1072b6de32bdSKonstantin Belousov #endif 107338d6b2dcSRoger Pau Monné tmp.start = start; 107438d6b2dcSRoger Pau Monné tmp.end = 0; 1075b6de32bdSKonstantin Belousov 107638d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 107738d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 107838d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) { 107938d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 108038d6b2dcSRoger Pau Monné panic( 108138d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 108238d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 108338d6b2dcSRoger Pau Monné } 108438d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 108538d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 108638d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES); 108738d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES); 1088b6de32bdSKonstantin Belousov } 1089b6de32bdSKonstantin Belousov 109011752d88SAlan Cox /* 109111752d88SAlan Cox * Free a contiguous, power of two-sized set of physical pages. 10928941dc44SAlan Cox * 10938941dc44SAlan Cox * The free page queues must be locked. 109411752d88SAlan Cox */ 109511752d88SAlan Cox void 109611752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order) 109711752d88SAlan Cox { 109811752d88SAlan Cox struct vm_freelist *fl; 109911752d88SAlan Cox struct vm_phys_seg *seg; 11005c1f2cc4SAlan Cox vm_paddr_t pa; 110111752d88SAlan Cox vm_page_t m_buddy; 110211752d88SAlan Cox 110311752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 1104*b7565d44SJeff Roberson ("vm_phys_free_pages: page %p(%p) has unexpected order %d", 1105*b7565d44SJeff Roberson m, (void *)m->phys_addr, m->order)); 110611752d88SAlan Cox KASSERT(m->pool < VM_NFREEPOOL, 11078941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected pool %d", 110811752d88SAlan Cox m, m->pool)); 110911752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 11108941dc44SAlan Cox ("vm_phys_free_pages: order %d is out of range", order)); 111111752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 1112e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 11135c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 11145c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 11155c1f2cc4SAlan Cox do { 11165c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 11175c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 111811752d88SAlan Cox break; 11195c1f2cc4SAlan Cox m_buddy = &seg->first_page[atop(pa - seg->start)]; 112011752d88SAlan Cox if (m_buddy->order != order) 112111752d88SAlan Cox break; 112211752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 11237e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order); 112411752d88SAlan Cox if (m_buddy->pool != m->pool) 112511752d88SAlan Cox vm_phys_set_pool(m->pool, m_buddy, order); 112611752d88SAlan Cox order++; 11275c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 112811752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 11295c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 113011752d88SAlan Cox } 113111752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 11327e226537SAttilio Rao vm_freelist_add(fl, m, order, 1); 113311752d88SAlan Cox } 113411752d88SAlan Cox 113511752d88SAlan Cox /* 1136b8590daeSDoug Moore * Return the largest possible order of a set of pages starting at m. 11375c1f2cc4SAlan Cox */ 1138b8590daeSDoug Moore static int 1139b8590daeSDoug Moore max_order(vm_page_t m) 11405c1f2cc4SAlan Cox { 11415c1f2cc4SAlan Cox 11425c1f2cc4SAlan Cox /* 11435c1f2cc4SAlan Cox * Unsigned "min" is used here so that "order" is assigned 11445c1f2cc4SAlan Cox * "VM_NFREEORDER - 1" when "m"'s physical address is zero 11455c1f2cc4SAlan Cox * or the low-order bits of its physical address are zero 11465c1f2cc4SAlan Cox * because the size of a physical address exceeds the size of 11475c1f2cc4SAlan Cox * a long. 11485c1f2cc4SAlan Cox */ 1149b8590daeSDoug Moore return (min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1150b8590daeSDoug Moore VM_NFREEORDER - 1)); 11515c1f2cc4SAlan Cox } 1152b8590daeSDoug Moore 1153b8590daeSDoug Moore /* 1154b8590daeSDoug Moore * Free a contiguous, arbitrarily sized set of physical pages, without 1155b8590daeSDoug Moore * merging across set boundaries. 1156b8590daeSDoug Moore * 1157b8590daeSDoug Moore * The free page queues must be locked. 1158b8590daeSDoug Moore */ 1159b8590daeSDoug Moore void 1160b8590daeSDoug Moore vm_phys_enqueue_contig(vm_page_t m, u_long npages) 1161b8590daeSDoug Moore { 1162b8590daeSDoug Moore struct vm_freelist *fl; 1163b8590daeSDoug Moore struct vm_phys_seg *seg; 1164b8590daeSDoug Moore vm_page_t m_end; 1165b8590daeSDoug Moore int order; 1166b8590daeSDoug Moore 1167b8590daeSDoug Moore /* 1168b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages in the largest 1169b8590daeSDoug Moore * possible power-of-two-sized subsets. 1170b8590daeSDoug Moore */ 1171b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1172b8590daeSDoug Moore seg = &vm_phys_segs[m->segind]; 1173b8590daeSDoug Moore fl = (*seg->free_queues)[m->pool]; 1174b8590daeSDoug Moore m_end = m + npages; 1175b8590daeSDoug Moore /* Free blocks of increasing size. */ 1176b8590daeSDoug Moore while ((order = max_order(m)) < VM_NFREEORDER - 1 && 1177b8590daeSDoug Moore m + (1 << order) <= m_end) { 1178b8590daeSDoug Moore KASSERT(seg == &vm_phys_segs[m->segind], 1179b8590daeSDoug Moore ("%s: page range [%p,%p) spans multiple segments", 1180b8590daeSDoug Moore __func__, m_end - npages, m)); 1181b8590daeSDoug Moore vm_freelist_add(fl, m, order, 1); 1182b8590daeSDoug Moore m += 1 << order; 11835c1f2cc4SAlan Cox } 1184b8590daeSDoug Moore /* Free blocks of maximum size. */ 1185b8590daeSDoug Moore while (m + (1 << order) <= m_end) { 1186b8590daeSDoug Moore KASSERT(seg == &vm_phys_segs[m->segind], 1187b8590daeSDoug Moore ("%s: page range [%p,%p) spans multiple segments", 1188b8590daeSDoug Moore __func__, m_end - npages, m)); 1189b8590daeSDoug Moore vm_freelist_add(fl, m, order, 1); 1190b8590daeSDoug Moore m += 1 << order; 1191b8590daeSDoug Moore } 1192b8590daeSDoug Moore /* Free blocks of diminishing size. */ 1193b8590daeSDoug Moore while (m < m_end) { 1194b8590daeSDoug Moore KASSERT(seg == &vm_phys_segs[m->segind], 1195b8590daeSDoug Moore ("%s: page range [%p,%p) spans multiple segments", 1196b8590daeSDoug Moore __func__, m_end - npages, m)); 1197b8590daeSDoug Moore order = flsl(m_end - m) - 1; 1198b8590daeSDoug Moore vm_freelist_add(fl, m, order, 1); 1199b8590daeSDoug Moore m += 1 << order; 1200b8590daeSDoug Moore } 1201b8590daeSDoug Moore } 1202b8590daeSDoug Moore 1203b8590daeSDoug Moore /* 1204b8590daeSDoug Moore * Free a contiguous, arbitrarily sized set of physical pages. 1205b8590daeSDoug Moore * 1206b8590daeSDoug Moore * The free page queues must be locked. 1207b8590daeSDoug Moore */ 1208b8590daeSDoug Moore void 1209b8590daeSDoug Moore vm_phys_free_contig(vm_page_t m, u_long npages) 1210b8590daeSDoug Moore { 1211b8590daeSDoug Moore int order_start, order_end; 1212b8590daeSDoug Moore vm_page_t m_start, m_end; 1213b8590daeSDoug Moore 1214b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1215b8590daeSDoug Moore 1216b8590daeSDoug Moore m_start = m; 1217b8590daeSDoug Moore order_start = max_order(m_start); 1218b8590daeSDoug Moore if (order_start < VM_NFREEORDER - 1) 1219b8590daeSDoug Moore m_start += 1 << order_start; 1220b8590daeSDoug Moore m_end = m + npages; 1221b8590daeSDoug Moore order_end = max_order(m_end); 1222b8590daeSDoug Moore if (order_end < VM_NFREEORDER - 1) 1223b8590daeSDoug Moore m_end -= 1 << order_end; 1224b8590daeSDoug Moore /* 1225b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages at the start and 1226b8590daeSDoug Moore * end of the range last. 1227b8590daeSDoug Moore */ 1228b8590daeSDoug Moore if (m_start < m_end) 1229b8590daeSDoug Moore vm_phys_enqueue_contig(m_start, m_end - m_start); 1230b8590daeSDoug Moore if (order_start < VM_NFREEORDER - 1) 1231b8590daeSDoug Moore vm_phys_free_pages(m, order_start); 1232b8590daeSDoug Moore if (order_end < VM_NFREEORDER - 1) 1233b8590daeSDoug Moore vm_phys_free_pages(m_end, order_end); 12345c1f2cc4SAlan Cox } 12355c1f2cc4SAlan Cox 12365c1f2cc4SAlan Cox /* 1237c869e672SAlan Cox * Scan physical memory between the specified addresses "low" and "high" for a 1238c869e672SAlan Cox * run of contiguous physical pages that satisfy the specified conditions, and 1239c869e672SAlan Cox * return the lowest page in the run. The specified "alignment" determines 1240c869e672SAlan Cox * the alignment of the lowest physical page in the run. If the specified 1241c869e672SAlan Cox * "boundary" is non-zero, then the run of physical pages cannot span a 1242c869e672SAlan Cox * physical address that is a multiple of "boundary". 1243c869e672SAlan Cox * 1244c869e672SAlan Cox * "npages" must be greater than zero. Both "alignment" and "boundary" must 1245c869e672SAlan Cox * be a power of two. 1246c869e672SAlan Cox */ 1247c869e672SAlan Cox vm_page_t 12483f289c3fSJeff Roberson vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1249c869e672SAlan Cox u_long alignment, vm_paddr_t boundary, int options) 1250c869e672SAlan Cox { 1251c869e672SAlan Cox vm_paddr_t pa_end; 1252c869e672SAlan Cox vm_page_t m_end, m_run, m_start; 1253c869e672SAlan Cox struct vm_phys_seg *seg; 1254c869e672SAlan Cox int segind; 1255c869e672SAlan Cox 1256c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1257c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1258c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1259c869e672SAlan Cox if (low >= high) 1260c869e672SAlan Cox return (NULL); 1261c869e672SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 1262c869e672SAlan Cox seg = &vm_phys_segs[segind]; 12633f289c3fSJeff Roberson if (seg->domain != domain) 12643f289c3fSJeff Roberson continue; 1265c869e672SAlan Cox if (seg->start >= high) 1266c869e672SAlan Cox break; 1267c869e672SAlan Cox if (low >= seg->end) 1268c869e672SAlan Cox continue; 1269c869e672SAlan Cox if (low <= seg->start) 1270c869e672SAlan Cox m_start = seg->first_page; 1271c869e672SAlan Cox else 1272c869e672SAlan Cox m_start = &seg->first_page[atop(low - seg->start)]; 1273c869e672SAlan Cox if (high < seg->end) 1274c869e672SAlan Cox pa_end = high; 1275c869e672SAlan Cox else 1276c869e672SAlan Cox pa_end = seg->end; 1277c869e672SAlan Cox if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1278c869e672SAlan Cox continue; 1279c869e672SAlan Cox m_end = &seg->first_page[atop(pa_end - seg->start)]; 1280c869e672SAlan Cox m_run = vm_page_scan_contig(npages, m_start, m_end, 1281c869e672SAlan Cox alignment, boundary, options); 1282c869e672SAlan Cox if (m_run != NULL) 1283c869e672SAlan Cox return (m_run); 1284c869e672SAlan Cox } 1285c869e672SAlan Cox return (NULL); 1286c869e672SAlan Cox } 1287c869e672SAlan Cox 1288c869e672SAlan Cox /* 128911752d88SAlan Cox * Set the pool for a contiguous, power of two-sized set of physical pages. 129011752d88SAlan Cox */ 12917bfda801SAlan Cox void 129211752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order) 129311752d88SAlan Cox { 129411752d88SAlan Cox vm_page_t m_tmp; 129511752d88SAlan Cox 129611752d88SAlan Cox for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 129711752d88SAlan Cox m_tmp->pool = pool; 129811752d88SAlan Cox } 129911752d88SAlan Cox 130011752d88SAlan Cox /* 13019742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 13029742373aSAlan Cox * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 13039742373aSAlan Cox * FALSE, indicating that "m" is not in the free lists. 13047bfda801SAlan Cox * 13057bfda801SAlan Cox * The free page queues must be locked. 13067bfda801SAlan Cox */ 1307e35395ceSAlan Cox boolean_t 13087bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m) 13097bfda801SAlan Cox { 13107bfda801SAlan Cox struct vm_freelist *fl; 13117bfda801SAlan Cox struct vm_phys_seg *seg; 13127bfda801SAlan Cox vm_paddr_t pa, pa_half; 13137bfda801SAlan Cox vm_page_t m_set, m_tmp; 13147bfda801SAlan Cox int order; 13157bfda801SAlan Cox 13167bfda801SAlan Cox /* 13177bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 13187bfda801SAlan Cox * physical pages containing the given physical page "m" and 13197bfda801SAlan Cox * assign it to "m_set". 13207bfda801SAlan Cox */ 13217bfda801SAlan Cox seg = &vm_phys_segs[m->segind]; 1322e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 13237bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1324bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 13257bfda801SAlan Cox order++; 13267bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 13272fbced65SAlan Cox if (pa >= seg->start) 13287bfda801SAlan Cox m_set = &seg->first_page[atop(pa - seg->start)]; 1329e35395ceSAlan Cox else 1330e35395ceSAlan Cox return (FALSE); 13317bfda801SAlan Cox } 1332e35395ceSAlan Cox if (m_set->order < order) 1333e35395ceSAlan Cox return (FALSE); 1334e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 1335e35395ceSAlan Cox return (FALSE); 13367bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 13377bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 13387bfda801SAlan Cox m_set, m_set->order)); 13397bfda801SAlan Cox 13407bfda801SAlan Cox /* 13417bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 13427bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 13437bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 13447bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 13457bfda801SAlan Cox */ 13467bfda801SAlan Cox fl = (*seg->free_queues)[m_set->pool]; 13477bfda801SAlan Cox order = m_set->order; 13487e226537SAttilio Rao vm_freelist_rem(fl, m_set, order); 13497bfda801SAlan Cox while (order > 0) { 13507bfda801SAlan Cox order--; 13517bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 13527bfda801SAlan Cox if (m->phys_addr < pa_half) 13537bfda801SAlan Cox m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 13547bfda801SAlan Cox else { 13557bfda801SAlan Cox m_tmp = m_set; 13567bfda801SAlan Cox m_set = &seg->first_page[atop(pa_half - seg->start)]; 13577bfda801SAlan Cox } 13587e226537SAttilio Rao vm_freelist_add(fl, m_tmp, order, 0); 13597bfda801SAlan Cox } 13607bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1361e35395ceSAlan Cox return (TRUE); 13627bfda801SAlan Cox } 13637bfda801SAlan Cox 13647bfda801SAlan Cox /* 13652f9f48d6SAlan Cox * Allocate a contiguous set of physical pages of the given size 13662f9f48d6SAlan Cox * "npages" from the free lists. All of the physical pages must be at 13672f9f48d6SAlan Cox * or above the given physical address "low" and below the given 13682f9f48d6SAlan Cox * physical address "high". The given value "alignment" determines the 13692f9f48d6SAlan Cox * alignment of the first physical page in the set. If the given value 13702f9f48d6SAlan Cox * "boundary" is non-zero, then the set of physical pages cannot cross 13712f9f48d6SAlan Cox * any physical address boundary that is a multiple of that value. Both 137211752d88SAlan Cox * "alignment" and "boundary" must be a power of two. 137311752d88SAlan Cox */ 137411752d88SAlan Cox vm_page_t 1375ef435ae7SJeff Roberson vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 13765c1f2cc4SAlan Cox u_long alignment, vm_paddr_t boundary) 137711752d88SAlan Cox { 1378c869e672SAlan Cox vm_paddr_t pa_end, pa_start; 1379c869e672SAlan Cox vm_page_t m_run; 1380c869e672SAlan Cox struct vm_phys_seg *seg; 1381ef435ae7SJeff Roberson int segind; 138211752d88SAlan Cox 1383c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1384c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1385c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1386e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1387c869e672SAlan Cox if (low >= high) 1388c869e672SAlan Cox return (NULL); 1389c869e672SAlan Cox m_run = NULL; 1390477bffbeSAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1391c869e672SAlan Cox seg = &vm_phys_segs[segind]; 1392477bffbeSAlan Cox if (seg->start >= high || seg->domain != domain) 139311752d88SAlan Cox continue; 1394477bffbeSAlan Cox if (low >= seg->end) 1395477bffbeSAlan Cox break; 1396c869e672SAlan Cox if (low <= seg->start) 1397c869e672SAlan Cox pa_start = seg->start; 1398c869e672SAlan Cox else 1399c869e672SAlan Cox pa_start = low; 1400c869e672SAlan Cox if (high < seg->end) 1401c869e672SAlan Cox pa_end = high; 1402c869e672SAlan Cox else 1403c869e672SAlan Cox pa_end = seg->end; 1404c869e672SAlan Cox if (pa_end - pa_start < ptoa(npages)) 1405c869e672SAlan Cox continue; 1406c869e672SAlan Cox m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1407c869e672SAlan Cox alignment, boundary); 1408c869e672SAlan Cox if (m_run != NULL) 1409c869e672SAlan Cox break; 1410c869e672SAlan Cox } 1411c869e672SAlan Cox return (m_run); 1412c869e672SAlan Cox } 141311752d88SAlan Cox 141411752d88SAlan Cox /* 1415c869e672SAlan Cox * Allocate a run of contiguous physical pages from the free list for the 1416c869e672SAlan Cox * specified segment. 1417c869e672SAlan Cox */ 1418c869e672SAlan Cox static vm_page_t 1419c869e672SAlan Cox vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1420c869e672SAlan Cox vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1421c869e672SAlan Cox { 1422c869e672SAlan Cox struct vm_freelist *fl; 1423c869e672SAlan Cox vm_paddr_t pa, pa_end, size; 1424c869e672SAlan Cox vm_page_t m, m_ret; 1425c869e672SAlan Cox u_long npages_end; 1426c869e672SAlan Cox int oind, order, pind; 1427c869e672SAlan Cox 1428c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1429c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1430c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1431e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1432c869e672SAlan Cox /* Compute the queue that is the best fit for npages. */ 14339161b4deSAlan Cox order = flsl(npages - 1); 1434c869e672SAlan Cox /* Search for a run satisfying the specified conditions. */ 1435c869e672SAlan Cox size = npages << PAGE_SHIFT; 1436c869e672SAlan Cox for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1437c869e672SAlan Cox oind++) { 1438c869e672SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1439c869e672SAlan Cox fl = (*seg->free_queues)[pind]; 14405cd29d0fSMark Johnston TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1441c869e672SAlan Cox /* 144211752d88SAlan Cox * Is the size of this allocation request 144311752d88SAlan Cox * larger than the largest block size? 144411752d88SAlan Cox */ 144511752d88SAlan Cox if (order >= VM_NFREEORDER) { 144611752d88SAlan Cox /* 1447c869e672SAlan Cox * Determine if a sufficient number of 1448c869e672SAlan Cox * subsequent blocks to satisfy the 1449c869e672SAlan Cox * allocation request are free. 145011752d88SAlan Cox */ 145111752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 1452c869e672SAlan Cox pa_end = pa + size; 145379e9552eSKonstantin Belousov if (pa_end < pa) 145479e9552eSKonstantin Belousov continue; 145511752d88SAlan Cox for (;;) { 1456c869e672SAlan Cox pa += 1 << (PAGE_SHIFT + 1457c869e672SAlan Cox VM_NFREEORDER - 1); 1458c869e672SAlan Cox if (pa >= pa_end || 1459c869e672SAlan Cox pa < seg->start || 146011752d88SAlan Cox pa >= seg->end) 146111752d88SAlan Cox break; 1462c869e672SAlan Cox m = &seg->first_page[atop(pa - 1463c869e672SAlan Cox seg->start)]; 1464c869e672SAlan Cox if (m->order != VM_NFREEORDER - 1465c869e672SAlan Cox 1) 146611752d88SAlan Cox break; 146711752d88SAlan Cox } 1468c869e672SAlan Cox /* If not, go to the next block. */ 1469c869e672SAlan Cox if (pa < pa_end) 147011752d88SAlan Cox continue; 147111752d88SAlan Cox } 147211752d88SAlan Cox 147311752d88SAlan Cox /* 1474c869e672SAlan Cox * Determine if the blocks are within the 1475c869e672SAlan Cox * given range, satisfy the given alignment, 1476c869e672SAlan Cox * and do not cross the given boundary. 147711752d88SAlan Cox */ 147811752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 1479c869e672SAlan Cox pa_end = pa + size; 1480d9c9c81cSPedro F. Giffuni if (pa >= low && pa_end <= high && 1481d9c9c81cSPedro F. Giffuni (pa & (alignment - 1)) == 0 && 1482d9c9c81cSPedro F. Giffuni rounddown2(pa ^ (pa_end - 1), boundary) == 0) 148311752d88SAlan Cox goto done; 148411752d88SAlan Cox } 148511752d88SAlan Cox } 148611752d88SAlan Cox } 148711752d88SAlan Cox return (NULL); 148811752d88SAlan Cox done: 148911752d88SAlan Cox for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 149011752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 14919161b4deSAlan Cox vm_freelist_rem(fl, m, oind); 14929161b4deSAlan Cox if (m->pool != VM_FREEPOOL_DEFAULT) 14939161b4deSAlan Cox vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 149411752d88SAlan Cox } 14955c1f2cc4SAlan Cox /* Return excess pages to the free lists. */ 14969161b4deSAlan Cox npages_end = roundup2(npages, 1 << oind); 14977493904eSAlan Cox if (npages < npages_end) { 14987493904eSAlan Cox fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 14997493904eSAlan Cox vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 15007493904eSAlan Cox } 150111752d88SAlan Cox return (m_ret); 150211752d88SAlan Cox } 150311752d88SAlan Cox 1504*b7565d44SJeff Roberson /* 1505*b7565d44SJeff Roberson * Return the index of the first unused slot which may be the terminating 1506*b7565d44SJeff Roberson * entry. 1507*b7565d44SJeff Roberson */ 1508*b7565d44SJeff Roberson static int 1509*b7565d44SJeff Roberson vm_phys_avail_count(void) 1510*b7565d44SJeff Roberson { 1511*b7565d44SJeff Roberson int i; 1512*b7565d44SJeff Roberson 1513*b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) 1514*b7565d44SJeff Roberson continue; 1515*b7565d44SJeff Roberson if (i > PHYS_AVAIL_ENTRIES) 1516*b7565d44SJeff Roberson panic("Improperly terminated phys_avail %d entries", i); 1517*b7565d44SJeff Roberson 1518*b7565d44SJeff Roberson return (i); 1519*b7565d44SJeff Roberson } 1520*b7565d44SJeff Roberson 1521*b7565d44SJeff Roberson /* 1522*b7565d44SJeff Roberson * Assert that a phys_avail entry is valid. 1523*b7565d44SJeff Roberson */ 1524*b7565d44SJeff Roberson static void 1525*b7565d44SJeff Roberson vm_phys_avail_check(int i) 1526*b7565d44SJeff Roberson { 1527*b7565d44SJeff Roberson if (phys_avail[i] & PAGE_MASK) 1528*b7565d44SJeff Roberson panic("Unaligned phys_avail[%d]: %#jx", i, 1529*b7565d44SJeff Roberson (intmax_t)phys_avail[i]); 1530*b7565d44SJeff Roberson if (phys_avail[i+1] & PAGE_MASK) 1531*b7565d44SJeff Roberson panic("Unaligned phys_avail[%d + 1]: %#jx", i, 1532*b7565d44SJeff Roberson (intmax_t)phys_avail[i]); 1533*b7565d44SJeff Roberson if (phys_avail[i + 1] < phys_avail[i]) 1534*b7565d44SJeff Roberson panic("phys_avail[%d] start %#jx < end %#jx", i, 1535*b7565d44SJeff Roberson (intmax_t)phys_avail[i], (intmax_t)phys_avail[i+1]); 1536*b7565d44SJeff Roberson } 1537*b7565d44SJeff Roberson 1538*b7565d44SJeff Roberson /* 1539*b7565d44SJeff Roberson * Return the index of an overlapping phys_avail entry or -1. 1540*b7565d44SJeff Roberson */ 1541*b7565d44SJeff Roberson static int 1542*b7565d44SJeff Roberson vm_phys_avail_find(vm_paddr_t pa) 1543*b7565d44SJeff Roberson { 1544*b7565d44SJeff Roberson int i; 1545*b7565d44SJeff Roberson 1546*b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) 1547*b7565d44SJeff Roberson if (phys_avail[i] <= pa && phys_avail[i + 1] > pa) 1548*b7565d44SJeff Roberson return (i); 1549*b7565d44SJeff Roberson return (-1); 1550*b7565d44SJeff Roberson } 1551*b7565d44SJeff Roberson 1552*b7565d44SJeff Roberson /* 1553*b7565d44SJeff Roberson * Return the index of the largest entry. 1554*b7565d44SJeff Roberson */ 1555*b7565d44SJeff Roberson int 1556*b7565d44SJeff Roberson vm_phys_avail_largest(void) 1557*b7565d44SJeff Roberson { 1558*b7565d44SJeff Roberson vm_paddr_t sz, largesz; 1559*b7565d44SJeff Roberson int largest; 1560*b7565d44SJeff Roberson int i; 1561*b7565d44SJeff Roberson 1562*b7565d44SJeff Roberson largest = 0; 1563*b7565d44SJeff Roberson largesz = 0; 1564*b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) { 1565*b7565d44SJeff Roberson sz = vm_phys_avail_size(i); 1566*b7565d44SJeff Roberson if (sz > largesz) { 1567*b7565d44SJeff Roberson largesz = sz; 1568*b7565d44SJeff Roberson largest = i; 1569*b7565d44SJeff Roberson } 1570*b7565d44SJeff Roberson } 1571*b7565d44SJeff Roberson 1572*b7565d44SJeff Roberson return (largest); 1573*b7565d44SJeff Roberson } 1574*b7565d44SJeff Roberson 1575*b7565d44SJeff Roberson vm_paddr_t 1576*b7565d44SJeff Roberson vm_phys_avail_size(int i) 1577*b7565d44SJeff Roberson { 1578*b7565d44SJeff Roberson 1579*b7565d44SJeff Roberson return (phys_avail[i + 1] - phys_avail[i]); 1580*b7565d44SJeff Roberson } 1581*b7565d44SJeff Roberson 1582*b7565d44SJeff Roberson /* 1583*b7565d44SJeff Roberson * Split an entry at the address 'pa'. Return zero on success or errno. 1584*b7565d44SJeff Roberson */ 1585*b7565d44SJeff Roberson static int 1586*b7565d44SJeff Roberson vm_phys_avail_split(vm_paddr_t pa, int i) 1587*b7565d44SJeff Roberson { 1588*b7565d44SJeff Roberson int cnt; 1589*b7565d44SJeff Roberson 1590*b7565d44SJeff Roberson vm_phys_avail_check(i); 1591*b7565d44SJeff Roberson if (pa <= phys_avail[i] || pa >= phys_avail[i + 1]) 1592*b7565d44SJeff Roberson panic("vm_phys_avail_split: invalid address"); 1593*b7565d44SJeff Roberson cnt = vm_phys_avail_count(); 1594*b7565d44SJeff Roberson if (cnt >= PHYS_AVAIL_ENTRIES) 1595*b7565d44SJeff Roberson return (ENOSPC); 1596*b7565d44SJeff Roberson memmove(&phys_avail[i + 2], &phys_avail[i], 1597*b7565d44SJeff Roberson (cnt - i) * sizeof(phys_avail[0])); 1598*b7565d44SJeff Roberson phys_avail[i + 1] = pa; 1599*b7565d44SJeff Roberson phys_avail[i + 2] = pa; 1600*b7565d44SJeff Roberson vm_phys_avail_check(i); 1601*b7565d44SJeff Roberson vm_phys_avail_check(i+2); 1602*b7565d44SJeff Roberson 1603*b7565d44SJeff Roberson return (0); 1604*b7565d44SJeff Roberson } 1605*b7565d44SJeff Roberson 1606*b7565d44SJeff Roberson /* 1607*b7565d44SJeff Roberson * This routine allocates NUMA node specific memory before the page 1608*b7565d44SJeff Roberson * allocator is bootstrapped. 1609*b7565d44SJeff Roberson */ 1610*b7565d44SJeff Roberson vm_paddr_t 1611*b7565d44SJeff Roberson vm_phys_early_alloc(int domain, size_t alloc_size) 1612*b7565d44SJeff Roberson { 1613*b7565d44SJeff Roberson int i, mem_index, biggestone; 1614*b7565d44SJeff Roberson vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align; 1615*b7565d44SJeff Roberson 1616*b7565d44SJeff Roberson 1617*b7565d44SJeff Roberson /* 1618*b7565d44SJeff Roberson * Search the mem_affinity array for the biggest address 1619*b7565d44SJeff Roberson * range in the desired domain. This is used to constrain 1620*b7565d44SJeff Roberson * the phys_avail selection below. 1621*b7565d44SJeff Roberson */ 1622*b7565d44SJeff Roberson biggestsize = 0; 1623*b7565d44SJeff Roberson mem_index = 0; 1624*b7565d44SJeff Roberson mem_start = 0; 1625*b7565d44SJeff Roberson mem_end = -1; 1626*b7565d44SJeff Roberson #ifdef NUMA 1627*b7565d44SJeff Roberson if (mem_affinity != NULL) { 1628*b7565d44SJeff Roberson for (i = 0; ; i++) { 1629*b7565d44SJeff Roberson size = mem_affinity[i].end - mem_affinity[i].start; 1630*b7565d44SJeff Roberson if (size == 0) 1631*b7565d44SJeff Roberson break; 1632*b7565d44SJeff Roberson if (mem_affinity[i].domain != domain) 1633*b7565d44SJeff Roberson continue; 1634*b7565d44SJeff Roberson if (size > biggestsize) { 1635*b7565d44SJeff Roberson mem_index = i; 1636*b7565d44SJeff Roberson biggestsize = size; 1637*b7565d44SJeff Roberson } 1638*b7565d44SJeff Roberson } 1639*b7565d44SJeff Roberson mem_start = mem_affinity[mem_index].start; 1640*b7565d44SJeff Roberson mem_end = mem_affinity[mem_index].end; 1641*b7565d44SJeff Roberson } 1642*b7565d44SJeff Roberson #endif 1643*b7565d44SJeff Roberson 1644*b7565d44SJeff Roberson /* 1645*b7565d44SJeff Roberson * Now find biggest physical segment in within the desired 1646*b7565d44SJeff Roberson * numa domain. 1647*b7565d44SJeff Roberson */ 1648*b7565d44SJeff Roberson biggestsize = 0; 1649*b7565d44SJeff Roberson biggestone = 0; 1650*b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) { 1651*b7565d44SJeff Roberson /* skip regions that are out of range */ 1652*b7565d44SJeff Roberson if (phys_avail[i+1] - alloc_size < mem_start || 1653*b7565d44SJeff Roberson phys_avail[i+1] > mem_end) 1654*b7565d44SJeff Roberson continue; 1655*b7565d44SJeff Roberson size = vm_phys_avail_size(i); 1656*b7565d44SJeff Roberson if (size > biggestsize) { 1657*b7565d44SJeff Roberson biggestone = i; 1658*b7565d44SJeff Roberson biggestsize = size; 1659*b7565d44SJeff Roberson } 1660*b7565d44SJeff Roberson } 1661*b7565d44SJeff Roberson alloc_size = round_page(alloc_size); 1662*b7565d44SJeff Roberson 1663*b7565d44SJeff Roberson /* 1664*b7565d44SJeff Roberson * Grab single pages from the front to reduce fragmentation. 1665*b7565d44SJeff Roberson */ 1666*b7565d44SJeff Roberson if (alloc_size == PAGE_SIZE) { 1667*b7565d44SJeff Roberson pa = phys_avail[biggestone]; 1668*b7565d44SJeff Roberson phys_avail[biggestone] += PAGE_SIZE; 1669*b7565d44SJeff Roberson vm_phys_avail_check(biggestone); 1670*b7565d44SJeff Roberson return (pa); 1671*b7565d44SJeff Roberson } 1672*b7565d44SJeff Roberson 1673*b7565d44SJeff Roberson /* 1674*b7565d44SJeff Roberson * Naturally align large allocations. 1675*b7565d44SJeff Roberson */ 1676*b7565d44SJeff Roberson align = phys_avail[biggestone + 1] & (alloc_size - 1); 1677*b7565d44SJeff Roberson if (alloc_size + align > biggestsize) 1678*b7565d44SJeff Roberson panic("cannot find a large enough size\n"); 1679*b7565d44SJeff Roberson if (align != 0 && 1680*b7565d44SJeff Roberson vm_phys_avail_split(phys_avail[biggestone + 1] - align, 1681*b7565d44SJeff Roberson biggestone) != 0) 1682*b7565d44SJeff Roberson /* Wasting memory. */ 1683*b7565d44SJeff Roberson phys_avail[biggestone + 1] -= align; 1684*b7565d44SJeff Roberson 1685*b7565d44SJeff Roberson phys_avail[biggestone + 1] -= alloc_size; 1686*b7565d44SJeff Roberson vm_phys_avail_check(biggestone); 1687*b7565d44SJeff Roberson pa = phys_avail[biggestone + 1]; 1688*b7565d44SJeff Roberson return (pa); 1689*b7565d44SJeff Roberson } 1690*b7565d44SJeff Roberson 1691*b7565d44SJeff Roberson void 1692*b7565d44SJeff Roberson vm_phys_early_startup(void) 1693*b7565d44SJeff Roberson { 1694*b7565d44SJeff Roberson int i; 1695*b7565d44SJeff Roberson 1696*b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) { 1697*b7565d44SJeff Roberson phys_avail[i] = round_page(phys_avail[i]); 1698*b7565d44SJeff Roberson phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 1699*b7565d44SJeff Roberson } 1700*b7565d44SJeff Roberson 1701*b7565d44SJeff Roberson #ifdef NUMA 1702*b7565d44SJeff Roberson /* Force phys_avail to be split by domain. */ 1703*b7565d44SJeff Roberson if (mem_affinity != NULL) { 1704*b7565d44SJeff Roberson int idx; 1705*b7565d44SJeff Roberson 1706*b7565d44SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) { 1707*b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].start); 1708*b7565d44SJeff Roberson if (idx != -1 && 1709*b7565d44SJeff Roberson phys_avail[idx] != mem_affinity[i].start) 1710*b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].start, idx); 1711*b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].end); 1712*b7565d44SJeff Roberson if (idx != -1 && 1713*b7565d44SJeff Roberson phys_avail[idx] != mem_affinity[i].end) 1714*b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].end, idx); 1715*b7565d44SJeff Roberson } 1716*b7565d44SJeff Roberson } 1717*b7565d44SJeff Roberson #endif 1718*b7565d44SJeff Roberson } 1719*b7565d44SJeff Roberson 172011752d88SAlan Cox #ifdef DDB 172111752d88SAlan Cox /* 172211752d88SAlan Cox * Show the number of physical pages in each of the free lists. 172311752d88SAlan Cox */ 172411752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages) 172511752d88SAlan Cox { 172611752d88SAlan Cox struct vm_freelist *fl; 17277e226537SAttilio Rao int flind, oind, pind, dom; 172811752d88SAlan Cox 17297e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 17307e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom); 173111752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 173211752d88SAlan Cox db_printf("FREE LIST %d:\n" 173311752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 173411752d88SAlan Cox "\n ", flind); 173511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 173611752d88SAlan Cox db_printf(" | POOL %d", pind); 173711752d88SAlan Cox db_printf("\n-- "); 173811752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 173911752d88SAlan Cox db_printf("-- -- "); 174011752d88SAlan Cox db_printf("--\n"); 174111752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 174211752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 174311752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 174411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 17457e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 174611752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 174711752d88SAlan Cox } 174811752d88SAlan Cox db_printf("\n"); 174911752d88SAlan Cox } 175011752d88SAlan Cox db_printf("\n"); 175111752d88SAlan Cox } 17527e226537SAttilio Rao db_printf("\n"); 17537e226537SAttilio Rao } 175411752d88SAlan Cox } 175511752d88SAlan Cox #endif 1756