111752d88SAlan Cox /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 411752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 511752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 611752d88SAlan Cox * All rights reserved. 711752d88SAlan Cox * 811752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 911752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 1011752d88SAlan Cox * 1111752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1211752d88SAlan Cox * modification, are permitted provided that the following conditions 1311752d88SAlan Cox * are met: 1411752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1611752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1711752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1811752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1911752d88SAlan Cox * 2011752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2111752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2211752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2311752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2411752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2511752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2611752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2711752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2811752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2911752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 3011752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3211752d88SAlan Cox */ 3311752d88SAlan Cox 34fbd80bd0SAlan Cox /* 35fbd80bd0SAlan Cox * Physical memory system implementation 36fbd80bd0SAlan Cox * 37fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 38fbd80bd0SAlan Cox * virtual memory system. 39fbd80bd0SAlan Cox */ 40fbd80bd0SAlan Cox 4111752d88SAlan Cox #include <sys/cdefs.h> 4211752d88SAlan Cox #include "opt_ddb.h" 43174b5f38SJohn Baldwin #include "opt_vm.h" 4411752d88SAlan Cox 4511752d88SAlan Cox #include <sys/param.h> 4611752d88SAlan Cox #include <sys/systm.h> 47662e7fa8SMark Johnston #include <sys/domainset.h> 4811752d88SAlan Cox #include <sys/lock.h> 4911752d88SAlan Cox #include <sys/kernel.h> 50b16b4c22SMark Johnston #include <sys/kthread.h> 5111752d88SAlan Cox #include <sys/malloc.h> 5211752d88SAlan Cox #include <sys/mutex.h> 537e226537SAttilio Rao #include <sys/proc.h> 5411752d88SAlan Cox #include <sys/queue.h> 5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h> 5611752d88SAlan Cox #include <sys/sbuf.h> 57b16b4c22SMark Johnston #include <sys/sched.h> 5811752d88SAlan Cox #include <sys/sysctl.h> 5938d6b2dcSRoger Pau Monné #include <sys/tree.h> 60b16b4c22SMark Johnston #include <sys/tslog.h> 61b16b4c22SMark Johnston #include <sys/unistd.h> 6211752d88SAlan Cox #include <sys/vmmeter.h> 6311752d88SAlan Cox 6411752d88SAlan Cox #include <ddb/ddb.h> 6511752d88SAlan Cox 6611752d88SAlan Cox #include <vm/vm.h> 6701e115abSDoug Moore #include <vm/vm_extern.h> 6811752d88SAlan Cox #include <vm/vm_param.h> 6911752d88SAlan Cox #include <vm/vm_kern.h> 7011752d88SAlan Cox #include <vm/vm_object.h> 7111752d88SAlan Cox #include <vm/vm_page.h> 7211752d88SAlan Cox #include <vm/vm_phys.h> 73e2068d0bSJeff Roberson #include <vm/vm_pagequeue.h> 7411752d88SAlan Cox 75449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 76449c2e92SKonstantin Belousov "Too many physsegs."); 77c9b06fa5SDoug Moore _Static_assert(sizeof(long long) >= sizeof(vm_paddr_t), 78c9b06fa5SDoug Moore "vm_paddr_t too big for ffsll, flsll."); 7911752d88SAlan Cox 80b6715dabSJeff Roberson #ifdef NUMA 81cdfeced8SJeff Roberson struct mem_affinity __read_mostly *mem_affinity; 82cdfeced8SJeff Roberson int __read_mostly *mem_locality; 83c415cfc8SZhenlei Huang 84c415cfc8SZhenlei Huang static int numa_disabled; 85c415cfc8SZhenlei Huang static SYSCTL_NODE(_vm, OID_AUTO, numa, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 86c415cfc8SZhenlei Huang "NUMA options"); 87c415cfc8SZhenlei Huang SYSCTL_INT(_vm_numa, OID_AUTO, disabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 88c415cfc8SZhenlei Huang &numa_disabled, 0, "NUMA-awareness in the allocators is disabled"); 8962d70a81SJohn Baldwin #endif 90a3870a18SJohn Baldwin 91cdfeced8SJeff Roberson int __read_mostly vm_ndomains = 1; 92463406acSMark Johnston domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 937e226537SAttilio Rao 94cdfeced8SJeff Roberson struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 95cdfeced8SJeff Roberson int __read_mostly vm_phys_nsegs; 9681302f1dSMark Johnston static struct vm_phys_seg vm_phys_early_segs[8]; 9781302f1dSMark Johnston static int vm_phys_early_nsegs; 9811752d88SAlan Cox 9938d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg; 10038d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 10138d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *); 10238d6b2dcSRoger Pau Monné 10338d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 104b649c2acSDoug Moore RB_INITIALIZER(&vm_phys_fictitious_tree); 10538d6b2dcSRoger Pau Monné 10638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg { 10738d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node; 10838d6b2dcSRoger Pau Monné /* Memory region data */ 109b6de32bdSKonstantin Belousov vm_paddr_t start; 110b6de32bdSKonstantin Belousov vm_paddr_t end; 111b6de32bdSKonstantin Belousov vm_page_t first_page; 11238d6b2dcSRoger Pau Monné }; 11338d6b2dcSRoger Pau Monné 11438d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 11538d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp); 11638d6b2dcSRoger Pau Monné 117cdfeced8SJeff Roberson static struct rwlock_padalign vm_phys_fictitious_reg_lock; 118c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 119b6de32bdSKonstantin Belousov 120cdfeced8SJeff Roberson static struct vm_freelist __aligned(CACHE_LINE_SIZE) 121f2a496d6SKonstantin Belousov vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] 122f2a496d6SKonstantin Belousov [VM_NFREEORDER_MAX]; 12311752d88SAlan Cox 124cdfeced8SJeff Roberson static int __read_mostly vm_nfreelists; 125d866a563SAlan Cox 126d866a563SAlan Cox /* 12721943937SJeff Roberson * These "avail lists" are globals used to communicate boot-time physical 12821943937SJeff Roberson * memory layout to other parts of the kernel. Each physically contiguous 12921943937SJeff Roberson * region of memory is defined by a start address at an even index and an 13021943937SJeff Roberson * end address at the following odd index. Each list is terminated by a 13121943937SJeff Roberson * pair of zero entries. 13221943937SJeff Roberson * 13321943937SJeff Roberson * dump_avail tells the dump code what regions to include in a crash dump, and 13421943937SJeff Roberson * phys_avail is all of the remaining physical memory that is available for 13521943937SJeff Roberson * the vm system. 13621943937SJeff Roberson * 13721943937SJeff Roberson * Initially dump_avail and phys_avail are identical. Boot time memory 13821943937SJeff Roberson * allocations remove extents from phys_avail that may still be included 13921943937SJeff Roberson * in dumps. 14021943937SJeff Roberson */ 14121943937SJeff Roberson vm_paddr_t phys_avail[PHYS_AVAIL_COUNT]; 14221943937SJeff Roberson vm_paddr_t dump_avail[PHYS_AVAIL_COUNT]; 14321943937SJeff Roberson 14421943937SJeff Roberson /* 145d866a563SAlan Cox * Provides the mapping from VM_FREELIST_* to free list indices (flind). 146d866a563SAlan Cox */ 147cdfeced8SJeff Roberson static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 148b16b4c22SMark Johnston static int __read_mostly vm_default_freepool; 149d866a563SAlan Cox 150d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0); 151d866a563SAlan Cox 152d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 153d866a563SAlan Cox #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 154d866a563SAlan Cox #endif 155d866a563SAlan Cox 156d866a563SAlan Cox /* 157d866a563SAlan Cox * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 158d866a563SAlan Cox * the ordering of the free list boundaries. 159d866a563SAlan Cox */ 160d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 161d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 162d866a563SAlan Cox #endif 16311752d88SAlan Cox 16411752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 1657029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_free, 166114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1677029da5cSPawel Biernacki sysctl_vm_phys_free, "A", 1687029da5cSPawel Biernacki "Phys Free Info"); 16911752d88SAlan Cox 17011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 1717029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_segs, 172114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1737029da5cSPawel Biernacki sysctl_vm_phys_segs, "A", 1747029da5cSPawel Biernacki "Phys Seg Info"); 17511752d88SAlan Cox 176b6715dabSJeff Roberson #ifdef NUMA 177415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 1787029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_locality, 179114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1807029da5cSPawel Biernacki sysctl_vm_phys_locality, "A", 1817029da5cSPawel Biernacki "Phys Locality Info"); 1826520495aSAdrian Chadd #endif 183415d7ccaSAdrian Chadd 1847e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 1857e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available."); 186a3870a18SJohn Baldwin 187d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 188d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 18911752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 1900078df5fSDoug Moore int order, int pool, int tail); 191c606ab59SDoug Moore 192b16b4c22SMark Johnston static bool __diagused 193b16b4c22SMark Johnston vm_phys_pool_valid(int pool) 194b16b4c22SMark Johnston { 195b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 196b16b4c22SMark Johnston if (pool == VM_FREEPOOL_LAZYINIT) 197b16b4c22SMark Johnston return (false); 198b16b4c22SMark Johnston #endif 199b16b4c22SMark Johnston return (pool >= 0 && pool < VM_NFREEPOOL); 200b16b4c22SMark Johnston } 201b16b4c22SMark Johnston 20238d6b2dcSRoger Pau Monné /* 20338d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management. 20438d6b2dcSRoger Pau Monné */ 20538d6b2dcSRoger Pau Monné static inline int 20638d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 20738d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range) 20838d6b2dcSRoger Pau Monné { 20938d6b2dcSRoger Pau Monné 21038d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0, 21138d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page")); 21238d6b2dcSRoger Pau Monné if (p->start >= range->end) 21338d6b2dcSRoger Pau Monné return (1); 21438d6b2dcSRoger Pau Monné if (p->start < range->start) 21538d6b2dcSRoger Pau Monné return (-1); 21638d6b2dcSRoger Pau Monné 21738d6b2dcSRoger Pau Monné return (0); 21838d6b2dcSRoger Pau Monné } 21938d6b2dcSRoger Pau Monné 22038d6b2dcSRoger Pau Monné static int 22138d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 22238d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2) 22338d6b2dcSRoger Pau Monné { 22438d6b2dcSRoger Pau Monné 22538d6b2dcSRoger Pau Monné /* Check if this is a search for a page */ 22638d6b2dcSRoger Pau Monné if (p1->end == 0) 22738d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2)); 22838d6b2dcSRoger Pau Monné 22938d6b2dcSRoger Pau Monné KASSERT(p2->end != 0, 23038d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison")); 23138d6b2dcSRoger Pau Monné 23238d6b2dcSRoger Pau Monné /* Searching to add a new range */ 23338d6b2dcSRoger Pau Monné if (p1->end <= p2->start) 23438d6b2dcSRoger Pau Monné return (-1); 23538d6b2dcSRoger Pau Monné if (p1->start >= p2->end) 23638d6b2dcSRoger Pau Monné return (1); 23738d6b2dcSRoger Pau Monné 23838d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n" 23938d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 24038d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 24138d6b2dcSRoger Pau Monné } 24238d6b2dcSRoger Pau Monné 2436f4acaf4SJeff Roberson int 244cb20a74cSStephen J. Kiernan vm_phys_domain_match(int prefer __numa_used, vm_paddr_t low __numa_used, 245cb20a74cSStephen J. Kiernan vm_paddr_t high __numa_used) 246449c2e92SKonstantin Belousov { 247b6715dabSJeff Roberson #ifdef NUMA 2486f4acaf4SJeff Roberson domainset_t mask; 2496f4acaf4SJeff Roberson int i; 250449c2e92SKonstantin Belousov 2516f4acaf4SJeff Roberson if (vm_ndomains == 1 || mem_affinity == NULL) 2526f4acaf4SJeff Roberson return (0); 2536f4acaf4SJeff Roberson 2546f4acaf4SJeff Roberson DOMAINSET_ZERO(&mask); 2556f4acaf4SJeff Roberson /* 2566f4acaf4SJeff Roberson * Check for any memory that overlaps low, high. 2576f4acaf4SJeff Roberson */ 2586f4acaf4SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) 2596f4acaf4SJeff Roberson if (mem_affinity[i].start <= high && 2606f4acaf4SJeff Roberson mem_affinity[i].end >= low) 2616f4acaf4SJeff Roberson DOMAINSET_SET(mem_affinity[i].domain, &mask); 2626f4acaf4SJeff Roberson if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 2636f4acaf4SJeff Roberson return (prefer); 2646f4acaf4SJeff Roberson if (DOMAINSET_EMPTY(&mask)) 2656f4acaf4SJeff Roberson panic("vm_phys_domain_match: Impossible constraint"); 2666f4acaf4SJeff Roberson return (DOMAINSET_FFS(&mask) - 1); 2676f4acaf4SJeff Roberson #else 2686f4acaf4SJeff Roberson return (0); 2696f4acaf4SJeff Roberson #endif 270449c2e92SKonstantin Belousov } 271449c2e92SKonstantin Belousov 27211752d88SAlan Cox /* 27311752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 27411752d88SAlan Cox * the amount of physical memory in each free list. 27511752d88SAlan Cox */ 27611752d88SAlan Cox static int 27711752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 27811752d88SAlan Cox { 27911752d88SAlan Cox struct sbuf sbuf; 28011752d88SAlan Cox struct vm_freelist *fl; 2817e226537SAttilio Rao int dom, error, flind, oind, pind; 28211752d88SAlan Cox 28300f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 28400f0e671SMatthew D Fleming if (error != 0) 28500f0e671SMatthew D Fleming return (error); 2867e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 2877e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 288eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 28911752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 290eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 29111752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 29211752d88SAlan Cox "\n ", flind); 29311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 29411752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 29511752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 29611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 29711752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 29811752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 29911752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 300d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 30111752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 30211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 3037e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 304eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d", 3057e226537SAttilio Rao fl[oind].lcnt); 30611752d88SAlan Cox } 30711752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 30811752d88SAlan Cox } 3097e226537SAttilio Rao } 31011752d88SAlan Cox } 3114e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 31211752d88SAlan Cox sbuf_delete(&sbuf); 31311752d88SAlan Cox return (error); 31411752d88SAlan Cox } 31511752d88SAlan Cox 31611752d88SAlan Cox /* 31711752d88SAlan Cox * Outputs the set of physical memory segments. 31811752d88SAlan Cox */ 31911752d88SAlan Cox static int 32011752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 32111752d88SAlan Cox { 32211752d88SAlan Cox struct sbuf sbuf; 32311752d88SAlan Cox struct vm_phys_seg *seg; 32411752d88SAlan Cox int error, segind; 32511752d88SAlan Cox 32600f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 32700f0e671SMatthew D Fleming if (error != 0) 32800f0e671SMatthew D Fleming return (error); 3294e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 33011752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 33111752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 33211752d88SAlan Cox seg = &vm_phys_segs[segind]; 33311752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 33411752d88SAlan Cox (uintmax_t)seg->start); 33511752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 33611752d88SAlan Cox (uintmax_t)seg->end); 337a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 33811752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 33911752d88SAlan Cox } 3404e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 34111752d88SAlan Cox sbuf_delete(&sbuf); 34211752d88SAlan Cox return (error); 34311752d88SAlan Cox } 34411752d88SAlan Cox 345415d7ccaSAdrian Chadd /* 346415d7ccaSAdrian Chadd * Return affinity, or -1 if there's no affinity information. 347415d7ccaSAdrian Chadd */ 3486520495aSAdrian Chadd int 349cb20a74cSStephen J. Kiernan vm_phys_mem_affinity(int f __numa_used, int t __numa_used) 350415d7ccaSAdrian Chadd { 351415d7ccaSAdrian Chadd 352b6715dabSJeff Roberson #ifdef NUMA 353415d7ccaSAdrian Chadd if (mem_locality == NULL) 354415d7ccaSAdrian Chadd return (-1); 355415d7ccaSAdrian Chadd if (f >= vm_ndomains || t >= vm_ndomains) 356415d7ccaSAdrian Chadd return (-1); 357415d7ccaSAdrian Chadd return (mem_locality[f * vm_ndomains + t]); 3586520495aSAdrian Chadd #else 3596520495aSAdrian Chadd return (-1); 3606520495aSAdrian Chadd #endif 361415d7ccaSAdrian Chadd } 362415d7ccaSAdrian Chadd 363b6715dabSJeff Roberson #ifdef NUMA 364415d7ccaSAdrian Chadd /* 365415d7ccaSAdrian Chadd * Outputs the VM locality table. 366415d7ccaSAdrian Chadd */ 367415d7ccaSAdrian Chadd static int 368415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 369415d7ccaSAdrian Chadd { 370415d7ccaSAdrian Chadd struct sbuf sbuf; 371415d7ccaSAdrian Chadd int error, i, j; 372415d7ccaSAdrian Chadd 373415d7ccaSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 374415d7ccaSAdrian Chadd if (error != 0) 375415d7ccaSAdrian Chadd return (error); 376415d7ccaSAdrian Chadd sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 377415d7ccaSAdrian Chadd 378415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 379415d7ccaSAdrian Chadd 380415d7ccaSAdrian Chadd for (i = 0; i < vm_ndomains; i++) { 381415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d: ", i); 382415d7ccaSAdrian Chadd for (j = 0; j < vm_ndomains; j++) { 383415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 384415d7ccaSAdrian Chadd } 385415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 386415d7ccaSAdrian Chadd } 387415d7ccaSAdrian Chadd error = sbuf_finish(&sbuf); 388415d7ccaSAdrian Chadd sbuf_delete(&sbuf); 389415d7ccaSAdrian Chadd return (error); 390415d7ccaSAdrian Chadd } 3916520495aSAdrian Chadd #endif 392415d7ccaSAdrian Chadd 3937e226537SAttilio Rao static void 3940078df5fSDoug Moore vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool, 3950078df5fSDoug Moore int tail) 396a3870a18SJohn Baldwin { 397a3870a18SJohn Baldwin 3987e226537SAttilio Rao m->order = order; 3990078df5fSDoug Moore m->pool = pool; 4007e226537SAttilio Rao if (tail) 4015cd29d0fSMark Johnston TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 4027e226537SAttilio Rao else 4035cd29d0fSMark Johnston TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 4047e226537SAttilio Rao fl[order].lcnt++; 405a3870a18SJohn Baldwin } 4067e226537SAttilio Rao 4077e226537SAttilio Rao static void 4087e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 4097e226537SAttilio Rao { 4107e226537SAttilio Rao 4115cd29d0fSMark Johnston TAILQ_REMOVE(&fl[order].pl, m, listq); 4127e226537SAttilio Rao fl[order].lcnt--; 4137e226537SAttilio Rao m->order = VM_NFREEORDER; 414a3870a18SJohn Baldwin } 415a3870a18SJohn Baldwin 41611752d88SAlan Cox /* 41711752d88SAlan Cox * Create a physical memory segment. 41811752d88SAlan Cox */ 41911752d88SAlan Cox static void 420d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 42111752d88SAlan Cox { 42211752d88SAlan Cox struct vm_phys_seg *seg; 42311752d88SAlan Cox 4248a14ddccSOlivier Certner if (!(0 <= domain && domain < vm_ndomains)) 4258a14ddccSOlivier Certner panic("%s: Invalid domain %d ('vm_ndomains' is %d)", 4268a14ddccSOlivier Certner __func__, domain, vm_ndomains); 4278a14ddccSOlivier Certner if (vm_phys_nsegs >= VM_PHYSSEG_MAX) 4288a14ddccSOlivier Certner panic("Not enough storage for physical segments, " 4298a14ddccSOlivier Certner "increase VM_PHYSSEG_MAX"); 4308a14ddccSOlivier Certner 43111752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 4328a14ddccSOlivier Certner while (seg > vm_phys_segs && seg[-1].start >= end) { 433271f0f12SAlan Cox *seg = *(seg - 1); 434271f0f12SAlan Cox seg--; 435271f0f12SAlan Cox } 43611752d88SAlan Cox seg->start = start; 43711752d88SAlan Cox seg->end = end; 438a3870a18SJohn Baldwin seg->domain = domain; 4398a14ddccSOlivier Certner if (seg != vm_phys_segs && seg[-1].end > start) 4408a14ddccSOlivier Certner panic("Overlapping physical segments: Current [%#jx,%#jx) " 4418a14ddccSOlivier Certner "at index %zu, previous [%#jx,%#jx)", 4428a14ddccSOlivier Certner (uintmax_t)start, (uintmax_t)end, seg - vm_phys_segs, 4438a14ddccSOlivier Certner (uintmax_t)seg[-1].start, (uintmax_t)seg[-1].end); 44411752d88SAlan Cox } 44511752d88SAlan Cox 446a3870a18SJohn Baldwin static void 447d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 448a3870a18SJohn Baldwin { 449b6715dabSJeff Roberson #ifdef NUMA 450a3870a18SJohn Baldwin int i; 451a3870a18SJohn Baldwin 452a3870a18SJohn Baldwin if (mem_affinity == NULL) { 453d866a563SAlan Cox _vm_phys_create_seg(start, end, 0); 454a3870a18SJohn Baldwin return; 455a3870a18SJohn Baldwin } 456a3870a18SJohn Baldwin 457a3870a18SJohn Baldwin for (i = 0;; i++) { 458a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 459a3870a18SJohn Baldwin panic("Reached end of affinity info"); 460a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 461a3870a18SJohn Baldwin continue; 462a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 463a3870a18SJohn Baldwin panic("No affinity info for start %jx", 464a3870a18SJohn Baldwin (uintmax_t)start); 465a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 466d866a563SAlan Cox _vm_phys_create_seg(start, end, 467a3870a18SJohn Baldwin mem_affinity[i].domain); 468a3870a18SJohn Baldwin break; 469a3870a18SJohn Baldwin } 470d866a563SAlan Cox _vm_phys_create_seg(start, mem_affinity[i].end, 471a3870a18SJohn Baldwin mem_affinity[i].domain); 472a3870a18SJohn Baldwin start = mem_affinity[i].end; 473a3870a18SJohn Baldwin } 47462d70a81SJohn Baldwin #else 47562d70a81SJohn Baldwin _vm_phys_create_seg(start, end, 0); 47662d70a81SJohn Baldwin #endif 477a3870a18SJohn Baldwin } 478a3870a18SJohn Baldwin 47911752d88SAlan Cox /* 480271f0f12SAlan Cox * Add a physical memory segment. 481271f0f12SAlan Cox */ 482271f0f12SAlan Cox void 483271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 484271f0f12SAlan Cox { 485d866a563SAlan Cox vm_paddr_t paddr; 486271f0f12SAlan Cox 487f30309abSOlivier Certner if ((start & PAGE_MASK) != 0) 488f30309abSOlivier Certner panic("%s: start (%jx) is not page aligned", __func__, 489f30309abSOlivier Certner (uintmax_t)start); 490f30309abSOlivier Certner if ((end & PAGE_MASK) != 0) 491f30309abSOlivier Certner panic("%s: end (%jx) is not page aligned", __func__, 492f30309abSOlivier Certner (uintmax_t)end); 493f30309abSOlivier Certner if (start > end) 494f30309abSOlivier Certner panic("%s: start (%jx) > end (%jx)!", __func__, 495f30309abSOlivier Certner (uintmax_t)start, (uintmax_t)end); 496f30309abSOlivier Certner 497f30309abSOlivier Certner if (start == end) 498f30309abSOlivier Certner return; 499d866a563SAlan Cox 500d866a563SAlan Cox /* 501d866a563SAlan Cox * Split the physical memory segment if it spans two or more free 502d866a563SAlan Cox * list boundaries. 503d866a563SAlan Cox */ 504d866a563SAlan Cox paddr = start; 505d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 506d866a563SAlan Cox if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 507d866a563SAlan Cox vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 508d866a563SAlan Cox paddr = VM_LOWMEM_BOUNDARY; 509d866a563SAlan Cox } 510271f0f12SAlan Cox #endif 511d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 512d866a563SAlan Cox if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 513d866a563SAlan Cox vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 514d866a563SAlan Cox paddr = VM_DMA32_BOUNDARY; 515d866a563SAlan Cox } 516d866a563SAlan Cox #endif 517d866a563SAlan Cox vm_phys_create_seg(paddr, end); 518271f0f12SAlan Cox } 519271f0f12SAlan Cox 520271f0f12SAlan Cox /* 52111752d88SAlan Cox * Initialize the physical memory allocator. 522d866a563SAlan Cox * 523d866a563SAlan Cox * Requires that vm_page_array is initialized! 52411752d88SAlan Cox */ 52511752d88SAlan Cox void 52611752d88SAlan Cox vm_phys_init(void) 52711752d88SAlan Cox { 52811752d88SAlan Cox struct vm_freelist *fl; 52972aebdd7SAlan Cox struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 53052526922SJohn Baldwin #if defined(VM_DMA32_NPAGES_THRESHOLD) || defined(VM_PHYSSEG_SPARSE) 531d866a563SAlan Cox u_long npages; 53252526922SJohn Baldwin #endif 533d866a563SAlan Cox int dom, flind, freelist, oind, pind, segind; 53411752d88SAlan Cox 535d866a563SAlan Cox /* 536d866a563SAlan Cox * Compute the number of free lists, and generate the mapping from the 537d866a563SAlan Cox * manifest constants VM_FREELIST_* to the free list indices. 538d866a563SAlan Cox * 539d866a563SAlan Cox * Initially, the entries of vm_freelist_to_flind[] are set to either 540d866a563SAlan Cox * 0 or 1 to indicate which free lists should be created. 541d866a563SAlan Cox */ 54252526922SJohn Baldwin #ifdef VM_DMA32_NPAGES_THRESHOLD 543d866a563SAlan Cox npages = 0; 54452526922SJohn Baldwin #endif 545d866a563SAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 546d866a563SAlan Cox seg = &vm_phys_segs[segind]; 547d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 548d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) 549d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 550d866a563SAlan Cox else 551d866a563SAlan Cox #endif 552d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 553d866a563SAlan Cox if ( 554d866a563SAlan Cox #ifdef VM_DMA32_NPAGES_THRESHOLD 555d866a563SAlan Cox /* 556d866a563SAlan Cox * Create the DMA32 free list only if the amount of 557d866a563SAlan Cox * physical memory above physical address 4G exceeds the 558d866a563SAlan Cox * given threshold. 559d866a563SAlan Cox */ 560d866a563SAlan Cox npages > VM_DMA32_NPAGES_THRESHOLD && 561d866a563SAlan Cox #endif 562d866a563SAlan Cox seg->end <= VM_DMA32_BOUNDARY) 563d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 564d866a563SAlan Cox else 565d866a563SAlan Cox #endif 566d866a563SAlan Cox { 56752526922SJohn Baldwin #ifdef VM_DMA32_NPAGES_THRESHOLD 568d866a563SAlan Cox npages += atop(seg->end - seg->start); 56952526922SJohn Baldwin #endif 570d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 571d866a563SAlan Cox } 572d866a563SAlan Cox } 573d866a563SAlan Cox /* Change each entry into a running total of the free lists. */ 574d866a563SAlan Cox for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 575d866a563SAlan Cox vm_freelist_to_flind[freelist] += 576d866a563SAlan Cox vm_freelist_to_flind[freelist - 1]; 577d866a563SAlan Cox } 578d866a563SAlan Cox vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 579d866a563SAlan Cox KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 580d866a563SAlan Cox /* Change each entry into a free list index. */ 581d866a563SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) 582d866a563SAlan Cox vm_freelist_to_flind[freelist]--; 583d866a563SAlan Cox 584d866a563SAlan Cox /* 585d866a563SAlan Cox * Initialize the first_page and free_queues fields of each physical 586d866a563SAlan Cox * memory segment. 587d866a563SAlan Cox */ 588271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 589d866a563SAlan Cox npages = 0; 59011752d88SAlan Cox #endif 591271f0f12SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 592271f0f12SAlan Cox seg = &vm_phys_segs[segind]; 593271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 594d866a563SAlan Cox seg->first_page = &vm_page_array[npages]; 595d866a563SAlan Cox npages += atop(seg->end - seg->start); 596271f0f12SAlan Cox #else 597271f0f12SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(seg->start); 59811752d88SAlan Cox #endif 599d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 600d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) { 601d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 602d866a563SAlan Cox KASSERT(flind >= 0, 603d866a563SAlan Cox ("vm_phys_init: LOWMEM flind < 0")); 604d866a563SAlan Cox } else 605d866a563SAlan Cox #endif 606d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 607d866a563SAlan Cox if (seg->end <= VM_DMA32_BOUNDARY) { 608d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 609d866a563SAlan Cox KASSERT(flind >= 0, 610d866a563SAlan Cox ("vm_phys_init: DMA32 flind < 0")); 611d866a563SAlan Cox } else 612d866a563SAlan Cox #endif 613d866a563SAlan Cox { 614d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 615d866a563SAlan Cox KASSERT(flind >= 0, 616d866a563SAlan Cox ("vm_phys_init: DEFAULT flind < 0")); 61711752d88SAlan Cox } 618d866a563SAlan Cox seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 619d866a563SAlan Cox } 620d866a563SAlan Cox 621d866a563SAlan Cox /* 62272aebdd7SAlan Cox * Coalesce physical memory segments that are contiguous and share the 62372aebdd7SAlan Cox * same per-domain free queues. 62472aebdd7SAlan Cox */ 62572aebdd7SAlan Cox prev_seg = vm_phys_segs; 62672aebdd7SAlan Cox seg = &vm_phys_segs[1]; 62772aebdd7SAlan Cox end_seg = &vm_phys_segs[vm_phys_nsegs]; 62872aebdd7SAlan Cox while (seg < end_seg) { 62972aebdd7SAlan Cox if (prev_seg->end == seg->start && 63072aebdd7SAlan Cox prev_seg->free_queues == seg->free_queues) { 63172aebdd7SAlan Cox prev_seg->end = seg->end; 63272aebdd7SAlan Cox KASSERT(prev_seg->domain == seg->domain, 63372aebdd7SAlan Cox ("vm_phys_init: free queues cannot span domains")); 63472aebdd7SAlan Cox vm_phys_nsegs--; 63572aebdd7SAlan Cox end_seg--; 63672aebdd7SAlan Cox for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 63772aebdd7SAlan Cox *tmp_seg = *(tmp_seg + 1); 63872aebdd7SAlan Cox } else { 63972aebdd7SAlan Cox prev_seg = seg; 64072aebdd7SAlan Cox seg++; 64172aebdd7SAlan Cox } 64272aebdd7SAlan Cox } 64372aebdd7SAlan Cox 64472aebdd7SAlan Cox /* 645d866a563SAlan Cox * Initialize the free queues. 646d866a563SAlan Cox */ 6477e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 64811752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 64911752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 6507e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 65111752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 65211752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 65311752d88SAlan Cox } 65411752d88SAlan Cox } 655a3870a18SJohn Baldwin } 656d866a563SAlan Cox 657b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 658b16b4c22SMark Johnston vm_default_freepool = VM_FREEPOOL_LAZYINIT; 659b16b4c22SMark Johnston #else 660b16b4c22SMark Johnston vm_default_freepool = VM_FREEPOOL_DEFAULT; 661b16b4c22SMark Johnston #endif 662b16b4c22SMark Johnston 66338d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 66411752d88SAlan Cox } 66511752d88SAlan Cox 66611752d88SAlan Cox /* 667662e7fa8SMark Johnston * Register info about the NUMA topology of the system. 668662e7fa8SMark Johnston * 669662e7fa8SMark Johnston * Invoked by platform-dependent code prior to vm_phys_init(). 670662e7fa8SMark Johnston */ 671662e7fa8SMark Johnston void 672cb20a74cSStephen J. Kiernan vm_phys_register_domains(int ndomains __numa_used, 673cb20a74cSStephen J. Kiernan struct mem_affinity *affinity __numa_used, int *locality __numa_used) 674662e7fa8SMark Johnston { 675662e7fa8SMark Johnston #ifdef NUMA 676c415cfc8SZhenlei Huang int i; 677662e7fa8SMark Johnston 678b61f3142SMark Johnston /* 679b61f3142SMark Johnston * For now the only override value that we support is 1, which 680b61f3142SMark Johnston * effectively disables NUMA-awareness in the allocators. 681b61f3142SMark Johnston */ 682c415cfc8SZhenlei Huang TUNABLE_INT_FETCH("vm.numa.disabled", &numa_disabled); 683c415cfc8SZhenlei Huang if (numa_disabled) 684b61f3142SMark Johnston ndomains = 1; 685b61f3142SMark Johnston 686b61f3142SMark Johnston if (ndomains > 1) { 687662e7fa8SMark Johnston vm_ndomains = ndomains; 688662e7fa8SMark Johnston mem_affinity = affinity; 689662e7fa8SMark Johnston mem_locality = locality; 690b61f3142SMark Johnston } 691662e7fa8SMark Johnston 692662e7fa8SMark Johnston for (i = 0; i < vm_ndomains; i++) 693662e7fa8SMark Johnston DOMAINSET_SET(i, &all_domains); 694662e7fa8SMark Johnston #endif 695662e7fa8SMark Johnston } 696662e7fa8SMark Johnston 697662e7fa8SMark Johnston /* 69811752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 699370a338aSAlan Cox * 700370a338aSAlan Cox * When this function is called by a page allocation function, the caller 701370a338aSAlan Cox * should request insertion at the head unless the order [order, oind) queues 702370a338aSAlan Cox * are known to be empty. The objective being to reduce the likelihood of 703370a338aSAlan Cox * long-term fragmentation by promoting contemporaneous allocation and 704370a338aSAlan Cox * (hopefully) deallocation. 70511752d88SAlan Cox */ 70611752d88SAlan Cox static __inline void 707370a338aSAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 7080078df5fSDoug Moore int pool, int tail) 70911752d88SAlan Cox { 71011752d88SAlan Cox vm_page_t m_buddy; 71111752d88SAlan Cox 71211752d88SAlan Cox while (oind > order) { 71311752d88SAlan Cox oind--; 71411752d88SAlan Cox m_buddy = &m[1 << oind]; 71511752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 71611752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 71711752d88SAlan Cox m_buddy, m_buddy->order)); 7180078df5fSDoug Moore vm_freelist_add(fl, m_buddy, oind, pool, tail); 71911752d88SAlan Cox } 72011752d88SAlan Cox } 72111752d88SAlan Cox 722d7ec4a88SMark Johnston static void 7230078df5fSDoug Moore vm_phys_enq_chunk(struct vm_freelist *fl, vm_page_t m, int order, int pool, 7240078df5fSDoug Moore int tail) 725d7ec4a88SMark Johnston { 726d7ec4a88SMark Johnston KASSERT(order >= 0 && order < VM_NFREEORDER, 727d7ec4a88SMark Johnston ("%s: invalid order %d", __func__, order)); 728d7ec4a88SMark Johnston 7290078df5fSDoug Moore vm_freelist_add(fl, m, order, pool, tail); 730b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 7310078df5fSDoug Moore if (__predict_false(pool == VM_FREEPOOL_LAZYINIT)) { 732b16b4c22SMark Johnston vm_page_t m_next; 733517c5854SMark Johnston vm_paddr_t pa; 734b16b4c22SMark Johnston int npages; 735b16b4c22SMark Johnston 736b16b4c22SMark Johnston npages = 1 << order; 737b16b4c22SMark Johnston m_next = m + npages; 738517c5854SMark Johnston pa = m->phys_addr + ptoa(npages); 739517c5854SMark Johnston if (pa < vm_phys_segs[m->segind].end) { 740517c5854SMark Johnston vm_page_init_page(m_next, pa, m->segind, 741b16b4c22SMark Johnston VM_FREEPOOL_LAZYINIT); 742b16b4c22SMark Johnston } 743517c5854SMark Johnston } 744b16b4c22SMark Johnston #endif 745d7ec4a88SMark Johnston } 746d7ec4a88SMark Johnston 74711752d88SAlan Cox /* 748e77f4e7fSDoug Moore * Add the physical pages [m, m + npages) at the beginning of a power-of-two 749e77f4e7fSDoug Moore * aligned and sized set to the specified free list. 750e77f4e7fSDoug Moore * 751e77f4e7fSDoug Moore * When this function is called by a page allocation function, the caller 752e77f4e7fSDoug Moore * should request insertion at the head unless the lower-order queues are 753e77f4e7fSDoug Moore * known to be empty. The objective being to reduce the likelihood of long- 754e77f4e7fSDoug Moore * term fragmentation by promoting contemporaneous allocation and (hopefully) 755e77f4e7fSDoug Moore * deallocation. 756e77f4e7fSDoug Moore * 757e77f4e7fSDoug Moore * The physical page m's buddy must not be free. 758e77f4e7fSDoug Moore */ 759e77f4e7fSDoug Moore static void 7600078df5fSDoug Moore vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool, 7610078df5fSDoug Moore int tail) 762e77f4e7fSDoug Moore { 763e77f4e7fSDoug Moore int order; 764e77f4e7fSDoug Moore 765e77f4e7fSDoug Moore KASSERT(npages == 0 || 766e77f4e7fSDoug Moore (VM_PAGE_TO_PHYS(m) & 767543d55d7SDoug Moore ((PAGE_SIZE << ilog2(npages)) - 1)) == 0, 768e77f4e7fSDoug Moore ("%s: page %p and npages %u are misaligned", 769e77f4e7fSDoug Moore __func__, m, npages)); 770e77f4e7fSDoug Moore while (npages > 0) { 771e77f4e7fSDoug Moore KASSERT(m->order == VM_NFREEORDER, 772e77f4e7fSDoug Moore ("%s: page %p has unexpected order %d", 773e77f4e7fSDoug Moore __func__, m, m->order)); 774543d55d7SDoug Moore order = ilog2(npages); 775e77f4e7fSDoug Moore KASSERT(order < VM_NFREEORDER, 776e77f4e7fSDoug Moore ("%s: order %d is out of range", __func__, order)); 7770078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, tail); 778e77f4e7fSDoug Moore m += 1 << order; 779e77f4e7fSDoug Moore npages -= 1 << order; 780e77f4e7fSDoug Moore } 781e77f4e7fSDoug Moore } 782e77f4e7fSDoug Moore 783e77f4e7fSDoug Moore /* 7847493904eSAlan Cox * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 7857493904eSAlan Cox * and sized set to the specified free list. 7867493904eSAlan Cox * 7877493904eSAlan Cox * When this function is called by a page allocation function, the caller 7887493904eSAlan Cox * should request insertion at the head unless the lower-order queues are 7897493904eSAlan Cox * known to be empty. The objective being to reduce the likelihood of long- 7907493904eSAlan Cox * term fragmentation by promoting contemporaneous allocation and (hopefully) 7917493904eSAlan Cox * deallocation. 7927493904eSAlan Cox * 793ccdb2827SDoug Moore * If npages is zero, this function does nothing and ignores the physical page 794ccdb2827SDoug Moore * parameter m. Otherwise, the physical page m's buddy must not be free. 7957493904eSAlan Cox */ 796c9b06fa5SDoug Moore static vm_page_t 7970078df5fSDoug Moore vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool, 7980078df5fSDoug Moore int tail) 7997493904eSAlan Cox { 8007493904eSAlan Cox int order; 8017493904eSAlan Cox 802ccdb2827SDoug Moore KASSERT(npages == 0 || 803ccdb2827SDoug Moore ((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 804543d55d7SDoug Moore ((PAGE_SIZE << ilog2(npages)) - 1)) == 0, 8057493904eSAlan Cox ("vm_phys_enq_range: page %p and npages %u are misaligned", 8067493904eSAlan Cox m, npages)); 807c9b06fa5SDoug Moore while (npages > 0) { 8087493904eSAlan Cox KASSERT(m->order == VM_NFREEORDER, 8097493904eSAlan Cox ("vm_phys_enq_range: page %p has unexpected order %d", 8107493904eSAlan Cox m, m->order)); 8117493904eSAlan Cox order = ffs(npages) - 1; 8120078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, tail); 813c9b06fa5SDoug Moore m += 1 << order; 814c9b06fa5SDoug Moore npages -= 1 << order; 815c9b06fa5SDoug Moore } 816c9b06fa5SDoug Moore return (m); 8177493904eSAlan Cox } 8187493904eSAlan Cox 8197493904eSAlan Cox /* 8200078df5fSDoug Moore * Complete initialization a contiguous, power of two-sized set of physical 8210078df5fSDoug Moore * pages. 822b16b4c22SMark Johnston * 823b16b4c22SMark Johnston * If the pages currently belong to the lazy init pool, then the corresponding 824b16b4c22SMark Johnston * page structures must be initialized. In this case it is assumed that the 825b16b4c22SMark Johnston * first page in the run has already been initialized. 826e3537f92SDoug Moore */ 827e3537f92SDoug Moore static void 8280078df5fSDoug Moore vm_phys_finish_init(vm_page_t m, int order) 829e3537f92SDoug Moore { 830b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 831b16b4c22SMark Johnston if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) { 832b16b4c22SMark Johnston vm_paddr_t pa; 833b16b4c22SMark Johnston int segind; 834e3537f92SDoug Moore 835b16b4c22SMark Johnston TSENTER(); 836b16b4c22SMark Johnston pa = m->phys_addr + PAGE_SIZE; 837b16b4c22SMark Johnston segind = m->segind; 838b16b4c22SMark Johnston for (vm_page_t m_tmp = m + 1; m_tmp < &m[1 << order]; 839b16b4c22SMark Johnston m_tmp++, pa += PAGE_SIZE) 8400078df5fSDoug Moore vm_page_init_page(m_tmp, pa, segind, VM_NFREEPOOL); 841b16b4c22SMark Johnston TSEXIT(); 8420078df5fSDoug Moore } 843b16b4c22SMark Johnston #endif 844e3537f92SDoug Moore } 845e3537f92SDoug Moore 846e3537f92SDoug Moore /* 84789ea39a7SAlan Cox * Tries to allocate the specified number of pages from the specified pool 84889ea39a7SAlan Cox * within the specified domain. Returns the actual number of allocated pages 84989ea39a7SAlan Cox * and a pointer to each page through the array ma[]. 85089ea39a7SAlan Cox * 85132d81f21SAlan Cox * The returned pages may not be physically contiguous. However, in contrast 85232d81f21SAlan Cox * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 85332d81f21SAlan Cox * calling this function once to allocate the desired number of pages will 8540078df5fSDoug Moore * avoid wasted time in vm_phys_split_pages(). The allocated pages have no 8550078df5fSDoug Moore * valid pool field set. 85689ea39a7SAlan Cox * 85789ea39a7SAlan Cox * The free page queues for the specified domain must be locked. 85889ea39a7SAlan Cox */ 85989ea39a7SAlan Cox int 86089ea39a7SAlan Cox vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 86189ea39a7SAlan Cox { 86289ea39a7SAlan Cox struct vm_freelist *alt, *fl; 86389ea39a7SAlan Cox vm_page_t m; 864c9b06fa5SDoug Moore int avail, end, flind, freelist, i, oind, pind; 86589ea39a7SAlan Cox 86689ea39a7SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains, 86789ea39a7SAlan Cox ("vm_phys_alloc_npages: domain %d is out of range", domain)); 868b16b4c22SMark Johnston KASSERT(vm_phys_pool_valid(pool), 86989ea39a7SAlan Cox ("vm_phys_alloc_npages: pool %d is out of range", pool)); 87089ea39a7SAlan Cox KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 87189ea39a7SAlan Cox ("vm_phys_alloc_npages: npages %d is out of range", npages)); 87289ea39a7SAlan Cox vm_domain_free_assert_locked(VM_DOMAIN(domain)); 87389ea39a7SAlan Cox i = 0; 87489ea39a7SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 87589ea39a7SAlan Cox flind = vm_freelist_to_flind[freelist]; 87689ea39a7SAlan Cox if (flind < 0) 87789ea39a7SAlan Cox continue; 87889ea39a7SAlan Cox fl = vm_phys_free_queues[domain][flind][pool]; 87989ea39a7SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) { 88089ea39a7SAlan Cox while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 88189ea39a7SAlan Cox vm_freelist_rem(fl, m, oind); 882c9b06fa5SDoug Moore avail = i + (1 << oind); 883c9b06fa5SDoug Moore end = imin(npages, avail); 884e3537f92SDoug Moore while (i < end) 88589ea39a7SAlan Cox ma[i++] = m++; 886c9b06fa5SDoug Moore if (i == npages) { 8877493904eSAlan Cox /* 888c9b06fa5SDoug Moore * Return excess pages to fl. Its order 889c9b06fa5SDoug Moore * [0, oind) queues are empty. 8907493904eSAlan Cox */ 8910078df5fSDoug Moore vm_phys_enq_range(m, avail - i, fl, 8920078df5fSDoug Moore pool, 1); 89389ea39a7SAlan Cox return (npages); 894c9b06fa5SDoug Moore } 89589ea39a7SAlan Cox } 89689ea39a7SAlan Cox } 89789ea39a7SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 898b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; 899b16b4c22SMark Johnston pind++) { 90089ea39a7SAlan Cox alt = vm_phys_free_queues[domain][flind][pind]; 90189ea39a7SAlan Cox while ((m = TAILQ_FIRST(&alt[oind].pl)) != 90289ea39a7SAlan Cox NULL) { 90389ea39a7SAlan Cox vm_freelist_rem(alt, m, oind); 9040078df5fSDoug Moore vm_phys_finish_init(m, oind); 905c9b06fa5SDoug Moore avail = i + (1 << oind); 906c9b06fa5SDoug Moore end = imin(npages, avail); 907e3537f92SDoug Moore while (i < end) 90889ea39a7SAlan Cox ma[i++] = m++; 909c9b06fa5SDoug Moore if (i == npages) { 9107493904eSAlan Cox /* 9117493904eSAlan Cox * Return excess pages to fl. 9127493904eSAlan Cox * Its order [0, oind) queues 9137493904eSAlan Cox * are empty. 9147493904eSAlan Cox */ 915c9b06fa5SDoug Moore vm_phys_enq_range(m, avail - i, 9160078df5fSDoug Moore fl, pool, 1); 91789ea39a7SAlan Cox return (npages); 918c9b06fa5SDoug Moore } 91989ea39a7SAlan Cox } 92089ea39a7SAlan Cox } 92189ea39a7SAlan Cox } 92289ea39a7SAlan Cox } 92389ea39a7SAlan Cox return (i); 92489ea39a7SAlan Cox } 92589ea39a7SAlan Cox 92689ea39a7SAlan Cox /* 927d866a563SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages from the 928d866a563SAlan Cox * specified free list. The free list must be specified using one of the 929e3537f92SDoug Moore * manifest constants VM_FREELIST_*. 930d866a563SAlan Cox * 931d866a563SAlan Cox * The free page queues must be locked. 93249ca10d4SJayachandran C. */ 9336aede562SDoug Moore static vm_page_t 9340db2102aSMichael Zhilin vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 93549ca10d4SJayachandran C. { 936ef435ae7SJeff Roberson struct vm_freelist *alt, *fl; 93711752d88SAlan Cox vm_page_t m; 9380db2102aSMichael Zhilin int oind, pind, flind; 93911752d88SAlan Cox 940ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 941ef435ae7SJeff Roberson ("vm_phys_alloc_freelist_pages: domain %d is out of range", 942ef435ae7SJeff Roberson domain)); 9430db2102aSMichael Zhilin KASSERT(freelist < VM_NFREELIST, 944d866a563SAlan Cox ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 9455be93778SAndrew Turner freelist)); 946b16b4c22SMark Johnston KASSERT(vm_phys_pool_valid(pool), 94749ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 94811752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 94949ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 9506520495aSAdrian Chadd 9510db2102aSMichael Zhilin flind = vm_freelist_to_flind[freelist]; 9520db2102aSMichael Zhilin /* Check if freelist is present */ 9530db2102aSMichael Zhilin if (flind < 0) 9540db2102aSMichael Zhilin return (NULL); 9550db2102aSMichael Zhilin 956e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(domain)); 9577e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0]; 95811752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 95911752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 96011752d88SAlan Cox if (m != NULL) { 9617e226537SAttilio Rao vm_freelist_rem(fl, m, oind); 962370a338aSAlan Cox /* The order [order, oind) queues are empty. */ 9630078df5fSDoug Moore vm_phys_split_pages(m, oind, fl, order, pool, 1); 96411752d88SAlan Cox return (m); 96511752d88SAlan Cox } 96611752d88SAlan Cox } 96711752d88SAlan Cox 96811752d88SAlan Cox /* 96911752d88SAlan Cox * The given pool was empty. Find the largest 97011752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 97111752d88SAlan Cox * pool. Transfer these pages to the given pool, and 97211752d88SAlan Cox * use them to satisfy the allocation. 97311752d88SAlan Cox */ 97411752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 975b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) { 9767e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0]; 97711752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 97811752d88SAlan Cox if (m != NULL) { 9797e226537SAttilio Rao vm_freelist_rem(alt, m, oind); 9800078df5fSDoug Moore vm_phys_finish_init(m, oind); 981370a338aSAlan Cox /* The order [order, oind) queues are empty. */ 9820078df5fSDoug Moore vm_phys_split_pages(m, oind, fl, order, pool, 1); 98311752d88SAlan Cox return (m); 98411752d88SAlan Cox } 98511752d88SAlan Cox } 98611752d88SAlan Cox } 98711752d88SAlan Cox return (NULL); 98811752d88SAlan Cox } 98911752d88SAlan Cox 99011752d88SAlan Cox /* 9916aede562SDoug Moore * Allocate a contiguous, power of two-sized set of physical pages 9926aede562SDoug Moore * from the free lists. 9936aede562SDoug Moore * 9946aede562SDoug Moore * The free page queues must be locked. 9956aede562SDoug Moore */ 9966aede562SDoug Moore vm_page_t 9976aede562SDoug Moore vm_phys_alloc_pages(int domain, int pool, int order) 9986aede562SDoug Moore { 9996aede562SDoug Moore vm_page_t m; 10006aede562SDoug Moore int freelist; 10016aede562SDoug Moore 10026aede562SDoug Moore for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 10036aede562SDoug Moore m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 10046aede562SDoug Moore if (m != NULL) 10056aede562SDoug Moore return (m); 10066aede562SDoug Moore } 10076aede562SDoug Moore return (NULL); 10086aede562SDoug Moore } 10096aede562SDoug Moore 10106aede562SDoug Moore /* 101169cbb187SMark Johnston * Find the vm_page corresponding to the given physical address, which must lie 101269cbb187SMark Johnston * within the given physical memory segment. 101369cbb187SMark Johnston */ 101469cbb187SMark Johnston vm_page_t 101569cbb187SMark Johnston vm_phys_seg_paddr_to_vm_page(struct vm_phys_seg *seg, vm_paddr_t pa) 101669cbb187SMark Johnston { 101769cbb187SMark Johnston KASSERT(pa >= seg->start && pa < seg->end, 101869cbb187SMark Johnston ("%s: pa %#jx is out of range", __func__, (uintmax_t)pa)); 101969cbb187SMark Johnston 102069cbb187SMark Johnston return (&seg->first_page[atop(pa - seg->start)]); 102169cbb187SMark Johnston } 102269cbb187SMark Johnston 102369cbb187SMark Johnston /* 102411752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 102511752d88SAlan Cox */ 102611752d88SAlan Cox vm_page_t 102711752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 102811752d88SAlan Cox { 102911752d88SAlan Cox struct vm_phys_seg *seg; 103011752d88SAlan Cox 10319e817428SDoug Moore if ((seg = vm_phys_paddr_to_seg(pa)) != NULL) 103269cbb187SMark Johnston return (vm_phys_seg_paddr_to_vm_page(seg, pa)); 1033f06a3a36SAndrew Thompson return (NULL); 103411752d88SAlan Cox } 103511752d88SAlan Cox 1036b6de32bdSKonstantin Belousov vm_page_t 1037b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 1038b6de32bdSKonstantin Belousov { 103938d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg; 1040b6de32bdSKonstantin Belousov vm_page_t m; 1041b6de32bdSKonstantin Belousov 1042b6de32bdSKonstantin Belousov m = NULL; 104338d6b2dcSRoger Pau Monné tmp.start = pa; 104438d6b2dcSRoger Pau Monné tmp.end = 0; 104538d6b2dcSRoger Pau Monné 104638d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock); 104738d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 104838d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock); 104938d6b2dcSRoger Pau Monné if (seg == NULL) 105038d6b2dcSRoger Pau Monné return (NULL); 105138d6b2dcSRoger Pau Monné 1052b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 105338d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 105438d6b2dcSRoger Pau Monné 1055b6de32bdSKonstantin Belousov return (m); 1056b6de32bdSKonstantin Belousov } 1057b6de32bdSKonstantin Belousov 10585ebe728dSRoger Pau Monné static inline void 10595ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 10605ebe728dSRoger Pau Monné long page_count, vm_memattr_t memattr) 10615ebe728dSRoger Pau Monné { 10625ebe728dSRoger Pau Monné long i; 10635ebe728dSRoger Pau Monné 1064f93f7cf1SMark Johnston bzero(range, page_count * sizeof(*range)); 10655ebe728dSRoger Pau Monné for (i = 0; i < page_count; i++) { 10665ebe728dSRoger Pau Monné vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 10675ebe728dSRoger Pau Monné range[i].oflags &= ~VPO_UNMANAGED; 10685ebe728dSRoger Pau Monné range[i].busy_lock = VPB_UNBUSIED; 10695ebe728dSRoger Pau Monné } 10705ebe728dSRoger Pau Monné } 10715ebe728dSRoger Pau Monné 1072b6de32bdSKonstantin Belousov int 1073b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 1074b6de32bdSKonstantin Belousov vm_memattr_t memattr) 1075b6de32bdSKonstantin Belousov { 1076b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 1077b6de32bdSKonstantin Belousov vm_page_t fp; 10785ebe728dSRoger Pau Monné long page_count; 1079b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 10805ebe728dSRoger Pau Monné long pi, pe; 10815ebe728dSRoger Pau Monné long dpage_count; 1082b6de32bdSKonstantin Belousov #endif 1083b6de32bdSKonstantin Belousov 10845ebe728dSRoger Pau Monné KASSERT(start < end, 10855ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 10865ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 10875ebe728dSRoger Pau Monné 1088b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 1089b6de32bdSKonstantin Belousov 1090b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 1091b6de32bdSKonstantin Belousov pi = atop(start); 10925ebe728dSRoger Pau Monné pe = atop(end); 10935ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1094b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 10955ebe728dSRoger Pau Monné if ((pe - first_page) > vm_page_array_size) { 10965ebe728dSRoger Pau Monné /* 10975ebe728dSRoger Pau Monné * We have a segment that starts inside 10985ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 10995ebe728dSRoger Pau Monné * 11005ebe728dSRoger Pau Monné * Use vm_page_array pages for those that are 11015ebe728dSRoger Pau Monné * inside of the vm_page_array range, and 11025ebe728dSRoger Pau Monné * allocate the remaining ones. 11035ebe728dSRoger Pau Monné */ 11045ebe728dSRoger Pau Monné dpage_count = vm_page_array_size - (pi - first_page); 11055ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, dpage_count, 11065ebe728dSRoger Pau Monné memattr); 11075ebe728dSRoger Pau Monné page_count -= dpage_count; 11085ebe728dSRoger Pau Monné start += ptoa(dpage_count); 11095ebe728dSRoger Pau Monné goto alloc; 11105ebe728dSRoger Pau Monné } 11115ebe728dSRoger Pau Monné /* 11125ebe728dSRoger Pau Monné * We can allocate the full range from vm_page_array, 11135ebe728dSRoger Pau Monné * so there's no need to register the range in the tree. 11145ebe728dSRoger Pau Monné */ 11155ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 11165ebe728dSRoger Pau Monné return (0); 11175ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 11185ebe728dSRoger Pau Monné /* 11195ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 11205ebe728dSRoger Pau Monné * but starts outside of it. 11215ebe728dSRoger Pau Monné */ 11225ebe728dSRoger Pau Monné fp = &vm_page_array[0]; 11235ebe728dSRoger Pau Monné dpage_count = pe - first_page; 11245ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 11255ebe728dSRoger Pau Monné memattr); 11265ebe728dSRoger Pau Monné end -= ptoa(dpage_count); 11275ebe728dSRoger Pau Monné page_count -= dpage_count; 11285ebe728dSRoger Pau Monné goto alloc; 11295ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 11305ebe728dSRoger Pau Monné /* 11315ebe728dSRoger Pau Monné * Trying to register a fictitious range that expands before 11325ebe728dSRoger Pau Monné * and after vm_page_array. 11335ebe728dSRoger Pau Monné */ 11345ebe728dSRoger Pau Monné return (EINVAL); 11355ebe728dSRoger Pau Monné } else { 11365ebe728dSRoger Pau Monné alloc: 1137b6de32bdSKonstantin Belousov #endif 1138b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 1139f93f7cf1SMark Johnston M_WAITOK); 11405ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE 1141b6de32bdSKonstantin Belousov } 11425ebe728dSRoger Pau Monné #endif 11435ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 114438d6b2dcSRoger Pau Monné 114538d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 1146b6de32bdSKonstantin Belousov seg->start = start; 1147b6de32bdSKonstantin Belousov seg->end = end; 1148b6de32bdSKonstantin Belousov seg->first_page = fp; 114938d6b2dcSRoger Pau Monné 115038d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 115138d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 115238d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 115338d6b2dcSRoger Pau Monné 1154b6de32bdSKonstantin Belousov return (0); 1155b6de32bdSKonstantin Belousov } 1156b6de32bdSKonstantin Belousov 1157b6de32bdSKonstantin Belousov void 1158b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1159b6de32bdSKonstantin Belousov { 116038d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp; 1161b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 11625ebe728dSRoger Pau Monné long pi, pe; 1163b6de32bdSKonstantin Belousov #endif 1164b6de32bdSKonstantin Belousov 11655ebe728dSRoger Pau Monné KASSERT(start < end, 11665ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 11675ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 11685ebe728dSRoger Pau Monné 1169b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 1170b6de32bdSKonstantin Belousov pi = atop(start); 11715ebe728dSRoger Pau Monné pe = atop(end); 11725ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 11735ebe728dSRoger Pau Monné if ((pe - first_page) <= vm_page_array_size) { 11745ebe728dSRoger Pau Monné /* 11755ebe728dSRoger Pau Monné * This segment was allocated using vm_page_array 11765ebe728dSRoger Pau Monné * only, there's nothing to do since those pages 11775ebe728dSRoger Pau Monné * were never added to the tree. 11785ebe728dSRoger Pau Monné */ 11795ebe728dSRoger Pau Monné return; 11805ebe728dSRoger Pau Monné } 11815ebe728dSRoger Pau Monné /* 11825ebe728dSRoger Pau Monné * We have a segment that starts inside 11835ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 11845ebe728dSRoger Pau Monné * 11855ebe728dSRoger Pau Monné * Calculate how many pages were added to the 11865ebe728dSRoger Pau Monné * tree and free them. 11875ebe728dSRoger Pau Monné */ 11885ebe728dSRoger Pau Monné start = ptoa(first_page + vm_page_array_size); 11895ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 11905ebe728dSRoger Pau Monné /* 11915ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 11925ebe728dSRoger Pau Monné * but starts outside of it. 11935ebe728dSRoger Pau Monné */ 11945ebe728dSRoger Pau Monné end = ptoa(first_page); 11955ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 11965ebe728dSRoger Pau Monné /* Since it's not possible to register such a range, panic. */ 11975ebe728dSRoger Pau Monné panic( 11985ebe728dSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 11995ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 12005ebe728dSRoger Pau Monné } 1201b6de32bdSKonstantin Belousov #endif 120238d6b2dcSRoger Pau Monné tmp.start = start; 120338d6b2dcSRoger Pau Monné tmp.end = 0; 1204b6de32bdSKonstantin Belousov 120538d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 120638d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 120738d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) { 120838d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 120938d6b2dcSRoger Pau Monné panic( 121038d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 121138d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 121238d6b2dcSRoger Pau Monné } 121338d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 121438d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 121538d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES); 121638d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES); 1217b6de32bdSKonstantin Belousov } 1218b6de32bdSKonstantin Belousov 121911752d88SAlan Cox /* 1220e3537f92SDoug Moore * Free a contiguous, power of two-sized set of physical pages. 12210078df5fSDoug Moore * The pool field in the first page determines the destination pool. 12228941dc44SAlan Cox * 12238941dc44SAlan Cox * The free page queues must be locked. 122411752d88SAlan Cox */ 122511752d88SAlan Cox void 12260078df5fSDoug Moore vm_phys_free_pages(vm_page_t m, int pool, int order) 122711752d88SAlan Cox { 122811752d88SAlan Cox struct vm_freelist *fl; 122911752d88SAlan Cox struct vm_phys_seg *seg; 12305c1f2cc4SAlan Cox vm_paddr_t pa; 123111752d88SAlan Cox vm_page_t m_buddy; 123211752d88SAlan Cox 123311752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 12340078df5fSDoug Moore ("%s: page %p has unexpected order %d", 12350078df5fSDoug Moore __func__, m, m->order)); 12360078df5fSDoug Moore KASSERT(vm_phys_pool_valid(pool), 12370078df5fSDoug Moore ("%s: unexpected pool param %d", __func__, pool)); 123811752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 12390078df5fSDoug Moore ("%s: order %d is out of range", __func__, order)); 124011752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 1241e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 12425c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 12435c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 12445c1f2cc4SAlan Cox do { 12455c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 12465c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 124711752d88SAlan Cox break; 124869cbb187SMark Johnston m_buddy = vm_phys_seg_paddr_to_vm_page(seg, pa); 124911752d88SAlan Cox if (m_buddy->order != order) 125011752d88SAlan Cox break; 125111752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 12527e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order); 12530078df5fSDoug Moore vm_phys_finish_init(m_buddy, order); 125411752d88SAlan Cox order++; 12555c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 125669cbb187SMark Johnston m = vm_phys_seg_paddr_to_vm_page(seg, pa); 12575c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 125811752d88SAlan Cox } 12590078df5fSDoug Moore fl = (*seg->free_queues)[pool]; 12600078df5fSDoug Moore vm_freelist_add(fl, m, order, pool, 1); 126111752d88SAlan Cox } 126211752d88SAlan Cox 1263b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 1264b16b4c22SMark Johnston /* 1265b16b4c22SMark Johnston * Initialize all pages lingering in the lazy init pool of a NUMA domain, moving 1266b16b4c22SMark Johnston * them to the default pool. This is a prerequisite for some rare operations 1267b16b4c22SMark Johnston * which need to scan the page array and thus depend on all pages being 1268b16b4c22SMark Johnston * initialized. 1269b16b4c22SMark Johnston */ 1270b16b4c22SMark Johnston static void 1271b16b4c22SMark Johnston vm_phys_lazy_init_domain(int domain, bool locked) 1272b16b4c22SMark Johnston { 1273b16b4c22SMark Johnston static bool initdone[MAXMEMDOM]; 1274b16b4c22SMark Johnston struct vm_domain *vmd; 1275b16b4c22SMark Johnston struct vm_freelist *fl; 1276b16b4c22SMark Johnston vm_page_t m; 1277b16b4c22SMark Johnston int pind; 1278b16b4c22SMark Johnston bool unlocked; 1279b16b4c22SMark Johnston 1280b16b4c22SMark Johnston if (__predict_true(atomic_load_bool(&initdone[domain]))) 1281b16b4c22SMark Johnston return; 1282b16b4c22SMark Johnston 1283b16b4c22SMark Johnston vmd = VM_DOMAIN(domain); 1284b16b4c22SMark Johnston if (locked) 1285b16b4c22SMark Johnston vm_domain_free_assert_locked(vmd); 1286b16b4c22SMark Johnston else 1287b16b4c22SMark Johnston vm_domain_free_lock(vmd); 1288b16b4c22SMark Johnston if (atomic_load_bool(&initdone[domain])) 1289b16b4c22SMark Johnston goto out; 1290b16b4c22SMark Johnston pind = VM_FREEPOOL_LAZYINIT; 1291b16b4c22SMark Johnston for (int freelist = 0; freelist < VM_NFREELIST; freelist++) { 1292b16b4c22SMark Johnston int flind; 1293b16b4c22SMark Johnston 1294b16b4c22SMark Johnston flind = vm_freelist_to_flind[freelist]; 1295b16b4c22SMark Johnston if (flind < 0) 1296b16b4c22SMark Johnston continue; 1297b16b4c22SMark Johnston fl = vm_phys_free_queues[domain][flind][pind]; 1298b16b4c22SMark Johnston for (int oind = 0; oind < VM_NFREEORDER; oind++) { 1299b16b4c22SMark Johnston if (atomic_load_int(&fl[oind].lcnt) == 0) 1300b16b4c22SMark Johnston continue; 1301b16b4c22SMark Johnston while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 1302b16b4c22SMark Johnston /* 1303b16b4c22SMark Johnston * Avoid holding the lock across the 1304b16b4c22SMark Johnston * initialization unless there's a free page 1305b16b4c22SMark Johnston * shortage. 1306b16b4c22SMark Johnston */ 1307b16b4c22SMark Johnston vm_freelist_rem(fl, m, oind); 1308b16b4c22SMark Johnston unlocked = vm_domain_allocate(vmd, 1309b16b4c22SMark Johnston VM_ALLOC_NORMAL, 1 << oind); 1310b16b4c22SMark Johnston if (unlocked) 1311b16b4c22SMark Johnston vm_domain_free_unlock(vmd); 13120078df5fSDoug Moore vm_phys_finish_init(m, oind); 1313b16b4c22SMark Johnston if (unlocked) { 1314b16b4c22SMark Johnston vm_domain_freecnt_inc(vmd, 1 << oind); 1315b16b4c22SMark Johnston vm_domain_free_lock(vmd); 1316b16b4c22SMark Johnston } 13170078df5fSDoug Moore vm_phys_free_pages(m, VM_FREEPOOL_DEFAULT, 13180078df5fSDoug Moore oind); 1319b16b4c22SMark Johnston } 1320b16b4c22SMark Johnston } 1321b16b4c22SMark Johnston } 1322b16b4c22SMark Johnston atomic_store_bool(&initdone[domain], true); 1323b16b4c22SMark Johnston out: 1324b16b4c22SMark Johnston if (!locked) 1325b16b4c22SMark Johnston vm_domain_free_unlock(vmd); 1326b16b4c22SMark Johnston } 1327b16b4c22SMark Johnston 1328b16b4c22SMark Johnston static void 1329b16b4c22SMark Johnston vm_phys_lazy_init(void) 1330b16b4c22SMark Johnston { 1331b16b4c22SMark Johnston for (int domain = 0; domain < vm_ndomains; domain++) 1332b16b4c22SMark Johnston vm_phys_lazy_init_domain(domain, false); 1333b16b4c22SMark Johnston atomic_store_int(&vm_default_freepool, VM_FREEPOOL_DEFAULT); 1334b16b4c22SMark Johnston } 1335b16b4c22SMark Johnston 1336b16b4c22SMark Johnston static void 1337b16b4c22SMark Johnston vm_phys_lazy_init_kthr(void *arg __unused) 1338b16b4c22SMark Johnston { 1339b16b4c22SMark Johnston vm_phys_lazy_init(); 1340b16b4c22SMark Johnston kthread_exit(); 1341b16b4c22SMark Johnston } 1342b16b4c22SMark Johnston 1343b16b4c22SMark Johnston static void 1344b16b4c22SMark Johnston vm_phys_lazy_sysinit(void *arg __unused) 1345b16b4c22SMark Johnston { 1346b16b4c22SMark Johnston struct thread *td; 1347b16b4c22SMark Johnston int error; 1348b16b4c22SMark Johnston 1349b16b4c22SMark Johnston error = kthread_add(vm_phys_lazy_init_kthr, NULL, curproc, &td, 1350b16b4c22SMark Johnston RFSTOPPED, 0, "vmlazyinit"); 1351b16b4c22SMark Johnston if (error == 0) { 1352b16b4c22SMark Johnston thread_lock(td); 1353b16b4c22SMark Johnston sched_prio(td, PRI_MIN_IDLE); 1354b16b4c22SMark Johnston sched_add(td, SRQ_BORING); 1355b16b4c22SMark Johnston } else { 1356b16b4c22SMark Johnston printf("%s: could not create lazy init thread: %d\n", 1357b16b4c22SMark Johnston __func__, error); 1358b16b4c22SMark Johnston vm_phys_lazy_init(); 1359b16b4c22SMark Johnston } 1360b16b4c22SMark Johnston } 1361b16b4c22SMark Johnston SYSINIT(vm_phys_lazy_init, SI_SUB_SMP, SI_ORDER_ANY, vm_phys_lazy_sysinit, 1362b16b4c22SMark Johnston NULL); 1363b16b4c22SMark Johnston #endif /* VM_FREEPOOL_LAZYINIT */ 1364b16b4c22SMark Johnston 136511752d88SAlan Cox /* 1366e3537f92SDoug Moore * Free a contiguous, arbitrarily sized set of physical pages, without 13670078df5fSDoug Moore * merging across set boundaries. Assumes no pages have a valid pool field. 1368b8590daeSDoug Moore * 1369b8590daeSDoug Moore * The free page queues must be locked. 1370b8590daeSDoug Moore */ 1371b8590daeSDoug Moore void 13720078df5fSDoug Moore vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages) 1373b8590daeSDoug Moore { 1374b8590daeSDoug Moore struct vm_freelist *fl; 1375b8590daeSDoug Moore struct vm_phys_seg *seg; 1376b8590daeSDoug Moore vm_page_t m_end; 1377c9b06fa5SDoug Moore vm_paddr_t diff, lo; 1378b8590daeSDoug Moore int order; 1379b8590daeSDoug Moore 1380b8590daeSDoug Moore /* 1381b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages in the largest 1382b8590daeSDoug Moore * possible power-of-two-sized subsets. 1383b8590daeSDoug Moore */ 1384b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1385b8590daeSDoug Moore seg = &vm_phys_segs[m->segind]; 13860078df5fSDoug Moore fl = (*seg->free_queues)[pool]; 1387b8590daeSDoug Moore m_end = m + npages; 1388b8590daeSDoug Moore /* Free blocks of increasing size. */ 13896dd15b7aSDoug Moore lo = atop(VM_PAGE_TO_PHYS(m)); 1390c9b06fa5SDoug Moore if (m < m_end && 1391c9b06fa5SDoug Moore (diff = lo ^ (lo + npages - 1)) != 0) { 1392543d55d7SDoug Moore order = min(ilog2(diff), VM_NFREEORDER - 1); 13930078df5fSDoug Moore m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl, 13940078df5fSDoug Moore pool, 1); 13955c1f2cc4SAlan Cox } 1396c9b06fa5SDoug Moore 1397b8590daeSDoug Moore /* Free blocks of maximum size. */ 1398c9b06fa5SDoug Moore order = VM_NFREEORDER - 1; 1399b8590daeSDoug Moore while (m + (1 << order) <= m_end) { 1400b8590daeSDoug Moore KASSERT(seg == &vm_phys_segs[m->segind], 1401b8590daeSDoug Moore ("%s: page range [%p,%p) spans multiple segments", 1402b8590daeSDoug Moore __func__, m_end - npages, m)); 14030078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, 1); 1404b8590daeSDoug Moore m += 1 << order; 1405b8590daeSDoug Moore } 1406b8590daeSDoug Moore /* Free blocks of diminishing size. */ 14070078df5fSDoug Moore vm_phys_enq_beg(m, m_end - m, fl, pool, 1); 1408b8590daeSDoug Moore } 1409b8590daeSDoug Moore 1410b8590daeSDoug Moore /* 1411b8590daeSDoug Moore * Free a contiguous, arbitrarily sized set of physical pages. 14120078df5fSDoug Moore * Assumes that every page but the first has no valid pool field. 14130078df5fSDoug Moore * Uses the pool value in the first page if valid, otherwise default. 1414b8590daeSDoug Moore * 1415b8590daeSDoug Moore * The free page queues must be locked. 1416b8590daeSDoug Moore */ 1417b8590daeSDoug Moore void 14180078df5fSDoug Moore vm_phys_free_contig(vm_page_t m, int pool, u_long npages) 1419b8590daeSDoug Moore { 14206dd15b7aSDoug Moore vm_paddr_t lo; 1421b8590daeSDoug Moore vm_page_t m_start, m_end; 14226dd15b7aSDoug Moore unsigned max_order, order_start, order_end; 1423b8590daeSDoug Moore 1424b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1425b8590daeSDoug Moore 14266dd15b7aSDoug Moore lo = atop(VM_PAGE_TO_PHYS(m)); 1427543d55d7SDoug Moore max_order = min(ilog2(lo ^ (lo + npages)), VM_NFREEORDER - 1); 1428e3537f92SDoug Moore 1429e3537f92SDoug Moore m_start = m; 14306dd15b7aSDoug Moore order_start = ffsll(lo) - 1; 14316dd15b7aSDoug Moore if (order_start < max_order) 1432b8590daeSDoug Moore m_start += 1 << order_start; 1433e3537f92SDoug Moore m_end = m + npages; 14346dd15b7aSDoug Moore order_end = ffsll(lo + npages) - 1; 14356dd15b7aSDoug Moore if (order_end < max_order) 1436b8590daeSDoug Moore m_end -= 1 << order_end; 1437b8590daeSDoug Moore /* 1438b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages at the start and 1439b8590daeSDoug Moore * end of the range last. 1440b8590daeSDoug Moore */ 1441b8590daeSDoug Moore if (m_start < m_end) 14420078df5fSDoug Moore vm_phys_enqueue_contig(m_start, pool, m_end - m_start); 1443e3537f92SDoug Moore if (order_start < max_order) 14440078df5fSDoug Moore vm_phys_free_pages(m, pool, order_start); 1445e3537f92SDoug Moore if (order_end < max_order) 14460078df5fSDoug Moore vm_phys_free_pages(m_end, pool, order_end); 14475c1f2cc4SAlan Cox } 14485c1f2cc4SAlan Cox 14495c1f2cc4SAlan Cox /* 14509e817428SDoug Moore * Identify the first address range within segment segind or greater 14519e817428SDoug Moore * that matches the domain, lies within the low/high range, and has 14529e817428SDoug Moore * enough pages. Return -1 if there is none. 1453c869e672SAlan Cox */ 14549e817428SDoug Moore int 14559e817428SDoug Moore vm_phys_find_range(vm_page_t bounds[], int segind, int domain, 14569e817428SDoug Moore u_long npages, vm_paddr_t low, vm_paddr_t high) 1457c869e672SAlan Cox { 14589e817428SDoug Moore vm_paddr_t pa_end, pa_start; 14599e817428SDoug Moore struct vm_phys_seg *end_seg, *seg; 1460c869e672SAlan Cox 14619e817428SDoug Moore KASSERT(npages > 0, ("npages is zero")); 146258d42717SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range")); 14639e817428SDoug Moore end_seg = &vm_phys_segs[vm_phys_nsegs]; 14649e817428SDoug Moore for (seg = &vm_phys_segs[segind]; seg < end_seg; seg++) { 14653f289c3fSJeff Roberson if (seg->domain != domain) 14663f289c3fSJeff Roberson continue; 1467c869e672SAlan Cox if (seg->start >= high) 14689e817428SDoug Moore return (-1); 14699e817428SDoug Moore pa_start = MAX(low, seg->start); 14709e817428SDoug Moore pa_end = MIN(high, seg->end); 14719e817428SDoug Moore if (pa_end - pa_start < ptoa(npages)) 1472c869e672SAlan Cox continue; 1473b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 1474b16b4c22SMark Johnston /* 1475b16b4c22SMark Johnston * The pages on the free lists must be initialized. 1476b16b4c22SMark Johnston */ 1477b16b4c22SMark Johnston vm_phys_lazy_init_domain(domain, false); 1478b16b4c22SMark Johnston #endif 147969cbb187SMark Johnston bounds[0] = vm_phys_seg_paddr_to_vm_page(seg, pa_start); 1480fbff6d54SMark Johnston bounds[1] = &seg->first_page[atop(pa_end - seg->start)]; 14819e817428SDoug Moore return (seg - vm_phys_segs); 1482c869e672SAlan Cox } 14839e817428SDoug Moore return (-1); 1484c869e672SAlan Cox } 1485c869e672SAlan Cox 1486c869e672SAlan Cox /* 14879742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 14886062d9faSMark Johnston * succeeds, remove "m" from the free lists and return true. Otherwise, return 14896062d9faSMark Johnston * false, indicating that "m" is not in the free lists. 14907bfda801SAlan Cox * 14917bfda801SAlan Cox * The free page queues must be locked. 14927bfda801SAlan Cox */ 14936062d9faSMark Johnston bool 1494b16b4c22SMark Johnston vm_phys_unfree_page(vm_paddr_t pa) 14957bfda801SAlan Cox { 14967bfda801SAlan Cox struct vm_freelist *fl; 14977bfda801SAlan Cox struct vm_phys_seg *seg; 1498b16b4c22SMark Johnston vm_paddr_t pa_half; 1499b16b4c22SMark Johnston vm_page_t m, m_set, m_tmp; 15000078df5fSDoug Moore int order, pool; 15017bfda801SAlan Cox 1502b16b4c22SMark Johnston seg = vm_phys_paddr_to_seg(pa); 1503b16b4c22SMark Johnston vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1504b16b4c22SMark Johnston 15050078df5fSDoug Moore #ifdef VM_FREEPOOL_LAZYINIT 1506b16b4c22SMark Johnston /* 1507b16b4c22SMark Johnston * The pages on the free lists must be initialized. 1508b16b4c22SMark Johnston */ 1509b16b4c22SMark Johnston vm_phys_lazy_init_domain(seg->domain, true); 1510b16b4c22SMark Johnston #endif 1511b16b4c22SMark Johnston 15127bfda801SAlan Cox /* 15137bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 15147bfda801SAlan Cox * physical pages containing the given physical page "m" and 15157bfda801SAlan Cox * assign it to "m_set". 15167bfda801SAlan Cox */ 1517b16b4c22SMark Johnston m = vm_phys_paddr_to_vm_page(pa); 15187bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1519bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 15207bfda801SAlan Cox order++; 15217bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 15222fbced65SAlan Cox if (pa >= seg->start) 152369cbb187SMark Johnston m_set = vm_phys_seg_paddr_to_vm_page(seg, pa); 1524e35395ceSAlan Cox else 15256062d9faSMark Johnston return (false); 15267bfda801SAlan Cox } 1527e35395ceSAlan Cox if (m_set->order < order) 15286062d9faSMark Johnston return (false); 1529e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 15306062d9faSMark Johnston return (false); 15317bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 15327bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 15337bfda801SAlan Cox m_set, m_set->order)); 15347bfda801SAlan Cox 15357bfda801SAlan Cox /* 15367bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 15377bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 15387bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 15397bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 15407bfda801SAlan Cox */ 15410078df5fSDoug Moore pool = m_set->pool; 15420078df5fSDoug Moore fl = (*seg->free_queues)[pool]; 15437bfda801SAlan Cox order = m_set->order; 15447e226537SAttilio Rao vm_freelist_rem(fl, m_set, order); 15457bfda801SAlan Cox while (order > 0) { 15467bfda801SAlan Cox order--; 15477bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 15487bfda801SAlan Cox if (m->phys_addr < pa_half) 154969cbb187SMark Johnston m_tmp = vm_phys_seg_paddr_to_vm_page(seg, pa_half); 15507bfda801SAlan Cox else { 15517bfda801SAlan Cox m_tmp = m_set; 155269cbb187SMark Johnston m_set = vm_phys_seg_paddr_to_vm_page(seg, pa_half); 15537bfda801SAlan Cox } 15540078df5fSDoug Moore vm_freelist_add(fl, m_tmp, order, pool, 0); 15557bfda801SAlan Cox } 15567bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 15576062d9faSMark Johnston return (true); 15587bfda801SAlan Cox } 15597bfda801SAlan Cox 15607bfda801SAlan Cox /* 15612a4897bdSDoug Moore * Find a run of contiguous physical pages, meeting alignment requirements, from 15622a4897bdSDoug Moore * a list of max-sized page blocks, where we need at least two consecutive 15632a4897bdSDoug Moore * blocks to satisfy the (large) page request. 1564fa8a6585SDoug Moore */ 1565fa8a6585SDoug Moore static vm_page_t 15662a4897bdSDoug Moore vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages, 1567fa8a6585SDoug Moore vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1568fa8a6585SDoug Moore { 1569fa8a6585SDoug Moore struct vm_phys_seg *seg; 15702a4897bdSDoug Moore vm_page_t m, m_iter, m_ret; 15712a4897bdSDoug Moore vm_paddr_t max_size, size; 15722a4897bdSDoug Moore int max_order; 1573fa8a6585SDoug Moore 15742a4897bdSDoug Moore max_order = VM_NFREEORDER - 1; 1575fa8a6585SDoug Moore size = npages << PAGE_SHIFT; 15762a4897bdSDoug Moore max_size = (vm_paddr_t)1 << (PAGE_SHIFT + max_order); 15772a4897bdSDoug Moore KASSERT(size > max_size, ("size is too small")); 15782a4897bdSDoug Moore 1579fa8a6585SDoug Moore /* 15802a4897bdSDoug Moore * In order to avoid examining any free max-sized page block more than 15812a4897bdSDoug Moore * twice, identify the ones that are first in a physically-contiguous 15822a4897bdSDoug Moore * sequence of such blocks, and only for those walk the sequence to 15832a4897bdSDoug Moore * check if there are enough free blocks starting at a properly aligned 15842a4897bdSDoug Moore * block. Thus, no block is checked for free-ness more than twice. 1585fa8a6585SDoug Moore */ 15862a4897bdSDoug Moore TAILQ_FOREACH(m, &fl[max_order].pl, listq) { 15872a4897bdSDoug Moore /* 15882a4897bdSDoug Moore * Skip m unless it is first in a sequence of free max page 15892a4897bdSDoug Moore * blocks >= low in its segment. 15902a4897bdSDoug Moore */ 15912a4897bdSDoug Moore seg = &vm_phys_segs[m->segind]; 15922a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m) < MAX(low, seg->start)) 15932a4897bdSDoug Moore continue; 15942a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m) >= max_size && 15952a4897bdSDoug Moore VM_PAGE_TO_PHYS(m) - max_size >= MAX(low, seg->start) && 15962a4897bdSDoug Moore max_order == m[-1 << max_order].order) 1597fa8a6585SDoug Moore continue; 1598fa8a6585SDoug Moore 1599fa8a6585SDoug Moore /* 16002a4897bdSDoug Moore * Advance m_ret from m to the first of the sequence, if any, 16012a4897bdSDoug Moore * that satisfies alignment conditions and might leave enough 16022a4897bdSDoug Moore * space. 1603fa8a6585SDoug Moore */ 16042a4897bdSDoug Moore m_ret = m; 16052a4897bdSDoug Moore while (!vm_addr_ok(VM_PAGE_TO_PHYS(m_ret), 16062a4897bdSDoug Moore size, alignment, boundary) && 16072a4897bdSDoug Moore VM_PAGE_TO_PHYS(m_ret) + size <= MIN(high, seg->end) && 16082a4897bdSDoug Moore max_order == m_ret[1 << max_order].order) 16092a4897bdSDoug Moore m_ret += 1 << max_order; 16102a4897bdSDoug Moore 16112a4897bdSDoug Moore /* 16122a4897bdSDoug Moore * Skip m unless some block m_ret in the sequence is properly 16132a4897bdSDoug Moore * aligned, and begins a sequence of enough pages less than 16142a4897bdSDoug Moore * high, and in the same segment. 16152a4897bdSDoug Moore */ 16162a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m_ret) + size > MIN(high, seg->end)) 1617fa8a6585SDoug Moore continue; 1618fa8a6585SDoug Moore 1619fa8a6585SDoug Moore /* 16202a4897bdSDoug Moore * Skip m unless the blocks to allocate starting at m_ret are 16212a4897bdSDoug Moore * all free. 1622fa8a6585SDoug Moore */ 16232a4897bdSDoug Moore for (m_iter = m_ret; 16242a4897bdSDoug Moore m_iter < m_ret + npages && max_order == m_iter->order; 16252a4897bdSDoug Moore m_iter += 1 << max_order) { 1626fa8a6585SDoug Moore } 16272a4897bdSDoug Moore if (m_iter < m_ret + npages) 1628fa8a6585SDoug Moore continue; 1629fa8a6585SDoug Moore return (m_ret); 1630fa8a6585SDoug Moore } 1631fa8a6585SDoug Moore return (NULL); 1632fa8a6585SDoug Moore } 1633fa8a6585SDoug Moore 1634fa8a6585SDoug Moore /* 1635fa8a6585SDoug Moore * Find a run of contiguous physical pages from the specified free list 1636342056faSDoug Moore * table. 1637c869e672SAlan Cox */ 1638c869e672SAlan Cox static vm_page_t 1639fa8a6585SDoug Moore vm_phys_find_queues_contig( 1640342056faSDoug Moore struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX], 1641342056faSDoug Moore u_long npages, vm_paddr_t low, vm_paddr_t high, 1642342056faSDoug Moore u_long alignment, vm_paddr_t boundary) 1643c869e672SAlan Cox { 1644c869e672SAlan Cox struct vm_freelist *fl; 1645fa8a6585SDoug Moore vm_page_t m_ret; 1646c869e672SAlan Cox vm_paddr_t pa, pa_end, size; 1647c869e672SAlan Cox int oind, order, pind; 1648c869e672SAlan Cox 1649c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1650c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1651c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1652c869e672SAlan Cox /* Compute the queue that is the best fit for npages. */ 16539161b4deSAlan Cox order = flsl(npages - 1); 1654fa8a6585SDoug Moore /* Search for a large enough free block. */ 1655c869e672SAlan Cox size = npages << PAGE_SHIFT; 1656fa8a6585SDoug Moore for (oind = order; oind < VM_NFREEORDER; oind++) { 1657b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) { 1658342056faSDoug Moore fl = (*queues)[pind]; 16595cd29d0fSMark Johnston TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1660c869e672SAlan Cox /* 1661da92ecbcSDoug Moore * Determine if the address range starting at pa 1662da92ecbcSDoug Moore * is within the given range, satisfies the 1663da92ecbcSDoug Moore * given alignment, and does not cross the given 1664da92ecbcSDoug Moore * boundary. 166511752d88SAlan Cox */ 1666da92ecbcSDoug Moore pa = VM_PAGE_TO_PHYS(m_ret); 1667da92ecbcSDoug Moore pa_end = pa + size; 1668fa8a6585SDoug Moore if (low <= pa && pa_end <= high && 1669fa8a6585SDoug Moore vm_addr_ok(pa, size, alignment, boundary)) 1670fa8a6585SDoug Moore return (m_ret); 1671fa8a6585SDoug Moore } 1672fa8a6585SDoug Moore } 1673fa8a6585SDoug Moore } 1674da92ecbcSDoug Moore if (order < VM_NFREEORDER) 1675fa8a6585SDoug Moore return (NULL); 16762a4897bdSDoug Moore /* Search for a long-enough sequence of max-order blocks. */ 1677b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) { 1678fa8a6585SDoug Moore fl = (*queues)[pind]; 16792a4897bdSDoug Moore m_ret = vm_phys_find_freelist_contig(fl, npages, 1680fa8a6585SDoug Moore low, high, alignment, boundary); 1681fa8a6585SDoug Moore if (m_ret != NULL) 1682fa8a6585SDoug Moore return (m_ret); 168311752d88SAlan Cox } 168411752d88SAlan Cox return (NULL); 168511752d88SAlan Cox } 168611752d88SAlan Cox 1687b7565d44SJeff Roberson /* 1688342056faSDoug Moore * Allocate a contiguous set of physical pages of the given size 1689342056faSDoug Moore * "npages" from the free lists. All of the physical pages must be at 1690342056faSDoug Moore * or above the given physical address "low" and below the given 1691342056faSDoug Moore * physical address "high". The given value "alignment" determines the 1692342056faSDoug Moore * alignment of the first physical page in the set. If the given value 1693342056faSDoug Moore * "boundary" is non-zero, then the set of physical pages cannot cross 1694342056faSDoug Moore * any physical address boundary that is a multiple of that value. Both 16950078df5fSDoug Moore * "alignment" and "boundary" must be a power of two. Sets the pool 16960078df5fSDoug Moore * field to DEFAULT in the first allocated page. 1697342056faSDoug Moore */ 1698342056faSDoug Moore vm_page_t 1699342056faSDoug Moore vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1700342056faSDoug Moore u_long alignment, vm_paddr_t boundary) 1701342056faSDoug Moore { 1702342056faSDoug Moore vm_paddr_t pa_end, pa_start; 1703fa8a6585SDoug Moore struct vm_freelist *fl; 1704fa8a6585SDoug Moore vm_page_t m, m_run; 1705342056faSDoug Moore struct vm_phys_seg *seg; 1706342056faSDoug Moore struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX]; 1707fa8a6585SDoug Moore int oind, segind; 1708342056faSDoug Moore 1709342056faSDoug Moore KASSERT(npages > 0, ("npages is 0")); 1710342056faSDoug Moore KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1711342056faSDoug Moore KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1712342056faSDoug Moore vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1713342056faSDoug Moore if (low >= high) 1714342056faSDoug Moore return (NULL); 1715342056faSDoug Moore queues = NULL; 1716342056faSDoug Moore m_run = NULL; 1717342056faSDoug Moore for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1718342056faSDoug Moore seg = &vm_phys_segs[segind]; 1719342056faSDoug Moore if (seg->start >= high || seg->domain != domain) 1720342056faSDoug Moore continue; 1721342056faSDoug Moore if (low >= seg->end) 1722342056faSDoug Moore break; 1723342056faSDoug Moore if (low <= seg->start) 1724342056faSDoug Moore pa_start = seg->start; 1725342056faSDoug Moore else 1726342056faSDoug Moore pa_start = low; 1727342056faSDoug Moore if (high < seg->end) 1728342056faSDoug Moore pa_end = high; 1729342056faSDoug Moore else 1730342056faSDoug Moore pa_end = seg->end; 1731342056faSDoug Moore if (pa_end - pa_start < ptoa(npages)) 1732342056faSDoug Moore continue; 1733342056faSDoug Moore /* 1734342056faSDoug Moore * If a previous segment led to a search using 1735342056faSDoug Moore * the same free lists as would this segment, then 1736342056faSDoug Moore * we've actually already searched within this 1737342056faSDoug Moore * too. So skip it. 1738342056faSDoug Moore */ 1739342056faSDoug Moore if (seg->free_queues == queues) 1740342056faSDoug Moore continue; 1741342056faSDoug Moore queues = seg->free_queues; 1742fa8a6585SDoug Moore m_run = vm_phys_find_queues_contig(queues, npages, 1743342056faSDoug Moore low, high, alignment, boundary); 1744342056faSDoug Moore if (m_run != NULL) 1745342056faSDoug Moore break; 1746342056faSDoug Moore } 1747fa8a6585SDoug Moore if (m_run == NULL) 1748fa8a6585SDoug Moore return (NULL); 1749fa8a6585SDoug Moore 1750fa8a6585SDoug Moore /* Allocate pages from the page-range found. */ 1751fa8a6585SDoug Moore for (m = m_run; m < &m_run[npages]; m = &m[1 << oind]) { 1752fa8a6585SDoug Moore fl = (*queues)[m->pool]; 1753fa8a6585SDoug Moore oind = m->order; 1754fa8a6585SDoug Moore vm_freelist_rem(fl, m, oind); 17550078df5fSDoug Moore vm_phys_finish_init(m, oind); 1756fa8a6585SDoug Moore } 1757fa8a6585SDoug Moore /* Return excess pages to the free lists. */ 1758fa8a6585SDoug Moore fl = (*queues)[VM_FREEPOOL_DEFAULT]; 17590078df5fSDoug Moore vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl, 17600078df5fSDoug Moore VM_FREEPOOL_DEFAULT, 0); 17612a4897bdSDoug Moore 17622a4897bdSDoug Moore /* Return page verified to satisfy conditions of request. */ 17632a4897bdSDoug Moore pa_start = VM_PAGE_TO_PHYS(m_run); 17642a4897bdSDoug Moore KASSERT(low <= pa_start, 17652a4897bdSDoug Moore ("memory allocated below minimum requested range")); 17662a4897bdSDoug Moore KASSERT(pa_start + ptoa(npages) <= high, 17672a4897bdSDoug Moore ("memory allocated above maximum requested range")); 17682a4897bdSDoug Moore seg = &vm_phys_segs[m_run->segind]; 17692a4897bdSDoug Moore KASSERT(seg->domain == domain, 17702a4897bdSDoug Moore ("memory not allocated from specified domain")); 17712a4897bdSDoug Moore KASSERT(vm_addr_ok(pa_start, ptoa(npages), alignment, boundary), 17722a4897bdSDoug Moore ("memory alignment/boundary constraints not satisfied")); 1773342056faSDoug Moore return (m_run); 1774342056faSDoug Moore } 1775342056faSDoug Moore 1776342056faSDoug Moore /* 1777b7565d44SJeff Roberson * Return the index of the first unused slot which may be the terminating 1778b7565d44SJeff Roberson * entry. 1779b7565d44SJeff Roberson */ 1780b7565d44SJeff Roberson static int 1781b7565d44SJeff Roberson vm_phys_avail_count(void) 1782b7565d44SJeff Roberson { 1783b7565d44SJeff Roberson int i; 1784b7565d44SJeff Roberson 1785291b7bf0SOlivier Certner for (i = 0; i < PHYS_AVAIL_COUNT; i += 2) 1786291b7bf0SOlivier Certner if (phys_avail[i] == 0 && phys_avail[i + 1] == 0) 1787b7565d44SJeff Roberson return (i); 1788291b7bf0SOlivier Certner panic("Improperly terminated phys_avail[]"); 1789b7565d44SJeff Roberson } 1790b7565d44SJeff Roberson 1791b7565d44SJeff Roberson /* 1792b7565d44SJeff Roberson * Assert that a phys_avail entry is valid. 1793b7565d44SJeff Roberson */ 1794b7565d44SJeff Roberson static void 1795b7565d44SJeff Roberson vm_phys_avail_check(int i) 1796b7565d44SJeff Roberson { 1797125ef4e0SOlivier Certner if (i % 2 != 0) 1798125ef4e0SOlivier Certner panic("Chunk start index %d is not even.", i); 1799b7565d44SJeff Roberson if (phys_avail[i] & PAGE_MASK) 1800b7565d44SJeff Roberson panic("Unaligned phys_avail[%d]: %#jx", i, 1801b7565d44SJeff Roberson (intmax_t)phys_avail[i]); 1802b7565d44SJeff Roberson if (phys_avail[i + 1] & PAGE_MASK) 1803b7565d44SJeff Roberson panic("Unaligned phys_avail[%d + 1]: %#jx", i, 1804125ef4e0SOlivier Certner (intmax_t)phys_avail[i + 1]); 1805b7565d44SJeff Roberson if (phys_avail[i + 1] < phys_avail[i]) 1806125ef4e0SOlivier Certner panic("phys_avail[%d]: start %#jx > end %#jx", i, 1807b7565d44SJeff Roberson (intmax_t)phys_avail[i], (intmax_t)phys_avail[i + 1]); 1808b7565d44SJeff Roberson } 1809b7565d44SJeff Roberson 1810b7565d44SJeff Roberson /* 1811b7565d44SJeff Roberson * Return the index of an overlapping phys_avail entry or -1. 1812b7565d44SJeff Roberson */ 1813be3f5f29SJeff Roberson #ifdef NUMA 1814b7565d44SJeff Roberson static int 1815b7565d44SJeff Roberson vm_phys_avail_find(vm_paddr_t pa) 1816b7565d44SJeff Roberson { 1817b7565d44SJeff Roberson int i; 1818b7565d44SJeff Roberson 1819b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) 1820b7565d44SJeff Roberson if (phys_avail[i] <= pa && phys_avail[i + 1] > pa) 1821b7565d44SJeff Roberson return (i); 1822b7565d44SJeff Roberson return (-1); 1823b7565d44SJeff Roberson } 1824be3f5f29SJeff Roberson #endif 1825b7565d44SJeff Roberson 1826b7565d44SJeff Roberson /* 1827b7565d44SJeff Roberson * Return the index of the largest entry. 1828b7565d44SJeff Roberson */ 1829b7565d44SJeff Roberson int 1830b7565d44SJeff Roberson vm_phys_avail_largest(void) 1831b7565d44SJeff Roberson { 1832b7565d44SJeff Roberson vm_paddr_t sz, largesz; 1833b7565d44SJeff Roberson int largest; 1834b7565d44SJeff Roberson int i; 1835b7565d44SJeff Roberson 1836b7565d44SJeff Roberson largest = 0; 1837b7565d44SJeff Roberson largesz = 0; 1838b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) { 1839b7565d44SJeff Roberson sz = vm_phys_avail_size(i); 1840b7565d44SJeff Roberson if (sz > largesz) { 1841b7565d44SJeff Roberson largesz = sz; 1842b7565d44SJeff Roberson largest = i; 1843b7565d44SJeff Roberson } 1844b7565d44SJeff Roberson } 1845b7565d44SJeff Roberson 1846b7565d44SJeff Roberson return (largest); 1847b7565d44SJeff Roberson } 1848b7565d44SJeff Roberson 1849b7565d44SJeff Roberson vm_paddr_t 1850b7565d44SJeff Roberson vm_phys_avail_size(int i) 1851b7565d44SJeff Roberson { 1852b7565d44SJeff Roberson 1853b7565d44SJeff Roberson return (phys_avail[i + 1] - phys_avail[i]); 1854b7565d44SJeff Roberson } 1855b7565d44SJeff Roberson 1856b7565d44SJeff Roberson /* 1857*e1499bffSOlivier Certner * Split a chunk in phys_avail[] at the address 'pa'. 1858*e1499bffSOlivier Certner * 1859*e1499bffSOlivier Certner * 'pa' must be within a chunk (slots i and i + 1) or one of its boundaries. 1860*e1499bffSOlivier Certner * Returns zero on actual split, in which case the two new chunks occupy slots 1861*e1499bffSOlivier Certner * i to i + 3, else EJUSTRETURN if 'pa' was one of the boundaries (and no split 1862*e1499bffSOlivier Certner * actually occurred) else ENOSPC if there are not enough slots in phys_avail[] 1863*e1499bffSOlivier Certner * to represent the additional chunk caused by the split. 1864b7565d44SJeff Roberson */ 1865b7565d44SJeff Roberson static int 1866b7565d44SJeff Roberson vm_phys_avail_split(vm_paddr_t pa, int i) 1867b7565d44SJeff Roberson { 1868b7565d44SJeff Roberson int cnt; 1869b7565d44SJeff Roberson 1870b7565d44SJeff Roberson vm_phys_avail_check(i); 1871*e1499bffSOlivier Certner if (pa < phys_avail[i] || pa > phys_avail[i + 1]) 1872*e1499bffSOlivier Certner panic("%s: Address %#jx not in range at slot %d [%#jx;%#jx].", 1873*e1499bffSOlivier Certner __func__, (uintmax_t)pa, i, 1874*e1499bffSOlivier Certner (uintmax_t)phys_avail[i], (uintmax_t)phys_avail[i + 1]); 1875*e1499bffSOlivier Certner if (pa == phys_avail[i] || pa == phys_avail[i + 1]) 1876*e1499bffSOlivier Certner return (EJUSTRETURN); 1877b7565d44SJeff Roberson cnt = vm_phys_avail_count(); 1878b7565d44SJeff Roberson if (cnt >= PHYS_AVAIL_ENTRIES) 1879b7565d44SJeff Roberson return (ENOSPC); 1880b7565d44SJeff Roberson memmove(&phys_avail[i + 2], &phys_avail[i], 1881b7565d44SJeff Roberson (cnt - i) * sizeof(phys_avail[0])); 1882b7565d44SJeff Roberson phys_avail[i + 1] = pa; 1883b7565d44SJeff Roberson phys_avail[i + 2] = pa; 1884b7565d44SJeff Roberson vm_phys_avail_check(i); 1885b7565d44SJeff Roberson vm_phys_avail_check(i+2); 1886b7565d44SJeff Roberson 1887b7565d44SJeff Roberson return (0); 1888b7565d44SJeff Roberson } 1889b7565d44SJeff Roberson 189031991a5aSMitchell Horne /* 189131991a5aSMitchell Horne * Check if a given physical address can be included as part of a crash dump. 189231991a5aSMitchell Horne */ 189331991a5aSMitchell Horne bool 189431991a5aSMitchell Horne vm_phys_is_dumpable(vm_paddr_t pa) 189531991a5aSMitchell Horne { 189631991a5aSMitchell Horne vm_page_t m; 189731991a5aSMitchell Horne int i; 189831991a5aSMitchell Horne 189931991a5aSMitchell Horne if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) 190031991a5aSMitchell Horne return ((m->flags & PG_NODUMP) == 0); 190131991a5aSMitchell Horne 190231991a5aSMitchell Horne for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 190331991a5aSMitchell Horne if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 190431991a5aSMitchell Horne return (true); 190531991a5aSMitchell Horne } 190631991a5aSMitchell Horne return (false); 190731991a5aSMitchell Horne } 190831991a5aSMitchell Horne 190981302f1dSMark Johnston void 191081302f1dSMark Johnston vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end) 191181302f1dSMark Johnston { 191281302f1dSMark Johnston struct vm_phys_seg *seg; 191381302f1dSMark Johnston 191481302f1dSMark Johnston if (vm_phys_early_nsegs == -1) 191581302f1dSMark Johnston panic("%s: called after initialization", __func__); 191681302f1dSMark Johnston if (vm_phys_early_nsegs == nitems(vm_phys_early_segs)) 191781302f1dSMark Johnston panic("%s: ran out of early segments", __func__); 191881302f1dSMark Johnston 191981302f1dSMark Johnston seg = &vm_phys_early_segs[vm_phys_early_nsegs++]; 192081302f1dSMark Johnston seg->start = start; 192181302f1dSMark Johnston seg->end = end; 192281302f1dSMark Johnston } 192381302f1dSMark Johnston 1924b7565d44SJeff Roberson /* 1925b7565d44SJeff Roberson * This routine allocates NUMA node specific memory before the page 1926b7565d44SJeff Roberson * allocator is bootstrapped. 1927b7565d44SJeff Roberson */ 1928b7565d44SJeff Roberson vm_paddr_t 1929b7565d44SJeff Roberson vm_phys_early_alloc(int domain, size_t alloc_size) 1930b7565d44SJeff Roberson { 19312e7838aeSJohn Baldwin #ifdef NUMA 19322e7838aeSJohn Baldwin int mem_index; 19332e7838aeSJohn Baldwin #endif 19342e7838aeSJohn Baldwin int i, biggestone; 1935b7565d44SJeff Roberson vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align; 1936b7565d44SJeff Roberson 193781302f1dSMark Johnston KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains), 193881302f1dSMark Johnston ("%s: invalid domain index %d", __func__, domain)); 1939b7565d44SJeff Roberson 1940b7565d44SJeff Roberson /* 1941b7565d44SJeff Roberson * Search the mem_affinity array for the biggest address 1942b7565d44SJeff Roberson * range in the desired domain. This is used to constrain 1943b7565d44SJeff Roberson * the phys_avail selection below. 1944b7565d44SJeff Roberson */ 1945b7565d44SJeff Roberson biggestsize = 0; 1946b7565d44SJeff Roberson mem_start = 0; 1947b7565d44SJeff Roberson mem_end = -1; 1948b7565d44SJeff Roberson #ifdef NUMA 19492e7838aeSJohn Baldwin mem_index = 0; 1950b7565d44SJeff Roberson if (mem_affinity != NULL) { 1951b7565d44SJeff Roberson for (i = 0;; i++) { 1952b7565d44SJeff Roberson size = mem_affinity[i].end - mem_affinity[i].start; 1953b7565d44SJeff Roberson if (size == 0) 1954b7565d44SJeff Roberson break; 195581302f1dSMark Johnston if (domain != -1 && mem_affinity[i].domain != domain) 1956b7565d44SJeff Roberson continue; 1957b7565d44SJeff Roberson if (size > biggestsize) { 1958b7565d44SJeff Roberson mem_index = i; 1959b7565d44SJeff Roberson biggestsize = size; 1960b7565d44SJeff Roberson } 1961b7565d44SJeff Roberson } 1962b7565d44SJeff Roberson mem_start = mem_affinity[mem_index].start; 1963b7565d44SJeff Roberson mem_end = mem_affinity[mem_index].end; 1964b7565d44SJeff Roberson } 1965b7565d44SJeff Roberson #endif 1966b7565d44SJeff Roberson 1967b7565d44SJeff Roberson /* 1968b7565d44SJeff Roberson * Now find biggest physical segment in within the desired 1969b7565d44SJeff Roberson * numa domain. 1970b7565d44SJeff Roberson */ 1971b7565d44SJeff Roberson biggestsize = 0; 1972b7565d44SJeff Roberson biggestone = 0; 1973b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) { 1974b7565d44SJeff Roberson /* skip regions that are out of range */ 1975b7565d44SJeff Roberson if (phys_avail[i+1] - alloc_size < mem_start || 1976b7565d44SJeff Roberson phys_avail[i+1] > mem_end) 1977b7565d44SJeff Roberson continue; 1978b7565d44SJeff Roberson size = vm_phys_avail_size(i); 1979b7565d44SJeff Roberson if (size > biggestsize) { 1980b7565d44SJeff Roberson biggestone = i; 1981b7565d44SJeff Roberson biggestsize = size; 1982b7565d44SJeff Roberson } 1983b7565d44SJeff Roberson } 1984b7565d44SJeff Roberson alloc_size = round_page(alloc_size); 1985b7565d44SJeff Roberson 1986b7565d44SJeff Roberson /* 1987b7565d44SJeff Roberson * Grab single pages from the front to reduce fragmentation. 1988b7565d44SJeff Roberson */ 1989b7565d44SJeff Roberson if (alloc_size == PAGE_SIZE) { 1990b7565d44SJeff Roberson pa = phys_avail[biggestone]; 1991b7565d44SJeff Roberson phys_avail[biggestone] += PAGE_SIZE; 1992b7565d44SJeff Roberson vm_phys_avail_check(biggestone); 1993b7565d44SJeff Roberson return (pa); 1994b7565d44SJeff Roberson } 1995b7565d44SJeff Roberson 1996b7565d44SJeff Roberson /* 1997b7565d44SJeff Roberson * Naturally align large allocations. 1998b7565d44SJeff Roberson */ 1999b7565d44SJeff Roberson align = phys_avail[biggestone + 1] & (alloc_size - 1); 2000b7565d44SJeff Roberson if (alloc_size + align > biggestsize) 2001b7565d44SJeff Roberson panic("cannot find a large enough size\n"); 2002b7565d44SJeff Roberson if (align != 0 && 2003b7565d44SJeff Roberson vm_phys_avail_split(phys_avail[biggestone + 1] - align, 2004b7565d44SJeff Roberson biggestone) != 0) 2005b7565d44SJeff Roberson /* Wasting memory. */ 2006b7565d44SJeff Roberson phys_avail[biggestone + 1] -= align; 2007b7565d44SJeff Roberson 2008b7565d44SJeff Roberson phys_avail[biggestone + 1] -= alloc_size; 2009b7565d44SJeff Roberson vm_phys_avail_check(biggestone); 2010b7565d44SJeff Roberson pa = phys_avail[biggestone + 1]; 2011b7565d44SJeff Roberson return (pa); 2012b7565d44SJeff Roberson } 2013b7565d44SJeff Roberson 2014b7565d44SJeff Roberson void 2015b7565d44SJeff Roberson vm_phys_early_startup(void) 2016b7565d44SJeff Roberson { 201781302f1dSMark Johnston struct vm_phys_seg *seg; 2018b7565d44SJeff Roberson int i; 2019b7565d44SJeff Roberson 2020b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) { 2021b7565d44SJeff Roberson phys_avail[i] = round_page(phys_avail[i]); 2022b7565d44SJeff Roberson phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 2023b7565d44SJeff Roberson } 2024b7565d44SJeff Roberson 202581302f1dSMark Johnston for (i = 0; i < vm_phys_early_nsegs; i++) { 202681302f1dSMark Johnston seg = &vm_phys_early_segs[i]; 202781302f1dSMark Johnston vm_phys_add_seg(seg->start, seg->end); 202881302f1dSMark Johnston } 202981302f1dSMark Johnston vm_phys_early_nsegs = -1; 203081302f1dSMark Johnston 2031b7565d44SJeff Roberson #ifdef NUMA 2032b7565d44SJeff Roberson /* Force phys_avail to be split by domain. */ 2033b7565d44SJeff Roberson if (mem_affinity != NULL) { 2034b7565d44SJeff Roberson int idx; 2035b7565d44SJeff Roberson 2036b7565d44SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) { 2037b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].start); 2038*e1499bffSOlivier Certner if (idx != -1) 2039b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].start, idx); 2040b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].end); 2041*e1499bffSOlivier Certner if (idx != -1) 2042b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].end, idx); 2043b7565d44SJeff Roberson } 2044b7565d44SJeff Roberson } 2045b7565d44SJeff Roberson #endif 2046b7565d44SJeff Roberson } 2047b7565d44SJeff Roberson 204811752d88SAlan Cox #ifdef DDB 204911752d88SAlan Cox /* 205011752d88SAlan Cox * Show the number of physical pages in each of the free lists. 205111752d88SAlan Cox */ 2052c84c5e00SMitchell Horne DB_SHOW_COMMAND_FLAGS(freepages, db_show_freepages, DB_CMD_MEMSAFE) 205311752d88SAlan Cox { 205411752d88SAlan Cox struct vm_freelist *fl; 20557e226537SAttilio Rao int flind, oind, pind, dom; 205611752d88SAlan Cox 20577e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 20587e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom); 205911752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 206011752d88SAlan Cox db_printf("FREE LIST %d:\n" 206111752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 206211752d88SAlan Cox "\n ", flind); 206311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 206411752d88SAlan Cox db_printf(" | POOL %d", pind); 206511752d88SAlan Cox db_printf("\n-- "); 206611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 206711752d88SAlan Cox db_printf("-- -- "); 206811752d88SAlan Cox db_printf("--\n"); 206911752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 207011752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 207111752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 207211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 20737e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 207411752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 207511752d88SAlan Cox } 207611752d88SAlan Cox db_printf("\n"); 207711752d88SAlan Cox } 207811752d88SAlan Cox db_printf("\n"); 207911752d88SAlan Cox } 20807e226537SAttilio Rao db_printf("\n"); 20817e226537SAttilio Rao } 208211752d88SAlan Cox } 208311752d88SAlan Cox #endif 2084