111752d88SAlan Cox /*- 2fe267a55SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3fe267a55SPedro F. Giffuni * 411752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 511752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 611752d88SAlan Cox * All rights reserved. 711752d88SAlan Cox * 811752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 911752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 1011752d88SAlan Cox * 1111752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1211752d88SAlan Cox * modification, are permitted provided that the following conditions 1311752d88SAlan Cox * are met: 1411752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1611752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1711752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1811752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1911752d88SAlan Cox * 2011752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2111752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2211752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2311752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2411752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2511752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2611752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2711752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2811752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2911752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 3011752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3211752d88SAlan Cox */ 3311752d88SAlan Cox 34fbd80bd0SAlan Cox /* 35fbd80bd0SAlan Cox * Physical memory system implementation 36fbd80bd0SAlan Cox * 37fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 38fbd80bd0SAlan Cox * virtual memory system. 39fbd80bd0SAlan Cox */ 40fbd80bd0SAlan Cox 4111752d88SAlan Cox #include <sys/cdefs.h> 4211752d88SAlan Cox __FBSDID("$FreeBSD$"); 4311752d88SAlan Cox 4411752d88SAlan Cox #include "opt_ddb.h" 45174b5f38SJohn Baldwin #include "opt_vm.h" 4611752d88SAlan Cox 4711752d88SAlan Cox #include <sys/param.h> 4811752d88SAlan Cox #include <sys/systm.h> 4911752d88SAlan Cox #include <sys/lock.h> 5011752d88SAlan Cox #include <sys/kernel.h> 5111752d88SAlan Cox #include <sys/malloc.h> 5211752d88SAlan Cox #include <sys/mutex.h> 537e226537SAttilio Rao #include <sys/proc.h> 5411752d88SAlan Cox #include <sys/queue.h> 5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h> 5611752d88SAlan Cox #include <sys/sbuf.h> 5711752d88SAlan Cox #include <sys/sysctl.h> 5838d6b2dcSRoger Pau Monné #include <sys/tree.h> 5911752d88SAlan Cox #include <sys/vmmeter.h> 606520495aSAdrian Chadd #include <sys/seq.h> 6111752d88SAlan Cox 6211752d88SAlan Cox #include <ddb/ddb.h> 6311752d88SAlan Cox 6411752d88SAlan Cox #include <vm/vm.h> 6511752d88SAlan Cox #include <vm/vm_param.h> 6611752d88SAlan Cox #include <vm/vm_kern.h> 6711752d88SAlan Cox #include <vm/vm_object.h> 6811752d88SAlan Cox #include <vm/vm_page.h> 6911752d88SAlan Cox #include <vm/vm_phys.h> 7011752d88SAlan Cox 71449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 72449c2e92SKonstantin Belousov "Too many physsegs."); 7311752d88SAlan Cox 7462d70a81SJohn Baldwin #ifdef VM_NUMA_ALLOC 75a3870a18SJohn Baldwin struct mem_affinity *mem_affinity; 76415d7ccaSAdrian Chadd int *mem_locality; 7762d70a81SJohn Baldwin #endif 78a3870a18SJohn Baldwin 797e226537SAttilio Rao int vm_ndomains = 1; 807e226537SAttilio Rao 81449c2e92SKonstantin Belousov struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 82449c2e92SKonstantin Belousov int vm_phys_nsegs; 8311752d88SAlan Cox 8438d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg; 8538d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 8638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *); 8738d6b2dcSRoger Pau Monné 8838d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 8938d6b2dcSRoger Pau Monné RB_INITIALIZER(_vm_phys_fictitious_tree); 9038d6b2dcSRoger Pau Monné 9138d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg { 9238d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node; 9338d6b2dcSRoger Pau Monné /* Memory region data */ 94b6de32bdSKonstantin Belousov vm_paddr_t start; 95b6de32bdSKonstantin Belousov vm_paddr_t end; 96b6de32bdSKonstantin Belousov vm_page_t first_page; 9738d6b2dcSRoger Pau Monné }; 9838d6b2dcSRoger Pau Monné 9938d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 10038d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp); 10138d6b2dcSRoger Pau Monné 10238d6b2dcSRoger Pau Monné static struct rwlock vm_phys_fictitious_reg_lock; 103c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 104b6de32bdSKonstantin Belousov 10511752d88SAlan Cox static struct vm_freelist 1067e226537SAttilio Rao vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 10711752d88SAlan Cox 108d866a563SAlan Cox static int vm_nfreelists; 109d866a563SAlan Cox 110d866a563SAlan Cox /* 111d866a563SAlan Cox * Provides the mapping from VM_FREELIST_* to free list indices (flind). 112d866a563SAlan Cox */ 113d866a563SAlan Cox static int vm_freelist_to_flind[VM_NFREELIST]; 114d866a563SAlan Cox 115d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0); 116d866a563SAlan Cox 117d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 118d866a563SAlan Cox #define VM_ISADMA_BOUNDARY 16777216 119d866a563SAlan Cox #endif 120d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 121d866a563SAlan Cox #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122d866a563SAlan Cox #endif 123d866a563SAlan Cox 124d866a563SAlan Cox /* 125d866a563SAlan Cox * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126d866a563SAlan Cox * the ordering of the free list boundaries. 127d866a563SAlan Cox */ 128d866a563SAlan Cox #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) 129d866a563SAlan Cox CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); 130d866a563SAlan Cox #endif 131d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 132d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 133d866a563SAlan Cox #endif 13411752d88SAlan Cox 13511752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 13611752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 13711752d88SAlan Cox NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 13811752d88SAlan Cox 13911752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 14011752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 14111752d88SAlan Cox NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 14211752d88SAlan Cox 14362d70a81SJohn Baldwin #ifdef VM_NUMA_ALLOC 144415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 145415d7ccaSAdrian Chadd SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 146415d7ccaSAdrian Chadd NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 1476520495aSAdrian Chadd #endif 148415d7ccaSAdrian Chadd 1497e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 1507e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available."); 151a3870a18SJohn Baldwin 152c869e672SAlan Cox static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 153c869e672SAlan Cox u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 154c869e672SAlan Cox vm_paddr_t boundary); 155d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 156d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 15711752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 15811752d88SAlan Cox int order); 15911752d88SAlan Cox 16038d6b2dcSRoger Pau Monné /* 16138d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management. 16238d6b2dcSRoger Pau Monné */ 16338d6b2dcSRoger Pau Monné static inline int 16438d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 16538d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range) 16638d6b2dcSRoger Pau Monné { 16738d6b2dcSRoger Pau Monné 16838d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0, 16938d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page")); 17038d6b2dcSRoger Pau Monné if (p->start >= range->end) 17138d6b2dcSRoger Pau Monné return (1); 17238d6b2dcSRoger Pau Monné if (p->start < range->start) 17338d6b2dcSRoger Pau Monné return (-1); 17438d6b2dcSRoger Pau Monné 17538d6b2dcSRoger Pau Monné return (0); 17638d6b2dcSRoger Pau Monné } 17738d6b2dcSRoger Pau Monné 17838d6b2dcSRoger Pau Monné static int 17938d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 18038d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2) 18138d6b2dcSRoger Pau Monné { 18238d6b2dcSRoger Pau Monné 18338d6b2dcSRoger Pau Monné /* Check if this is a search for a page */ 18438d6b2dcSRoger Pau Monné if (p1->end == 0) 18538d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2)); 18638d6b2dcSRoger Pau Monné 18738d6b2dcSRoger Pau Monné KASSERT(p2->end != 0, 18838d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison")); 18938d6b2dcSRoger Pau Monné 19038d6b2dcSRoger Pau Monné /* Searching to add a new range */ 19138d6b2dcSRoger Pau Monné if (p1->end <= p2->start) 19238d6b2dcSRoger Pau Monné return (-1); 19338d6b2dcSRoger Pau Monné if (p1->start >= p2->end) 19438d6b2dcSRoger Pau Monné return (1); 19538d6b2dcSRoger Pau Monné 19638d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n" 19738d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 19838d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 19938d6b2dcSRoger Pau Monné } 20038d6b2dcSRoger Pau Monné 201449c2e92SKonstantin Belousov boolean_t 202449c2e92SKonstantin Belousov vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) 203449c2e92SKonstantin Belousov { 204449c2e92SKonstantin Belousov struct vm_phys_seg *s; 205449c2e92SKonstantin Belousov int idx; 206449c2e92SKonstantin Belousov 207449c2e92SKonstantin Belousov while ((idx = ffsl(mask)) != 0) { 208449c2e92SKonstantin Belousov idx--; /* ffsl counts from 1 */ 209449c2e92SKonstantin Belousov mask &= ~(1UL << idx); 210449c2e92SKonstantin Belousov s = &vm_phys_segs[idx]; 211449c2e92SKonstantin Belousov if (low < s->end && high > s->start) 212449c2e92SKonstantin Belousov return (TRUE); 213449c2e92SKonstantin Belousov } 214449c2e92SKonstantin Belousov return (FALSE); 215449c2e92SKonstantin Belousov } 216449c2e92SKonstantin Belousov 21711752d88SAlan Cox /* 21811752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 21911752d88SAlan Cox * the amount of physical memory in each free list. 22011752d88SAlan Cox */ 22111752d88SAlan Cox static int 22211752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 22311752d88SAlan Cox { 22411752d88SAlan Cox struct sbuf sbuf; 22511752d88SAlan Cox struct vm_freelist *fl; 2267e226537SAttilio Rao int dom, error, flind, oind, pind; 22711752d88SAlan Cox 22800f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 22900f0e671SMatthew D Fleming if (error != 0) 23000f0e671SMatthew D Fleming return (error); 2317e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 2327e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 233eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 23411752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 235eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 23611752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 23711752d88SAlan Cox "\n ", flind); 23811752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 23911752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 24011752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 24111752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 24211752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 24311752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 24411752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 245d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 24611752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 24711752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 2487e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 249eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d", 2507e226537SAttilio Rao fl[oind].lcnt); 25111752d88SAlan Cox } 25211752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 25311752d88SAlan Cox } 2547e226537SAttilio Rao } 25511752d88SAlan Cox } 2564e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 25711752d88SAlan Cox sbuf_delete(&sbuf); 25811752d88SAlan Cox return (error); 25911752d88SAlan Cox } 26011752d88SAlan Cox 26111752d88SAlan Cox /* 26211752d88SAlan Cox * Outputs the set of physical memory segments. 26311752d88SAlan Cox */ 26411752d88SAlan Cox static int 26511752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 26611752d88SAlan Cox { 26711752d88SAlan Cox struct sbuf sbuf; 26811752d88SAlan Cox struct vm_phys_seg *seg; 26911752d88SAlan Cox int error, segind; 27011752d88SAlan Cox 27100f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 27200f0e671SMatthew D Fleming if (error != 0) 27300f0e671SMatthew D Fleming return (error); 2744e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 27511752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 27611752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 27711752d88SAlan Cox seg = &vm_phys_segs[segind]; 27811752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 27911752d88SAlan Cox (uintmax_t)seg->start); 28011752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 28111752d88SAlan Cox (uintmax_t)seg->end); 282a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 28311752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 28411752d88SAlan Cox } 2854e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 28611752d88SAlan Cox sbuf_delete(&sbuf); 28711752d88SAlan Cox return (error); 28811752d88SAlan Cox } 28911752d88SAlan Cox 290415d7ccaSAdrian Chadd /* 291415d7ccaSAdrian Chadd * Return affinity, or -1 if there's no affinity information. 292415d7ccaSAdrian Chadd */ 2936520495aSAdrian Chadd int 294415d7ccaSAdrian Chadd vm_phys_mem_affinity(int f, int t) 295415d7ccaSAdrian Chadd { 296415d7ccaSAdrian Chadd 29762d70a81SJohn Baldwin #ifdef VM_NUMA_ALLOC 298415d7ccaSAdrian Chadd if (mem_locality == NULL) 299415d7ccaSAdrian Chadd return (-1); 300415d7ccaSAdrian Chadd if (f >= vm_ndomains || t >= vm_ndomains) 301415d7ccaSAdrian Chadd return (-1); 302415d7ccaSAdrian Chadd return (mem_locality[f * vm_ndomains + t]); 3036520495aSAdrian Chadd #else 3046520495aSAdrian Chadd return (-1); 3056520495aSAdrian Chadd #endif 306415d7ccaSAdrian Chadd } 307415d7ccaSAdrian Chadd 30862d70a81SJohn Baldwin #ifdef VM_NUMA_ALLOC 309415d7ccaSAdrian Chadd /* 310415d7ccaSAdrian Chadd * Outputs the VM locality table. 311415d7ccaSAdrian Chadd */ 312415d7ccaSAdrian Chadd static int 313415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 314415d7ccaSAdrian Chadd { 315415d7ccaSAdrian Chadd struct sbuf sbuf; 316415d7ccaSAdrian Chadd int error, i, j; 317415d7ccaSAdrian Chadd 318415d7ccaSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 319415d7ccaSAdrian Chadd if (error != 0) 320415d7ccaSAdrian Chadd return (error); 321415d7ccaSAdrian Chadd sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 322415d7ccaSAdrian Chadd 323415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 324415d7ccaSAdrian Chadd 325415d7ccaSAdrian Chadd for (i = 0; i < vm_ndomains; i++) { 326415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d: ", i); 327415d7ccaSAdrian Chadd for (j = 0; j < vm_ndomains; j++) { 328415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 329415d7ccaSAdrian Chadd } 330415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 331415d7ccaSAdrian Chadd } 332415d7ccaSAdrian Chadd error = sbuf_finish(&sbuf); 333415d7ccaSAdrian Chadd sbuf_delete(&sbuf); 334415d7ccaSAdrian Chadd return (error); 335415d7ccaSAdrian Chadd } 3366520495aSAdrian Chadd #endif 337415d7ccaSAdrian Chadd 3387e226537SAttilio Rao static void 3397e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 340a3870a18SJohn Baldwin { 341a3870a18SJohn Baldwin 3427e226537SAttilio Rao m->order = order; 3437e226537SAttilio Rao if (tail) 344c325e866SKonstantin Belousov TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); 3457e226537SAttilio Rao else 346c325e866SKonstantin Belousov TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); 3477e226537SAttilio Rao fl[order].lcnt++; 348a3870a18SJohn Baldwin } 3497e226537SAttilio Rao 3507e226537SAttilio Rao static void 3517e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 3527e226537SAttilio Rao { 3537e226537SAttilio Rao 354c325e866SKonstantin Belousov TAILQ_REMOVE(&fl[order].pl, m, plinks.q); 3557e226537SAttilio Rao fl[order].lcnt--; 3567e226537SAttilio Rao m->order = VM_NFREEORDER; 357a3870a18SJohn Baldwin } 358a3870a18SJohn Baldwin 35911752d88SAlan Cox /* 36011752d88SAlan Cox * Create a physical memory segment. 36111752d88SAlan Cox */ 36211752d88SAlan Cox static void 363d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 36411752d88SAlan Cox { 36511752d88SAlan Cox struct vm_phys_seg *seg; 36611752d88SAlan Cox 36711752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 36811752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 369ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 3707e226537SAttilio Rao ("vm_phys_create_seg: invalid domain provided")); 37111752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 372271f0f12SAlan Cox while (seg > vm_phys_segs && (seg - 1)->start >= end) { 373271f0f12SAlan Cox *seg = *(seg - 1); 374271f0f12SAlan Cox seg--; 375271f0f12SAlan Cox } 37611752d88SAlan Cox seg->start = start; 37711752d88SAlan Cox seg->end = end; 378a3870a18SJohn Baldwin seg->domain = domain; 37911752d88SAlan Cox } 38011752d88SAlan Cox 381a3870a18SJohn Baldwin static void 382d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 383a3870a18SJohn Baldwin { 38462d70a81SJohn Baldwin #ifdef VM_NUMA_ALLOC 385a3870a18SJohn Baldwin int i; 386a3870a18SJohn Baldwin 387a3870a18SJohn Baldwin if (mem_affinity == NULL) { 388d866a563SAlan Cox _vm_phys_create_seg(start, end, 0); 389a3870a18SJohn Baldwin return; 390a3870a18SJohn Baldwin } 391a3870a18SJohn Baldwin 392a3870a18SJohn Baldwin for (i = 0;; i++) { 393a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 394a3870a18SJohn Baldwin panic("Reached end of affinity info"); 395a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 396a3870a18SJohn Baldwin continue; 397a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 398a3870a18SJohn Baldwin panic("No affinity info for start %jx", 399a3870a18SJohn Baldwin (uintmax_t)start); 400a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 401d866a563SAlan Cox _vm_phys_create_seg(start, end, 402a3870a18SJohn Baldwin mem_affinity[i].domain); 403a3870a18SJohn Baldwin break; 404a3870a18SJohn Baldwin } 405d866a563SAlan Cox _vm_phys_create_seg(start, mem_affinity[i].end, 406a3870a18SJohn Baldwin mem_affinity[i].domain); 407a3870a18SJohn Baldwin start = mem_affinity[i].end; 408a3870a18SJohn Baldwin } 40962d70a81SJohn Baldwin #else 41062d70a81SJohn Baldwin _vm_phys_create_seg(start, end, 0); 41162d70a81SJohn Baldwin #endif 412a3870a18SJohn Baldwin } 413a3870a18SJohn Baldwin 41411752d88SAlan Cox /* 415271f0f12SAlan Cox * Add a physical memory segment. 416271f0f12SAlan Cox */ 417271f0f12SAlan Cox void 418271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 419271f0f12SAlan Cox { 420d866a563SAlan Cox vm_paddr_t paddr; 421271f0f12SAlan Cox 422271f0f12SAlan Cox KASSERT((start & PAGE_MASK) == 0, 423271f0f12SAlan Cox ("vm_phys_define_seg: start is not page aligned")); 424271f0f12SAlan Cox KASSERT((end & PAGE_MASK) == 0, 425271f0f12SAlan Cox ("vm_phys_define_seg: end is not page aligned")); 426d866a563SAlan Cox 427d866a563SAlan Cox /* 428d866a563SAlan Cox * Split the physical memory segment if it spans two or more free 429d866a563SAlan Cox * list boundaries. 430d866a563SAlan Cox */ 431d866a563SAlan Cox paddr = start; 432271f0f12SAlan Cox #ifdef VM_FREELIST_ISADMA 433d866a563SAlan Cox if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { 434d866a563SAlan Cox vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); 435d866a563SAlan Cox paddr = VM_ISADMA_BOUNDARY; 436d866a563SAlan Cox } 437271f0f12SAlan Cox #endif 438d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 439d866a563SAlan Cox if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 440d866a563SAlan Cox vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 441d866a563SAlan Cox paddr = VM_LOWMEM_BOUNDARY; 442d866a563SAlan Cox } 443271f0f12SAlan Cox #endif 444d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 445d866a563SAlan Cox if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 446d866a563SAlan Cox vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 447d866a563SAlan Cox paddr = VM_DMA32_BOUNDARY; 448d866a563SAlan Cox } 449d866a563SAlan Cox #endif 450d866a563SAlan Cox vm_phys_create_seg(paddr, end); 451271f0f12SAlan Cox } 452271f0f12SAlan Cox 453271f0f12SAlan Cox /* 45411752d88SAlan Cox * Initialize the physical memory allocator. 455d866a563SAlan Cox * 456d866a563SAlan Cox * Requires that vm_page_array is initialized! 45711752d88SAlan Cox */ 45811752d88SAlan Cox void 45911752d88SAlan Cox vm_phys_init(void) 46011752d88SAlan Cox { 46111752d88SAlan Cox struct vm_freelist *fl; 462271f0f12SAlan Cox struct vm_phys_seg *seg; 463d866a563SAlan Cox u_long npages; 464d866a563SAlan Cox int dom, flind, freelist, oind, pind, segind; 46511752d88SAlan Cox 466d866a563SAlan Cox /* 467d866a563SAlan Cox * Compute the number of free lists, and generate the mapping from the 468d866a563SAlan Cox * manifest constants VM_FREELIST_* to the free list indices. 469d866a563SAlan Cox * 470d866a563SAlan Cox * Initially, the entries of vm_freelist_to_flind[] are set to either 471d866a563SAlan Cox * 0 or 1 to indicate which free lists should be created. 472d866a563SAlan Cox */ 473d866a563SAlan Cox npages = 0; 474d866a563SAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 475d866a563SAlan Cox seg = &vm_phys_segs[segind]; 476d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 477d866a563SAlan Cox if (seg->end <= VM_ISADMA_BOUNDARY) 478d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; 479d866a563SAlan Cox else 480d866a563SAlan Cox #endif 481d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 482d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) 483d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 484d866a563SAlan Cox else 485d866a563SAlan Cox #endif 486d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 487d866a563SAlan Cox if ( 488d866a563SAlan Cox #ifdef VM_DMA32_NPAGES_THRESHOLD 489d866a563SAlan Cox /* 490d866a563SAlan Cox * Create the DMA32 free list only if the amount of 491d866a563SAlan Cox * physical memory above physical address 4G exceeds the 492d866a563SAlan Cox * given threshold. 493d866a563SAlan Cox */ 494d866a563SAlan Cox npages > VM_DMA32_NPAGES_THRESHOLD && 495d866a563SAlan Cox #endif 496d866a563SAlan Cox seg->end <= VM_DMA32_BOUNDARY) 497d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 498d866a563SAlan Cox else 499d866a563SAlan Cox #endif 500d866a563SAlan Cox { 501d866a563SAlan Cox npages += atop(seg->end - seg->start); 502d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 503d866a563SAlan Cox } 504d866a563SAlan Cox } 505d866a563SAlan Cox /* Change each entry into a running total of the free lists. */ 506d866a563SAlan Cox for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 507d866a563SAlan Cox vm_freelist_to_flind[freelist] += 508d866a563SAlan Cox vm_freelist_to_flind[freelist - 1]; 509d866a563SAlan Cox } 510d866a563SAlan Cox vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 511d866a563SAlan Cox KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 512d866a563SAlan Cox /* Change each entry into a free list index. */ 513d866a563SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) 514d866a563SAlan Cox vm_freelist_to_flind[freelist]--; 515d866a563SAlan Cox 516d866a563SAlan Cox /* 517d866a563SAlan Cox * Initialize the first_page and free_queues fields of each physical 518d866a563SAlan Cox * memory segment. 519d866a563SAlan Cox */ 520271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 521d866a563SAlan Cox npages = 0; 52211752d88SAlan Cox #endif 523271f0f12SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 524271f0f12SAlan Cox seg = &vm_phys_segs[segind]; 525271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 526d866a563SAlan Cox seg->first_page = &vm_page_array[npages]; 527d866a563SAlan Cox npages += atop(seg->end - seg->start); 528271f0f12SAlan Cox #else 529271f0f12SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(seg->start); 53011752d88SAlan Cox #endif 531d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 532d866a563SAlan Cox if (seg->end <= VM_ISADMA_BOUNDARY) { 533d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; 534d866a563SAlan Cox KASSERT(flind >= 0, 535d866a563SAlan Cox ("vm_phys_init: ISADMA flind < 0")); 536d866a563SAlan Cox } else 537d866a563SAlan Cox #endif 538d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 539d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) { 540d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 541d866a563SAlan Cox KASSERT(flind >= 0, 542d866a563SAlan Cox ("vm_phys_init: LOWMEM flind < 0")); 543d866a563SAlan Cox } else 544d866a563SAlan Cox #endif 545d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 546d866a563SAlan Cox if (seg->end <= VM_DMA32_BOUNDARY) { 547d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 548d866a563SAlan Cox KASSERT(flind >= 0, 549d866a563SAlan Cox ("vm_phys_init: DMA32 flind < 0")); 550d866a563SAlan Cox } else 551d866a563SAlan Cox #endif 552d866a563SAlan Cox { 553d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 554d866a563SAlan Cox KASSERT(flind >= 0, 555d866a563SAlan Cox ("vm_phys_init: DEFAULT flind < 0")); 55611752d88SAlan Cox } 557d866a563SAlan Cox seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 558d866a563SAlan Cox } 559d866a563SAlan Cox 560d866a563SAlan Cox /* 561d866a563SAlan Cox * Initialize the free queues. 562d866a563SAlan Cox */ 5637e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 56411752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 56511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 5667e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 56711752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 56811752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 56911752d88SAlan Cox } 57011752d88SAlan Cox } 571a3870a18SJohn Baldwin } 572d866a563SAlan Cox 57338d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 57411752d88SAlan Cox } 57511752d88SAlan Cox 57611752d88SAlan Cox /* 57711752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 57811752d88SAlan Cox */ 57911752d88SAlan Cox static __inline void 58011752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 58111752d88SAlan Cox { 58211752d88SAlan Cox vm_page_t m_buddy; 58311752d88SAlan Cox 58411752d88SAlan Cox while (oind > order) { 58511752d88SAlan Cox oind--; 58611752d88SAlan Cox m_buddy = &m[1 << oind]; 58711752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 58811752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 58911752d88SAlan Cox m_buddy, m_buddy->order)); 5907e226537SAttilio Rao vm_freelist_add(fl, m_buddy, oind, 0); 59111752d88SAlan Cox } 59211752d88SAlan Cox } 59311752d88SAlan Cox 59411752d88SAlan Cox /* 59511752d88SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages 59611752d88SAlan Cox * from the free lists. 5978941dc44SAlan Cox * 5988941dc44SAlan Cox * The free page queues must be locked. 59911752d88SAlan Cox */ 60011752d88SAlan Cox vm_page_t 601ef435ae7SJeff Roberson vm_phys_alloc_pages(int domain, int pool, int order) 60211752d88SAlan Cox { 60349ca10d4SJayachandran C. vm_page_t m; 6040db2102aSMichael Zhilin int freelist; 60549ca10d4SJayachandran C. 6060db2102aSMichael Zhilin for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 6070db2102aSMichael Zhilin m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 60849ca10d4SJayachandran C. if (m != NULL) 60949ca10d4SJayachandran C. return (m); 61049ca10d4SJayachandran C. } 61149ca10d4SJayachandran C. return (NULL); 61249ca10d4SJayachandran C. } 61349ca10d4SJayachandran C. 61449ca10d4SJayachandran C. /* 615d866a563SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages from the 616d866a563SAlan Cox * specified free list. The free list must be specified using one of the 617d866a563SAlan Cox * manifest constants VM_FREELIST_*. 618d866a563SAlan Cox * 619d866a563SAlan Cox * The free page queues must be locked. 62049ca10d4SJayachandran C. */ 62149ca10d4SJayachandran C. vm_page_t 6220db2102aSMichael Zhilin vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 62349ca10d4SJayachandran C. { 624ef435ae7SJeff Roberson struct vm_freelist *alt, *fl; 62511752d88SAlan Cox vm_page_t m; 6260db2102aSMichael Zhilin int oind, pind, flind; 62711752d88SAlan Cox 628ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 629ef435ae7SJeff Roberson ("vm_phys_alloc_freelist_pages: domain %d is out of range", 630ef435ae7SJeff Roberson domain)); 6310db2102aSMichael Zhilin KASSERT(freelist < VM_NFREELIST, 632d866a563SAlan Cox ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 6335be93778SAndrew Turner freelist)); 63411752d88SAlan Cox KASSERT(pool < VM_NFREEPOOL, 63549ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 63611752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 63749ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 6386520495aSAdrian Chadd 6390db2102aSMichael Zhilin flind = vm_freelist_to_flind[freelist]; 6400db2102aSMichael Zhilin /* Check if freelist is present */ 6410db2102aSMichael Zhilin if (flind < 0) 6420db2102aSMichael Zhilin return (NULL); 6430db2102aSMichael Zhilin 64411752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 6457e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0]; 64611752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 64711752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 64811752d88SAlan Cox if (m != NULL) { 6497e226537SAttilio Rao vm_freelist_rem(fl, m, oind); 65011752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 65111752d88SAlan Cox return (m); 65211752d88SAlan Cox } 65311752d88SAlan Cox } 65411752d88SAlan Cox 65511752d88SAlan Cox /* 65611752d88SAlan Cox * The given pool was empty. Find the largest 65711752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 65811752d88SAlan Cox * pool. Transfer these pages to the given pool, and 65911752d88SAlan Cox * use them to satisfy the allocation. 66011752d88SAlan Cox */ 66111752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 66211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 6637e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0]; 66411752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 66511752d88SAlan Cox if (m != NULL) { 6667e226537SAttilio Rao vm_freelist_rem(alt, m, oind); 66711752d88SAlan Cox vm_phys_set_pool(pool, m, oind); 66811752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 66911752d88SAlan Cox return (m); 67011752d88SAlan Cox } 67111752d88SAlan Cox } 67211752d88SAlan Cox } 67311752d88SAlan Cox return (NULL); 67411752d88SAlan Cox } 67511752d88SAlan Cox 67611752d88SAlan Cox /* 67711752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 67811752d88SAlan Cox */ 67911752d88SAlan Cox vm_page_t 68011752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 68111752d88SAlan Cox { 68211752d88SAlan Cox struct vm_phys_seg *seg; 68311752d88SAlan Cox int segind; 68411752d88SAlan Cox 68511752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 68611752d88SAlan Cox seg = &vm_phys_segs[segind]; 68711752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 68811752d88SAlan Cox return (&seg->first_page[atop(pa - seg->start)]); 68911752d88SAlan Cox } 690f06a3a36SAndrew Thompson return (NULL); 69111752d88SAlan Cox } 69211752d88SAlan Cox 693b6de32bdSKonstantin Belousov vm_page_t 694b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 695b6de32bdSKonstantin Belousov { 69638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg; 697b6de32bdSKonstantin Belousov vm_page_t m; 698b6de32bdSKonstantin Belousov 699b6de32bdSKonstantin Belousov m = NULL; 70038d6b2dcSRoger Pau Monné tmp.start = pa; 70138d6b2dcSRoger Pau Monné tmp.end = 0; 70238d6b2dcSRoger Pau Monné 70338d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock); 70438d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 70538d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock); 70638d6b2dcSRoger Pau Monné if (seg == NULL) 70738d6b2dcSRoger Pau Monné return (NULL); 70838d6b2dcSRoger Pau Monné 709b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 71038d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 71138d6b2dcSRoger Pau Monné 712b6de32bdSKonstantin Belousov return (m); 713b6de32bdSKonstantin Belousov } 714b6de32bdSKonstantin Belousov 7155ebe728dSRoger Pau Monné static inline void 7165ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 7175ebe728dSRoger Pau Monné long page_count, vm_memattr_t memattr) 7185ebe728dSRoger Pau Monné { 7195ebe728dSRoger Pau Monné long i; 7205ebe728dSRoger Pau Monné 721f93f7cf1SMark Johnston bzero(range, page_count * sizeof(*range)); 7225ebe728dSRoger Pau Monné for (i = 0; i < page_count; i++) { 7235ebe728dSRoger Pau Monné vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 7245ebe728dSRoger Pau Monné range[i].oflags &= ~VPO_UNMANAGED; 7255ebe728dSRoger Pau Monné range[i].busy_lock = VPB_UNBUSIED; 7265ebe728dSRoger Pau Monné } 7275ebe728dSRoger Pau Monné } 7285ebe728dSRoger Pau Monné 729b6de32bdSKonstantin Belousov int 730b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 731b6de32bdSKonstantin Belousov vm_memattr_t memattr) 732b6de32bdSKonstantin Belousov { 733b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 734b6de32bdSKonstantin Belousov vm_page_t fp; 7355ebe728dSRoger Pau Monné long page_count; 736b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 7375ebe728dSRoger Pau Monné long pi, pe; 7385ebe728dSRoger Pau Monné long dpage_count; 739b6de32bdSKonstantin Belousov #endif 740b6de32bdSKonstantin Belousov 7415ebe728dSRoger Pau Monné KASSERT(start < end, 7425ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 7435ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 7445ebe728dSRoger Pau Monné 745b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 746b6de32bdSKonstantin Belousov 747b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 748b6de32bdSKonstantin Belousov pi = atop(start); 7495ebe728dSRoger Pau Monné pe = atop(end); 7505ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 751b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 7525ebe728dSRoger Pau Monné if ((pe - first_page) > vm_page_array_size) { 7535ebe728dSRoger Pau Monné /* 7545ebe728dSRoger Pau Monné * We have a segment that starts inside 7555ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 7565ebe728dSRoger Pau Monné * 7575ebe728dSRoger Pau Monné * Use vm_page_array pages for those that are 7585ebe728dSRoger Pau Monné * inside of the vm_page_array range, and 7595ebe728dSRoger Pau Monné * allocate the remaining ones. 7605ebe728dSRoger Pau Monné */ 7615ebe728dSRoger Pau Monné dpage_count = vm_page_array_size - (pi - first_page); 7625ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, dpage_count, 7635ebe728dSRoger Pau Monné memattr); 7645ebe728dSRoger Pau Monné page_count -= dpage_count; 7655ebe728dSRoger Pau Monné start += ptoa(dpage_count); 7665ebe728dSRoger Pau Monné goto alloc; 7675ebe728dSRoger Pau Monné } 7685ebe728dSRoger Pau Monné /* 7695ebe728dSRoger Pau Monné * We can allocate the full range from vm_page_array, 7705ebe728dSRoger Pau Monné * so there's no need to register the range in the tree. 7715ebe728dSRoger Pau Monné */ 7725ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 7735ebe728dSRoger Pau Monné return (0); 7745ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 7755ebe728dSRoger Pau Monné /* 7765ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 7775ebe728dSRoger Pau Monné * but starts outside of it. 7785ebe728dSRoger Pau Monné */ 7795ebe728dSRoger Pau Monné fp = &vm_page_array[0]; 7805ebe728dSRoger Pau Monné dpage_count = pe - first_page; 7815ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 7825ebe728dSRoger Pau Monné memattr); 7835ebe728dSRoger Pau Monné end -= ptoa(dpage_count); 7845ebe728dSRoger Pau Monné page_count -= dpage_count; 7855ebe728dSRoger Pau Monné goto alloc; 7865ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 7875ebe728dSRoger Pau Monné /* 7885ebe728dSRoger Pau Monné * Trying to register a fictitious range that expands before 7895ebe728dSRoger Pau Monné * and after vm_page_array. 7905ebe728dSRoger Pau Monné */ 7915ebe728dSRoger Pau Monné return (EINVAL); 7925ebe728dSRoger Pau Monné } else { 7935ebe728dSRoger Pau Monné alloc: 794b6de32bdSKonstantin Belousov #endif 795b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 796f93f7cf1SMark Johnston M_WAITOK); 7975ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE 798b6de32bdSKonstantin Belousov } 7995ebe728dSRoger Pau Monné #endif 8005ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 80138d6b2dcSRoger Pau Monné 80238d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 803b6de32bdSKonstantin Belousov seg->start = start; 804b6de32bdSKonstantin Belousov seg->end = end; 805b6de32bdSKonstantin Belousov seg->first_page = fp; 80638d6b2dcSRoger Pau Monné 80738d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 80838d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 80938d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 81038d6b2dcSRoger Pau Monné 811b6de32bdSKonstantin Belousov return (0); 812b6de32bdSKonstantin Belousov } 813b6de32bdSKonstantin Belousov 814b6de32bdSKonstantin Belousov void 815b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 816b6de32bdSKonstantin Belousov { 81738d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp; 818b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 8195ebe728dSRoger Pau Monné long pi, pe; 820b6de32bdSKonstantin Belousov #endif 821b6de32bdSKonstantin Belousov 8225ebe728dSRoger Pau Monné KASSERT(start < end, 8235ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 8245ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 8255ebe728dSRoger Pau Monné 826b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 827b6de32bdSKonstantin Belousov pi = atop(start); 8285ebe728dSRoger Pau Monné pe = atop(end); 8295ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 8305ebe728dSRoger Pau Monné if ((pe - first_page) <= vm_page_array_size) { 8315ebe728dSRoger Pau Monné /* 8325ebe728dSRoger Pau Monné * This segment was allocated using vm_page_array 8335ebe728dSRoger Pau Monné * only, there's nothing to do since those pages 8345ebe728dSRoger Pau Monné * were never added to the tree. 8355ebe728dSRoger Pau Monné */ 8365ebe728dSRoger Pau Monné return; 8375ebe728dSRoger Pau Monné } 8385ebe728dSRoger Pau Monné /* 8395ebe728dSRoger Pau Monné * We have a segment that starts inside 8405ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 8415ebe728dSRoger Pau Monné * 8425ebe728dSRoger Pau Monné * Calculate how many pages were added to the 8435ebe728dSRoger Pau Monné * tree and free them. 8445ebe728dSRoger Pau Monné */ 8455ebe728dSRoger Pau Monné start = ptoa(first_page + vm_page_array_size); 8465ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 8475ebe728dSRoger Pau Monné /* 8485ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 8495ebe728dSRoger Pau Monné * but starts outside of it. 8505ebe728dSRoger Pau Monné */ 8515ebe728dSRoger Pau Monné end = ptoa(first_page); 8525ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 8535ebe728dSRoger Pau Monné /* Since it's not possible to register such a range, panic. */ 8545ebe728dSRoger Pau Monné panic( 8555ebe728dSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 8565ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 8575ebe728dSRoger Pau Monné } 858b6de32bdSKonstantin Belousov #endif 85938d6b2dcSRoger Pau Monné tmp.start = start; 86038d6b2dcSRoger Pau Monné tmp.end = 0; 861b6de32bdSKonstantin Belousov 86238d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 86338d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 86438d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) { 86538d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 86638d6b2dcSRoger Pau Monné panic( 86738d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 86838d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 86938d6b2dcSRoger Pau Monné } 87038d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 87138d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 87238d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES); 87338d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES); 874b6de32bdSKonstantin Belousov } 875b6de32bdSKonstantin Belousov 87611752d88SAlan Cox /* 87711752d88SAlan Cox * Free a contiguous, power of two-sized set of physical pages. 8788941dc44SAlan Cox * 8798941dc44SAlan Cox * The free page queues must be locked. 88011752d88SAlan Cox */ 88111752d88SAlan Cox void 88211752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order) 88311752d88SAlan Cox { 88411752d88SAlan Cox struct vm_freelist *fl; 88511752d88SAlan Cox struct vm_phys_seg *seg; 8865c1f2cc4SAlan Cox vm_paddr_t pa; 88711752d88SAlan Cox vm_page_t m_buddy; 88811752d88SAlan Cox 88911752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 8908941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected order %d", 89111752d88SAlan Cox m, m->order)); 89211752d88SAlan Cox KASSERT(m->pool < VM_NFREEPOOL, 8938941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected pool %d", 89411752d88SAlan Cox m, m->pool)); 89511752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 8968941dc44SAlan Cox ("vm_phys_free_pages: order %d is out of range", order)); 89711752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 89811752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 8995c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 9005c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 9015c1f2cc4SAlan Cox do { 9025c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 9035c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 90411752d88SAlan Cox break; 9055c1f2cc4SAlan Cox m_buddy = &seg->first_page[atop(pa - seg->start)]; 90611752d88SAlan Cox if (m_buddy->order != order) 90711752d88SAlan Cox break; 90811752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 9097e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order); 91011752d88SAlan Cox if (m_buddy->pool != m->pool) 91111752d88SAlan Cox vm_phys_set_pool(m->pool, m_buddy, order); 91211752d88SAlan Cox order++; 9135c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 91411752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 9155c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 91611752d88SAlan Cox } 91711752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 9187e226537SAttilio Rao vm_freelist_add(fl, m, order, 1); 91911752d88SAlan Cox } 92011752d88SAlan Cox 92111752d88SAlan Cox /* 9225c1f2cc4SAlan Cox * Free a contiguous, arbitrarily sized set of physical pages. 9235c1f2cc4SAlan Cox * 9245c1f2cc4SAlan Cox * The free page queues must be locked. 9255c1f2cc4SAlan Cox */ 9265c1f2cc4SAlan Cox void 9275c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages) 9285c1f2cc4SAlan Cox { 9295c1f2cc4SAlan Cox u_int n; 9305c1f2cc4SAlan Cox int order; 9315c1f2cc4SAlan Cox 9325c1f2cc4SAlan Cox /* 9335c1f2cc4SAlan Cox * Avoid unnecessary coalescing by freeing the pages in the largest 9345c1f2cc4SAlan Cox * possible power-of-two-sized subsets. 9355c1f2cc4SAlan Cox */ 9365c1f2cc4SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 9375c1f2cc4SAlan Cox for (;; npages -= n) { 9385c1f2cc4SAlan Cox /* 9395c1f2cc4SAlan Cox * Unsigned "min" is used here so that "order" is assigned 9405c1f2cc4SAlan Cox * "VM_NFREEORDER - 1" when "m"'s physical address is zero 9415c1f2cc4SAlan Cox * or the low-order bits of its physical address are zero 9425c1f2cc4SAlan Cox * because the size of a physical address exceeds the size of 9435c1f2cc4SAlan Cox * a long. 9445c1f2cc4SAlan Cox */ 9455c1f2cc4SAlan Cox order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 9465c1f2cc4SAlan Cox VM_NFREEORDER - 1); 9475c1f2cc4SAlan Cox n = 1 << order; 9485c1f2cc4SAlan Cox if (npages < n) 9495c1f2cc4SAlan Cox break; 9505c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 9515c1f2cc4SAlan Cox m += n; 9525c1f2cc4SAlan Cox } 9535c1f2cc4SAlan Cox /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 9545c1f2cc4SAlan Cox for (; npages > 0; npages -= n) { 9555c1f2cc4SAlan Cox order = flsl(npages) - 1; 9565c1f2cc4SAlan Cox n = 1 << order; 9575c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 9585c1f2cc4SAlan Cox m += n; 9595c1f2cc4SAlan Cox } 9605c1f2cc4SAlan Cox } 9615c1f2cc4SAlan Cox 9625c1f2cc4SAlan Cox /* 963c869e672SAlan Cox * Scan physical memory between the specified addresses "low" and "high" for a 964c869e672SAlan Cox * run of contiguous physical pages that satisfy the specified conditions, and 965c869e672SAlan Cox * return the lowest page in the run. The specified "alignment" determines 966c869e672SAlan Cox * the alignment of the lowest physical page in the run. If the specified 967c869e672SAlan Cox * "boundary" is non-zero, then the run of physical pages cannot span a 968c869e672SAlan Cox * physical address that is a multiple of "boundary". 969c869e672SAlan Cox * 970c869e672SAlan Cox * "npages" must be greater than zero. Both "alignment" and "boundary" must 971c869e672SAlan Cox * be a power of two. 972c869e672SAlan Cox */ 973c869e672SAlan Cox vm_page_t 974*3f289c3fSJeff Roberson vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 975c869e672SAlan Cox u_long alignment, vm_paddr_t boundary, int options) 976c869e672SAlan Cox { 977c869e672SAlan Cox vm_paddr_t pa_end; 978c869e672SAlan Cox vm_page_t m_end, m_run, m_start; 979c869e672SAlan Cox struct vm_phys_seg *seg; 980c869e672SAlan Cox int segind; 981c869e672SAlan Cox 982c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 983c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 984c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 985c869e672SAlan Cox if (low >= high) 986c869e672SAlan Cox return (NULL); 987c869e672SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 988c869e672SAlan Cox seg = &vm_phys_segs[segind]; 989*3f289c3fSJeff Roberson if (seg->domain != domain) 990*3f289c3fSJeff Roberson continue; 991c869e672SAlan Cox if (seg->start >= high) 992c869e672SAlan Cox break; 993c869e672SAlan Cox if (low >= seg->end) 994c869e672SAlan Cox continue; 995c869e672SAlan Cox if (low <= seg->start) 996c869e672SAlan Cox m_start = seg->first_page; 997c869e672SAlan Cox else 998c869e672SAlan Cox m_start = &seg->first_page[atop(low - seg->start)]; 999c869e672SAlan Cox if (high < seg->end) 1000c869e672SAlan Cox pa_end = high; 1001c869e672SAlan Cox else 1002c869e672SAlan Cox pa_end = seg->end; 1003c869e672SAlan Cox if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1004c869e672SAlan Cox continue; 1005c869e672SAlan Cox m_end = &seg->first_page[atop(pa_end - seg->start)]; 1006c869e672SAlan Cox m_run = vm_page_scan_contig(npages, m_start, m_end, 1007c869e672SAlan Cox alignment, boundary, options); 1008c869e672SAlan Cox if (m_run != NULL) 1009c869e672SAlan Cox return (m_run); 1010c869e672SAlan Cox } 1011c869e672SAlan Cox return (NULL); 1012c869e672SAlan Cox } 1013c869e672SAlan Cox 1014c869e672SAlan Cox /* 101511752d88SAlan Cox * Set the pool for a contiguous, power of two-sized set of physical pages. 101611752d88SAlan Cox */ 10177bfda801SAlan Cox void 101811752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order) 101911752d88SAlan Cox { 102011752d88SAlan Cox vm_page_t m_tmp; 102111752d88SAlan Cox 102211752d88SAlan Cox for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 102311752d88SAlan Cox m_tmp->pool = pool; 102411752d88SAlan Cox } 102511752d88SAlan Cox 102611752d88SAlan Cox /* 10279742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 10289742373aSAlan Cox * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 10299742373aSAlan Cox * FALSE, indicating that "m" is not in the free lists. 10307bfda801SAlan Cox * 10317bfda801SAlan Cox * The free page queues must be locked. 10327bfda801SAlan Cox */ 1033e35395ceSAlan Cox boolean_t 10347bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m) 10357bfda801SAlan Cox { 10367bfda801SAlan Cox struct vm_freelist *fl; 10377bfda801SAlan Cox struct vm_phys_seg *seg; 10387bfda801SAlan Cox vm_paddr_t pa, pa_half; 10397bfda801SAlan Cox vm_page_t m_set, m_tmp; 10407bfda801SAlan Cox int order; 10417bfda801SAlan Cox 10427bfda801SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 10437bfda801SAlan Cox 10447bfda801SAlan Cox /* 10457bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 10467bfda801SAlan Cox * physical pages containing the given physical page "m" and 10477bfda801SAlan Cox * assign it to "m_set". 10487bfda801SAlan Cox */ 10497bfda801SAlan Cox seg = &vm_phys_segs[m->segind]; 10507bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1051bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 10527bfda801SAlan Cox order++; 10537bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 10542fbced65SAlan Cox if (pa >= seg->start) 10557bfda801SAlan Cox m_set = &seg->first_page[atop(pa - seg->start)]; 1056e35395ceSAlan Cox else 1057e35395ceSAlan Cox return (FALSE); 10587bfda801SAlan Cox } 1059e35395ceSAlan Cox if (m_set->order < order) 1060e35395ceSAlan Cox return (FALSE); 1061e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 1062e35395ceSAlan Cox return (FALSE); 10637bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 10647bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 10657bfda801SAlan Cox m_set, m_set->order)); 10667bfda801SAlan Cox 10677bfda801SAlan Cox /* 10687bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 10697bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 10707bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 10717bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 10727bfda801SAlan Cox */ 10737bfda801SAlan Cox fl = (*seg->free_queues)[m_set->pool]; 10747bfda801SAlan Cox order = m_set->order; 10757e226537SAttilio Rao vm_freelist_rem(fl, m_set, order); 10767bfda801SAlan Cox while (order > 0) { 10777bfda801SAlan Cox order--; 10787bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 10797bfda801SAlan Cox if (m->phys_addr < pa_half) 10807bfda801SAlan Cox m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 10817bfda801SAlan Cox else { 10827bfda801SAlan Cox m_tmp = m_set; 10837bfda801SAlan Cox m_set = &seg->first_page[atop(pa_half - seg->start)]; 10847bfda801SAlan Cox } 10857e226537SAttilio Rao vm_freelist_add(fl, m_tmp, order, 0); 10867bfda801SAlan Cox } 10877bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1088e35395ceSAlan Cox return (TRUE); 10897bfda801SAlan Cox } 10907bfda801SAlan Cox 10917bfda801SAlan Cox /* 10922f9f48d6SAlan Cox * Allocate a contiguous set of physical pages of the given size 10932f9f48d6SAlan Cox * "npages" from the free lists. All of the physical pages must be at 10942f9f48d6SAlan Cox * or above the given physical address "low" and below the given 10952f9f48d6SAlan Cox * physical address "high". The given value "alignment" determines the 10962f9f48d6SAlan Cox * alignment of the first physical page in the set. If the given value 10972f9f48d6SAlan Cox * "boundary" is non-zero, then the set of physical pages cannot cross 10982f9f48d6SAlan Cox * any physical address boundary that is a multiple of that value. Both 109911752d88SAlan Cox * "alignment" and "boundary" must be a power of two. 110011752d88SAlan Cox */ 110111752d88SAlan Cox vm_page_t 1102ef435ae7SJeff Roberson vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 11035c1f2cc4SAlan Cox u_long alignment, vm_paddr_t boundary) 110411752d88SAlan Cox { 1105c869e672SAlan Cox vm_paddr_t pa_end, pa_start; 1106c869e672SAlan Cox vm_page_t m_run; 1107c869e672SAlan Cox struct vm_phys_seg *seg; 1108ef435ae7SJeff Roberson int segind; 110911752d88SAlan Cox 1110c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1111c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1112c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1113fbd80bd0SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1114c869e672SAlan Cox if (low >= high) 1115c869e672SAlan Cox return (NULL); 1116c869e672SAlan Cox m_run = NULL; 1117477bffbeSAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1118c869e672SAlan Cox seg = &vm_phys_segs[segind]; 1119477bffbeSAlan Cox if (seg->start >= high || seg->domain != domain) 112011752d88SAlan Cox continue; 1121477bffbeSAlan Cox if (low >= seg->end) 1122477bffbeSAlan Cox break; 1123c869e672SAlan Cox if (low <= seg->start) 1124c869e672SAlan Cox pa_start = seg->start; 1125c869e672SAlan Cox else 1126c869e672SAlan Cox pa_start = low; 1127c869e672SAlan Cox if (high < seg->end) 1128c869e672SAlan Cox pa_end = high; 1129c869e672SAlan Cox else 1130c869e672SAlan Cox pa_end = seg->end; 1131c869e672SAlan Cox if (pa_end - pa_start < ptoa(npages)) 1132c869e672SAlan Cox continue; 1133c869e672SAlan Cox m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1134c869e672SAlan Cox alignment, boundary); 1135c869e672SAlan Cox if (m_run != NULL) 1136c869e672SAlan Cox break; 1137c869e672SAlan Cox } 1138c869e672SAlan Cox return (m_run); 1139c869e672SAlan Cox } 114011752d88SAlan Cox 114111752d88SAlan Cox /* 1142c869e672SAlan Cox * Allocate a run of contiguous physical pages from the free list for the 1143c869e672SAlan Cox * specified segment. 1144c869e672SAlan Cox */ 1145c869e672SAlan Cox static vm_page_t 1146c869e672SAlan Cox vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1147c869e672SAlan Cox vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1148c869e672SAlan Cox { 1149c869e672SAlan Cox struct vm_freelist *fl; 1150c869e672SAlan Cox vm_paddr_t pa, pa_end, size; 1151c869e672SAlan Cox vm_page_t m, m_ret; 1152c869e672SAlan Cox u_long npages_end; 1153c869e672SAlan Cox int oind, order, pind; 1154c869e672SAlan Cox 1155c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1156c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1157c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1158c869e672SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1159c869e672SAlan Cox /* Compute the queue that is the best fit for npages. */ 1160c869e672SAlan Cox for (order = 0; (1 << order) < npages; order++); 1161c869e672SAlan Cox /* Search for a run satisfying the specified conditions. */ 1162c869e672SAlan Cox size = npages << PAGE_SHIFT; 1163c869e672SAlan Cox for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1164c869e672SAlan Cox oind++) { 1165c869e672SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1166c869e672SAlan Cox fl = (*seg->free_queues)[pind]; 1167c869e672SAlan Cox TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { 1168c869e672SAlan Cox /* 116911752d88SAlan Cox * Is the size of this allocation request 117011752d88SAlan Cox * larger than the largest block size? 117111752d88SAlan Cox */ 117211752d88SAlan Cox if (order >= VM_NFREEORDER) { 117311752d88SAlan Cox /* 1174c869e672SAlan Cox * Determine if a sufficient number of 1175c869e672SAlan Cox * subsequent blocks to satisfy the 1176c869e672SAlan Cox * allocation request are free. 117711752d88SAlan Cox */ 117811752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 1179c869e672SAlan Cox pa_end = pa + size; 118011752d88SAlan Cox for (;;) { 1181c869e672SAlan Cox pa += 1 << (PAGE_SHIFT + 1182c869e672SAlan Cox VM_NFREEORDER - 1); 1183c869e672SAlan Cox if (pa >= pa_end || 1184c869e672SAlan Cox pa < seg->start || 118511752d88SAlan Cox pa >= seg->end) 118611752d88SAlan Cox break; 1187c869e672SAlan Cox m = &seg->first_page[atop(pa - 1188c869e672SAlan Cox seg->start)]; 1189c869e672SAlan Cox if (m->order != VM_NFREEORDER - 1190c869e672SAlan Cox 1) 119111752d88SAlan Cox break; 119211752d88SAlan Cox } 1193c869e672SAlan Cox /* If not, go to the next block. */ 1194c869e672SAlan Cox if (pa < pa_end) 119511752d88SAlan Cox continue; 119611752d88SAlan Cox } 119711752d88SAlan Cox 119811752d88SAlan Cox /* 1199c869e672SAlan Cox * Determine if the blocks are within the 1200c869e672SAlan Cox * given range, satisfy the given alignment, 1201c869e672SAlan Cox * and do not cross the given boundary. 120211752d88SAlan Cox */ 120311752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 1204c869e672SAlan Cox pa_end = pa + size; 1205d9c9c81cSPedro F. Giffuni if (pa >= low && pa_end <= high && 1206d9c9c81cSPedro F. Giffuni (pa & (alignment - 1)) == 0 && 1207d9c9c81cSPedro F. Giffuni rounddown2(pa ^ (pa_end - 1), boundary) == 0) 120811752d88SAlan Cox goto done; 120911752d88SAlan Cox } 121011752d88SAlan Cox } 121111752d88SAlan Cox } 121211752d88SAlan Cox return (NULL); 121311752d88SAlan Cox done: 121411752d88SAlan Cox for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 121511752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 12167e226537SAttilio Rao vm_freelist_rem(fl, m, m->order); 121711752d88SAlan Cox } 121811752d88SAlan Cox if (m_ret->pool != VM_FREEPOOL_DEFAULT) 121911752d88SAlan Cox vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 122011752d88SAlan Cox fl = (*seg->free_queues)[m_ret->pool]; 122111752d88SAlan Cox vm_phys_split_pages(m_ret, oind, fl, order); 12225c1f2cc4SAlan Cox /* Return excess pages to the free lists. */ 12235c1f2cc4SAlan Cox npages_end = roundup2(npages, 1 << imin(oind, order)); 12245c1f2cc4SAlan Cox if (npages < npages_end) 12255c1f2cc4SAlan Cox vm_phys_free_contig(&m_ret[npages], npages_end - npages); 122611752d88SAlan Cox return (m_ret); 122711752d88SAlan Cox } 122811752d88SAlan Cox 122911752d88SAlan Cox #ifdef DDB 123011752d88SAlan Cox /* 123111752d88SAlan Cox * Show the number of physical pages in each of the free lists. 123211752d88SAlan Cox */ 123311752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages) 123411752d88SAlan Cox { 123511752d88SAlan Cox struct vm_freelist *fl; 12367e226537SAttilio Rao int flind, oind, pind, dom; 123711752d88SAlan Cox 12387e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 12397e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom); 124011752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 124111752d88SAlan Cox db_printf("FREE LIST %d:\n" 124211752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 124311752d88SAlan Cox "\n ", flind); 124411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 124511752d88SAlan Cox db_printf(" | POOL %d", pind); 124611752d88SAlan Cox db_printf("\n-- "); 124711752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 124811752d88SAlan Cox db_printf("-- -- "); 124911752d88SAlan Cox db_printf("--\n"); 125011752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 125111752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 125211752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 125311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 12547e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 125511752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 125611752d88SAlan Cox } 125711752d88SAlan Cox db_printf("\n"); 125811752d88SAlan Cox } 125911752d88SAlan Cox db_printf("\n"); 126011752d88SAlan Cox } 12617e226537SAttilio Rao db_printf("\n"); 12627e226537SAttilio Rao } 126311752d88SAlan Cox } 126411752d88SAlan Cox #endif 1265