111752d88SAlan Cox /*- 211752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 311752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 411752d88SAlan Cox * All rights reserved. 511752d88SAlan Cox * 611752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 711752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 811752d88SAlan Cox * 911752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1011752d88SAlan Cox * modification, are permitted provided that the following conditions 1111752d88SAlan Cox * are met: 1211752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1311752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1411752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1611752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1711752d88SAlan Cox * 1811752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1911752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2011752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2111752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2211752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2311752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2411752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2511752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2611752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2711752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 2811752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2911752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3011752d88SAlan Cox */ 3111752d88SAlan Cox 32fbd80bd0SAlan Cox /* 33fbd80bd0SAlan Cox * Physical memory system implementation 34fbd80bd0SAlan Cox * 35fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 36fbd80bd0SAlan Cox * virtual memory system. 37fbd80bd0SAlan Cox */ 38fbd80bd0SAlan Cox 3911752d88SAlan Cox #include <sys/cdefs.h> 4011752d88SAlan Cox __FBSDID("$FreeBSD$"); 4111752d88SAlan Cox 4211752d88SAlan Cox #include "opt_ddb.h" 43174b5f38SJohn Baldwin #include "opt_vm.h" 4411752d88SAlan Cox 4511752d88SAlan Cox #include <sys/param.h> 4611752d88SAlan Cox #include <sys/systm.h> 4711752d88SAlan Cox #include <sys/lock.h> 4811752d88SAlan Cox #include <sys/kernel.h> 4911752d88SAlan Cox #include <sys/malloc.h> 5011752d88SAlan Cox #include <sys/mutex.h> 517e226537SAttilio Rao #if MAXMEMDOM > 1 527e226537SAttilio Rao #include <sys/proc.h> 537e226537SAttilio Rao #endif 5411752d88SAlan Cox #include <sys/queue.h> 5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h> 5611752d88SAlan Cox #include <sys/sbuf.h> 5711752d88SAlan Cox #include <sys/sysctl.h> 5838d6b2dcSRoger Pau Monné #include <sys/tree.h> 5911752d88SAlan Cox #include <sys/vmmeter.h> 60*6520495aSAdrian Chadd #include <sys/seq.h> 6111752d88SAlan Cox 6211752d88SAlan Cox #include <ddb/ddb.h> 6311752d88SAlan Cox 6411752d88SAlan Cox #include <vm/vm.h> 6511752d88SAlan Cox #include <vm/vm_param.h> 6611752d88SAlan Cox #include <vm/vm_kern.h> 6711752d88SAlan Cox #include <vm/vm_object.h> 6811752d88SAlan Cox #include <vm/vm_page.h> 6911752d88SAlan Cox #include <vm/vm_phys.h> 7011752d88SAlan Cox 71*6520495aSAdrian Chadd #include <vm/vm_domain.h> 72*6520495aSAdrian Chadd 73449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 74449c2e92SKonstantin Belousov "Too many physsegs."); 7511752d88SAlan Cox 76a3870a18SJohn Baldwin struct mem_affinity *mem_affinity; 77415d7ccaSAdrian Chadd int *mem_locality; 78a3870a18SJohn Baldwin 797e226537SAttilio Rao int vm_ndomains = 1; 807e226537SAttilio Rao 81449c2e92SKonstantin Belousov struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 82449c2e92SKonstantin Belousov int vm_phys_nsegs; 8311752d88SAlan Cox 8438d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg; 8538d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 8638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *); 8738d6b2dcSRoger Pau Monné 8838d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 8938d6b2dcSRoger Pau Monné RB_INITIALIZER(_vm_phys_fictitious_tree); 9038d6b2dcSRoger Pau Monné 9138d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg { 9238d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node; 9338d6b2dcSRoger Pau Monné /* Memory region data */ 94b6de32bdSKonstantin Belousov vm_paddr_t start; 95b6de32bdSKonstantin Belousov vm_paddr_t end; 96b6de32bdSKonstantin Belousov vm_page_t first_page; 9738d6b2dcSRoger Pau Monné }; 9838d6b2dcSRoger Pau Monné 9938d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 10038d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp); 10138d6b2dcSRoger Pau Monné 10238d6b2dcSRoger Pau Monné static struct rwlock vm_phys_fictitious_reg_lock; 103c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 104b6de32bdSKonstantin Belousov 10511752d88SAlan Cox static struct vm_freelist 1067e226537SAttilio Rao vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 10711752d88SAlan Cox 108d866a563SAlan Cox static int vm_nfreelists; 109d866a563SAlan Cox 110d866a563SAlan Cox /* 111d866a563SAlan Cox * Provides the mapping from VM_FREELIST_* to free list indices (flind). 112d866a563SAlan Cox */ 113d866a563SAlan Cox static int vm_freelist_to_flind[VM_NFREELIST]; 114d866a563SAlan Cox 115d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0); 116d866a563SAlan Cox 117d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 118d866a563SAlan Cox #define VM_ISADMA_BOUNDARY 16777216 119d866a563SAlan Cox #endif 120d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 121d866a563SAlan Cox #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 122d866a563SAlan Cox #endif 123d866a563SAlan Cox 124d866a563SAlan Cox /* 125d866a563SAlan Cox * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 126d866a563SAlan Cox * the ordering of the free list boundaries. 127d866a563SAlan Cox */ 128d866a563SAlan Cox #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) 129d866a563SAlan Cox CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); 130d866a563SAlan Cox #endif 131d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 132d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 133d866a563SAlan Cox #endif 13411752d88SAlan Cox 13511752d88SAlan Cox static int cnt_prezero; 13611752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 13711752d88SAlan Cox &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 13811752d88SAlan Cox 13911752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 14011752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 14111752d88SAlan Cox NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 14211752d88SAlan Cox 14311752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 14411752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 14511752d88SAlan Cox NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 14611752d88SAlan Cox 147*6520495aSAdrian Chadd #if MAXMEMDOM > 1 148415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 149415d7ccaSAdrian Chadd SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, 150415d7ccaSAdrian Chadd NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); 151*6520495aSAdrian Chadd #endif 152415d7ccaSAdrian Chadd 1537e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 1547e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available."); 155a3870a18SJohn Baldwin 156*6520495aSAdrian Chadd /* 157*6520495aSAdrian Chadd * Default to first-touch + round-robin. 158*6520495aSAdrian Chadd */ 159*6520495aSAdrian Chadd static struct mtx vm_default_policy_mtx; 160*6520495aSAdrian Chadd MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex", 161*6520495aSAdrian Chadd MTX_DEF); 162*6520495aSAdrian Chadd #if MAXMEMDOM > 1 163*6520495aSAdrian Chadd static struct vm_domain_policy vm_default_policy = 164*6520495aSAdrian Chadd VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 165*6520495aSAdrian Chadd #else 166*6520495aSAdrian Chadd /* Use round-robin so the domain policy code will only try once per allocation */ 167*6520495aSAdrian Chadd static struct vm_domain_policy vm_default_policy = 168*6520495aSAdrian Chadd VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0); 169*6520495aSAdrian Chadd #endif 170*6520495aSAdrian Chadd 171f5c4b077SJohn Baldwin static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, 172f5c4b077SJohn Baldwin int order); 173d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 174d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 17511752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa); 17611752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 17711752d88SAlan Cox int order); 17811752d88SAlan Cox 179*6520495aSAdrian Chadd static int 180*6520495aSAdrian Chadd sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) 181*6520495aSAdrian Chadd { 182*6520495aSAdrian Chadd char policy_name[32]; 183*6520495aSAdrian Chadd int error; 184*6520495aSAdrian Chadd 185*6520495aSAdrian Chadd mtx_lock(&vm_default_policy_mtx); 186*6520495aSAdrian Chadd 187*6520495aSAdrian Chadd /* Map policy to output string */ 188*6520495aSAdrian Chadd switch (vm_default_policy.p.policy) { 189*6520495aSAdrian Chadd case VM_POLICY_FIRST_TOUCH: 190*6520495aSAdrian Chadd strcpy(policy_name, "first-touch"); 191*6520495aSAdrian Chadd break; 192*6520495aSAdrian Chadd case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: 193*6520495aSAdrian Chadd strcpy(policy_name, "first-touch-rr"); 194*6520495aSAdrian Chadd break; 195*6520495aSAdrian Chadd case VM_POLICY_ROUND_ROBIN: 196*6520495aSAdrian Chadd default: 197*6520495aSAdrian Chadd strcpy(policy_name, "rr"); 198*6520495aSAdrian Chadd break; 199*6520495aSAdrian Chadd } 200*6520495aSAdrian Chadd mtx_unlock(&vm_default_policy_mtx); 201*6520495aSAdrian Chadd 202*6520495aSAdrian Chadd error = sysctl_handle_string(oidp, &policy_name[0], 203*6520495aSAdrian Chadd sizeof(policy_name), req); 204*6520495aSAdrian Chadd if (error != 0 || req->newptr == NULL) 205*6520495aSAdrian Chadd return (error); 206*6520495aSAdrian Chadd 207*6520495aSAdrian Chadd mtx_lock(&vm_default_policy_mtx); 208*6520495aSAdrian Chadd /* Set: match on the subset of policies that make sense as a default */ 209*6520495aSAdrian Chadd if (strcmp("first-touch-rr", policy_name) == 0) { 210*6520495aSAdrian Chadd vm_domain_policy_set(&vm_default_policy, 211*6520495aSAdrian Chadd VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); 212*6520495aSAdrian Chadd } else if (strcmp("first-touch", policy_name) == 0) { 213*6520495aSAdrian Chadd vm_domain_policy_set(&vm_default_policy, 214*6520495aSAdrian Chadd VM_POLICY_FIRST_TOUCH, 0); 215*6520495aSAdrian Chadd } else if (strcmp("rr", policy_name) == 0) { 216*6520495aSAdrian Chadd vm_domain_policy_set(&vm_default_policy, 217*6520495aSAdrian Chadd VM_POLICY_ROUND_ROBIN, 0); 218*6520495aSAdrian Chadd } else { 219*6520495aSAdrian Chadd error = EINVAL; 220*6520495aSAdrian Chadd goto finish; 221*6520495aSAdrian Chadd } 222*6520495aSAdrian Chadd 223*6520495aSAdrian Chadd error = 0; 224*6520495aSAdrian Chadd finish: 225*6520495aSAdrian Chadd mtx_unlock(&vm_default_policy_mtx); 226*6520495aSAdrian Chadd return (error); 227*6520495aSAdrian Chadd } 228*6520495aSAdrian Chadd 229*6520495aSAdrian Chadd SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW, 230*6520495aSAdrian Chadd 0, 0, sysctl_vm_default_policy, "A", 231*6520495aSAdrian Chadd "Default policy (rr, first-touch, first-touch-rr"); 232*6520495aSAdrian Chadd 23338d6b2dcSRoger Pau Monné /* 23438d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management. 23538d6b2dcSRoger Pau Monné */ 23638d6b2dcSRoger Pau Monné static inline int 23738d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 23838d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range) 23938d6b2dcSRoger Pau Monné { 24038d6b2dcSRoger Pau Monné 24138d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0, 24238d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page")); 24338d6b2dcSRoger Pau Monné if (p->start >= range->end) 24438d6b2dcSRoger Pau Monné return (1); 24538d6b2dcSRoger Pau Monné if (p->start < range->start) 24638d6b2dcSRoger Pau Monné return (-1); 24738d6b2dcSRoger Pau Monné 24838d6b2dcSRoger Pau Monné return (0); 24938d6b2dcSRoger Pau Monné } 25038d6b2dcSRoger Pau Monné 25138d6b2dcSRoger Pau Monné static int 25238d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 25338d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2) 25438d6b2dcSRoger Pau Monné { 25538d6b2dcSRoger Pau Monné 25638d6b2dcSRoger Pau Monné /* Check if this is a search for a page */ 25738d6b2dcSRoger Pau Monné if (p1->end == 0) 25838d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2)); 25938d6b2dcSRoger Pau Monné 26038d6b2dcSRoger Pau Monné KASSERT(p2->end != 0, 26138d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison")); 26238d6b2dcSRoger Pau Monné 26338d6b2dcSRoger Pau Monné /* Searching to add a new range */ 26438d6b2dcSRoger Pau Monné if (p1->end <= p2->start) 26538d6b2dcSRoger Pau Monné return (-1); 26638d6b2dcSRoger Pau Monné if (p1->start >= p2->end) 26738d6b2dcSRoger Pau Monné return (1); 26838d6b2dcSRoger Pau Monné 26938d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n" 27038d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 27138d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 27238d6b2dcSRoger Pau Monné } 27338d6b2dcSRoger Pau Monné 2747e226537SAttilio Rao static __inline int 2757e226537SAttilio Rao vm_rr_selectdomain(void) 2767e226537SAttilio Rao { 2777e226537SAttilio Rao #if MAXMEMDOM > 1 2787e226537SAttilio Rao struct thread *td; 2797e226537SAttilio Rao 2807e226537SAttilio Rao td = curthread; 2817e226537SAttilio Rao 2827e226537SAttilio Rao td->td_dom_rr_idx++; 2837e226537SAttilio Rao td->td_dom_rr_idx %= vm_ndomains; 2847e226537SAttilio Rao return (td->td_dom_rr_idx); 2857e226537SAttilio Rao #else 2867e226537SAttilio Rao return (0); 2877e226537SAttilio Rao #endif 2887e226537SAttilio Rao } 2897e226537SAttilio Rao 290*6520495aSAdrian Chadd /* 291*6520495aSAdrian Chadd * Initialise a VM domain iterator. 292*6520495aSAdrian Chadd * 293*6520495aSAdrian Chadd * Check the thread policy, then the proc policy, 294*6520495aSAdrian Chadd * then default to the system policy. 295*6520495aSAdrian Chadd * 296*6520495aSAdrian Chadd * Later on the various layers will have this logic 297*6520495aSAdrian Chadd * plumbed into them and the phys code will be explicitly 298*6520495aSAdrian Chadd * handed a VM domain policy to use. 299*6520495aSAdrian Chadd */ 300*6520495aSAdrian Chadd static void 301*6520495aSAdrian Chadd vm_policy_iterator_init(struct vm_domain_iterator *vi) 302*6520495aSAdrian Chadd { 303*6520495aSAdrian Chadd #if MAXMEMDOM > 1 304*6520495aSAdrian Chadd struct vm_domain_policy lcl; 305*6520495aSAdrian Chadd #endif 306*6520495aSAdrian Chadd 307*6520495aSAdrian Chadd vm_domain_iterator_init(vi); 308*6520495aSAdrian Chadd 309*6520495aSAdrian Chadd #if MAXMEMDOM > 1 310*6520495aSAdrian Chadd /* Copy out the thread policy */ 311*6520495aSAdrian Chadd vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy); 312*6520495aSAdrian Chadd if (lcl.p.policy != VM_POLICY_NONE) { 313*6520495aSAdrian Chadd /* Thread policy is present; use it */ 314*6520495aSAdrian Chadd vm_domain_iterator_set_policy(vi, &lcl); 315*6520495aSAdrian Chadd return; 316*6520495aSAdrian Chadd } 317*6520495aSAdrian Chadd 318*6520495aSAdrian Chadd vm_domain_policy_localcopy(&lcl, 319*6520495aSAdrian Chadd &curthread->td_proc->p_vm_dom_policy); 320*6520495aSAdrian Chadd if (lcl.p.policy != VM_POLICY_NONE) { 321*6520495aSAdrian Chadd /* Process policy is present; use it */ 322*6520495aSAdrian Chadd vm_domain_iterator_set_policy(vi, &lcl); 323*6520495aSAdrian Chadd return; 324*6520495aSAdrian Chadd } 325*6520495aSAdrian Chadd #endif 326*6520495aSAdrian Chadd /* Use system default policy */ 327*6520495aSAdrian Chadd vm_domain_iterator_set_policy(vi, &vm_default_policy); 328*6520495aSAdrian Chadd } 329*6520495aSAdrian Chadd 330*6520495aSAdrian Chadd static void 331*6520495aSAdrian Chadd vm_policy_iterator_finish(struct vm_domain_iterator *vi) 332*6520495aSAdrian Chadd { 333*6520495aSAdrian Chadd 334*6520495aSAdrian Chadd vm_domain_iterator_cleanup(vi); 335*6520495aSAdrian Chadd } 336*6520495aSAdrian Chadd 337449c2e92SKonstantin Belousov boolean_t 338449c2e92SKonstantin Belousov vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) 339449c2e92SKonstantin Belousov { 340449c2e92SKonstantin Belousov struct vm_phys_seg *s; 341449c2e92SKonstantin Belousov int idx; 342449c2e92SKonstantin Belousov 343449c2e92SKonstantin Belousov while ((idx = ffsl(mask)) != 0) { 344449c2e92SKonstantin Belousov idx--; /* ffsl counts from 1 */ 345449c2e92SKonstantin Belousov mask &= ~(1UL << idx); 346449c2e92SKonstantin Belousov s = &vm_phys_segs[idx]; 347449c2e92SKonstantin Belousov if (low < s->end && high > s->start) 348449c2e92SKonstantin Belousov return (TRUE); 349449c2e92SKonstantin Belousov } 350449c2e92SKonstantin Belousov return (FALSE); 351449c2e92SKonstantin Belousov } 352449c2e92SKonstantin Belousov 35311752d88SAlan Cox /* 35411752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 35511752d88SAlan Cox * the amount of physical memory in each free list. 35611752d88SAlan Cox */ 35711752d88SAlan Cox static int 35811752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 35911752d88SAlan Cox { 36011752d88SAlan Cox struct sbuf sbuf; 36111752d88SAlan Cox struct vm_freelist *fl; 3627e226537SAttilio Rao int dom, error, flind, oind, pind; 36311752d88SAlan Cox 36400f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 36500f0e671SMatthew D Fleming if (error != 0) 36600f0e671SMatthew D Fleming return (error); 3677e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 3687e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 369eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 37011752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 371eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 37211752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 37311752d88SAlan Cox "\n ", flind); 37411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 37511752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 37611752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 37711752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 37811752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 37911752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 38011752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 381d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 38211752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 38311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 3847e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 385eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d", 3867e226537SAttilio Rao fl[oind].lcnt); 38711752d88SAlan Cox } 38811752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 38911752d88SAlan Cox } 3907e226537SAttilio Rao } 39111752d88SAlan Cox } 3924e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 39311752d88SAlan Cox sbuf_delete(&sbuf); 39411752d88SAlan Cox return (error); 39511752d88SAlan Cox } 39611752d88SAlan Cox 39711752d88SAlan Cox /* 39811752d88SAlan Cox * Outputs the set of physical memory segments. 39911752d88SAlan Cox */ 40011752d88SAlan Cox static int 40111752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 40211752d88SAlan Cox { 40311752d88SAlan Cox struct sbuf sbuf; 40411752d88SAlan Cox struct vm_phys_seg *seg; 40511752d88SAlan Cox int error, segind; 40611752d88SAlan Cox 40700f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 40800f0e671SMatthew D Fleming if (error != 0) 40900f0e671SMatthew D Fleming return (error); 4104e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 41111752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 41211752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 41311752d88SAlan Cox seg = &vm_phys_segs[segind]; 41411752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 41511752d88SAlan Cox (uintmax_t)seg->start); 41611752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 41711752d88SAlan Cox (uintmax_t)seg->end); 418a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 41911752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 42011752d88SAlan Cox } 4214e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 42211752d88SAlan Cox sbuf_delete(&sbuf); 42311752d88SAlan Cox return (error); 42411752d88SAlan Cox } 42511752d88SAlan Cox 426415d7ccaSAdrian Chadd /* 427415d7ccaSAdrian Chadd * Return affinity, or -1 if there's no affinity information. 428415d7ccaSAdrian Chadd */ 429*6520495aSAdrian Chadd int 430415d7ccaSAdrian Chadd vm_phys_mem_affinity(int f, int t) 431415d7ccaSAdrian Chadd { 432415d7ccaSAdrian Chadd 433*6520495aSAdrian Chadd #if MAXMEMDOM > 1 434415d7ccaSAdrian Chadd if (mem_locality == NULL) 435415d7ccaSAdrian Chadd return (-1); 436415d7ccaSAdrian Chadd if (f >= vm_ndomains || t >= vm_ndomains) 437415d7ccaSAdrian Chadd return (-1); 438415d7ccaSAdrian Chadd return (mem_locality[f * vm_ndomains + t]); 439*6520495aSAdrian Chadd #else 440*6520495aSAdrian Chadd return (-1); 441*6520495aSAdrian Chadd #endif 442415d7ccaSAdrian Chadd } 443415d7ccaSAdrian Chadd 444*6520495aSAdrian Chadd #if MAXMEMDOM > 1 445415d7ccaSAdrian Chadd /* 446415d7ccaSAdrian Chadd * Outputs the VM locality table. 447415d7ccaSAdrian Chadd */ 448415d7ccaSAdrian Chadd static int 449415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 450415d7ccaSAdrian Chadd { 451415d7ccaSAdrian Chadd struct sbuf sbuf; 452415d7ccaSAdrian Chadd int error, i, j; 453415d7ccaSAdrian Chadd 454415d7ccaSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 455415d7ccaSAdrian Chadd if (error != 0) 456415d7ccaSAdrian Chadd return (error); 457415d7ccaSAdrian Chadd sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 458415d7ccaSAdrian Chadd 459415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 460415d7ccaSAdrian Chadd 461415d7ccaSAdrian Chadd for (i = 0; i < vm_ndomains; i++) { 462415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d: ", i); 463415d7ccaSAdrian Chadd for (j = 0; j < vm_ndomains; j++) { 464415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 465415d7ccaSAdrian Chadd } 466415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 467415d7ccaSAdrian Chadd } 468415d7ccaSAdrian Chadd error = sbuf_finish(&sbuf); 469415d7ccaSAdrian Chadd sbuf_delete(&sbuf); 470415d7ccaSAdrian Chadd return (error); 471415d7ccaSAdrian Chadd } 472*6520495aSAdrian Chadd #endif 473415d7ccaSAdrian Chadd 4747e226537SAttilio Rao static void 4757e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 476a3870a18SJohn Baldwin { 477a3870a18SJohn Baldwin 4787e226537SAttilio Rao m->order = order; 4797e226537SAttilio Rao if (tail) 480c325e866SKonstantin Belousov TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); 4817e226537SAttilio Rao else 482c325e866SKonstantin Belousov TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); 4837e226537SAttilio Rao fl[order].lcnt++; 484a3870a18SJohn Baldwin } 4857e226537SAttilio Rao 4867e226537SAttilio Rao static void 4877e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 4887e226537SAttilio Rao { 4897e226537SAttilio Rao 490c325e866SKonstantin Belousov TAILQ_REMOVE(&fl[order].pl, m, plinks.q); 4917e226537SAttilio Rao fl[order].lcnt--; 4927e226537SAttilio Rao m->order = VM_NFREEORDER; 493a3870a18SJohn Baldwin } 494a3870a18SJohn Baldwin 49511752d88SAlan Cox /* 49611752d88SAlan Cox * Create a physical memory segment. 49711752d88SAlan Cox */ 49811752d88SAlan Cox static void 499d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 50011752d88SAlan Cox { 50111752d88SAlan Cox struct vm_phys_seg *seg; 50211752d88SAlan Cox 50311752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 50411752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 5057e226537SAttilio Rao KASSERT(domain < vm_ndomains, 5067e226537SAttilio Rao ("vm_phys_create_seg: invalid domain provided")); 50711752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 508271f0f12SAlan Cox while (seg > vm_phys_segs && (seg - 1)->start >= end) { 509271f0f12SAlan Cox *seg = *(seg - 1); 510271f0f12SAlan Cox seg--; 511271f0f12SAlan Cox } 51211752d88SAlan Cox seg->start = start; 51311752d88SAlan Cox seg->end = end; 514a3870a18SJohn Baldwin seg->domain = domain; 51511752d88SAlan Cox } 51611752d88SAlan Cox 517a3870a18SJohn Baldwin static void 518d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 519a3870a18SJohn Baldwin { 520a3870a18SJohn Baldwin int i; 521a3870a18SJohn Baldwin 522a3870a18SJohn Baldwin if (mem_affinity == NULL) { 523d866a563SAlan Cox _vm_phys_create_seg(start, end, 0); 524a3870a18SJohn Baldwin return; 525a3870a18SJohn Baldwin } 526a3870a18SJohn Baldwin 527a3870a18SJohn Baldwin for (i = 0;; i++) { 528a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 529a3870a18SJohn Baldwin panic("Reached end of affinity info"); 530a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 531a3870a18SJohn Baldwin continue; 532a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 533a3870a18SJohn Baldwin panic("No affinity info for start %jx", 534a3870a18SJohn Baldwin (uintmax_t)start); 535a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 536d866a563SAlan Cox _vm_phys_create_seg(start, end, 537a3870a18SJohn Baldwin mem_affinity[i].domain); 538a3870a18SJohn Baldwin break; 539a3870a18SJohn Baldwin } 540d866a563SAlan Cox _vm_phys_create_seg(start, mem_affinity[i].end, 541a3870a18SJohn Baldwin mem_affinity[i].domain); 542a3870a18SJohn Baldwin start = mem_affinity[i].end; 543a3870a18SJohn Baldwin } 544a3870a18SJohn Baldwin } 545a3870a18SJohn Baldwin 54611752d88SAlan Cox /* 547271f0f12SAlan Cox * Add a physical memory segment. 548271f0f12SAlan Cox */ 549271f0f12SAlan Cox void 550271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 551271f0f12SAlan Cox { 552d866a563SAlan Cox vm_paddr_t paddr; 553271f0f12SAlan Cox 554271f0f12SAlan Cox KASSERT((start & PAGE_MASK) == 0, 555271f0f12SAlan Cox ("vm_phys_define_seg: start is not page aligned")); 556271f0f12SAlan Cox KASSERT((end & PAGE_MASK) == 0, 557271f0f12SAlan Cox ("vm_phys_define_seg: end is not page aligned")); 558d866a563SAlan Cox 559d866a563SAlan Cox /* 560d866a563SAlan Cox * Split the physical memory segment if it spans two or more free 561d866a563SAlan Cox * list boundaries. 562d866a563SAlan Cox */ 563d866a563SAlan Cox paddr = start; 564271f0f12SAlan Cox #ifdef VM_FREELIST_ISADMA 565d866a563SAlan Cox if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { 566d866a563SAlan Cox vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); 567d866a563SAlan Cox paddr = VM_ISADMA_BOUNDARY; 568d866a563SAlan Cox } 569271f0f12SAlan Cox #endif 570d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 571d866a563SAlan Cox if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 572d866a563SAlan Cox vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 573d866a563SAlan Cox paddr = VM_LOWMEM_BOUNDARY; 574d866a563SAlan Cox } 575271f0f12SAlan Cox #endif 576d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 577d866a563SAlan Cox if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 578d866a563SAlan Cox vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 579d866a563SAlan Cox paddr = VM_DMA32_BOUNDARY; 580d866a563SAlan Cox } 581d866a563SAlan Cox #endif 582d866a563SAlan Cox vm_phys_create_seg(paddr, end); 583271f0f12SAlan Cox } 584271f0f12SAlan Cox 585271f0f12SAlan Cox /* 58611752d88SAlan Cox * Initialize the physical memory allocator. 587d866a563SAlan Cox * 588d866a563SAlan Cox * Requires that vm_page_array is initialized! 58911752d88SAlan Cox */ 59011752d88SAlan Cox void 59111752d88SAlan Cox vm_phys_init(void) 59211752d88SAlan Cox { 59311752d88SAlan Cox struct vm_freelist *fl; 594271f0f12SAlan Cox struct vm_phys_seg *seg; 595d866a563SAlan Cox u_long npages; 596d866a563SAlan Cox int dom, flind, freelist, oind, pind, segind; 59711752d88SAlan Cox 598d866a563SAlan Cox /* 599d866a563SAlan Cox * Compute the number of free lists, and generate the mapping from the 600d866a563SAlan Cox * manifest constants VM_FREELIST_* to the free list indices. 601d866a563SAlan Cox * 602d866a563SAlan Cox * Initially, the entries of vm_freelist_to_flind[] are set to either 603d866a563SAlan Cox * 0 or 1 to indicate which free lists should be created. 604d866a563SAlan Cox */ 605d866a563SAlan Cox npages = 0; 606d866a563SAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 607d866a563SAlan Cox seg = &vm_phys_segs[segind]; 608d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 609d866a563SAlan Cox if (seg->end <= VM_ISADMA_BOUNDARY) 610d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; 611d866a563SAlan Cox else 612d866a563SAlan Cox #endif 613d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 614d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) 615d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 616d866a563SAlan Cox else 617d866a563SAlan Cox #endif 618d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 619d866a563SAlan Cox if ( 620d866a563SAlan Cox #ifdef VM_DMA32_NPAGES_THRESHOLD 621d866a563SAlan Cox /* 622d866a563SAlan Cox * Create the DMA32 free list only if the amount of 623d866a563SAlan Cox * physical memory above physical address 4G exceeds the 624d866a563SAlan Cox * given threshold. 625d866a563SAlan Cox */ 626d866a563SAlan Cox npages > VM_DMA32_NPAGES_THRESHOLD && 627d866a563SAlan Cox #endif 628d866a563SAlan Cox seg->end <= VM_DMA32_BOUNDARY) 629d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 630d866a563SAlan Cox else 631d866a563SAlan Cox #endif 632d866a563SAlan Cox { 633d866a563SAlan Cox npages += atop(seg->end - seg->start); 634d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 635d866a563SAlan Cox } 636d866a563SAlan Cox } 637d866a563SAlan Cox /* Change each entry into a running total of the free lists. */ 638d866a563SAlan Cox for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 639d866a563SAlan Cox vm_freelist_to_flind[freelist] += 640d866a563SAlan Cox vm_freelist_to_flind[freelist - 1]; 641d866a563SAlan Cox } 642d866a563SAlan Cox vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 643d866a563SAlan Cox KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 644d866a563SAlan Cox /* Change each entry into a free list index. */ 645d866a563SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) 646d866a563SAlan Cox vm_freelist_to_flind[freelist]--; 647d866a563SAlan Cox 648d866a563SAlan Cox /* 649d866a563SAlan Cox * Initialize the first_page and free_queues fields of each physical 650d866a563SAlan Cox * memory segment. 651d866a563SAlan Cox */ 652271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 653d866a563SAlan Cox npages = 0; 65411752d88SAlan Cox #endif 655271f0f12SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 656271f0f12SAlan Cox seg = &vm_phys_segs[segind]; 657271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 658d866a563SAlan Cox seg->first_page = &vm_page_array[npages]; 659d866a563SAlan Cox npages += atop(seg->end - seg->start); 660271f0f12SAlan Cox #else 661271f0f12SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(seg->start); 66211752d88SAlan Cox #endif 663d866a563SAlan Cox #ifdef VM_FREELIST_ISADMA 664d866a563SAlan Cox if (seg->end <= VM_ISADMA_BOUNDARY) { 665d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; 666d866a563SAlan Cox KASSERT(flind >= 0, 667d866a563SAlan Cox ("vm_phys_init: ISADMA flind < 0")); 668d866a563SAlan Cox } else 669d866a563SAlan Cox #endif 670d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 671d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) { 672d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 673d866a563SAlan Cox KASSERT(flind >= 0, 674d866a563SAlan Cox ("vm_phys_init: LOWMEM flind < 0")); 675d866a563SAlan Cox } else 676d866a563SAlan Cox #endif 677d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 678d866a563SAlan Cox if (seg->end <= VM_DMA32_BOUNDARY) { 679d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 680d866a563SAlan Cox KASSERT(flind >= 0, 681d866a563SAlan Cox ("vm_phys_init: DMA32 flind < 0")); 682d866a563SAlan Cox } else 683d866a563SAlan Cox #endif 684d866a563SAlan Cox { 685d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 686d866a563SAlan Cox KASSERT(flind >= 0, 687d866a563SAlan Cox ("vm_phys_init: DEFAULT flind < 0")); 68811752d88SAlan Cox } 689d866a563SAlan Cox seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 690d866a563SAlan Cox } 691d866a563SAlan Cox 692d866a563SAlan Cox /* 693d866a563SAlan Cox * Initialize the free queues. 694d866a563SAlan Cox */ 6957e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 69611752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 69711752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 6987e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 69911752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 70011752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 70111752d88SAlan Cox } 70211752d88SAlan Cox } 703a3870a18SJohn Baldwin } 704d866a563SAlan Cox 70538d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 70611752d88SAlan Cox } 70711752d88SAlan Cox 70811752d88SAlan Cox /* 70911752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 71011752d88SAlan Cox */ 71111752d88SAlan Cox static __inline void 71211752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 71311752d88SAlan Cox { 71411752d88SAlan Cox vm_page_t m_buddy; 71511752d88SAlan Cox 71611752d88SAlan Cox while (oind > order) { 71711752d88SAlan Cox oind--; 71811752d88SAlan Cox m_buddy = &m[1 << oind]; 71911752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 72011752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 72111752d88SAlan Cox m_buddy, m_buddy->order)); 7227e226537SAttilio Rao vm_freelist_add(fl, m_buddy, oind, 0); 72311752d88SAlan Cox } 72411752d88SAlan Cox } 72511752d88SAlan Cox 72611752d88SAlan Cox /* 72711752d88SAlan Cox * Initialize a physical page and add it to the free lists. 72811752d88SAlan Cox */ 72911752d88SAlan Cox void 73011752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa) 73111752d88SAlan Cox { 73211752d88SAlan Cox vm_page_t m; 733449c2e92SKonstantin Belousov struct vm_domain *vmd; 73411752d88SAlan Cox 73544f1c916SBryan Drewery vm_cnt.v_page_count++; 73611752d88SAlan Cox m = vm_phys_paddr_to_vm_page(pa); 73711752d88SAlan Cox m->phys_addr = pa; 73844e46b9eSAlan Cox m->queue = PQ_NONE; 73911752d88SAlan Cox m->segind = vm_phys_paddr_to_segind(pa); 740449c2e92SKonstantin Belousov vmd = vm_phys_domain(m); 741449c2e92SKonstantin Belousov vmd->vmd_page_count++; 742449c2e92SKonstantin Belousov vmd->vmd_segs |= 1UL << m->segind; 74311752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 74411752d88SAlan Cox ("vm_phys_add_page: page %p has unexpected order %d", 74511752d88SAlan Cox m, m->order)); 74611752d88SAlan Cox m->pool = VM_FREEPOOL_DEFAULT; 74711752d88SAlan Cox pmap_page_init(m); 7488941dc44SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 749449c2e92SKonstantin Belousov vm_phys_freecnt_adj(m, 1); 75011752d88SAlan Cox vm_phys_free_pages(m, 0); 7518941dc44SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 75211752d88SAlan Cox } 75311752d88SAlan Cox 75411752d88SAlan Cox /* 75511752d88SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages 75611752d88SAlan Cox * from the free lists. 7578941dc44SAlan Cox * 7588941dc44SAlan Cox * The free page queues must be locked. 75911752d88SAlan Cox */ 76011752d88SAlan Cox vm_page_t 76111752d88SAlan Cox vm_phys_alloc_pages(int pool, int order) 76211752d88SAlan Cox { 76349ca10d4SJayachandran C. vm_page_t m; 764*6520495aSAdrian Chadd int domain, flind; 765*6520495aSAdrian Chadd struct vm_domain_iterator vi; 76649ca10d4SJayachandran C. 767f5c4b077SJohn Baldwin KASSERT(pool < VM_NFREEPOOL, 768f5c4b077SJohn Baldwin ("vm_phys_alloc_pages: pool %d is out of range", pool)); 769f5c4b077SJohn Baldwin KASSERT(order < VM_NFREEORDER, 770f5c4b077SJohn Baldwin ("vm_phys_alloc_pages: order %d is out of range", order)); 771f5c4b077SJohn Baldwin 772*6520495aSAdrian Chadd vm_policy_iterator_init(&vi); 773*6520495aSAdrian Chadd 774*6520495aSAdrian Chadd while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 77549ca10d4SJayachandran C. for (flind = 0; flind < vm_nfreelists; flind++) { 7767e226537SAttilio Rao m = vm_phys_alloc_domain_pages(domain, flind, pool, 7777e226537SAttilio Rao order); 77849ca10d4SJayachandran C. if (m != NULL) 77949ca10d4SJayachandran C. return (m); 78049ca10d4SJayachandran C. } 7817e226537SAttilio Rao } 782*6520495aSAdrian Chadd 783*6520495aSAdrian Chadd vm_policy_iterator_finish(&vi); 78449ca10d4SJayachandran C. return (NULL); 78549ca10d4SJayachandran C. } 78649ca10d4SJayachandran C. 78749ca10d4SJayachandran C. /* 788d866a563SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages from the 789d866a563SAlan Cox * specified free list. The free list must be specified using one of the 790d866a563SAlan Cox * manifest constants VM_FREELIST_*. 791d866a563SAlan Cox * 792d866a563SAlan Cox * The free page queues must be locked. 79349ca10d4SJayachandran C. */ 79449ca10d4SJayachandran C. vm_page_t 795d866a563SAlan Cox vm_phys_alloc_freelist_pages(int freelist, int pool, int order) 79649ca10d4SJayachandran C. { 79711752d88SAlan Cox vm_page_t m; 798*6520495aSAdrian Chadd struct vm_domain_iterator vi; 799*6520495aSAdrian Chadd int domain; 80011752d88SAlan Cox 801d866a563SAlan Cox KASSERT(freelist < VM_NFREELIST, 802d866a563SAlan Cox ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 803d866a563SAlan Cox freelist)); 80411752d88SAlan Cox KASSERT(pool < VM_NFREEPOOL, 80549ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 80611752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 80749ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 808*6520495aSAdrian Chadd 809*6520495aSAdrian Chadd vm_policy_iterator_init(&vi); 810*6520495aSAdrian Chadd 811*6520495aSAdrian Chadd while ((vm_domain_iterator_run(&vi, &domain)) == 0) { 812d866a563SAlan Cox m = vm_phys_alloc_domain_pages(domain, 813d866a563SAlan Cox vm_freelist_to_flind[freelist], pool, order); 814f5c4b077SJohn Baldwin if (m != NULL) 815f5c4b077SJohn Baldwin return (m); 8167e226537SAttilio Rao } 817*6520495aSAdrian Chadd 818*6520495aSAdrian Chadd vm_policy_iterator_finish(&vi); 8197e226537SAttilio Rao return (NULL); 820f5c4b077SJohn Baldwin } 821f5c4b077SJohn Baldwin 822f5c4b077SJohn Baldwin static vm_page_t 823f5c4b077SJohn Baldwin vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) 824f5c4b077SJohn Baldwin { 825f5c4b077SJohn Baldwin struct vm_freelist *fl; 826f5c4b077SJohn Baldwin struct vm_freelist *alt; 827f5c4b077SJohn Baldwin int oind, pind; 828f5c4b077SJohn Baldwin vm_page_t m; 829f5c4b077SJohn Baldwin 83011752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 8317e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0]; 83211752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 83311752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 83411752d88SAlan Cox if (m != NULL) { 8357e226537SAttilio Rao vm_freelist_rem(fl, m, oind); 83611752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 83711752d88SAlan Cox return (m); 83811752d88SAlan Cox } 83911752d88SAlan Cox } 84011752d88SAlan Cox 84111752d88SAlan Cox /* 84211752d88SAlan Cox * The given pool was empty. Find the largest 84311752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 84411752d88SAlan Cox * pool. Transfer these pages to the given pool, and 84511752d88SAlan Cox * use them to satisfy the allocation. 84611752d88SAlan Cox */ 84711752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 84811752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 8497e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0]; 85011752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 85111752d88SAlan Cox if (m != NULL) { 8527e226537SAttilio Rao vm_freelist_rem(alt, m, oind); 85311752d88SAlan Cox vm_phys_set_pool(pool, m, oind); 85411752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 85511752d88SAlan Cox return (m); 85611752d88SAlan Cox } 85711752d88SAlan Cox } 85811752d88SAlan Cox } 85911752d88SAlan Cox return (NULL); 86011752d88SAlan Cox } 86111752d88SAlan Cox 86211752d88SAlan Cox /* 86311752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 86411752d88SAlan Cox */ 86511752d88SAlan Cox vm_page_t 86611752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 86711752d88SAlan Cox { 86811752d88SAlan Cox struct vm_phys_seg *seg; 86911752d88SAlan Cox int segind; 87011752d88SAlan Cox 87111752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 87211752d88SAlan Cox seg = &vm_phys_segs[segind]; 87311752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 87411752d88SAlan Cox return (&seg->first_page[atop(pa - seg->start)]); 87511752d88SAlan Cox } 876f06a3a36SAndrew Thompson return (NULL); 87711752d88SAlan Cox } 87811752d88SAlan Cox 879b6de32bdSKonstantin Belousov vm_page_t 880b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 881b6de32bdSKonstantin Belousov { 88238d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg; 883b6de32bdSKonstantin Belousov vm_page_t m; 884b6de32bdSKonstantin Belousov 885b6de32bdSKonstantin Belousov m = NULL; 88638d6b2dcSRoger Pau Monné tmp.start = pa; 88738d6b2dcSRoger Pau Monné tmp.end = 0; 88838d6b2dcSRoger Pau Monné 88938d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock); 89038d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 89138d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock); 89238d6b2dcSRoger Pau Monné if (seg == NULL) 89338d6b2dcSRoger Pau Monné return (NULL); 89438d6b2dcSRoger Pau Monné 895b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 89638d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 89738d6b2dcSRoger Pau Monné 898b6de32bdSKonstantin Belousov return (m); 899b6de32bdSKonstantin Belousov } 900b6de32bdSKonstantin Belousov 9015ebe728dSRoger Pau Monné static inline void 9025ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 9035ebe728dSRoger Pau Monné long page_count, vm_memattr_t memattr) 9045ebe728dSRoger Pau Monné { 9055ebe728dSRoger Pau Monné long i; 9065ebe728dSRoger Pau Monné 9075ebe728dSRoger Pau Monné for (i = 0; i < page_count; i++) { 9085ebe728dSRoger Pau Monné vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 9095ebe728dSRoger Pau Monné range[i].oflags &= ~VPO_UNMANAGED; 9105ebe728dSRoger Pau Monné range[i].busy_lock = VPB_UNBUSIED; 9115ebe728dSRoger Pau Monné } 9125ebe728dSRoger Pau Monné } 9135ebe728dSRoger Pau Monné 914b6de32bdSKonstantin Belousov int 915b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 916b6de32bdSKonstantin Belousov vm_memattr_t memattr) 917b6de32bdSKonstantin Belousov { 918b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 919b6de32bdSKonstantin Belousov vm_page_t fp; 9205ebe728dSRoger Pau Monné long page_count; 921b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 9225ebe728dSRoger Pau Monné long pi, pe; 9235ebe728dSRoger Pau Monné long dpage_count; 924b6de32bdSKonstantin Belousov #endif 925b6de32bdSKonstantin Belousov 9265ebe728dSRoger Pau Monné KASSERT(start < end, 9275ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 9285ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 9295ebe728dSRoger Pau Monné 930b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 931b6de32bdSKonstantin Belousov 932b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 933b6de32bdSKonstantin Belousov pi = atop(start); 9345ebe728dSRoger Pau Monné pe = atop(end); 9355ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 936b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 9375ebe728dSRoger Pau Monné if ((pe - first_page) > vm_page_array_size) { 9385ebe728dSRoger Pau Monné /* 9395ebe728dSRoger Pau Monné * We have a segment that starts inside 9405ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 9415ebe728dSRoger Pau Monné * 9425ebe728dSRoger Pau Monné * Use vm_page_array pages for those that are 9435ebe728dSRoger Pau Monné * inside of the vm_page_array range, and 9445ebe728dSRoger Pau Monné * allocate the remaining ones. 9455ebe728dSRoger Pau Monné */ 9465ebe728dSRoger Pau Monné dpage_count = vm_page_array_size - (pi - first_page); 9475ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, dpage_count, 9485ebe728dSRoger Pau Monné memattr); 9495ebe728dSRoger Pau Monné page_count -= dpage_count; 9505ebe728dSRoger Pau Monné start += ptoa(dpage_count); 9515ebe728dSRoger Pau Monné goto alloc; 9525ebe728dSRoger Pau Monné } 9535ebe728dSRoger Pau Monné /* 9545ebe728dSRoger Pau Monné * We can allocate the full range from vm_page_array, 9555ebe728dSRoger Pau Monné * so there's no need to register the range in the tree. 9565ebe728dSRoger Pau Monné */ 9575ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 9585ebe728dSRoger Pau Monné return (0); 9595ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 9605ebe728dSRoger Pau Monné /* 9615ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 9625ebe728dSRoger Pau Monné * but starts outside of it. 9635ebe728dSRoger Pau Monné */ 9645ebe728dSRoger Pau Monné fp = &vm_page_array[0]; 9655ebe728dSRoger Pau Monné dpage_count = pe - first_page; 9665ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 9675ebe728dSRoger Pau Monné memattr); 9685ebe728dSRoger Pau Monné end -= ptoa(dpage_count); 9695ebe728dSRoger Pau Monné page_count -= dpage_count; 9705ebe728dSRoger Pau Monné goto alloc; 9715ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 9725ebe728dSRoger Pau Monné /* 9735ebe728dSRoger Pau Monné * Trying to register a fictitious range that expands before 9745ebe728dSRoger Pau Monné * and after vm_page_array. 9755ebe728dSRoger Pau Monné */ 9765ebe728dSRoger Pau Monné return (EINVAL); 9775ebe728dSRoger Pau Monné } else { 9785ebe728dSRoger Pau Monné alloc: 979b6de32bdSKonstantin Belousov #endif 980b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 981b6de32bdSKonstantin Belousov M_WAITOK | M_ZERO); 9825ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE 983b6de32bdSKonstantin Belousov } 9845ebe728dSRoger Pau Monné #endif 9855ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 98638d6b2dcSRoger Pau Monné 98738d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 988b6de32bdSKonstantin Belousov seg->start = start; 989b6de32bdSKonstantin Belousov seg->end = end; 990b6de32bdSKonstantin Belousov seg->first_page = fp; 99138d6b2dcSRoger Pau Monné 99238d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 99338d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 99438d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 99538d6b2dcSRoger Pau Monné 996b6de32bdSKonstantin Belousov return (0); 997b6de32bdSKonstantin Belousov } 998b6de32bdSKonstantin Belousov 999b6de32bdSKonstantin Belousov void 1000b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1001b6de32bdSKonstantin Belousov { 100238d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp; 1003b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 10045ebe728dSRoger Pau Monné long pi, pe; 1005b6de32bdSKonstantin Belousov #endif 1006b6de32bdSKonstantin Belousov 10075ebe728dSRoger Pau Monné KASSERT(start < end, 10085ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 10095ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 10105ebe728dSRoger Pau Monné 1011b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 1012b6de32bdSKonstantin Belousov pi = atop(start); 10135ebe728dSRoger Pau Monné pe = atop(end); 10145ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 10155ebe728dSRoger Pau Monné if ((pe - first_page) <= vm_page_array_size) { 10165ebe728dSRoger Pau Monné /* 10175ebe728dSRoger Pau Monné * This segment was allocated using vm_page_array 10185ebe728dSRoger Pau Monné * only, there's nothing to do since those pages 10195ebe728dSRoger Pau Monné * were never added to the tree. 10205ebe728dSRoger Pau Monné */ 10215ebe728dSRoger Pau Monné return; 10225ebe728dSRoger Pau Monné } 10235ebe728dSRoger Pau Monné /* 10245ebe728dSRoger Pau Monné * We have a segment that starts inside 10255ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 10265ebe728dSRoger Pau Monné * 10275ebe728dSRoger Pau Monné * Calculate how many pages were added to the 10285ebe728dSRoger Pau Monné * tree and free them. 10295ebe728dSRoger Pau Monné */ 10305ebe728dSRoger Pau Monné start = ptoa(first_page + vm_page_array_size); 10315ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 10325ebe728dSRoger Pau Monné /* 10335ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 10345ebe728dSRoger Pau Monné * but starts outside of it. 10355ebe728dSRoger Pau Monné */ 10365ebe728dSRoger Pau Monné end = ptoa(first_page); 10375ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 10385ebe728dSRoger Pau Monné /* Since it's not possible to register such a range, panic. */ 10395ebe728dSRoger Pau Monné panic( 10405ebe728dSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 10415ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 10425ebe728dSRoger Pau Monné } 1043b6de32bdSKonstantin Belousov #endif 104438d6b2dcSRoger Pau Monné tmp.start = start; 104538d6b2dcSRoger Pau Monné tmp.end = 0; 1046b6de32bdSKonstantin Belousov 104738d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 104838d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 104938d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) { 105038d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 105138d6b2dcSRoger Pau Monné panic( 105238d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 105338d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 105438d6b2dcSRoger Pau Monné } 105538d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 105638d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 105738d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES); 105838d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES); 1059b6de32bdSKonstantin Belousov } 1060b6de32bdSKonstantin Belousov 106111752d88SAlan Cox /* 106211752d88SAlan Cox * Find the segment containing the given physical address. 106311752d88SAlan Cox */ 106411752d88SAlan Cox static int 106511752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa) 106611752d88SAlan Cox { 106711752d88SAlan Cox struct vm_phys_seg *seg; 106811752d88SAlan Cox int segind; 106911752d88SAlan Cox 107011752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 107111752d88SAlan Cox seg = &vm_phys_segs[segind]; 107211752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 107311752d88SAlan Cox return (segind); 107411752d88SAlan Cox } 107511752d88SAlan Cox panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 107611752d88SAlan Cox (uintmax_t)pa); 107711752d88SAlan Cox } 107811752d88SAlan Cox 107911752d88SAlan Cox /* 108011752d88SAlan Cox * Free a contiguous, power of two-sized set of physical pages. 10818941dc44SAlan Cox * 10828941dc44SAlan Cox * The free page queues must be locked. 108311752d88SAlan Cox */ 108411752d88SAlan Cox void 108511752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order) 108611752d88SAlan Cox { 108711752d88SAlan Cox struct vm_freelist *fl; 108811752d88SAlan Cox struct vm_phys_seg *seg; 10895c1f2cc4SAlan Cox vm_paddr_t pa; 109011752d88SAlan Cox vm_page_t m_buddy; 109111752d88SAlan Cox 109211752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 10938941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected order %d", 109411752d88SAlan Cox m, m->order)); 109511752d88SAlan Cox KASSERT(m->pool < VM_NFREEPOOL, 10968941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected pool %d", 109711752d88SAlan Cox m, m->pool)); 109811752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 10998941dc44SAlan Cox ("vm_phys_free_pages: order %d is out of range", order)); 110011752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 110111752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 11025c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 11035c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 11045c1f2cc4SAlan Cox do { 11055c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 11065c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 110711752d88SAlan Cox break; 11085c1f2cc4SAlan Cox m_buddy = &seg->first_page[atop(pa - seg->start)]; 110911752d88SAlan Cox if (m_buddy->order != order) 111011752d88SAlan Cox break; 111111752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 11127e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order); 111311752d88SAlan Cox if (m_buddy->pool != m->pool) 111411752d88SAlan Cox vm_phys_set_pool(m->pool, m_buddy, order); 111511752d88SAlan Cox order++; 11165c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 111711752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 11185c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 111911752d88SAlan Cox } 112011752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 11217e226537SAttilio Rao vm_freelist_add(fl, m, order, 1); 112211752d88SAlan Cox } 112311752d88SAlan Cox 112411752d88SAlan Cox /* 11255c1f2cc4SAlan Cox * Free a contiguous, arbitrarily sized set of physical pages. 11265c1f2cc4SAlan Cox * 11275c1f2cc4SAlan Cox * The free page queues must be locked. 11285c1f2cc4SAlan Cox */ 11295c1f2cc4SAlan Cox void 11305c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages) 11315c1f2cc4SAlan Cox { 11325c1f2cc4SAlan Cox u_int n; 11335c1f2cc4SAlan Cox int order; 11345c1f2cc4SAlan Cox 11355c1f2cc4SAlan Cox /* 11365c1f2cc4SAlan Cox * Avoid unnecessary coalescing by freeing the pages in the largest 11375c1f2cc4SAlan Cox * possible power-of-two-sized subsets. 11385c1f2cc4SAlan Cox */ 11395c1f2cc4SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 11405c1f2cc4SAlan Cox for (;; npages -= n) { 11415c1f2cc4SAlan Cox /* 11425c1f2cc4SAlan Cox * Unsigned "min" is used here so that "order" is assigned 11435c1f2cc4SAlan Cox * "VM_NFREEORDER - 1" when "m"'s physical address is zero 11445c1f2cc4SAlan Cox * or the low-order bits of its physical address are zero 11455c1f2cc4SAlan Cox * because the size of a physical address exceeds the size of 11465c1f2cc4SAlan Cox * a long. 11475c1f2cc4SAlan Cox */ 11485c1f2cc4SAlan Cox order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 11495c1f2cc4SAlan Cox VM_NFREEORDER - 1); 11505c1f2cc4SAlan Cox n = 1 << order; 11515c1f2cc4SAlan Cox if (npages < n) 11525c1f2cc4SAlan Cox break; 11535c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 11545c1f2cc4SAlan Cox m += n; 11555c1f2cc4SAlan Cox } 11565c1f2cc4SAlan Cox /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 11575c1f2cc4SAlan Cox for (; npages > 0; npages -= n) { 11585c1f2cc4SAlan Cox order = flsl(npages) - 1; 11595c1f2cc4SAlan Cox n = 1 << order; 11605c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 11615c1f2cc4SAlan Cox m += n; 11625c1f2cc4SAlan Cox } 11635c1f2cc4SAlan Cox } 11645c1f2cc4SAlan Cox 11655c1f2cc4SAlan Cox /* 116611752d88SAlan Cox * Set the pool for a contiguous, power of two-sized set of physical pages. 116711752d88SAlan Cox */ 11687bfda801SAlan Cox void 116911752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order) 117011752d88SAlan Cox { 117111752d88SAlan Cox vm_page_t m_tmp; 117211752d88SAlan Cox 117311752d88SAlan Cox for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 117411752d88SAlan Cox m_tmp->pool = pool; 117511752d88SAlan Cox } 117611752d88SAlan Cox 117711752d88SAlan Cox /* 11789742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 11799742373aSAlan Cox * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 11809742373aSAlan Cox * FALSE, indicating that "m" is not in the free lists. 11817bfda801SAlan Cox * 11827bfda801SAlan Cox * The free page queues must be locked. 11837bfda801SAlan Cox */ 1184e35395ceSAlan Cox boolean_t 11857bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m) 11867bfda801SAlan Cox { 11877bfda801SAlan Cox struct vm_freelist *fl; 11887bfda801SAlan Cox struct vm_phys_seg *seg; 11897bfda801SAlan Cox vm_paddr_t pa, pa_half; 11907bfda801SAlan Cox vm_page_t m_set, m_tmp; 11917bfda801SAlan Cox int order; 11927bfda801SAlan Cox 11937bfda801SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 11947bfda801SAlan Cox 11957bfda801SAlan Cox /* 11967bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 11977bfda801SAlan Cox * physical pages containing the given physical page "m" and 11987bfda801SAlan Cox * assign it to "m_set". 11997bfda801SAlan Cox */ 12007bfda801SAlan Cox seg = &vm_phys_segs[m->segind]; 12017bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1202bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 12037bfda801SAlan Cox order++; 12047bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 12052fbced65SAlan Cox if (pa >= seg->start) 12067bfda801SAlan Cox m_set = &seg->first_page[atop(pa - seg->start)]; 1207e35395ceSAlan Cox else 1208e35395ceSAlan Cox return (FALSE); 12097bfda801SAlan Cox } 1210e35395ceSAlan Cox if (m_set->order < order) 1211e35395ceSAlan Cox return (FALSE); 1212e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 1213e35395ceSAlan Cox return (FALSE); 12147bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 12157bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 12167bfda801SAlan Cox m_set, m_set->order)); 12177bfda801SAlan Cox 12187bfda801SAlan Cox /* 12197bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 12207bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 12217bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 12227bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 12237bfda801SAlan Cox */ 12247bfda801SAlan Cox fl = (*seg->free_queues)[m_set->pool]; 12257bfda801SAlan Cox order = m_set->order; 12267e226537SAttilio Rao vm_freelist_rem(fl, m_set, order); 12277bfda801SAlan Cox while (order > 0) { 12287bfda801SAlan Cox order--; 12297bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 12307bfda801SAlan Cox if (m->phys_addr < pa_half) 12317bfda801SAlan Cox m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 12327bfda801SAlan Cox else { 12337bfda801SAlan Cox m_tmp = m_set; 12347bfda801SAlan Cox m_set = &seg->first_page[atop(pa_half - seg->start)]; 12357bfda801SAlan Cox } 12367e226537SAttilio Rao vm_freelist_add(fl, m_tmp, order, 0); 12377bfda801SAlan Cox } 12387bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1239e35395ceSAlan Cox return (TRUE); 12407bfda801SAlan Cox } 12417bfda801SAlan Cox 12427bfda801SAlan Cox /* 12437bfda801SAlan Cox * Try to zero one physical page. Used by an idle priority thread. 124411752d88SAlan Cox */ 124511752d88SAlan Cox boolean_t 124611752d88SAlan Cox vm_phys_zero_pages_idle(void) 124711752d88SAlan Cox { 12487e226537SAttilio Rao static struct vm_freelist *fl; 12497bfda801SAlan Cox static int flind, oind, pind; 125011752d88SAlan Cox vm_page_t m, m_tmp; 12517e226537SAttilio Rao int domain; 125211752d88SAlan Cox 12537e226537SAttilio Rao domain = vm_rr_selectdomain(); 12547e226537SAttilio Rao fl = vm_phys_free_queues[domain][0][0]; 125511752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 12567bfda801SAlan Cox for (;;) { 1257c325e866SKonstantin Belousov TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) { 12587bfda801SAlan Cox for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 12597bfda801SAlan Cox if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 12607bfda801SAlan Cox vm_phys_unfree_page(m_tmp); 1261449c2e92SKonstantin Belousov vm_phys_freecnt_adj(m, -1); 126211752d88SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 126311752d88SAlan Cox pmap_zero_page_idle(m_tmp); 126411752d88SAlan Cox m_tmp->flags |= PG_ZERO; 126511752d88SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 1266449c2e92SKonstantin Belousov vm_phys_freecnt_adj(m, 1); 12677bfda801SAlan Cox vm_phys_free_pages(m_tmp, 0); 12687bfda801SAlan Cox vm_page_zero_count++; 12697bfda801SAlan Cox cnt_prezero++; 127011752d88SAlan Cox return (TRUE); 127111752d88SAlan Cox } 127211752d88SAlan Cox } 127311752d88SAlan Cox } 12747bfda801SAlan Cox oind++; 12757bfda801SAlan Cox if (oind == VM_NFREEORDER) { 12767bfda801SAlan Cox oind = 0; 12777bfda801SAlan Cox pind++; 12787bfda801SAlan Cox if (pind == VM_NFREEPOOL) { 12797bfda801SAlan Cox pind = 0; 12807bfda801SAlan Cox flind++; 12817bfda801SAlan Cox if (flind == vm_nfreelists) 12827bfda801SAlan Cox flind = 0; 12837bfda801SAlan Cox } 12847e226537SAttilio Rao fl = vm_phys_free_queues[domain][flind][pind]; 12857bfda801SAlan Cox } 12867bfda801SAlan Cox } 128711752d88SAlan Cox } 128811752d88SAlan Cox 128911752d88SAlan Cox /* 12902f9f48d6SAlan Cox * Allocate a contiguous set of physical pages of the given size 12912f9f48d6SAlan Cox * "npages" from the free lists. All of the physical pages must be at 12922f9f48d6SAlan Cox * or above the given physical address "low" and below the given 12932f9f48d6SAlan Cox * physical address "high". The given value "alignment" determines the 12942f9f48d6SAlan Cox * alignment of the first physical page in the set. If the given value 12952f9f48d6SAlan Cox * "boundary" is non-zero, then the set of physical pages cannot cross 12962f9f48d6SAlan Cox * any physical address boundary that is a multiple of that value. Both 129711752d88SAlan Cox * "alignment" and "boundary" must be a power of two. 129811752d88SAlan Cox */ 129911752d88SAlan Cox vm_page_t 13005c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 13015c1f2cc4SAlan Cox u_long alignment, vm_paddr_t boundary) 130211752d88SAlan Cox { 130311752d88SAlan Cox struct vm_freelist *fl; 130411752d88SAlan Cox struct vm_phys_seg *seg; 130511752d88SAlan Cox vm_paddr_t pa, pa_last, size; 1306fbd80bd0SAlan Cox vm_page_t m, m_ret; 13075c1f2cc4SAlan Cox u_long npages_end; 1308*6520495aSAdrian Chadd int domain, flind, oind, order, pind; 1309*6520495aSAdrian Chadd struct vm_domain_iterator vi; 131011752d88SAlan Cox 1311fbd80bd0SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 131211752d88SAlan Cox size = npages << PAGE_SHIFT; 131311752d88SAlan Cox KASSERT(size != 0, 131411752d88SAlan Cox ("vm_phys_alloc_contig: size must not be 0")); 131511752d88SAlan Cox KASSERT((alignment & (alignment - 1)) == 0, 131611752d88SAlan Cox ("vm_phys_alloc_contig: alignment must be a power of 2")); 131711752d88SAlan Cox KASSERT((boundary & (boundary - 1)) == 0, 131811752d88SAlan Cox ("vm_phys_alloc_contig: boundary must be a power of 2")); 131911752d88SAlan Cox /* Compute the queue that is the best fit for npages. */ 132011752d88SAlan Cox for (order = 0; (1 << order) < npages; order++); 1321*6520495aSAdrian Chadd 1322*6520495aSAdrian Chadd vm_policy_iterator_init(&vi); 1323*6520495aSAdrian Chadd 13247e226537SAttilio Rao restartdom: 1325*6520495aSAdrian Chadd if (vm_domain_iterator_run(&vi, &domain) != 0) { 1326*6520495aSAdrian Chadd vm_policy_iterator_finish(&vi); 1327*6520495aSAdrian Chadd return (NULL); 1328*6520495aSAdrian Chadd } 1329*6520495aSAdrian Chadd 133011752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 133111752d88SAlan Cox for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 133211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 13337e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pind][0]; 1334c325e866SKonstantin Belousov TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { 133511752d88SAlan Cox /* 133611752d88SAlan Cox * A free list may contain physical pages 133711752d88SAlan Cox * from one or more segments. 133811752d88SAlan Cox */ 133911752d88SAlan Cox seg = &vm_phys_segs[m_ret->segind]; 134011752d88SAlan Cox if (seg->start > high || 134111752d88SAlan Cox low >= seg->end) 134211752d88SAlan Cox continue; 134311752d88SAlan Cox 134411752d88SAlan Cox /* 134511752d88SAlan Cox * Is the size of this allocation request 134611752d88SAlan Cox * larger than the largest block size? 134711752d88SAlan Cox */ 134811752d88SAlan Cox if (order >= VM_NFREEORDER) { 134911752d88SAlan Cox /* 135011752d88SAlan Cox * Determine if a sufficient number 135111752d88SAlan Cox * of subsequent blocks to satisfy 135211752d88SAlan Cox * the allocation request are free. 135311752d88SAlan Cox */ 135411752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 135511752d88SAlan Cox pa_last = pa + size; 135611752d88SAlan Cox for (;;) { 135711752d88SAlan Cox pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 135811752d88SAlan Cox if (pa >= pa_last) 135911752d88SAlan Cox break; 136011752d88SAlan Cox if (pa < seg->start || 136111752d88SAlan Cox pa >= seg->end) 136211752d88SAlan Cox break; 136311752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 136411752d88SAlan Cox if (m->order != VM_NFREEORDER - 1) 136511752d88SAlan Cox break; 136611752d88SAlan Cox } 136711752d88SAlan Cox /* If not, continue to the next block. */ 136811752d88SAlan Cox if (pa < pa_last) 136911752d88SAlan Cox continue; 137011752d88SAlan Cox } 137111752d88SAlan Cox 137211752d88SAlan Cox /* 137311752d88SAlan Cox * Determine if the blocks are within the given range, 137411752d88SAlan Cox * satisfy the given alignment, and do not cross the 137511752d88SAlan Cox * given boundary. 137611752d88SAlan Cox */ 137711752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 137811752d88SAlan Cox if (pa >= low && 137911752d88SAlan Cox pa + size <= high && 138011752d88SAlan Cox (pa & (alignment - 1)) == 0 && 138111752d88SAlan Cox ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 138211752d88SAlan Cox goto done; 138311752d88SAlan Cox } 138411752d88SAlan Cox } 138511752d88SAlan Cox } 138611752d88SAlan Cox } 1387*6520495aSAdrian Chadd if (!vm_domain_iterator_isdone(&vi)) 13887e226537SAttilio Rao goto restartdom; 1389*6520495aSAdrian Chadd vm_policy_iterator_finish(&vi); 139011752d88SAlan Cox return (NULL); 139111752d88SAlan Cox done: 139211752d88SAlan Cox for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 139311752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 13947e226537SAttilio Rao vm_freelist_rem(fl, m, m->order); 139511752d88SAlan Cox } 139611752d88SAlan Cox if (m_ret->pool != VM_FREEPOOL_DEFAULT) 139711752d88SAlan Cox vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 139811752d88SAlan Cox fl = (*seg->free_queues)[m_ret->pool]; 139911752d88SAlan Cox vm_phys_split_pages(m_ret, oind, fl, order); 14005c1f2cc4SAlan Cox /* Return excess pages to the free lists. */ 14015c1f2cc4SAlan Cox npages_end = roundup2(npages, 1 << imin(oind, order)); 14025c1f2cc4SAlan Cox if (npages < npages_end) 14035c1f2cc4SAlan Cox vm_phys_free_contig(&m_ret[npages], npages_end - npages); 140411752d88SAlan Cox return (m_ret); 140511752d88SAlan Cox } 140611752d88SAlan Cox 140711752d88SAlan Cox #ifdef DDB 140811752d88SAlan Cox /* 140911752d88SAlan Cox * Show the number of physical pages in each of the free lists. 141011752d88SAlan Cox */ 141111752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages) 141211752d88SAlan Cox { 141311752d88SAlan Cox struct vm_freelist *fl; 14147e226537SAttilio Rao int flind, oind, pind, dom; 141511752d88SAlan Cox 14167e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 14177e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom); 141811752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 141911752d88SAlan Cox db_printf("FREE LIST %d:\n" 142011752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 142111752d88SAlan Cox "\n ", flind); 142211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 142311752d88SAlan Cox db_printf(" | POOL %d", pind); 142411752d88SAlan Cox db_printf("\n-- "); 142511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 142611752d88SAlan Cox db_printf("-- -- "); 142711752d88SAlan Cox db_printf("--\n"); 142811752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 142911752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 143011752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 143111752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 14327e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 143311752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 143411752d88SAlan Cox } 143511752d88SAlan Cox db_printf("\n"); 143611752d88SAlan Cox } 143711752d88SAlan Cox db_printf("\n"); 143811752d88SAlan Cox } 14397e226537SAttilio Rao db_printf("\n"); 14407e226537SAttilio Rao } 144111752d88SAlan Cox } 144211752d88SAlan Cox #endif 1443