111752d88SAlan Cox /*- 211752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 311752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 411752d88SAlan Cox * All rights reserved. 511752d88SAlan Cox * 611752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 711752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 811752d88SAlan Cox * 911752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1011752d88SAlan Cox * modification, are permitted provided that the following conditions 1111752d88SAlan Cox * are met: 1211752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1311752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1411752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1611752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1711752d88SAlan Cox * 1811752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1911752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2011752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2111752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2211752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2311752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2411752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2511752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2611752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2711752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 2811752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2911752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3011752d88SAlan Cox */ 3111752d88SAlan Cox 32fbd80bd0SAlan Cox /* 33fbd80bd0SAlan Cox * Physical memory system implementation 34fbd80bd0SAlan Cox * 35fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 36fbd80bd0SAlan Cox * virtual memory system. 37fbd80bd0SAlan Cox */ 38fbd80bd0SAlan Cox 3911752d88SAlan Cox #include <sys/cdefs.h> 4011752d88SAlan Cox __FBSDID("$FreeBSD$"); 4111752d88SAlan Cox 4211752d88SAlan Cox #include "opt_ddb.h" 43174b5f38SJohn Baldwin #include "opt_vm.h" 4411752d88SAlan Cox 4511752d88SAlan Cox #include <sys/param.h> 4611752d88SAlan Cox #include <sys/systm.h> 4711752d88SAlan Cox #include <sys/lock.h> 4811752d88SAlan Cox #include <sys/kernel.h> 4911752d88SAlan Cox #include <sys/malloc.h> 5011752d88SAlan Cox #include <sys/mutex.h> 517e226537SAttilio Rao #if MAXMEMDOM > 1 527e226537SAttilio Rao #include <sys/proc.h> 537e226537SAttilio Rao #endif 5411752d88SAlan Cox #include <sys/queue.h> 55*38d6b2dcSRoger Pau Monné #include <sys/rwlock.h> 5611752d88SAlan Cox #include <sys/sbuf.h> 5711752d88SAlan Cox #include <sys/sysctl.h> 58*38d6b2dcSRoger Pau Monné #include <sys/tree.h> 5911752d88SAlan Cox #include <sys/vmmeter.h> 6011752d88SAlan Cox 6111752d88SAlan Cox #include <ddb/ddb.h> 6211752d88SAlan Cox 6311752d88SAlan Cox #include <vm/vm.h> 6411752d88SAlan Cox #include <vm/vm_param.h> 6511752d88SAlan Cox #include <vm/vm_kern.h> 6611752d88SAlan Cox #include <vm/vm_object.h> 6711752d88SAlan Cox #include <vm/vm_page.h> 6811752d88SAlan Cox #include <vm/vm_phys.h> 6911752d88SAlan Cox 70449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 71449c2e92SKonstantin Belousov "Too many physsegs."); 7211752d88SAlan Cox 73a3870a18SJohn Baldwin struct mem_affinity *mem_affinity; 74a3870a18SJohn Baldwin 757e226537SAttilio Rao int vm_ndomains = 1; 767e226537SAttilio Rao 77449c2e92SKonstantin Belousov struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 78449c2e92SKonstantin Belousov int vm_phys_nsegs; 7911752d88SAlan Cox 80*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg; 81*38d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 82*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *); 83*38d6b2dcSRoger Pau Monné 84*38d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 85*38d6b2dcSRoger Pau Monné RB_INITIALIZER(_vm_phys_fictitious_tree); 86*38d6b2dcSRoger Pau Monné 87*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg { 88*38d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node; 89*38d6b2dcSRoger Pau Monné /* Memory region data */ 90b6de32bdSKonstantin Belousov vm_paddr_t start; 91b6de32bdSKonstantin Belousov vm_paddr_t end; 92b6de32bdSKonstantin Belousov vm_page_t first_page; 93*38d6b2dcSRoger Pau Monné }; 94*38d6b2dcSRoger Pau Monné 95*38d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 96*38d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp); 97*38d6b2dcSRoger Pau Monné 98*38d6b2dcSRoger Pau Monné static struct rwlock vm_phys_fictitious_reg_lock; 99c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 100b6de32bdSKonstantin Belousov 10111752d88SAlan Cox static struct vm_freelist 1027e226537SAttilio Rao vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 10311752d88SAlan Cox 10411752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 10511752d88SAlan Cox 10611752d88SAlan Cox static int cnt_prezero; 10711752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 10811752d88SAlan Cox &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 10911752d88SAlan Cox 11011752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 11111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 11211752d88SAlan Cox NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 11311752d88SAlan Cox 11411752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 11511752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 11611752d88SAlan Cox NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 11711752d88SAlan Cox 1187e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 1197e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available."); 120a3870a18SJohn Baldwin 121f5c4b077SJohn Baldwin static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, 122f5c4b077SJohn Baldwin int order); 123a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, 124a3870a18SJohn Baldwin int domain); 12511752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 12611752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa); 12711752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 12811752d88SAlan Cox int order); 12911752d88SAlan Cox 130*38d6b2dcSRoger Pau Monné /* 131*38d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management. 132*38d6b2dcSRoger Pau Monné */ 133*38d6b2dcSRoger Pau Monné static inline int 134*38d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 135*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range) 136*38d6b2dcSRoger Pau Monné { 137*38d6b2dcSRoger Pau Monné 138*38d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0, 139*38d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page")); 140*38d6b2dcSRoger Pau Monné if (p->start >= range->end) 141*38d6b2dcSRoger Pau Monné return (1); 142*38d6b2dcSRoger Pau Monné if (p->start < range->start) 143*38d6b2dcSRoger Pau Monné return (-1); 144*38d6b2dcSRoger Pau Monné 145*38d6b2dcSRoger Pau Monné return (0); 146*38d6b2dcSRoger Pau Monné } 147*38d6b2dcSRoger Pau Monné 148*38d6b2dcSRoger Pau Monné static int 149*38d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 150*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2) 151*38d6b2dcSRoger Pau Monné { 152*38d6b2dcSRoger Pau Monné 153*38d6b2dcSRoger Pau Monné /* Check if this is a search for a page */ 154*38d6b2dcSRoger Pau Monné if (p1->end == 0) 155*38d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2)); 156*38d6b2dcSRoger Pau Monné 157*38d6b2dcSRoger Pau Monné KASSERT(p2->end != 0, 158*38d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison")); 159*38d6b2dcSRoger Pau Monné 160*38d6b2dcSRoger Pau Monné /* Searching to add a new range */ 161*38d6b2dcSRoger Pau Monné if (p1->end <= p2->start) 162*38d6b2dcSRoger Pau Monné return (-1); 163*38d6b2dcSRoger Pau Monné if (p1->start >= p2->end) 164*38d6b2dcSRoger Pau Monné return (1); 165*38d6b2dcSRoger Pau Monné 166*38d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n" 167*38d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 168*38d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 169*38d6b2dcSRoger Pau Monné } 170*38d6b2dcSRoger Pau Monné 1717e226537SAttilio Rao static __inline int 1727e226537SAttilio Rao vm_rr_selectdomain(void) 1737e226537SAttilio Rao { 1747e226537SAttilio Rao #if MAXMEMDOM > 1 1757e226537SAttilio Rao struct thread *td; 1767e226537SAttilio Rao 1777e226537SAttilio Rao td = curthread; 1787e226537SAttilio Rao 1797e226537SAttilio Rao td->td_dom_rr_idx++; 1807e226537SAttilio Rao td->td_dom_rr_idx %= vm_ndomains; 1817e226537SAttilio Rao return (td->td_dom_rr_idx); 1827e226537SAttilio Rao #else 1837e226537SAttilio Rao return (0); 1847e226537SAttilio Rao #endif 1857e226537SAttilio Rao } 1867e226537SAttilio Rao 187449c2e92SKonstantin Belousov boolean_t 188449c2e92SKonstantin Belousov vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) 189449c2e92SKonstantin Belousov { 190449c2e92SKonstantin Belousov struct vm_phys_seg *s; 191449c2e92SKonstantin Belousov int idx; 192449c2e92SKonstantin Belousov 193449c2e92SKonstantin Belousov while ((idx = ffsl(mask)) != 0) { 194449c2e92SKonstantin Belousov idx--; /* ffsl counts from 1 */ 195449c2e92SKonstantin Belousov mask &= ~(1UL << idx); 196449c2e92SKonstantin Belousov s = &vm_phys_segs[idx]; 197449c2e92SKonstantin Belousov if (low < s->end && high > s->start) 198449c2e92SKonstantin Belousov return (TRUE); 199449c2e92SKonstantin Belousov } 200449c2e92SKonstantin Belousov return (FALSE); 201449c2e92SKonstantin Belousov } 202449c2e92SKonstantin Belousov 20311752d88SAlan Cox /* 20411752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 20511752d88SAlan Cox * the amount of physical memory in each free list. 20611752d88SAlan Cox */ 20711752d88SAlan Cox static int 20811752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 20911752d88SAlan Cox { 21011752d88SAlan Cox struct sbuf sbuf; 21111752d88SAlan Cox struct vm_freelist *fl; 2127e226537SAttilio Rao int dom, error, flind, oind, pind; 21311752d88SAlan Cox 21400f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 21500f0e671SMatthew D Fleming if (error != 0) 21600f0e671SMatthew D Fleming return (error); 2177e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 2187e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 219eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 22011752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 221eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 22211752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 22311752d88SAlan Cox "\n ", flind); 22411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 22511752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 22611752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 22711752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 22811752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 22911752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 23011752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 231d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 23211752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 23311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 2347e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 235eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d", 2367e226537SAttilio Rao fl[oind].lcnt); 23711752d88SAlan Cox } 23811752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 23911752d88SAlan Cox } 2407e226537SAttilio Rao } 24111752d88SAlan Cox } 2424e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 24311752d88SAlan Cox sbuf_delete(&sbuf); 24411752d88SAlan Cox return (error); 24511752d88SAlan Cox } 24611752d88SAlan Cox 24711752d88SAlan Cox /* 24811752d88SAlan Cox * Outputs the set of physical memory segments. 24911752d88SAlan Cox */ 25011752d88SAlan Cox static int 25111752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 25211752d88SAlan Cox { 25311752d88SAlan Cox struct sbuf sbuf; 25411752d88SAlan Cox struct vm_phys_seg *seg; 25511752d88SAlan Cox int error, segind; 25611752d88SAlan Cox 25700f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 25800f0e671SMatthew D Fleming if (error != 0) 25900f0e671SMatthew D Fleming return (error); 2604e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 26111752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 26211752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 26311752d88SAlan Cox seg = &vm_phys_segs[segind]; 26411752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 26511752d88SAlan Cox (uintmax_t)seg->start); 26611752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 26711752d88SAlan Cox (uintmax_t)seg->end); 268a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 26911752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 27011752d88SAlan Cox } 2714e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 27211752d88SAlan Cox sbuf_delete(&sbuf); 27311752d88SAlan Cox return (error); 27411752d88SAlan Cox } 27511752d88SAlan Cox 2767e226537SAttilio Rao static void 2777e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 278a3870a18SJohn Baldwin { 279a3870a18SJohn Baldwin 2807e226537SAttilio Rao m->order = order; 2817e226537SAttilio Rao if (tail) 282c325e866SKonstantin Belousov TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); 2837e226537SAttilio Rao else 284c325e866SKonstantin Belousov TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); 2857e226537SAttilio Rao fl[order].lcnt++; 286a3870a18SJohn Baldwin } 2877e226537SAttilio Rao 2887e226537SAttilio Rao static void 2897e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 2907e226537SAttilio Rao { 2917e226537SAttilio Rao 292c325e866SKonstantin Belousov TAILQ_REMOVE(&fl[order].pl, m, plinks.q); 2937e226537SAttilio Rao fl[order].lcnt--; 2947e226537SAttilio Rao m->order = VM_NFREEORDER; 295a3870a18SJohn Baldwin } 296a3870a18SJohn Baldwin 29711752d88SAlan Cox /* 29811752d88SAlan Cox * Create a physical memory segment. 29911752d88SAlan Cox */ 30011752d88SAlan Cox static void 301a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) 30211752d88SAlan Cox { 30311752d88SAlan Cox struct vm_phys_seg *seg; 30411752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE 305d6e9b97bSJohn Baldwin long pages; 30611752d88SAlan Cox int segind; 30711752d88SAlan Cox 30811752d88SAlan Cox pages = 0; 30911752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 31011752d88SAlan Cox seg = &vm_phys_segs[segind]; 31111752d88SAlan Cox pages += atop(seg->end - seg->start); 31211752d88SAlan Cox } 31311752d88SAlan Cox #endif 31411752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 31511752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 3167e226537SAttilio Rao KASSERT(domain < vm_ndomains, 3177e226537SAttilio Rao ("vm_phys_create_seg: invalid domain provided")); 31811752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 31911752d88SAlan Cox seg->start = start; 32011752d88SAlan Cox seg->end = end; 321a3870a18SJohn Baldwin seg->domain = domain; 32211752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE 32311752d88SAlan Cox seg->first_page = &vm_page_array[pages]; 32411752d88SAlan Cox #else 32511752d88SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(start); 32611752d88SAlan Cox #endif 3277e226537SAttilio Rao seg->free_queues = &vm_phys_free_queues[domain][flind]; 32811752d88SAlan Cox } 32911752d88SAlan Cox 330a3870a18SJohn Baldwin static void 331a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 332a3870a18SJohn Baldwin { 333a3870a18SJohn Baldwin int i; 334a3870a18SJohn Baldwin 335a3870a18SJohn Baldwin if (mem_affinity == NULL) { 336a3870a18SJohn Baldwin _vm_phys_create_seg(start, end, flind, 0); 337a3870a18SJohn Baldwin return; 338a3870a18SJohn Baldwin } 339a3870a18SJohn Baldwin 340a3870a18SJohn Baldwin for (i = 0;; i++) { 341a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 342a3870a18SJohn Baldwin panic("Reached end of affinity info"); 343a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 344a3870a18SJohn Baldwin continue; 345a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 346a3870a18SJohn Baldwin panic("No affinity info for start %jx", 347a3870a18SJohn Baldwin (uintmax_t)start); 348a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 349a3870a18SJohn Baldwin _vm_phys_create_seg(start, end, flind, 350a3870a18SJohn Baldwin mem_affinity[i].domain); 351a3870a18SJohn Baldwin break; 352a3870a18SJohn Baldwin } 353a3870a18SJohn Baldwin _vm_phys_create_seg(start, mem_affinity[i].end, flind, 354a3870a18SJohn Baldwin mem_affinity[i].domain); 355a3870a18SJohn Baldwin start = mem_affinity[i].end; 356a3870a18SJohn Baldwin } 357a3870a18SJohn Baldwin } 358a3870a18SJohn Baldwin 35911752d88SAlan Cox /* 36011752d88SAlan Cox * Initialize the physical memory allocator. 36111752d88SAlan Cox */ 36211752d88SAlan Cox void 36311752d88SAlan Cox vm_phys_init(void) 36411752d88SAlan Cox { 36511752d88SAlan Cox struct vm_freelist *fl; 3667e226537SAttilio Rao int dom, flind, i, oind, pind; 36711752d88SAlan Cox 36811752d88SAlan Cox for (i = 0; phys_avail[i + 1] != 0; i += 2) { 36911752d88SAlan Cox #ifdef VM_FREELIST_ISADMA 37011752d88SAlan Cox if (phys_avail[i] < 16777216) { 37111752d88SAlan Cox if (phys_avail[i + 1] > 16777216) { 37211752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 16777216, 37311752d88SAlan Cox VM_FREELIST_ISADMA); 37411752d88SAlan Cox vm_phys_create_seg(16777216, phys_avail[i + 1], 37511752d88SAlan Cox VM_FREELIST_DEFAULT); 37611752d88SAlan Cox } else { 37711752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 37811752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_ISADMA); 37911752d88SAlan Cox } 38011752d88SAlan Cox if (VM_FREELIST_ISADMA >= vm_nfreelists) 38111752d88SAlan Cox vm_nfreelists = VM_FREELIST_ISADMA + 1; 38211752d88SAlan Cox } else 38311752d88SAlan Cox #endif 38411752d88SAlan Cox #ifdef VM_FREELIST_HIGHMEM 38511752d88SAlan Cox if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 38611752d88SAlan Cox if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 38711752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 38811752d88SAlan Cox VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 38911752d88SAlan Cox vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 39011752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_HIGHMEM); 39111752d88SAlan Cox } else { 39211752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 39311752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_HIGHMEM); 39411752d88SAlan Cox } 39511752d88SAlan Cox if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 39611752d88SAlan Cox vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 39711752d88SAlan Cox } else 39811752d88SAlan Cox #endif 39911752d88SAlan Cox vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 40011752d88SAlan Cox VM_FREELIST_DEFAULT); 40111752d88SAlan Cox } 4027e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 40311752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 40411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 4057e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 40611752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 40711752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 40811752d88SAlan Cox } 40911752d88SAlan Cox } 410a3870a18SJohn Baldwin } 411*38d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 41211752d88SAlan Cox } 41311752d88SAlan Cox 41411752d88SAlan Cox /* 41511752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 41611752d88SAlan Cox */ 41711752d88SAlan Cox static __inline void 41811752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 41911752d88SAlan Cox { 42011752d88SAlan Cox vm_page_t m_buddy; 42111752d88SAlan Cox 42211752d88SAlan Cox while (oind > order) { 42311752d88SAlan Cox oind--; 42411752d88SAlan Cox m_buddy = &m[1 << oind]; 42511752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 42611752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 42711752d88SAlan Cox m_buddy, m_buddy->order)); 4287e226537SAttilio Rao vm_freelist_add(fl, m_buddy, oind, 0); 42911752d88SAlan Cox } 43011752d88SAlan Cox } 43111752d88SAlan Cox 43211752d88SAlan Cox /* 43311752d88SAlan Cox * Initialize a physical page and add it to the free lists. 43411752d88SAlan Cox */ 43511752d88SAlan Cox void 43611752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa) 43711752d88SAlan Cox { 43811752d88SAlan Cox vm_page_t m; 439449c2e92SKonstantin Belousov struct vm_domain *vmd; 44011752d88SAlan Cox 44144f1c916SBryan Drewery vm_cnt.v_page_count++; 44211752d88SAlan Cox m = vm_phys_paddr_to_vm_page(pa); 44311752d88SAlan Cox m->phys_addr = pa; 44444e46b9eSAlan Cox m->queue = PQ_NONE; 44511752d88SAlan Cox m->segind = vm_phys_paddr_to_segind(pa); 446449c2e92SKonstantin Belousov vmd = vm_phys_domain(m); 447449c2e92SKonstantin Belousov vmd->vmd_page_count++; 448449c2e92SKonstantin Belousov vmd->vmd_segs |= 1UL << m->segind; 44911752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 45011752d88SAlan Cox ("vm_phys_add_page: page %p has unexpected order %d", 45111752d88SAlan Cox m, m->order)); 45211752d88SAlan Cox m->pool = VM_FREEPOOL_DEFAULT; 45311752d88SAlan Cox pmap_page_init(m); 4548941dc44SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 455449c2e92SKonstantin Belousov vm_phys_freecnt_adj(m, 1); 45611752d88SAlan Cox vm_phys_free_pages(m, 0); 4578941dc44SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 45811752d88SAlan Cox } 45911752d88SAlan Cox 46011752d88SAlan Cox /* 46111752d88SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages 46211752d88SAlan Cox * from the free lists. 4638941dc44SAlan Cox * 4648941dc44SAlan Cox * The free page queues must be locked. 46511752d88SAlan Cox */ 46611752d88SAlan Cox vm_page_t 46711752d88SAlan Cox vm_phys_alloc_pages(int pool, int order) 46811752d88SAlan Cox { 46949ca10d4SJayachandran C. vm_page_t m; 4707e226537SAttilio Rao int dom, domain, flind; 47149ca10d4SJayachandran C. 472f5c4b077SJohn Baldwin KASSERT(pool < VM_NFREEPOOL, 473f5c4b077SJohn Baldwin ("vm_phys_alloc_pages: pool %d is out of range", pool)); 474f5c4b077SJohn Baldwin KASSERT(order < VM_NFREEORDER, 475f5c4b077SJohn Baldwin ("vm_phys_alloc_pages: order %d is out of range", order)); 476f5c4b077SJohn Baldwin 4777e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 4787e226537SAttilio Rao domain = vm_rr_selectdomain(); 47949ca10d4SJayachandran C. for (flind = 0; flind < vm_nfreelists; flind++) { 4807e226537SAttilio Rao m = vm_phys_alloc_domain_pages(domain, flind, pool, 4817e226537SAttilio Rao order); 48249ca10d4SJayachandran C. if (m != NULL) 48349ca10d4SJayachandran C. return (m); 48449ca10d4SJayachandran C. } 4857e226537SAttilio Rao } 48649ca10d4SJayachandran C. return (NULL); 48749ca10d4SJayachandran C. } 48849ca10d4SJayachandran C. 48949ca10d4SJayachandran C. /* 49049ca10d4SJayachandran C. * Find and dequeue a free page on the given free list, with the 49149ca10d4SJayachandran C. * specified pool and order 49249ca10d4SJayachandran C. */ 49349ca10d4SJayachandran C. vm_page_t 49449ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order) 49549ca10d4SJayachandran C. { 49611752d88SAlan Cox vm_page_t m; 4977e226537SAttilio Rao int dom, domain; 49811752d88SAlan Cox 49949ca10d4SJayachandran C. KASSERT(flind < VM_NFREELIST, 50049ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 50111752d88SAlan Cox KASSERT(pool < VM_NFREEPOOL, 50249ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 50311752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 50449ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 505a3870a18SJohn Baldwin 5067e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 5077e226537SAttilio Rao domain = vm_rr_selectdomain(); 5087e226537SAttilio Rao m = vm_phys_alloc_domain_pages(domain, flind, pool, order); 509f5c4b077SJohn Baldwin if (m != NULL) 510f5c4b077SJohn Baldwin return (m); 5117e226537SAttilio Rao } 5127e226537SAttilio Rao return (NULL); 513f5c4b077SJohn Baldwin } 514f5c4b077SJohn Baldwin 515f5c4b077SJohn Baldwin static vm_page_t 516f5c4b077SJohn Baldwin vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) 517f5c4b077SJohn Baldwin { 518f5c4b077SJohn Baldwin struct vm_freelist *fl; 519f5c4b077SJohn Baldwin struct vm_freelist *alt; 520f5c4b077SJohn Baldwin int oind, pind; 521f5c4b077SJohn Baldwin vm_page_t m; 522f5c4b077SJohn Baldwin 52311752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 5247e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0]; 52511752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 52611752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 52711752d88SAlan Cox if (m != NULL) { 5287e226537SAttilio Rao vm_freelist_rem(fl, m, oind); 52911752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 53011752d88SAlan Cox return (m); 53111752d88SAlan Cox } 53211752d88SAlan Cox } 53311752d88SAlan Cox 53411752d88SAlan Cox /* 53511752d88SAlan Cox * The given pool was empty. Find the largest 53611752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 53711752d88SAlan Cox * pool. Transfer these pages to the given pool, and 53811752d88SAlan Cox * use them to satisfy the allocation. 53911752d88SAlan Cox */ 54011752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 54111752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 5427e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0]; 54311752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 54411752d88SAlan Cox if (m != NULL) { 5457e226537SAttilio Rao vm_freelist_rem(alt, m, oind); 54611752d88SAlan Cox vm_phys_set_pool(pool, m, oind); 54711752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 54811752d88SAlan Cox return (m); 54911752d88SAlan Cox } 55011752d88SAlan Cox } 55111752d88SAlan Cox } 55211752d88SAlan Cox return (NULL); 55311752d88SAlan Cox } 55411752d88SAlan Cox 55511752d88SAlan Cox /* 55611752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 55711752d88SAlan Cox */ 55811752d88SAlan Cox vm_page_t 55911752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 56011752d88SAlan Cox { 56111752d88SAlan Cox struct vm_phys_seg *seg; 56211752d88SAlan Cox int segind; 56311752d88SAlan Cox 56411752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 56511752d88SAlan Cox seg = &vm_phys_segs[segind]; 56611752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 56711752d88SAlan Cox return (&seg->first_page[atop(pa - seg->start)]); 56811752d88SAlan Cox } 569f06a3a36SAndrew Thompson return (NULL); 57011752d88SAlan Cox } 57111752d88SAlan Cox 572b6de32bdSKonstantin Belousov vm_page_t 573b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 574b6de32bdSKonstantin Belousov { 575*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg; 576b6de32bdSKonstantin Belousov vm_page_t m; 577b6de32bdSKonstantin Belousov 578b6de32bdSKonstantin Belousov m = NULL; 579*38d6b2dcSRoger Pau Monné tmp.start = pa; 580*38d6b2dcSRoger Pau Monné tmp.end = 0; 581*38d6b2dcSRoger Pau Monné 582*38d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock); 583*38d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 584*38d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock); 585*38d6b2dcSRoger Pau Monné if (seg == NULL) 586*38d6b2dcSRoger Pau Monné return (NULL); 587*38d6b2dcSRoger Pau Monné 588b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 589*38d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 590*38d6b2dcSRoger Pau Monné 591b6de32bdSKonstantin Belousov return (m); 592b6de32bdSKonstantin Belousov } 593b6de32bdSKonstantin Belousov 594b6de32bdSKonstantin Belousov int 595b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 596b6de32bdSKonstantin Belousov vm_memattr_t memattr) 597b6de32bdSKonstantin Belousov { 598b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 599b6de32bdSKonstantin Belousov vm_page_t fp; 600b6de32bdSKonstantin Belousov long i, page_count; 601b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 602b6de32bdSKonstantin Belousov long pi; 603b6de32bdSKonstantin Belousov #endif 604b6de32bdSKonstantin Belousov 605b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 606b6de32bdSKonstantin Belousov 607b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 608b6de32bdSKonstantin Belousov pi = atop(start); 609a17937bdSKonstantin Belousov if (pi >= first_page && pi < vm_page_array_size + first_page) { 610a17937bdSKonstantin Belousov if (atop(end) >= vm_page_array_size + first_page) 611a17937bdSKonstantin Belousov return (EINVAL); 612b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 613b6de32bdSKonstantin Belousov } else 614b6de32bdSKonstantin Belousov #endif 615b6de32bdSKonstantin Belousov { 616b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 617b6de32bdSKonstantin Belousov M_WAITOK | M_ZERO); 618b6de32bdSKonstantin Belousov } 619b6de32bdSKonstantin Belousov for (i = 0; i < page_count; i++) { 620b6de32bdSKonstantin Belousov vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr); 621c7aebda8SAttilio Rao fp[i].oflags &= ~VPO_UNMANAGED; 622c7aebda8SAttilio Rao fp[i].busy_lock = VPB_UNBUSIED; 623b6de32bdSKonstantin Belousov } 624*38d6b2dcSRoger Pau Monné 625*38d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 626b6de32bdSKonstantin Belousov seg->start = start; 627b6de32bdSKonstantin Belousov seg->end = end; 628b6de32bdSKonstantin Belousov seg->first_page = fp; 629*38d6b2dcSRoger Pau Monné 630*38d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 631*38d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 632*38d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 633*38d6b2dcSRoger Pau Monné 634b6de32bdSKonstantin Belousov return (0); 635b6de32bdSKonstantin Belousov } 636b6de32bdSKonstantin Belousov 637b6de32bdSKonstantin Belousov void 638b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 639b6de32bdSKonstantin Belousov { 640*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp; 641b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 642b6de32bdSKonstantin Belousov long pi; 643b6de32bdSKonstantin Belousov #endif 644b6de32bdSKonstantin Belousov 645b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 646b6de32bdSKonstantin Belousov pi = atop(start); 647b6de32bdSKonstantin Belousov #endif 648*38d6b2dcSRoger Pau Monné tmp.start = start; 649*38d6b2dcSRoger Pau Monné tmp.end = 0; 650b6de32bdSKonstantin Belousov 651*38d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 652*38d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 653*38d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) { 654*38d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 655*38d6b2dcSRoger Pau Monné panic( 656*38d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 657*38d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 658*38d6b2dcSRoger Pau Monné } 659*38d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 660*38d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 661b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 662b6de32bdSKonstantin Belousov if (pi < first_page || atop(end) >= vm_page_array_size) 663b6de32bdSKonstantin Belousov #endif 664*38d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES); 665*38d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES); 666b6de32bdSKonstantin Belousov } 667b6de32bdSKonstantin Belousov 66811752d88SAlan Cox /* 66911752d88SAlan Cox * Find the segment containing the given physical address. 67011752d88SAlan Cox */ 67111752d88SAlan Cox static int 67211752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa) 67311752d88SAlan Cox { 67411752d88SAlan Cox struct vm_phys_seg *seg; 67511752d88SAlan Cox int segind; 67611752d88SAlan Cox 67711752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 67811752d88SAlan Cox seg = &vm_phys_segs[segind]; 67911752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 68011752d88SAlan Cox return (segind); 68111752d88SAlan Cox } 68211752d88SAlan Cox panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 68311752d88SAlan Cox (uintmax_t)pa); 68411752d88SAlan Cox } 68511752d88SAlan Cox 68611752d88SAlan Cox /* 68711752d88SAlan Cox * Free a contiguous, power of two-sized set of physical pages. 6888941dc44SAlan Cox * 6898941dc44SAlan Cox * The free page queues must be locked. 69011752d88SAlan Cox */ 69111752d88SAlan Cox void 69211752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order) 69311752d88SAlan Cox { 69411752d88SAlan Cox struct vm_freelist *fl; 69511752d88SAlan Cox struct vm_phys_seg *seg; 6965c1f2cc4SAlan Cox vm_paddr_t pa; 69711752d88SAlan Cox vm_page_t m_buddy; 69811752d88SAlan Cox 69911752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 7008941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected order %d", 70111752d88SAlan Cox m, m->order)); 70211752d88SAlan Cox KASSERT(m->pool < VM_NFREEPOOL, 7038941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected pool %d", 70411752d88SAlan Cox m, m->pool)); 70511752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 7068941dc44SAlan Cox ("vm_phys_free_pages: order %d is out of range", order)); 70711752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 70811752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 7095c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 7105c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 7115c1f2cc4SAlan Cox do { 7125c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 7135c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 71411752d88SAlan Cox break; 7155c1f2cc4SAlan Cox m_buddy = &seg->first_page[atop(pa - seg->start)]; 71611752d88SAlan Cox if (m_buddy->order != order) 71711752d88SAlan Cox break; 71811752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 7197e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order); 72011752d88SAlan Cox if (m_buddy->pool != m->pool) 72111752d88SAlan Cox vm_phys_set_pool(m->pool, m_buddy, order); 72211752d88SAlan Cox order++; 7235c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 72411752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 7255c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 72611752d88SAlan Cox } 72711752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 7287e226537SAttilio Rao vm_freelist_add(fl, m, order, 1); 72911752d88SAlan Cox } 73011752d88SAlan Cox 73111752d88SAlan Cox /* 7325c1f2cc4SAlan Cox * Free a contiguous, arbitrarily sized set of physical pages. 7335c1f2cc4SAlan Cox * 7345c1f2cc4SAlan Cox * The free page queues must be locked. 7355c1f2cc4SAlan Cox */ 7365c1f2cc4SAlan Cox void 7375c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages) 7385c1f2cc4SAlan Cox { 7395c1f2cc4SAlan Cox u_int n; 7405c1f2cc4SAlan Cox int order; 7415c1f2cc4SAlan Cox 7425c1f2cc4SAlan Cox /* 7435c1f2cc4SAlan Cox * Avoid unnecessary coalescing by freeing the pages in the largest 7445c1f2cc4SAlan Cox * possible power-of-two-sized subsets. 7455c1f2cc4SAlan Cox */ 7465c1f2cc4SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 7475c1f2cc4SAlan Cox for (;; npages -= n) { 7485c1f2cc4SAlan Cox /* 7495c1f2cc4SAlan Cox * Unsigned "min" is used here so that "order" is assigned 7505c1f2cc4SAlan Cox * "VM_NFREEORDER - 1" when "m"'s physical address is zero 7515c1f2cc4SAlan Cox * or the low-order bits of its physical address are zero 7525c1f2cc4SAlan Cox * because the size of a physical address exceeds the size of 7535c1f2cc4SAlan Cox * a long. 7545c1f2cc4SAlan Cox */ 7555c1f2cc4SAlan Cox order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 7565c1f2cc4SAlan Cox VM_NFREEORDER - 1); 7575c1f2cc4SAlan Cox n = 1 << order; 7585c1f2cc4SAlan Cox if (npages < n) 7595c1f2cc4SAlan Cox break; 7605c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 7615c1f2cc4SAlan Cox m += n; 7625c1f2cc4SAlan Cox } 7635c1f2cc4SAlan Cox /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 7645c1f2cc4SAlan Cox for (; npages > 0; npages -= n) { 7655c1f2cc4SAlan Cox order = flsl(npages) - 1; 7665c1f2cc4SAlan Cox n = 1 << order; 7675c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 7685c1f2cc4SAlan Cox m += n; 7695c1f2cc4SAlan Cox } 7705c1f2cc4SAlan Cox } 7715c1f2cc4SAlan Cox 7725c1f2cc4SAlan Cox /* 77311752d88SAlan Cox * Set the pool for a contiguous, power of two-sized set of physical pages. 77411752d88SAlan Cox */ 7757bfda801SAlan Cox void 77611752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order) 77711752d88SAlan Cox { 77811752d88SAlan Cox vm_page_t m_tmp; 77911752d88SAlan Cox 78011752d88SAlan Cox for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 78111752d88SAlan Cox m_tmp->pool = pool; 78211752d88SAlan Cox } 78311752d88SAlan Cox 78411752d88SAlan Cox /* 7859742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 7869742373aSAlan Cox * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 7879742373aSAlan Cox * FALSE, indicating that "m" is not in the free lists. 7887bfda801SAlan Cox * 7897bfda801SAlan Cox * The free page queues must be locked. 7907bfda801SAlan Cox */ 791e35395ceSAlan Cox boolean_t 7927bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m) 7937bfda801SAlan Cox { 7947bfda801SAlan Cox struct vm_freelist *fl; 7957bfda801SAlan Cox struct vm_phys_seg *seg; 7967bfda801SAlan Cox vm_paddr_t pa, pa_half; 7977bfda801SAlan Cox vm_page_t m_set, m_tmp; 7987bfda801SAlan Cox int order; 7997bfda801SAlan Cox 8007bfda801SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 8017bfda801SAlan Cox 8027bfda801SAlan Cox /* 8037bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 8047bfda801SAlan Cox * physical pages containing the given physical page "m" and 8057bfda801SAlan Cox * assign it to "m_set". 8067bfda801SAlan Cox */ 8077bfda801SAlan Cox seg = &vm_phys_segs[m->segind]; 8087bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 809bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 8107bfda801SAlan Cox order++; 8117bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 8122fbced65SAlan Cox if (pa >= seg->start) 8137bfda801SAlan Cox m_set = &seg->first_page[atop(pa - seg->start)]; 814e35395ceSAlan Cox else 815e35395ceSAlan Cox return (FALSE); 8167bfda801SAlan Cox } 817e35395ceSAlan Cox if (m_set->order < order) 818e35395ceSAlan Cox return (FALSE); 819e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 820e35395ceSAlan Cox return (FALSE); 8217bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 8227bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 8237bfda801SAlan Cox m_set, m_set->order)); 8247bfda801SAlan Cox 8257bfda801SAlan Cox /* 8267bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 8277bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 8287bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 8297bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 8307bfda801SAlan Cox */ 8317bfda801SAlan Cox fl = (*seg->free_queues)[m_set->pool]; 8327bfda801SAlan Cox order = m_set->order; 8337e226537SAttilio Rao vm_freelist_rem(fl, m_set, order); 8347bfda801SAlan Cox while (order > 0) { 8357bfda801SAlan Cox order--; 8367bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 8377bfda801SAlan Cox if (m->phys_addr < pa_half) 8387bfda801SAlan Cox m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 8397bfda801SAlan Cox else { 8407bfda801SAlan Cox m_tmp = m_set; 8417bfda801SAlan Cox m_set = &seg->first_page[atop(pa_half - seg->start)]; 8427bfda801SAlan Cox } 8437e226537SAttilio Rao vm_freelist_add(fl, m_tmp, order, 0); 8447bfda801SAlan Cox } 8457bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 846e35395ceSAlan Cox return (TRUE); 8477bfda801SAlan Cox } 8487bfda801SAlan Cox 8497bfda801SAlan Cox /* 8507bfda801SAlan Cox * Try to zero one physical page. Used by an idle priority thread. 85111752d88SAlan Cox */ 85211752d88SAlan Cox boolean_t 85311752d88SAlan Cox vm_phys_zero_pages_idle(void) 85411752d88SAlan Cox { 8557e226537SAttilio Rao static struct vm_freelist *fl; 8567bfda801SAlan Cox static int flind, oind, pind; 85711752d88SAlan Cox vm_page_t m, m_tmp; 8587e226537SAttilio Rao int domain; 85911752d88SAlan Cox 8607e226537SAttilio Rao domain = vm_rr_selectdomain(); 8617e226537SAttilio Rao fl = vm_phys_free_queues[domain][0][0]; 86211752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 8637bfda801SAlan Cox for (;;) { 864c325e866SKonstantin Belousov TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) { 8657bfda801SAlan Cox for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 8667bfda801SAlan Cox if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 8677bfda801SAlan Cox vm_phys_unfree_page(m_tmp); 868449c2e92SKonstantin Belousov vm_phys_freecnt_adj(m, -1); 86911752d88SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 87011752d88SAlan Cox pmap_zero_page_idle(m_tmp); 87111752d88SAlan Cox m_tmp->flags |= PG_ZERO; 87211752d88SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 873449c2e92SKonstantin Belousov vm_phys_freecnt_adj(m, 1); 8747bfda801SAlan Cox vm_phys_free_pages(m_tmp, 0); 8757bfda801SAlan Cox vm_page_zero_count++; 8767bfda801SAlan Cox cnt_prezero++; 87711752d88SAlan Cox return (TRUE); 87811752d88SAlan Cox } 87911752d88SAlan Cox } 88011752d88SAlan Cox } 8817bfda801SAlan Cox oind++; 8827bfda801SAlan Cox if (oind == VM_NFREEORDER) { 8837bfda801SAlan Cox oind = 0; 8847bfda801SAlan Cox pind++; 8857bfda801SAlan Cox if (pind == VM_NFREEPOOL) { 8867bfda801SAlan Cox pind = 0; 8877bfda801SAlan Cox flind++; 8887bfda801SAlan Cox if (flind == vm_nfreelists) 8897bfda801SAlan Cox flind = 0; 8907bfda801SAlan Cox } 8917e226537SAttilio Rao fl = vm_phys_free_queues[domain][flind][pind]; 8927bfda801SAlan Cox } 8937bfda801SAlan Cox } 89411752d88SAlan Cox } 89511752d88SAlan Cox 89611752d88SAlan Cox /* 8972f9f48d6SAlan Cox * Allocate a contiguous set of physical pages of the given size 8982f9f48d6SAlan Cox * "npages" from the free lists. All of the physical pages must be at 8992f9f48d6SAlan Cox * or above the given physical address "low" and below the given 9002f9f48d6SAlan Cox * physical address "high". The given value "alignment" determines the 9012f9f48d6SAlan Cox * alignment of the first physical page in the set. If the given value 9022f9f48d6SAlan Cox * "boundary" is non-zero, then the set of physical pages cannot cross 9032f9f48d6SAlan Cox * any physical address boundary that is a multiple of that value. Both 90411752d88SAlan Cox * "alignment" and "boundary" must be a power of two. 90511752d88SAlan Cox */ 90611752d88SAlan Cox vm_page_t 9075c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 9085c1f2cc4SAlan Cox u_long alignment, vm_paddr_t boundary) 90911752d88SAlan Cox { 91011752d88SAlan Cox struct vm_freelist *fl; 91111752d88SAlan Cox struct vm_phys_seg *seg; 91211752d88SAlan Cox vm_paddr_t pa, pa_last, size; 913fbd80bd0SAlan Cox vm_page_t m, m_ret; 9145c1f2cc4SAlan Cox u_long npages_end; 9157e226537SAttilio Rao int dom, domain, flind, oind, order, pind; 91611752d88SAlan Cox 917fbd80bd0SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 91811752d88SAlan Cox size = npages << PAGE_SHIFT; 91911752d88SAlan Cox KASSERT(size != 0, 92011752d88SAlan Cox ("vm_phys_alloc_contig: size must not be 0")); 92111752d88SAlan Cox KASSERT((alignment & (alignment - 1)) == 0, 92211752d88SAlan Cox ("vm_phys_alloc_contig: alignment must be a power of 2")); 92311752d88SAlan Cox KASSERT((boundary & (boundary - 1)) == 0, 92411752d88SAlan Cox ("vm_phys_alloc_contig: boundary must be a power of 2")); 92511752d88SAlan Cox /* Compute the queue that is the best fit for npages. */ 92611752d88SAlan Cox for (order = 0; (1 << order) < npages; order++); 9277e226537SAttilio Rao dom = 0; 9287e226537SAttilio Rao restartdom: 9297e226537SAttilio Rao domain = vm_rr_selectdomain(); 93011752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 93111752d88SAlan Cox for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 93211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 9337e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pind][0]; 934c325e866SKonstantin Belousov TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { 93511752d88SAlan Cox /* 93611752d88SAlan Cox * A free list may contain physical pages 93711752d88SAlan Cox * from one or more segments. 93811752d88SAlan Cox */ 93911752d88SAlan Cox seg = &vm_phys_segs[m_ret->segind]; 94011752d88SAlan Cox if (seg->start > high || 94111752d88SAlan Cox low >= seg->end) 94211752d88SAlan Cox continue; 94311752d88SAlan Cox 94411752d88SAlan Cox /* 94511752d88SAlan Cox * Is the size of this allocation request 94611752d88SAlan Cox * larger than the largest block size? 94711752d88SAlan Cox */ 94811752d88SAlan Cox if (order >= VM_NFREEORDER) { 94911752d88SAlan Cox /* 95011752d88SAlan Cox * Determine if a sufficient number 95111752d88SAlan Cox * of subsequent blocks to satisfy 95211752d88SAlan Cox * the allocation request are free. 95311752d88SAlan Cox */ 95411752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 95511752d88SAlan Cox pa_last = pa + size; 95611752d88SAlan Cox for (;;) { 95711752d88SAlan Cox pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 95811752d88SAlan Cox if (pa >= pa_last) 95911752d88SAlan Cox break; 96011752d88SAlan Cox if (pa < seg->start || 96111752d88SAlan Cox pa >= seg->end) 96211752d88SAlan Cox break; 96311752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 96411752d88SAlan Cox if (m->order != VM_NFREEORDER - 1) 96511752d88SAlan Cox break; 96611752d88SAlan Cox } 96711752d88SAlan Cox /* If not, continue to the next block. */ 96811752d88SAlan Cox if (pa < pa_last) 96911752d88SAlan Cox continue; 97011752d88SAlan Cox } 97111752d88SAlan Cox 97211752d88SAlan Cox /* 97311752d88SAlan Cox * Determine if the blocks are within the given range, 97411752d88SAlan Cox * satisfy the given alignment, and do not cross the 97511752d88SAlan Cox * given boundary. 97611752d88SAlan Cox */ 97711752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 97811752d88SAlan Cox if (pa >= low && 97911752d88SAlan Cox pa + size <= high && 98011752d88SAlan Cox (pa & (alignment - 1)) == 0 && 98111752d88SAlan Cox ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 98211752d88SAlan Cox goto done; 98311752d88SAlan Cox } 98411752d88SAlan Cox } 98511752d88SAlan Cox } 98611752d88SAlan Cox } 9877e226537SAttilio Rao if (++dom < vm_ndomains) 9887e226537SAttilio Rao goto restartdom; 98911752d88SAlan Cox return (NULL); 99011752d88SAlan Cox done: 99111752d88SAlan Cox for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 99211752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 9937e226537SAttilio Rao vm_freelist_rem(fl, m, m->order); 99411752d88SAlan Cox } 99511752d88SAlan Cox if (m_ret->pool != VM_FREEPOOL_DEFAULT) 99611752d88SAlan Cox vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 99711752d88SAlan Cox fl = (*seg->free_queues)[m_ret->pool]; 99811752d88SAlan Cox vm_phys_split_pages(m_ret, oind, fl, order); 9995c1f2cc4SAlan Cox /* Return excess pages to the free lists. */ 10005c1f2cc4SAlan Cox npages_end = roundup2(npages, 1 << imin(oind, order)); 10015c1f2cc4SAlan Cox if (npages < npages_end) 10025c1f2cc4SAlan Cox vm_phys_free_contig(&m_ret[npages], npages_end - npages); 100311752d88SAlan Cox return (m_ret); 100411752d88SAlan Cox } 100511752d88SAlan Cox 100611752d88SAlan Cox #ifdef DDB 100711752d88SAlan Cox /* 100811752d88SAlan Cox * Show the number of physical pages in each of the free lists. 100911752d88SAlan Cox */ 101011752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages) 101111752d88SAlan Cox { 101211752d88SAlan Cox struct vm_freelist *fl; 10137e226537SAttilio Rao int flind, oind, pind, dom; 101411752d88SAlan Cox 10157e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 10167e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom); 101711752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 101811752d88SAlan Cox db_printf("FREE LIST %d:\n" 101911752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 102011752d88SAlan Cox "\n ", flind); 102111752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 102211752d88SAlan Cox db_printf(" | POOL %d", pind); 102311752d88SAlan Cox db_printf("\n-- "); 102411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 102511752d88SAlan Cox db_printf("-- -- "); 102611752d88SAlan Cox db_printf("--\n"); 102711752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 102811752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 102911752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 103011752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 10317e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 103211752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 103311752d88SAlan Cox } 103411752d88SAlan Cox db_printf("\n"); 103511752d88SAlan Cox } 103611752d88SAlan Cox db_printf("\n"); 103711752d88SAlan Cox } 10387e226537SAttilio Rao db_printf("\n"); 10397e226537SAttilio Rao } 104011752d88SAlan Cox } 104111752d88SAlan Cox #endif 1042