111752d88SAlan Cox /*- 211752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 311752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 411752d88SAlan Cox * All rights reserved. 511752d88SAlan Cox * 611752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 711752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 811752d88SAlan Cox * 911752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1011752d88SAlan Cox * modification, are permitted provided that the following conditions 1111752d88SAlan Cox * are met: 1211752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1311752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1411752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1611752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1711752d88SAlan Cox * 1811752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1911752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2011752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2111752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2211752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2311752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2411752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2511752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2611752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2711752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 2811752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2911752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3011752d88SAlan Cox */ 3111752d88SAlan Cox 32fbd80bd0SAlan Cox /* 33fbd80bd0SAlan Cox * Physical memory system implementation 34fbd80bd0SAlan Cox * 35fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 36fbd80bd0SAlan Cox * virtual memory system. 37fbd80bd0SAlan Cox */ 38fbd80bd0SAlan Cox 3911752d88SAlan Cox #include <sys/cdefs.h> 4011752d88SAlan Cox __FBSDID("$FreeBSD$"); 4111752d88SAlan Cox 4211752d88SAlan Cox #include "opt_ddb.h" 43*174b5f38SJohn Baldwin #include "opt_vm.h" 4411752d88SAlan Cox 4511752d88SAlan Cox #include <sys/param.h> 4611752d88SAlan Cox #include <sys/systm.h> 4711752d88SAlan Cox #include <sys/lock.h> 4811752d88SAlan Cox #include <sys/kernel.h> 4911752d88SAlan Cox #include <sys/malloc.h> 5011752d88SAlan Cox #include <sys/mutex.h> 5111752d88SAlan Cox #include <sys/queue.h> 5211752d88SAlan Cox #include <sys/sbuf.h> 5311752d88SAlan Cox #include <sys/sysctl.h> 5411752d88SAlan Cox #include <sys/vmmeter.h> 5511752d88SAlan Cox 5611752d88SAlan Cox #include <ddb/ddb.h> 5711752d88SAlan Cox 5811752d88SAlan Cox #include <vm/vm.h> 5911752d88SAlan Cox #include <vm/vm_param.h> 6011752d88SAlan Cox #include <vm/vm_kern.h> 6111752d88SAlan Cox #include <vm/vm_object.h> 6211752d88SAlan Cox #include <vm/vm_page.h> 6311752d88SAlan Cox #include <vm/vm_phys.h> 6411752d88SAlan Cox 65a3870a18SJohn Baldwin /* 66a3870a18SJohn Baldwin * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each 67a3870a18SJohn Baldwin * domain. These extra lists are stored at the end of the regular 68a3870a18SJohn Baldwin * free lists starting with VM_NFREELIST. 69a3870a18SJohn Baldwin */ 70a3870a18SJohn Baldwin #define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1) 71a3870a18SJohn Baldwin 7211752d88SAlan Cox struct vm_freelist { 7311752d88SAlan Cox struct pglist pl; 7411752d88SAlan Cox int lcnt; 7511752d88SAlan Cox }; 7611752d88SAlan Cox 7711752d88SAlan Cox struct vm_phys_seg { 7811752d88SAlan Cox vm_paddr_t start; 7911752d88SAlan Cox vm_paddr_t end; 8011752d88SAlan Cox vm_page_t first_page; 81a3870a18SJohn Baldwin int domain; 8211752d88SAlan Cox struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 8311752d88SAlan Cox }; 8411752d88SAlan Cox 85a3870a18SJohn Baldwin struct mem_affinity *mem_affinity; 86a3870a18SJohn Baldwin 8711752d88SAlan Cox static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 8811752d88SAlan Cox 8911752d88SAlan Cox static int vm_phys_nsegs; 9011752d88SAlan Cox 91b6de32bdSKonstantin Belousov #define VM_PHYS_FICTITIOUS_NSEGS 8 92b6de32bdSKonstantin Belousov static struct vm_phys_fictitious_seg { 93b6de32bdSKonstantin Belousov vm_paddr_t start; 94b6de32bdSKonstantin Belousov vm_paddr_t end; 95b6de32bdSKonstantin Belousov vm_page_t first_page; 96b6de32bdSKonstantin Belousov } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS]; 97b6de32bdSKonstantin Belousov static struct mtx vm_phys_fictitious_reg_mtx; 98b6de32bdSKonstantin Belousov MALLOC_DEFINE(M_FICT_PAGES, "", ""); 99b6de32bdSKonstantin Belousov 10011752d88SAlan Cox static struct vm_freelist 101a3870a18SJohn Baldwin vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 102a3870a18SJohn Baldwin static struct vm_freelist 103a3870a18SJohn Baldwin (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER]; 10411752d88SAlan Cox 10511752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 10611752d88SAlan Cox 10711752d88SAlan Cox static int cnt_prezero; 10811752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 10911752d88SAlan Cox &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 11011752d88SAlan Cox 11111752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 11211752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 11311752d88SAlan Cox NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 11411752d88SAlan Cox 11511752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 11611752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 11711752d88SAlan Cox NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 11811752d88SAlan Cox 119a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 120a3870a18SJohn Baldwin static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS); 121a3870a18SJohn Baldwin SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD, 122a3870a18SJohn Baldwin NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists"); 123a3870a18SJohn Baldwin #endif 124a3870a18SJohn Baldwin 125a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, 126a3870a18SJohn Baldwin int domain); 12711752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 12811752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa); 12911752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 13011752d88SAlan Cox int order); 13111752d88SAlan Cox 13211752d88SAlan Cox /* 13311752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 13411752d88SAlan Cox * the amount of physical memory in each free list. 13511752d88SAlan Cox */ 13611752d88SAlan Cox static int 13711752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 13811752d88SAlan Cox { 13911752d88SAlan Cox struct sbuf sbuf; 14011752d88SAlan Cox struct vm_freelist *fl; 14111752d88SAlan Cox int error, flind, oind, pind; 14211752d88SAlan Cox 14300f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 14400f0e671SMatthew D Fleming if (error != 0) 14500f0e671SMatthew D Fleming return (error); 1464e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 14711752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 14811752d88SAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 14911752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 15011752d88SAlan Cox "\n ", flind); 15111752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 15211752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 15311752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 15411752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 15511752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 15611752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 15711752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 158d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 15911752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 16011752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 16111752d88SAlan Cox fl = vm_phys_free_queues[flind][pind]; 162d689bc00SAlan Cox sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt); 16311752d88SAlan Cox } 16411752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 16511752d88SAlan Cox } 16611752d88SAlan Cox } 1674e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 16811752d88SAlan Cox sbuf_delete(&sbuf); 16911752d88SAlan Cox return (error); 17011752d88SAlan Cox } 17111752d88SAlan Cox 17211752d88SAlan Cox /* 17311752d88SAlan Cox * Outputs the set of physical memory segments. 17411752d88SAlan Cox */ 17511752d88SAlan Cox static int 17611752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 17711752d88SAlan Cox { 17811752d88SAlan Cox struct sbuf sbuf; 17911752d88SAlan Cox struct vm_phys_seg *seg; 18011752d88SAlan Cox int error, segind; 18111752d88SAlan Cox 18200f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 18300f0e671SMatthew D Fleming if (error != 0) 18400f0e671SMatthew D Fleming return (error); 1854e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 18611752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 18711752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 18811752d88SAlan Cox seg = &vm_phys_segs[segind]; 18911752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 19011752d88SAlan Cox (uintmax_t)seg->start); 19111752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 19211752d88SAlan Cox (uintmax_t)seg->end); 193a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 19411752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 19511752d88SAlan Cox } 1964e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 19711752d88SAlan Cox sbuf_delete(&sbuf); 19811752d88SAlan Cox return (error); 19911752d88SAlan Cox } 20011752d88SAlan Cox 201a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 202a3870a18SJohn Baldwin /* 203a3870a18SJohn Baldwin * Outputs the set of free list lookup lists. 204a3870a18SJohn Baldwin */ 205a3870a18SJohn Baldwin static int 206a3870a18SJohn Baldwin sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS) 207a3870a18SJohn Baldwin { 208a3870a18SJohn Baldwin struct sbuf sbuf; 209a3870a18SJohn Baldwin int domain, error, flind, ndomains; 210a3870a18SJohn Baldwin 21100f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 21200f0e671SMatthew D Fleming if (error != 0) 21300f0e671SMatthew D Fleming return (error); 2144e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 21500f0e671SMatthew D Fleming ndomains = vm_nfreelists - VM_NFREELIST + 1; 216a3870a18SJohn Baldwin for (domain = 0; domain < ndomains; domain++) { 217a3870a18SJohn Baldwin sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain); 218a3870a18SJohn Baldwin for (flind = 0; flind < vm_nfreelists; flind++) 219a3870a18SJohn Baldwin sbuf_printf(&sbuf, " [%d]:\t%p\n", flind, 220a3870a18SJohn Baldwin vm_phys_lookup_lists[domain][flind]); 221a3870a18SJohn Baldwin } 2224e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 223a3870a18SJohn Baldwin sbuf_delete(&sbuf); 224a3870a18SJohn Baldwin return (error); 225a3870a18SJohn Baldwin } 226a3870a18SJohn Baldwin #endif 227a3870a18SJohn Baldwin 22811752d88SAlan Cox /* 22911752d88SAlan Cox * Create a physical memory segment. 23011752d88SAlan Cox */ 23111752d88SAlan Cox static void 232a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) 23311752d88SAlan Cox { 23411752d88SAlan Cox struct vm_phys_seg *seg; 23511752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE 236d6e9b97bSJohn Baldwin long pages; 23711752d88SAlan Cox int segind; 23811752d88SAlan Cox 23911752d88SAlan Cox pages = 0; 24011752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 24111752d88SAlan Cox seg = &vm_phys_segs[segind]; 24211752d88SAlan Cox pages += atop(seg->end - seg->start); 24311752d88SAlan Cox } 24411752d88SAlan Cox #endif 24511752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 24611752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 24711752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 24811752d88SAlan Cox seg->start = start; 24911752d88SAlan Cox seg->end = end; 250a3870a18SJohn Baldwin seg->domain = domain; 25111752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE 25211752d88SAlan Cox seg->first_page = &vm_page_array[pages]; 25311752d88SAlan Cox #else 25411752d88SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(start); 25511752d88SAlan Cox #endif 256a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 257a3870a18SJohn Baldwin if (flind == VM_FREELIST_DEFAULT && domain != 0) { 258a3870a18SJohn Baldwin flind = VM_NFREELIST + (domain - 1); 259a3870a18SJohn Baldwin if (flind >= vm_nfreelists) 260a3870a18SJohn Baldwin vm_nfreelists = flind + 1; 261a3870a18SJohn Baldwin } 262a3870a18SJohn Baldwin #endif 26311752d88SAlan Cox seg->free_queues = &vm_phys_free_queues[flind]; 26411752d88SAlan Cox } 26511752d88SAlan Cox 266a3870a18SJohn Baldwin static void 267a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 268a3870a18SJohn Baldwin { 269a3870a18SJohn Baldwin int i; 270a3870a18SJohn Baldwin 271a3870a18SJohn Baldwin if (mem_affinity == NULL) { 272a3870a18SJohn Baldwin _vm_phys_create_seg(start, end, flind, 0); 273a3870a18SJohn Baldwin return; 274a3870a18SJohn Baldwin } 275a3870a18SJohn Baldwin 276a3870a18SJohn Baldwin for (i = 0;; i++) { 277a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 278a3870a18SJohn Baldwin panic("Reached end of affinity info"); 279a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 280a3870a18SJohn Baldwin continue; 281a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 282a3870a18SJohn Baldwin panic("No affinity info for start %jx", 283a3870a18SJohn Baldwin (uintmax_t)start); 284a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 285a3870a18SJohn Baldwin _vm_phys_create_seg(start, end, flind, 286a3870a18SJohn Baldwin mem_affinity[i].domain); 287a3870a18SJohn Baldwin break; 288a3870a18SJohn Baldwin } 289a3870a18SJohn Baldwin _vm_phys_create_seg(start, mem_affinity[i].end, flind, 290a3870a18SJohn Baldwin mem_affinity[i].domain); 291a3870a18SJohn Baldwin start = mem_affinity[i].end; 292a3870a18SJohn Baldwin } 293a3870a18SJohn Baldwin } 294a3870a18SJohn Baldwin 29511752d88SAlan Cox /* 29611752d88SAlan Cox * Initialize the physical memory allocator. 29711752d88SAlan Cox */ 29811752d88SAlan Cox void 29911752d88SAlan Cox vm_phys_init(void) 30011752d88SAlan Cox { 30111752d88SAlan Cox struct vm_freelist *fl; 30211752d88SAlan Cox int flind, i, oind, pind; 303a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 304a3870a18SJohn Baldwin int ndomains, j; 305a3870a18SJohn Baldwin #endif 30611752d88SAlan Cox 30711752d88SAlan Cox for (i = 0; phys_avail[i + 1] != 0; i += 2) { 30811752d88SAlan Cox #ifdef VM_FREELIST_ISADMA 30911752d88SAlan Cox if (phys_avail[i] < 16777216) { 31011752d88SAlan Cox if (phys_avail[i + 1] > 16777216) { 31111752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 16777216, 31211752d88SAlan Cox VM_FREELIST_ISADMA); 31311752d88SAlan Cox vm_phys_create_seg(16777216, phys_avail[i + 1], 31411752d88SAlan Cox VM_FREELIST_DEFAULT); 31511752d88SAlan Cox } else { 31611752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 31711752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_ISADMA); 31811752d88SAlan Cox } 31911752d88SAlan Cox if (VM_FREELIST_ISADMA >= vm_nfreelists) 32011752d88SAlan Cox vm_nfreelists = VM_FREELIST_ISADMA + 1; 32111752d88SAlan Cox } else 32211752d88SAlan Cox #endif 32311752d88SAlan Cox #ifdef VM_FREELIST_HIGHMEM 32411752d88SAlan Cox if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 32511752d88SAlan Cox if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 32611752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 32711752d88SAlan Cox VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 32811752d88SAlan Cox vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 32911752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_HIGHMEM); 33011752d88SAlan Cox } else { 33111752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 33211752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_HIGHMEM); 33311752d88SAlan Cox } 33411752d88SAlan Cox if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 33511752d88SAlan Cox vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 33611752d88SAlan Cox } else 33711752d88SAlan Cox #endif 33811752d88SAlan Cox vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 33911752d88SAlan Cox VM_FREELIST_DEFAULT); 34011752d88SAlan Cox } 34111752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 34211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 34311752d88SAlan Cox fl = vm_phys_free_queues[flind][pind]; 34411752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 34511752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 34611752d88SAlan Cox } 34711752d88SAlan Cox } 348a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 349a3870a18SJohn Baldwin /* 350a3870a18SJohn Baldwin * Build a free list lookup list for each domain. All of the 351a3870a18SJohn Baldwin * memory domain lists are inserted at the VM_FREELIST_DEFAULT 352a3870a18SJohn Baldwin * index in a round-robin order starting with the current 353a3870a18SJohn Baldwin * domain. 354a3870a18SJohn Baldwin */ 355a3870a18SJohn Baldwin ndomains = vm_nfreelists - VM_NFREELIST + 1; 356a3870a18SJohn Baldwin for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++) 357a3870a18SJohn Baldwin for (i = 0; i < ndomains; i++) 358a3870a18SJohn Baldwin vm_phys_lookup_lists[i][flind] = 359a3870a18SJohn Baldwin &vm_phys_free_queues[flind]; 360a3870a18SJohn Baldwin for (i = 0; i < ndomains; i++) 361a3870a18SJohn Baldwin for (j = 0; j < ndomains; j++) { 362a3870a18SJohn Baldwin flind = (i + j) % ndomains; 363a3870a18SJohn Baldwin if (flind == 0) 364a3870a18SJohn Baldwin flind = VM_FREELIST_DEFAULT; 365a3870a18SJohn Baldwin else 366a3870a18SJohn Baldwin flind += VM_NFREELIST - 1; 367a3870a18SJohn Baldwin vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] = 368a3870a18SJohn Baldwin &vm_phys_free_queues[flind]; 369a3870a18SJohn Baldwin } 370a3870a18SJohn Baldwin for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST; 371a3870a18SJohn Baldwin flind++) 372a3870a18SJohn Baldwin for (i = 0; i < ndomains; i++) 373a3870a18SJohn Baldwin vm_phys_lookup_lists[i][flind + ndomains - 1] = 374a3870a18SJohn Baldwin &vm_phys_free_queues[flind]; 375a3870a18SJohn Baldwin #else 376a3870a18SJohn Baldwin for (flind = 0; flind < vm_nfreelists; flind++) 377a3870a18SJohn Baldwin vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind]; 378a3870a18SJohn Baldwin #endif 379b6de32bdSKonstantin Belousov 380b6de32bdSKonstantin Belousov mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF); 38111752d88SAlan Cox } 38211752d88SAlan Cox 38311752d88SAlan Cox /* 38411752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 38511752d88SAlan Cox */ 38611752d88SAlan Cox static __inline void 38711752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 38811752d88SAlan Cox { 38911752d88SAlan Cox vm_page_t m_buddy; 39011752d88SAlan Cox 39111752d88SAlan Cox while (oind > order) { 39211752d88SAlan Cox oind--; 39311752d88SAlan Cox m_buddy = &m[1 << oind]; 39411752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 39511752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 39611752d88SAlan Cox m_buddy, m_buddy->order)); 39711752d88SAlan Cox m_buddy->order = oind; 39811752d88SAlan Cox TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 39911752d88SAlan Cox fl[oind].lcnt++; 40011752d88SAlan Cox } 40111752d88SAlan Cox } 40211752d88SAlan Cox 40311752d88SAlan Cox /* 40411752d88SAlan Cox * Initialize a physical page and add it to the free lists. 40511752d88SAlan Cox */ 40611752d88SAlan Cox void 40711752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa) 40811752d88SAlan Cox { 40911752d88SAlan Cox vm_page_t m; 41011752d88SAlan Cox 41111752d88SAlan Cox cnt.v_page_count++; 41211752d88SAlan Cox m = vm_phys_paddr_to_vm_page(pa); 41311752d88SAlan Cox m->phys_addr = pa; 41444e46b9eSAlan Cox m->queue = PQ_NONE; 41511752d88SAlan Cox m->segind = vm_phys_paddr_to_segind(pa); 41611752d88SAlan Cox m->flags = PG_FREE; 41711752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 41811752d88SAlan Cox ("vm_phys_add_page: page %p has unexpected order %d", 41911752d88SAlan Cox m, m->order)); 42011752d88SAlan Cox m->pool = VM_FREEPOOL_DEFAULT; 42111752d88SAlan Cox pmap_page_init(m); 4228941dc44SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 4237bfda801SAlan Cox cnt.v_free_count++; 42411752d88SAlan Cox vm_phys_free_pages(m, 0); 4258941dc44SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 42611752d88SAlan Cox } 42711752d88SAlan Cox 42811752d88SAlan Cox /* 42911752d88SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages 43011752d88SAlan Cox * from the free lists. 4318941dc44SAlan Cox * 4328941dc44SAlan Cox * The free page queues must be locked. 43311752d88SAlan Cox */ 43411752d88SAlan Cox vm_page_t 43511752d88SAlan Cox vm_phys_alloc_pages(int pool, int order) 43611752d88SAlan Cox { 43749ca10d4SJayachandran C. vm_page_t m; 43849ca10d4SJayachandran C. int flind; 43949ca10d4SJayachandran C. 44049ca10d4SJayachandran C. for (flind = 0; flind < vm_nfreelists; flind++) { 44149ca10d4SJayachandran C. m = vm_phys_alloc_freelist_pages(flind, pool, order); 44249ca10d4SJayachandran C. if (m != NULL) 44349ca10d4SJayachandran C. return (m); 44449ca10d4SJayachandran C. } 44549ca10d4SJayachandran C. return (NULL); 44649ca10d4SJayachandran C. } 44749ca10d4SJayachandran C. 44849ca10d4SJayachandran C. /* 44949ca10d4SJayachandran C. * Find and dequeue a free page on the given free list, with the 45049ca10d4SJayachandran C. * specified pool and order 45149ca10d4SJayachandran C. */ 45249ca10d4SJayachandran C. vm_page_t 45349ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order) 45449ca10d4SJayachandran C. { 45511752d88SAlan Cox struct vm_freelist *fl; 45611752d88SAlan Cox struct vm_freelist *alt; 457a3870a18SJohn Baldwin int domain, oind, pind; 45811752d88SAlan Cox vm_page_t m; 45911752d88SAlan Cox 46049ca10d4SJayachandran C. KASSERT(flind < VM_NFREELIST, 46149ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 46211752d88SAlan Cox KASSERT(pool < VM_NFREEPOOL, 46349ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 46411752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 46549ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 466a3870a18SJohn Baldwin 467a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 468a3870a18SJohn Baldwin domain = PCPU_GET(domain); 469a3870a18SJohn Baldwin #else 470a3870a18SJohn Baldwin domain = 0; 471a3870a18SJohn Baldwin #endif 47211752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 473a3870a18SJohn Baldwin fl = (*vm_phys_lookup_lists[domain][flind])[pool]; 47411752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 47511752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 47611752d88SAlan Cox if (m != NULL) { 47711752d88SAlan Cox TAILQ_REMOVE(&fl[oind].pl, m, pageq); 47811752d88SAlan Cox fl[oind].lcnt--; 47911752d88SAlan Cox m->order = VM_NFREEORDER; 48011752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 48111752d88SAlan Cox return (m); 48211752d88SAlan Cox } 48311752d88SAlan Cox } 48411752d88SAlan Cox 48511752d88SAlan Cox /* 48611752d88SAlan Cox * The given pool was empty. Find the largest 48711752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 48811752d88SAlan Cox * pool. Transfer these pages to the given pool, and 48911752d88SAlan Cox * use them to satisfy the allocation. 49011752d88SAlan Cox */ 49111752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 49211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 493a3870a18SJohn Baldwin alt = (*vm_phys_lookup_lists[domain][flind])[pind]; 49411752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 49511752d88SAlan Cox if (m != NULL) { 49611752d88SAlan Cox TAILQ_REMOVE(&alt[oind].pl, m, pageq); 49711752d88SAlan Cox alt[oind].lcnt--; 49811752d88SAlan Cox m->order = VM_NFREEORDER; 49911752d88SAlan Cox vm_phys_set_pool(pool, m, oind); 50011752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 50111752d88SAlan Cox return (m); 50211752d88SAlan Cox } 50311752d88SAlan Cox } 50411752d88SAlan Cox } 50511752d88SAlan Cox return (NULL); 50611752d88SAlan Cox } 50711752d88SAlan Cox 50811752d88SAlan Cox /* 50911752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 51011752d88SAlan Cox */ 51111752d88SAlan Cox vm_page_t 51211752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 51311752d88SAlan Cox { 51411752d88SAlan Cox struct vm_phys_seg *seg; 51511752d88SAlan Cox int segind; 51611752d88SAlan Cox 51711752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 51811752d88SAlan Cox seg = &vm_phys_segs[segind]; 51911752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 52011752d88SAlan Cox return (&seg->first_page[atop(pa - seg->start)]); 52111752d88SAlan Cox } 522f06a3a36SAndrew Thompson return (NULL); 52311752d88SAlan Cox } 52411752d88SAlan Cox 525b6de32bdSKonstantin Belousov vm_page_t 526b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 527b6de32bdSKonstantin Belousov { 528b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 529b6de32bdSKonstantin Belousov vm_page_t m; 530b6de32bdSKonstantin Belousov int segind; 531b6de32bdSKonstantin Belousov 532b6de32bdSKonstantin Belousov m = NULL; 533b6de32bdSKonstantin Belousov for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { 534b6de32bdSKonstantin Belousov seg = &vm_phys_fictitious_segs[segind]; 535b6de32bdSKonstantin Belousov if (pa >= seg->start && pa < seg->end) { 536b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 537b6de32bdSKonstantin Belousov KASSERT((m->flags & PG_FICTITIOUS) != 0, 538b6de32bdSKonstantin Belousov ("%p not fictitious", m)); 539b6de32bdSKonstantin Belousov break; 540b6de32bdSKonstantin Belousov } 541b6de32bdSKonstantin Belousov } 542b6de32bdSKonstantin Belousov return (m); 543b6de32bdSKonstantin Belousov } 544b6de32bdSKonstantin Belousov 545b6de32bdSKonstantin Belousov int 546b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 547b6de32bdSKonstantin Belousov vm_memattr_t memattr) 548b6de32bdSKonstantin Belousov { 549b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 550b6de32bdSKonstantin Belousov vm_page_t fp; 551b6de32bdSKonstantin Belousov long i, page_count; 552b6de32bdSKonstantin Belousov int segind; 553b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 554b6de32bdSKonstantin Belousov long pi; 555b6de32bdSKonstantin Belousov boolean_t malloced; 556b6de32bdSKonstantin Belousov #endif 557b6de32bdSKonstantin Belousov 558b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 559b6de32bdSKonstantin Belousov 560b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 561b6de32bdSKonstantin Belousov pi = atop(start); 562b6de32bdSKonstantin Belousov if (pi >= first_page && atop(end) < vm_page_array_size) { 563b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 564b6de32bdSKonstantin Belousov malloced = FALSE; 565b6de32bdSKonstantin Belousov } else 566b6de32bdSKonstantin Belousov #endif 567b6de32bdSKonstantin Belousov { 568b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 569b6de32bdSKonstantin Belousov M_WAITOK | M_ZERO); 570b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 571b6de32bdSKonstantin Belousov malloced = TRUE; 572b6de32bdSKonstantin Belousov #endif 573b6de32bdSKonstantin Belousov } 574b6de32bdSKonstantin Belousov for (i = 0; i < page_count; i++) { 575b6de32bdSKonstantin Belousov vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr); 576b6de32bdSKonstantin Belousov pmap_page_init(&fp[i]); 577b6de32bdSKonstantin Belousov fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED); 578b6de32bdSKonstantin Belousov } 579b6de32bdSKonstantin Belousov mtx_lock(&vm_phys_fictitious_reg_mtx); 580b6de32bdSKonstantin Belousov for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { 581b6de32bdSKonstantin Belousov seg = &vm_phys_fictitious_segs[segind]; 582b6de32bdSKonstantin Belousov if (seg->start == 0 && seg->end == 0) { 583b6de32bdSKonstantin Belousov seg->start = start; 584b6de32bdSKonstantin Belousov seg->end = end; 585b6de32bdSKonstantin Belousov seg->first_page = fp; 586b6de32bdSKonstantin Belousov mtx_unlock(&vm_phys_fictitious_reg_mtx); 587b6de32bdSKonstantin Belousov return (0); 588b6de32bdSKonstantin Belousov } 589b6de32bdSKonstantin Belousov } 590b6de32bdSKonstantin Belousov mtx_unlock(&vm_phys_fictitious_reg_mtx); 591b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 592b6de32bdSKonstantin Belousov if (malloced) 593b6de32bdSKonstantin Belousov #endif 594b6de32bdSKonstantin Belousov free(fp, M_FICT_PAGES); 595b6de32bdSKonstantin Belousov return (EBUSY); 596b6de32bdSKonstantin Belousov } 597b6de32bdSKonstantin Belousov 598b6de32bdSKonstantin Belousov void 599b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 600b6de32bdSKonstantin Belousov { 601b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 602b6de32bdSKonstantin Belousov vm_page_t fp; 603b6de32bdSKonstantin Belousov int segind; 604b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 605b6de32bdSKonstantin Belousov long pi; 606b6de32bdSKonstantin Belousov #endif 607b6de32bdSKonstantin Belousov 608b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 609b6de32bdSKonstantin Belousov pi = atop(start); 610b6de32bdSKonstantin Belousov #endif 611b6de32bdSKonstantin Belousov 612b6de32bdSKonstantin Belousov mtx_lock(&vm_phys_fictitious_reg_mtx); 613b6de32bdSKonstantin Belousov for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { 614b6de32bdSKonstantin Belousov seg = &vm_phys_fictitious_segs[segind]; 615b6de32bdSKonstantin Belousov if (seg->start == start && seg->end == end) { 616b6de32bdSKonstantin Belousov seg->start = seg->end = 0; 617b6de32bdSKonstantin Belousov fp = seg->first_page; 618b6de32bdSKonstantin Belousov seg->first_page = NULL; 619b6de32bdSKonstantin Belousov mtx_unlock(&vm_phys_fictitious_reg_mtx); 620b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 621b6de32bdSKonstantin Belousov if (pi < first_page || atop(end) >= vm_page_array_size) 622b6de32bdSKonstantin Belousov #endif 623b6de32bdSKonstantin Belousov free(fp, M_FICT_PAGES); 624b6de32bdSKonstantin Belousov return; 625b6de32bdSKonstantin Belousov } 626b6de32bdSKonstantin Belousov } 627b6de32bdSKonstantin Belousov mtx_unlock(&vm_phys_fictitious_reg_mtx); 628b6de32bdSKonstantin Belousov KASSERT(0, ("Unregistering not registered fictitious range")); 629b6de32bdSKonstantin Belousov } 630b6de32bdSKonstantin Belousov 63111752d88SAlan Cox /* 63211752d88SAlan Cox * Find the segment containing the given physical address. 63311752d88SAlan Cox */ 63411752d88SAlan Cox static int 63511752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa) 63611752d88SAlan Cox { 63711752d88SAlan Cox struct vm_phys_seg *seg; 63811752d88SAlan Cox int segind; 63911752d88SAlan Cox 64011752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 64111752d88SAlan Cox seg = &vm_phys_segs[segind]; 64211752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 64311752d88SAlan Cox return (segind); 64411752d88SAlan Cox } 64511752d88SAlan Cox panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 64611752d88SAlan Cox (uintmax_t)pa); 64711752d88SAlan Cox } 64811752d88SAlan Cox 64911752d88SAlan Cox /* 65011752d88SAlan Cox * Free a contiguous, power of two-sized set of physical pages. 6518941dc44SAlan Cox * 6528941dc44SAlan Cox * The free page queues must be locked. 65311752d88SAlan Cox */ 65411752d88SAlan Cox void 65511752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order) 65611752d88SAlan Cox { 65711752d88SAlan Cox struct vm_freelist *fl; 65811752d88SAlan Cox struct vm_phys_seg *seg; 6595c1f2cc4SAlan Cox vm_paddr_t pa; 66011752d88SAlan Cox vm_page_t m_buddy; 66111752d88SAlan Cox 66211752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 6638941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected order %d", 66411752d88SAlan Cox m, m->order)); 66511752d88SAlan Cox KASSERT(m->pool < VM_NFREEPOOL, 6668941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected pool %d", 66711752d88SAlan Cox m, m->pool)); 66811752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 6698941dc44SAlan Cox ("vm_phys_free_pages: order %d is out of range", order)); 67011752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 67111752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 6725c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 6735c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 6745c1f2cc4SAlan Cox do { 6755c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 6765c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 67711752d88SAlan Cox break; 6785c1f2cc4SAlan Cox m_buddy = &seg->first_page[atop(pa - seg->start)]; 67911752d88SAlan Cox if (m_buddy->order != order) 68011752d88SAlan Cox break; 68111752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 6825c1f2cc4SAlan Cox TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq); 6835c1f2cc4SAlan Cox fl[order].lcnt--; 68411752d88SAlan Cox m_buddy->order = VM_NFREEORDER; 68511752d88SAlan Cox if (m_buddy->pool != m->pool) 68611752d88SAlan Cox vm_phys_set_pool(m->pool, m_buddy, order); 68711752d88SAlan Cox order++; 6885c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 68911752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 6905c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 69111752d88SAlan Cox } 69211752d88SAlan Cox m->order = order; 69311752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 69411752d88SAlan Cox TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 69511752d88SAlan Cox fl[order].lcnt++; 69611752d88SAlan Cox } 69711752d88SAlan Cox 69811752d88SAlan Cox /* 6995c1f2cc4SAlan Cox * Free a contiguous, arbitrarily sized set of physical pages. 7005c1f2cc4SAlan Cox * 7015c1f2cc4SAlan Cox * The free page queues must be locked. 7025c1f2cc4SAlan Cox */ 7035c1f2cc4SAlan Cox void 7045c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages) 7055c1f2cc4SAlan Cox { 7065c1f2cc4SAlan Cox u_int n; 7075c1f2cc4SAlan Cox int order; 7085c1f2cc4SAlan Cox 7095c1f2cc4SAlan Cox /* 7105c1f2cc4SAlan Cox * Avoid unnecessary coalescing by freeing the pages in the largest 7115c1f2cc4SAlan Cox * possible power-of-two-sized subsets. 7125c1f2cc4SAlan Cox */ 7135c1f2cc4SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 7145c1f2cc4SAlan Cox for (;; npages -= n) { 7155c1f2cc4SAlan Cox /* 7165c1f2cc4SAlan Cox * Unsigned "min" is used here so that "order" is assigned 7175c1f2cc4SAlan Cox * "VM_NFREEORDER - 1" when "m"'s physical address is zero 7185c1f2cc4SAlan Cox * or the low-order bits of its physical address are zero 7195c1f2cc4SAlan Cox * because the size of a physical address exceeds the size of 7205c1f2cc4SAlan Cox * a long. 7215c1f2cc4SAlan Cox */ 7225c1f2cc4SAlan Cox order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 7235c1f2cc4SAlan Cox VM_NFREEORDER - 1); 7245c1f2cc4SAlan Cox n = 1 << order; 7255c1f2cc4SAlan Cox if (npages < n) 7265c1f2cc4SAlan Cox break; 7275c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 7285c1f2cc4SAlan Cox m += n; 7295c1f2cc4SAlan Cox } 7305c1f2cc4SAlan Cox /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 7315c1f2cc4SAlan Cox for (; npages > 0; npages -= n) { 7325c1f2cc4SAlan Cox order = flsl(npages) - 1; 7335c1f2cc4SAlan Cox n = 1 << order; 7345c1f2cc4SAlan Cox vm_phys_free_pages(m, order); 7355c1f2cc4SAlan Cox m += n; 7365c1f2cc4SAlan Cox } 7375c1f2cc4SAlan Cox } 7385c1f2cc4SAlan Cox 7395c1f2cc4SAlan Cox /* 74011752d88SAlan Cox * Set the pool for a contiguous, power of two-sized set of physical pages. 74111752d88SAlan Cox */ 7427bfda801SAlan Cox void 74311752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order) 74411752d88SAlan Cox { 74511752d88SAlan Cox vm_page_t m_tmp; 74611752d88SAlan Cox 74711752d88SAlan Cox for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 74811752d88SAlan Cox m_tmp->pool = pool; 74911752d88SAlan Cox } 75011752d88SAlan Cox 75111752d88SAlan Cox /* 7529742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 7539742373aSAlan Cox * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 7549742373aSAlan Cox * FALSE, indicating that "m" is not in the free lists. 7557bfda801SAlan Cox * 7567bfda801SAlan Cox * The free page queues must be locked. 7577bfda801SAlan Cox */ 758e35395ceSAlan Cox boolean_t 7597bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m) 7607bfda801SAlan Cox { 7617bfda801SAlan Cox struct vm_freelist *fl; 7627bfda801SAlan Cox struct vm_phys_seg *seg; 7637bfda801SAlan Cox vm_paddr_t pa, pa_half; 7647bfda801SAlan Cox vm_page_t m_set, m_tmp; 7657bfda801SAlan Cox int order; 7667bfda801SAlan Cox 7677bfda801SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 7687bfda801SAlan Cox 7697bfda801SAlan Cox /* 7707bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 7717bfda801SAlan Cox * physical pages containing the given physical page "m" and 7727bfda801SAlan Cox * assign it to "m_set". 7737bfda801SAlan Cox */ 7747bfda801SAlan Cox seg = &vm_phys_segs[m->segind]; 7757bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 776bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 7777bfda801SAlan Cox order++; 7787bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 7792fbced65SAlan Cox if (pa >= seg->start) 7807bfda801SAlan Cox m_set = &seg->first_page[atop(pa - seg->start)]; 781e35395ceSAlan Cox else 782e35395ceSAlan Cox return (FALSE); 7837bfda801SAlan Cox } 784e35395ceSAlan Cox if (m_set->order < order) 785e35395ceSAlan Cox return (FALSE); 786e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 787e35395ceSAlan Cox return (FALSE); 7887bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 7897bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 7907bfda801SAlan Cox m_set, m_set->order)); 7917bfda801SAlan Cox 7927bfda801SAlan Cox /* 7937bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 7947bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 7957bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 7967bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 7977bfda801SAlan Cox */ 7987bfda801SAlan Cox fl = (*seg->free_queues)[m_set->pool]; 7997bfda801SAlan Cox order = m_set->order; 8007bfda801SAlan Cox TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 8017bfda801SAlan Cox fl[order].lcnt--; 8027bfda801SAlan Cox m_set->order = VM_NFREEORDER; 8037bfda801SAlan Cox while (order > 0) { 8047bfda801SAlan Cox order--; 8057bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 8067bfda801SAlan Cox if (m->phys_addr < pa_half) 8077bfda801SAlan Cox m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 8087bfda801SAlan Cox else { 8097bfda801SAlan Cox m_tmp = m_set; 8107bfda801SAlan Cox m_set = &seg->first_page[atop(pa_half - seg->start)]; 8117bfda801SAlan Cox } 8127bfda801SAlan Cox m_tmp->order = order; 8137bfda801SAlan Cox TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 8147bfda801SAlan Cox fl[order].lcnt++; 8157bfda801SAlan Cox } 8167bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 817e35395ceSAlan Cox return (TRUE); 8187bfda801SAlan Cox } 8197bfda801SAlan Cox 8207bfda801SAlan Cox /* 8217bfda801SAlan Cox * Try to zero one physical page. Used by an idle priority thread. 82211752d88SAlan Cox */ 82311752d88SAlan Cox boolean_t 82411752d88SAlan Cox vm_phys_zero_pages_idle(void) 82511752d88SAlan Cox { 8267bfda801SAlan Cox static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 8277bfda801SAlan Cox static int flind, oind, pind; 82811752d88SAlan Cox vm_page_t m, m_tmp; 82911752d88SAlan Cox 83011752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 8317bfda801SAlan Cox for (;;) { 8327bfda801SAlan Cox TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 8337bfda801SAlan Cox for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 8347bfda801SAlan Cox if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 8357bfda801SAlan Cox vm_phys_unfree_page(m_tmp); 8367bfda801SAlan Cox cnt.v_free_count--; 83711752d88SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 83811752d88SAlan Cox pmap_zero_page_idle(m_tmp); 83911752d88SAlan Cox m_tmp->flags |= PG_ZERO; 84011752d88SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 8417bfda801SAlan Cox cnt.v_free_count++; 8427bfda801SAlan Cox vm_phys_free_pages(m_tmp, 0); 8437bfda801SAlan Cox vm_page_zero_count++; 8447bfda801SAlan Cox cnt_prezero++; 84511752d88SAlan Cox return (TRUE); 84611752d88SAlan Cox } 84711752d88SAlan Cox } 84811752d88SAlan Cox } 8497bfda801SAlan Cox oind++; 8507bfda801SAlan Cox if (oind == VM_NFREEORDER) { 8517bfda801SAlan Cox oind = 0; 8527bfda801SAlan Cox pind++; 8537bfda801SAlan Cox if (pind == VM_NFREEPOOL) { 8547bfda801SAlan Cox pind = 0; 8557bfda801SAlan Cox flind++; 8567bfda801SAlan Cox if (flind == vm_nfreelists) 8577bfda801SAlan Cox flind = 0; 8587bfda801SAlan Cox } 8597bfda801SAlan Cox fl = vm_phys_free_queues[flind][pind]; 8607bfda801SAlan Cox } 8617bfda801SAlan Cox } 86211752d88SAlan Cox } 86311752d88SAlan Cox 86411752d88SAlan Cox /* 8652f9f48d6SAlan Cox * Allocate a contiguous set of physical pages of the given size 8662f9f48d6SAlan Cox * "npages" from the free lists. All of the physical pages must be at 8672f9f48d6SAlan Cox * or above the given physical address "low" and below the given 8682f9f48d6SAlan Cox * physical address "high". The given value "alignment" determines the 8692f9f48d6SAlan Cox * alignment of the first physical page in the set. If the given value 8702f9f48d6SAlan Cox * "boundary" is non-zero, then the set of physical pages cannot cross 8712f9f48d6SAlan Cox * any physical address boundary that is a multiple of that value. Both 87211752d88SAlan Cox * "alignment" and "boundary" must be a power of two. 87311752d88SAlan Cox */ 87411752d88SAlan Cox vm_page_t 8755c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 8765c1f2cc4SAlan Cox u_long alignment, vm_paddr_t boundary) 87711752d88SAlan Cox { 87811752d88SAlan Cox struct vm_freelist *fl; 87911752d88SAlan Cox struct vm_phys_seg *seg; 88011752d88SAlan Cox vm_paddr_t pa, pa_last, size; 881fbd80bd0SAlan Cox vm_page_t m, m_ret; 8825c1f2cc4SAlan Cox u_long npages_end; 883fbd80bd0SAlan Cox int domain, flind, oind, order, pind; 88411752d88SAlan Cox 885fbd80bd0SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 886a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 887a3870a18SJohn Baldwin domain = PCPU_GET(domain); 888a3870a18SJohn Baldwin #else 889a3870a18SJohn Baldwin domain = 0; 890a3870a18SJohn Baldwin #endif 89111752d88SAlan Cox size = npages << PAGE_SHIFT; 89211752d88SAlan Cox KASSERT(size != 0, 89311752d88SAlan Cox ("vm_phys_alloc_contig: size must not be 0")); 89411752d88SAlan Cox KASSERT((alignment & (alignment - 1)) == 0, 89511752d88SAlan Cox ("vm_phys_alloc_contig: alignment must be a power of 2")); 89611752d88SAlan Cox KASSERT((boundary & (boundary - 1)) == 0, 89711752d88SAlan Cox ("vm_phys_alloc_contig: boundary must be a power of 2")); 89811752d88SAlan Cox /* Compute the queue that is the best fit for npages. */ 89911752d88SAlan Cox for (order = 0; (1 << order) < npages; order++); 90011752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 90111752d88SAlan Cox for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 90211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 903a3870a18SJohn Baldwin fl = (*vm_phys_lookup_lists[domain][flind]) 904a3870a18SJohn Baldwin [pind]; 90511752d88SAlan Cox TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 90611752d88SAlan Cox /* 90711752d88SAlan Cox * A free list may contain physical pages 90811752d88SAlan Cox * from one or more segments. 90911752d88SAlan Cox */ 91011752d88SAlan Cox seg = &vm_phys_segs[m_ret->segind]; 91111752d88SAlan Cox if (seg->start > high || 91211752d88SAlan Cox low >= seg->end) 91311752d88SAlan Cox continue; 91411752d88SAlan Cox 91511752d88SAlan Cox /* 91611752d88SAlan Cox * Is the size of this allocation request 91711752d88SAlan Cox * larger than the largest block size? 91811752d88SAlan Cox */ 91911752d88SAlan Cox if (order >= VM_NFREEORDER) { 92011752d88SAlan Cox /* 92111752d88SAlan Cox * Determine if a sufficient number 92211752d88SAlan Cox * of subsequent blocks to satisfy 92311752d88SAlan Cox * the allocation request are free. 92411752d88SAlan Cox */ 92511752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 92611752d88SAlan Cox pa_last = pa + size; 92711752d88SAlan Cox for (;;) { 92811752d88SAlan Cox pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 92911752d88SAlan Cox if (pa >= pa_last) 93011752d88SAlan Cox break; 93111752d88SAlan Cox if (pa < seg->start || 93211752d88SAlan Cox pa >= seg->end) 93311752d88SAlan Cox break; 93411752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 93511752d88SAlan Cox if (m->order != VM_NFREEORDER - 1) 93611752d88SAlan Cox break; 93711752d88SAlan Cox } 93811752d88SAlan Cox /* If not, continue to the next block. */ 93911752d88SAlan Cox if (pa < pa_last) 94011752d88SAlan Cox continue; 94111752d88SAlan Cox } 94211752d88SAlan Cox 94311752d88SAlan Cox /* 94411752d88SAlan Cox * Determine if the blocks are within the given range, 94511752d88SAlan Cox * satisfy the given alignment, and do not cross the 94611752d88SAlan Cox * given boundary. 94711752d88SAlan Cox */ 94811752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 94911752d88SAlan Cox if (pa >= low && 95011752d88SAlan Cox pa + size <= high && 95111752d88SAlan Cox (pa & (alignment - 1)) == 0 && 95211752d88SAlan Cox ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 95311752d88SAlan Cox goto done; 95411752d88SAlan Cox } 95511752d88SAlan Cox } 95611752d88SAlan Cox } 95711752d88SAlan Cox } 95811752d88SAlan Cox return (NULL); 95911752d88SAlan Cox done: 96011752d88SAlan Cox for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 96111752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 96211752d88SAlan Cox TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 96311752d88SAlan Cox fl[m->order].lcnt--; 96411752d88SAlan Cox m->order = VM_NFREEORDER; 96511752d88SAlan Cox } 96611752d88SAlan Cox if (m_ret->pool != VM_FREEPOOL_DEFAULT) 96711752d88SAlan Cox vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 96811752d88SAlan Cox fl = (*seg->free_queues)[m_ret->pool]; 96911752d88SAlan Cox vm_phys_split_pages(m_ret, oind, fl, order); 9705c1f2cc4SAlan Cox /* Return excess pages to the free lists. */ 9715c1f2cc4SAlan Cox npages_end = roundup2(npages, 1 << imin(oind, order)); 9725c1f2cc4SAlan Cox if (npages < npages_end) 9735c1f2cc4SAlan Cox vm_phys_free_contig(&m_ret[npages], npages_end - npages); 97411752d88SAlan Cox return (m_ret); 97511752d88SAlan Cox } 97611752d88SAlan Cox 97711752d88SAlan Cox #ifdef DDB 97811752d88SAlan Cox /* 97911752d88SAlan Cox * Show the number of physical pages in each of the free lists. 98011752d88SAlan Cox */ 98111752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages) 98211752d88SAlan Cox { 98311752d88SAlan Cox struct vm_freelist *fl; 98411752d88SAlan Cox int flind, oind, pind; 98511752d88SAlan Cox 98611752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 98711752d88SAlan Cox db_printf("FREE LIST %d:\n" 98811752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 98911752d88SAlan Cox "\n ", flind); 99011752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 99111752d88SAlan Cox db_printf(" | POOL %d", pind); 99211752d88SAlan Cox db_printf("\n-- "); 99311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 99411752d88SAlan Cox db_printf("-- -- "); 99511752d88SAlan Cox db_printf("--\n"); 99611752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 99711752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 99811752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 99911752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 100011752d88SAlan Cox fl = vm_phys_free_queues[flind][pind]; 100111752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 100211752d88SAlan Cox } 100311752d88SAlan Cox db_printf("\n"); 100411752d88SAlan Cox } 100511752d88SAlan Cox db_printf("\n"); 100611752d88SAlan Cox } 100711752d88SAlan Cox } 100811752d88SAlan Cox #endif 1009