111752d88SAlan Cox /*- 211752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 311752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 411752d88SAlan Cox * All rights reserved. 511752d88SAlan Cox * 611752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 711752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 811752d88SAlan Cox * 911752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1011752d88SAlan Cox * modification, are permitted provided that the following conditions 1111752d88SAlan Cox * are met: 1211752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1311752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1411752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1611752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1711752d88SAlan Cox * 1811752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1911752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2011752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2111752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2211752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2311752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2411752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2511752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2611752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2711752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 2811752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2911752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3011752d88SAlan Cox */ 3111752d88SAlan Cox 3211752d88SAlan Cox #include <sys/cdefs.h> 3311752d88SAlan Cox __FBSDID("$FreeBSD$"); 3411752d88SAlan Cox 3511752d88SAlan Cox #include "opt_ddb.h" 3611752d88SAlan Cox 3711752d88SAlan Cox #include <sys/param.h> 3811752d88SAlan Cox #include <sys/systm.h> 3911752d88SAlan Cox #include <sys/lock.h> 4011752d88SAlan Cox #include <sys/kernel.h> 4111752d88SAlan Cox #include <sys/malloc.h> 4211752d88SAlan Cox #include <sys/mutex.h> 4311752d88SAlan Cox #include <sys/queue.h> 4411752d88SAlan Cox #include <sys/sbuf.h> 4511752d88SAlan Cox #include <sys/sysctl.h> 4611752d88SAlan Cox #include <sys/vmmeter.h> 477bfda801SAlan Cox #include <sys/vnode.h> 4811752d88SAlan Cox 4911752d88SAlan Cox #include <ddb/ddb.h> 5011752d88SAlan Cox 5111752d88SAlan Cox #include <vm/vm.h> 5211752d88SAlan Cox #include <vm/vm_param.h> 5311752d88SAlan Cox #include <vm/vm_kern.h> 5411752d88SAlan Cox #include <vm/vm_object.h> 5511752d88SAlan Cox #include <vm/vm_page.h> 5611752d88SAlan Cox #include <vm/vm_phys.h> 5744aab2c3SAlan Cox #include <vm/vm_reserv.h> 5811752d88SAlan Cox 59*a3870a18SJohn Baldwin /* 60*a3870a18SJohn Baldwin * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each 61*a3870a18SJohn Baldwin * domain. These extra lists are stored at the end of the regular 62*a3870a18SJohn Baldwin * free lists starting with VM_NFREELIST. 63*a3870a18SJohn Baldwin */ 64*a3870a18SJohn Baldwin #define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1) 65*a3870a18SJohn Baldwin 6611752d88SAlan Cox struct vm_freelist { 6711752d88SAlan Cox struct pglist pl; 6811752d88SAlan Cox int lcnt; 6911752d88SAlan Cox }; 7011752d88SAlan Cox 7111752d88SAlan Cox struct vm_phys_seg { 7211752d88SAlan Cox vm_paddr_t start; 7311752d88SAlan Cox vm_paddr_t end; 7411752d88SAlan Cox vm_page_t first_page; 75*a3870a18SJohn Baldwin int domain; 7611752d88SAlan Cox struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 7711752d88SAlan Cox }; 7811752d88SAlan Cox 79*a3870a18SJohn Baldwin struct mem_affinity *mem_affinity; 80*a3870a18SJohn Baldwin 8111752d88SAlan Cox static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 8211752d88SAlan Cox 8311752d88SAlan Cox static int vm_phys_nsegs; 8411752d88SAlan Cox 8511752d88SAlan Cox static struct vm_freelist 86*a3870a18SJohn Baldwin vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 87*a3870a18SJohn Baldwin static struct vm_freelist 88*a3870a18SJohn Baldwin (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER]; 8911752d88SAlan Cox 9011752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 9111752d88SAlan Cox 9211752d88SAlan Cox static int cnt_prezero; 9311752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 9411752d88SAlan Cox &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 9511752d88SAlan Cox 9611752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 9711752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 9811752d88SAlan Cox NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 9911752d88SAlan Cox 10011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 10111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 10211752d88SAlan Cox NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 10311752d88SAlan Cox 104*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 105*a3870a18SJohn Baldwin static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS); 106*a3870a18SJohn Baldwin SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD, 107*a3870a18SJohn Baldwin NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists"); 108*a3870a18SJohn Baldwin #endif 109*a3870a18SJohn Baldwin 110*a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, 111*a3870a18SJohn Baldwin int domain); 11211752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 11311752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa); 11411752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 11511752d88SAlan Cox int order); 11611752d88SAlan Cox 11711752d88SAlan Cox /* 11811752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 11911752d88SAlan Cox * the amount of physical memory in each free list. 12011752d88SAlan Cox */ 12111752d88SAlan Cox static int 12211752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 12311752d88SAlan Cox { 12411752d88SAlan Cox struct sbuf sbuf; 12511752d88SAlan Cox struct vm_freelist *fl; 12611752d88SAlan Cox char *cbuf; 12711752d88SAlan Cox const int cbufsize = vm_nfreelists*(VM_NFREEORDER + 1)*81; 12811752d88SAlan Cox int error, flind, oind, pind; 12911752d88SAlan Cox 13011752d88SAlan Cox cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 13111752d88SAlan Cox sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 13211752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 13311752d88SAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 13411752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 13511752d88SAlan Cox "\n ", flind); 13611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 13711752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 13811752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 13911752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 14011752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 14111752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 14211752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 14311752d88SAlan Cox sbuf_printf(&sbuf, " %2.2d (%6.6dK)", oind, 14411752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 14511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 14611752d88SAlan Cox fl = vm_phys_free_queues[flind][pind]; 14711752d88SAlan Cox sbuf_printf(&sbuf, " | %6.6d", fl[oind].lcnt); 14811752d88SAlan Cox } 14911752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 15011752d88SAlan Cox } 15111752d88SAlan Cox } 15211752d88SAlan Cox sbuf_finish(&sbuf); 15311752d88SAlan Cox error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 15411752d88SAlan Cox sbuf_delete(&sbuf); 15511752d88SAlan Cox free(cbuf, M_TEMP); 15611752d88SAlan Cox return (error); 15711752d88SAlan Cox } 15811752d88SAlan Cox 15911752d88SAlan Cox /* 16011752d88SAlan Cox * Outputs the set of physical memory segments. 16111752d88SAlan Cox */ 16211752d88SAlan Cox static int 16311752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 16411752d88SAlan Cox { 16511752d88SAlan Cox struct sbuf sbuf; 16611752d88SAlan Cox struct vm_phys_seg *seg; 16711752d88SAlan Cox char *cbuf; 16811752d88SAlan Cox const int cbufsize = VM_PHYSSEG_MAX*(VM_NFREEORDER + 1)*81; 16911752d88SAlan Cox int error, segind; 17011752d88SAlan Cox 17111752d88SAlan Cox cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 17211752d88SAlan Cox sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 17311752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 17411752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 17511752d88SAlan Cox seg = &vm_phys_segs[segind]; 17611752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 17711752d88SAlan Cox (uintmax_t)seg->start); 17811752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 17911752d88SAlan Cox (uintmax_t)seg->end); 180*a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 18111752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 18211752d88SAlan Cox } 18311752d88SAlan Cox sbuf_finish(&sbuf); 18411752d88SAlan Cox error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 18511752d88SAlan Cox sbuf_delete(&sbuf); 18611752d88SAlan Cox free(cbuf, M_TEMP); 18711752d88SAlan Cox return (error); 18811752d88SAlan Cox } 18911752d88SAlan Cox 190*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 191*a3870a18SJohn Baldwin /* 192*a3870a18SJohn Baldwin * Outputs the set of free list lookup lists. 193*a3870a18SJohn Baldwin */ 194*a3870a18SJohn Baldwin static int 195*a3870a18SJohn Baldwin sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS) 196*a3870a18SJohn Baldwin { 197*a3870a18SJohn Baldwin struct sbuf sbuf; 198*a3870a18SJohn Baldwin char *cbuf; 199*a3870a18SJohn Baldwin const int cbufsize = (vm_nfreelists + 1) * VM_NDOMAIN * 81; 200*a3870a18SJohn Baldwin int domain, error, flind, ndomains; 201*a3870a18SJohn Baldwin 202*a3870a18SJohn Baldwin ndomains = vm_nfreelists - VM_NFREELIST + 1; 203*a3870a18SJohn Baldwin cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 204*a3870a18SJohn Baldwin sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 205*a3870a18SJohn Baldwin for (domain = 0; domain < ndomains; domain++) { 206*a3870a18SJohn Baldwin sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain); 207*a3870a18SJohn Baldwin for (flind = 0; flind < vm_nfreelists; flind++) 208*a3870a18SJohn Baldwin sbuf_printf(&sbuf, " [%d]:\t%p\n", flind, 209*a3870a18SJohn Baldwin vm_phys_lookup_lists[domain][flind]); 210*a3870a18SJohn Baldwin } 211*a3870a18SJohn Baldwin sbuf_finish(&sbuf); 212*a3870a18SJohn Baldwin error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 213*a3870a18SJohn Baldwin sbuf_delete(&sbuf); 214*a3870a18SJohn Baldwin free(cbuf, M_TEMP); 215*a3870a18SJohn Baldwin return (error); 216*a3870a18SJohn Baldwin } 217*a3870a18SJohn Baldwin #endif 218*a3870a18SJohn Baldwin 21911752d88SAlan Cox /* 22011752d88SAlan Cox * Create a physical memory segment. 22111752d88SAlan Cox */ 22211752d88SAlan Cox static void 223*a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) 22411752d88SAlan Cox { 22511752d88SAlan Cox struct vm_phys_seg *seg; 22611752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE 22711752d88SAlan Cox long pages; 22811752d88SAlan Cox int segind; 22911752d88SAlan Cox 23011752d88SAlan Cox pages = 0; 23111752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 23211752d88SAlan Cox seg = &vm_phys_segs[segind]; 23311752d88SAlan Cox pages += atop(seg->end - seg->start); 23411752d88SAlan Cox } 23511752d88SAlan Cox #endif 23611752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 23711752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 23811752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 23911752d88SAlan Cox seg->start = start; 24011752d88SAlan Cox seg->end = end; 241*a3870a18SJohn Baldwin seg->domain = domain; 24211752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE 24311752d88SAlan Cox seg->first_page = &vm_page_array[pages]; 24411752d88SAlan Cox #else 24511752d88SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(start); 24611752d88SAlan Cox #endif 247*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 248*a3870a18SJohn Baldwin if (flind == VM_FREELIST_DEFAULT && domain != 0) { 249*a3870a18SJohn Baldwin flind = VM_NFREELIST + (domain - 1); 250*a3870a18SJohn Baldwin if (flind >= vm_nfreelists) 251*a3870a18SJohn Baldwin vm_nfreelists = flind + 1; 252*a3870a18SJohn Baldwin } 253*a3870a18SJohn Baldwin #endif 25411752d88SAlan Cox seg->free_queues = &vm_phys_free_queues[flind]; 25511752d88SAlan Cox } 25611752d88SAlan Cox 257*a3870a18SJohn Baldwin static void 258*a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 259*a3870a18SJohn Baldwin { 260*a3870a18SJohn Baldwin int i; 261*a3870a18SJohn Baldwin 262*a3870a18SJohn Baldwin if (mem_affinity == NULL) { 263*a3870a18SJohn Baldwin _vm_phys_create_seg(start, end, flind, 0); 264*a3870a18SJohn Baldwin return; 265*a3870a18SJohn Baldwin } 266*a3870a18SJohn Baldwin 267*a3870a18SJohn Baldwin for (i = 0;; i++) { 268*a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 269*a3870a18SJohn Baldwin panic("Reached end of affinity info"); 270*a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 271*a3870a18SJohn Baldwin continue; 272*a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 273*a3870a18SJohn Baldwin panic("No affinity info for start %jx", 274*a3870a18SJohn Baldwin (uintmax_t)start); 275*a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 276*a3870a18SJohn Baldwin _vm_phys_create_seg(start, end, flind, 277*a3870a18SJohn Baldwin mem_affinity[i].domain); 278*a3870a18SJohn Baldwin break; 279*a3870a18SJohn Baldwin } 280*a3870a18SJohn Baldwin _vm_phys_create_seg(start, mem_affinity[i].end, flind, 281*a3870a18SJohn Baldwin mem_affinity[i].domain); 282*a3870a18SJohn Baldwin start = mem_affinity[i].end; 283*a3870a18SJohn Baldwin } 284*a3870a18SJohn Baldwin } 285*a3870a18SJohn Baldwin 28611752d88SAlan Cox /* 28711752d88SAlan Cox * Initialize the physical memory allocator. 28811752d88SAlan Cox */ 28911752d88SAlan Cox void 29011752d88SAlan Cox vm_phys_init(void) 29111752d88SAlan Cox { 29211752d88SAlan Cox struct vm_freelist *fl; 29311752d88SAlan Cox int flind, i, oind, pind; 294*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 295*a3870a18SJohn Baldwin int ndomains, j; 296*a3870a18SJohn Baldwin #endif 29711752d88SAlan Cox 29811752d88SAlan Cox for (i = 0; phys_avail[i + 1] != 0; i += 2) { 29911752d88SAlan Cox #ifdef VM_FREELIST_ISADMA 30011752d88SAlan Cox if (phys_avail[i] < 16777216) { 30111752d88SAlan Cox if (phys_avail[i + 1] > 16777216) { 30211752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 16777216, 30311752d88SAlan Cox VM_FREELIST_ISADMA); 30411752d88SAlan Cox vm_phys_create_seg(16777216, phys_avail[i + 1], 30511752d88SAlan Cox VM_FREELIST_DEFAULT); 30611752d88SAlan Cox } else { 30711752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 30811752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_ISADMA); 30911752d88SAlan Cox } 31011752d88SAlan Cox if (VM_FREELIST_ISADMA >= vm_nfreelists) 31111752d88SAlan Cox vm_nfreelists = VM_FREELIST_ISADMA + 1; 31211752d88SAlan Cox } else 31311752d88SAlan Cox #endif 31411752d88SAlan Cox #ifdef VM_FREELIST_HIGHMEM 31511752d88SAlan Cox if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 31611752d88SAlan Cox if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 31711752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 31811752d88SAlan Cox VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 31911752d88SAlan Cox vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 32011752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_HIGHMEM); 32111752d88SAlan Cox } else { 32211752d88SAlan Cox vm_phys_create_seg(phys_avail[i], 32311752d88SAlan Cox phys_avail[i + 1], VM_FREELIST_HIGHMEM); 32411752d88SAlan Cox } 32511752d88SAlan Cox if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 32611752d88SAlan Cox vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 32711752d88SAlan Cox } else 32811752d88SAlan Cox #endif 32911752d88SAlan Cox vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 33011752d88SAlan Cox VM_FREELIST_DEFAULT); 33111752d88SAlan Cox } 33211752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 33311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 33411752d88SAlan Cox fl = vm_phys_free_queues[flind][pind]; 33511752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 33611752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 33711752d88SAlan Cox } 33811752d88SAlan Cox } 339*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 340*a3870a18SJohn Baldwin /* 341*a3870a18SJohn Baldwin * Build a free list lookup list for each domain. All of the 342*a3870a18SJohn Baldwin * memory domain lists are inserted at the VM_FREELIST_DEFAULT 343*a3870a18SJohn Baldwin * index in a round-robin order starting with the current 344*a3870a18SJohn Baldwin * domain. 345*a3870a18SJohn Baldwin */ 346*a3870a18SJohn Baldwin ndomains = vm_nfreelists - VM_NFREELIST + 1; 347*a3870a18SJohn Baldwin for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++) 348*a3870a18SJohn Baldwin for (i = 0; i < ndomains; i++) 349*a3870a18SJohn Baldwin vm_phys_lookup_lists[i][flind] = 350*a3870a18SJohn Baldwin &vm_phys_free_queues[flind]; 351*a3870a18SJohn Baldwin for (i = 0; i < ndomains; i++) 352*a3870a18SJohn Baldwin for (j = 0; j < ndomains; j++) { 353*a3870a18SJohn Baldwin flind = (i + j) % ndomains; 354*a3870a18SJohn Baldwin if (flind == 0) 355*a3870a18SJohn Baldwin flind = VM_FREELIST_DEFAULT; 356*a3870a18SJohn Baldwin else 357*a3870a18SJohn Baldwin flind += VM_NFREELIST - 1; 358*a3870a18SJohn Baldwin vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] = 359*a3870a18SJohn Baldwin &vm_phys_free_queues[flind]; 360*a3870a18SJohn Baldwin } 361*a3870a18SJohn Baldwin for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST; 362*a3870a18SJohn Baldwin flind++) 363*a3870a18SJohn Baldwin for (i = 0; i < ndomains; i++) 364*a3870a18SJohn Baldwin vm_phys_lookup_lists[i][flind + ndomains - 1] = 365*a3870a18SJohn Baldwin &vm_phys_free_queues[flind]; 366*a3870a18SJohn Baldwin #else 367*a3870a18SJohn Baldwin for (flind = 0; flind < vm_nfreelists; flind++) 368*a3870a18SJohn Baldwin vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind]; 369*a3870a18SJohn Baldwin #endif 37011752d88SAlan Cox } 37111752d88SAlan Cox 37211752d88SAlan Cox /* 37311752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 37411752d88SAlan Cox */ 37511752d88SAlan Cox static __inline void 37611752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 37711752d88SAlan Cox { 37811752d88SAlan Cox vm_page_t m_buddy; 37911752d88SAlan Cox 38011752d88SAlan Cox while (oind > order) { 38111752d88SAlan Cox oind--; 38211752d88SAlan Cox m_buddy = &m[1 << oind]; 38311752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 38411752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 38511752d88SAlan Cox m_buddy, m_buddy->order)); 38611752d88SAlan Cox m_buddy->order = oind; 38711752d88SAlan Cox TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 38811752d88SAlan Cox fl[oind].lcnt++; 38911752d88SAlan Cox } 39011752d88SAlan Cox } 39111752d88SAlan Cox 39211752d88SAlan Cox /* 39311752d88SAlan Cox * Initialize a physical page and add it to the free lists. 39411752d88SAlan Cox */ 39511752d88SAlan Cox void 39611752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa) 39711752d88SAlan Cox { 39811752d88SAlan Cox vm_page_t m; 39911752d88SAlan Cox 40011752d88SAlan Cox cnt.v_page_count++; 40111752d88SAlan Cox m = vm_phys_paddr_to_vm_page(pa); 40211752d88SAlan Cox m->phys_addr = pa; 40311752d88SAlan Cox m->segind = vm_phys_paddr_to_segind(pa); 40411752d88SAlan Cox m->flags = PG_FREE; 40511752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 40611752d88SAlan Cox ("vm_phys_add_page: page %p has unexpected order %d", 40711752d88SAlan Cox m, m->order)); 40811752d88SAlan Cox m->pool = VM_FREEPOOL_DEFAULT; 40911752d88SAlan Cox pmap_page_init(m); 4108941dc44SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 4117bfda801SAlan Cox cnt.v_free_count++; 41211752d88SAlan Cox vm_phys_free_pages(m, 0); 4138941dc44SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 41411752d88SAlan Cox } 41511752d88SAlan Cox 41611752d88SAlan Cox /* 41711752d88SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages 41811752d88SAlan Cox * from the free lists. 4198941dc44SAlan Cox * 4208941dc44SAlan Cox * The free page queues must be locked. 42111752d88SAlan Cox */ 42211752d88SAlan Cox vm_page_t 42311752d88SAlan Cox vm_phys_alloc_pages(int pool, int order) 42411752d88SAlan Cox { 42549ca10d4SJayachandran C. vm_page_t m; 42649ca10d4SJayachandran C. int flind; 42749ca10d4SJayachandran C. 42849ca10d4SJayachandran C. for (flind = 0; flind < vm_nfreelists; flind++) { 42949ca10d4SJayachandran C. m = vm_phys_alloc_freelist_pages(flind, pool, order); 43049ca10d4SJayachandran C. if (m != NULL) 43149ca10d4SJayachandran C. return (m); 43249ca10d4SJayachandran C. } 43349ca10d4SJayachandran C. return (NULL); 43449ca10d4SJayachandran C. } 43549ca10d4SJayachandran C. 43649ca10d4SJayachandran C. /* 43749ca10d4SJayachandran C. * Find and dequeue a free page on the given free list, with the 43849ca10d4SJayachandran C. * specified pool and order 43949ca10d4SJayachandran C. */ 44049ca10d4SJayachandran C. vm_page_t 44149ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order) 44249ca10d4SJayachandran C. { 44311752d88SAlan Cox struct vm_freelist *fl; 44411752d88SAlan Cox struct vm_freelist *alt; 445*a3870a18SJohn Baldwin int domain, oind, pind; 44611752d88SAlan Cox vm_page_t m; 44711752d88SAlan Cox 44849ca10d4SJayachandran C. KASSERT(flind < VM_NFREELIST, 44949ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 45011752d88SAlan Cox KASSERT(pool < VM_NFREEPOOL, 45149ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 45211752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 45349ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 454*a3870a18SJohn Baldwin 455*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 456*a3870a18SJohn Baldwin domain = PCPU_GET(domain); 457*a3870a18SJohn Baldwin #else 458*a3870a18SJohn Baldwin domain = 0; 459*a3870a18SJohn Baldwin #endif 46011752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 461*a3870a18SJohn Baldwin fl = (*vm_phys_lookup_lists[domain][flind])[pool]; 46211752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 46311752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 46411752d88SAlan Cox if (m != NULL) { 46511752d88SAlan Cox TAILQ_REMOVE(&fl[oind].pl, m, pageq); 46611752d88SAlan Cox fl[oind].lcnt--; 46711752d88SAlan Cox m->order = VM_NFREEORDER; 46811752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 46911752d88SAlan Cox return (m); 47011752d88SAlan Cox } 47111752d88SAlan Cox } 47211752d88SAlan Cox 47311752d88SAlan Cox /* 47411752d88SAlan Cox * The given pool was empty. Find the largest 47511752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 47611752d88SAlan Cox * pool. Transfer these pages to the given pool, and 47711752d88SAlan Cox * use them to satisfy the allocation. 47811752d88SAlan Cox */ 47911752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 48011752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 481*a3870a18SJohn Baldwin alt = (*vm_phys_lookup_lists[domain][flind])[pind]; 48211752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 48311752d88SAlan Cox if (m != NULL) { 48411752d88SAlan Cox TAILQ_REMOVE(&alt[oind].pl, m, pageq); 48511752d88SAlan Cox alt[oind].lcnt--; 48611752d88SAlan Cox m->order = VM_NFREEORDER; 48711752d88SAlan Cox vm_phys_set_pool(pool, m, oind); 48811752d88SAlan Cox vm_phys_split_pages(m, oind, fl, order); 48911752d88SAlan Cox return (m); 49011752d88SAlan Cox } 49111752d88SAlan Cox } 49211752d88SAlan Cox } 49311752d88SAlan Cox return (NULL); 49411752d88SAlan Cox } 49511752d88SAlan Cox 49611752d88SAlan Cox /* 49711752d88SAlan Cox * Allocate physical memory from phys_avail[]. 49811752d88SAlan Cox */ 49911752d88SAlan Cox vm_paddr_t 50011752d88SAlan Cox vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment) 50111752d88SAlan Cox { 50211752d88SAlan Cox vm_paddr_t pa; 50311752d88SAlan Cox int i; 50411752d88SAlan Cox 50511752d88SAlan Cox size = round_page(size); 50611752d88SAlan Cox for (i = 0; phys_avail[i + 1] != 0; i += 2) { 50711752d88SAlan Cox if (phys_avail[i + 1] - phys_avail[i] < size) 50811752d88SAlan Cox continue; 50911752d88SAlan Cox pa = phys_avail[i]; 51011752d88SAlan Cox phys_avail[i] += size; 51111752d88SAlan Cox return (pa); 51211752d88SAlan Cox } 51311752d88SAlan Cox panic("vm_phys_bootstrap_alloc"); 51411752d88SAlan Cox } 51511752d88SAlan Cox 51611752d88SAlan Cox /* 51711752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 51811752d88SAlan Cox */ 51911752d88SAlan Cox vm_page_t 52011752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 52111752d88SAlan Cox { 52211752d88SAlan Cox struct vm_phys_seg *seg; 52311752d88SAlan Cox int segind; 52411752d88SAlan Cox 52511752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 52611752d88SAlan Cox seg = &vm_phys_segs[segind]; 52711752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 52811752d88SAlan Cox return (&seg->first_page[atop(pa - seg->start)]); 52911752d88SAlan Cox } 530f06a3a36SAndrew Thompson return (NULL); 53111752d88SAlan Cox } 53211752d88SAlan Cox 53311752d88SAlan Cox /* 53411752d88SAlan Cox * Find the segment containing the given physical address. 53511752d88SAlan Cox */ 53611752d88SAlan Cox static int 53711752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa) 53811752d88SAlan Cox { 53911752d88SAlan Cox struct vm_phys_seg *seg; 54011752d88SAlan Cox int segind; 54111752d88SAlan Cox 54211752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 54311752d88SAlan Cox seg = &vm_phys_segs[segind]; 54411752d88SAlan Cox if (pa >= seg->start && pa < seg->end) 54511752d88SAlan Cox return (segind); 54611752d88SAlan Cox } 54711752d88SAlan Cox panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 54811752d88SAlan Cox (uintmax_t)pa); 54911752d88SAlan Cox } 55011752d88SAlan Cox 55111752d88SAlan Cox /* 55211752d88SAlan Cox * Free a contiguous, power of two-sized set of physical pages. 5538941dc44SAlan Cox * 5548941dc44SAlan Cox * The free page queues must be locked. 55511752d88SAlan Cox */ 55611752d88SAlan Cox void 55711752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order) 55811752d88SAlan Cox { 55911752d88SAlan Cox struct vm_freelist *fl; 56011752d88SAlan Cox struct vm_phys_seg *seg; 56111752d88SAlan Cox vm_paddr_t pa, pa_buddy; 56211752d88SAlan Cox vm_page_t m_buddy; 56311752d88SAlan Cox 56411752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 5658941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected order %d", 56611752d88SAlan Cox m, m->order)); 56711752d88SAlan Cox KASSERT(m->pool < VM_NFREEPOOL, 5688941dc44SAlan Cox ("vm_phys_free_pages: page %p has unexpected pool %d", 56911752d88SAlan Cox m, m->pool)); 57011752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 5718941dc44SAlan Cox ("vm_phys_free_pages: order %d is out of range", order)); 57211752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 57311752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m); 57411752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 57511752d88SAlan Cox while (order < VM_NFREEORDER - 1) { 57611752d88SAlan Cox pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); 57711752d88SAlan Cox if (pa_buddy < seg->start || 57811752d88SAlan Cox pa_buddy >= seg->end) 57911752d88SAlan Cox break; 58011752d88SAlan Cox m_buddy = &seg->first_page[atop(pa_buddy - seg->start)]; 58111752d88SAlan Cox if (m_buddy->order != order) 58211752d88SAlan Cox break; 58311752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 58411752d88SAlan Cox TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq); 58511752d88SAlan Cox fl[m_buddy->order].lcnt--; 58611752d88SAlan Cox m_buddy->order = VM_NFREEORDER; 58711752d88SAlan Cox if (m_buddy->pool != m->pool) 58811752d88SAlan Cox vm_phys_set_pool(m->pool, m_buddy, order); 58911752d88SAlan Cox order++; 59011752d88SAlan Cox pa &= ~((1 << (PAGE_SHIFT + order)) - 1); 59111752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 59211752d88SAlan Cox } 59311752d88SAlan Cox m->order = order; 59411752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 59511752d88SAlan Cox TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 59611752d88SAlan Cox fl[order].lcnt++; 59711752d88SAlan Cox } 59811752d88SAlan Cox 59911752d88SAlan Cox /* 60011752d88SAlan Cox * Set the pool for a contiguous, power of two-sized set of physical pages. 60111752d88SAlan Cox */ 6027bfda801SAlan Cox void 60311752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order) 60411752d88SAlan Cox { 60511752d88SAlan Cox vm_page_t m_tmp; 60611752d88SAlan Cox 60711752d88SAlan Cox for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 60811752d88SAlan Cox m_tmp->pool = pool; 60911752d88SAlan Cox } 61011752d88SAlan Cox 61111752d88SAlan Cox /* 6129742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 6139742373aSAlan Cox * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 6149742373aSAlan Cox * FALSE, indicating that "m" is not in the free lists. 6157bfda801SAlan Cox * 6167bfda801SAlan Cox * The free page queues must be locked. 6177bfda801SAlan Cox */ 618e35395ceSAlan Cox boolean_t 6197bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m) 6207bfda801SAlan Cox { 6217bfda801SAlan Cox struct vm_freelist *fl; 6227bfda801SAlan Cox struct vm_phys_seg *seg; 6237bfda801SAlan Cox vm_paddr_t pa, pa_half; 6247bfda801SAlan Cox vm_page_t m_set, m_tmp; 6257bfda801SAlan Cox int order; 6267bfda801SAlan Cox 6277bfda801SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 6287bfda801SAlan Cox 6297bfda801SAlan Cox /* 6307bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 6317bfda801SAlan Cox * physical pages containing the given physical page "m" and 6327bfda801SAlan Cox * assign it to "m_set". 6337bfda801SAlan Cox */ 6347bfda801SAlan Cox seg = &vm_phys_segs[m->segind]; 6357bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 636bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 6377bfda801SAlan Cox order++; 6387bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 6392fbced65SAlan Cox if (pa >= seg->start) 6407bfda801SAlan Cox m_set = &seg->first_page[atop(pa - seg->start)]; 641e35395ceSAlan Cox else 642e35395ceSAlan Cox return (FALSE); 6437bfda801SAlan Cox } 644e35395ceSAlan Cox if (m_set->order < order) 645e35395ceSAlan Cox return (FALSE); 646e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 647e35395ceSAlan Cox return (FALSE); 6487bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 6497bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 6507bfda801SAlan Cox m_set, m_set->order)); 6517bfda801SAlan Cox 6527bfda801SAlan Cox /* 6537bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 6547bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 6557bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 6567bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 6577bfda801SAlan Cox */ 6587bfda801SAlan Cox fl = (*seg->free_queues)[m_set->pool]; 6597bfda801SAlan Cox order = m_set->order; 6607bfda801SAlan Cox TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 6617bfda801SAlan Cox fl[order].lcnt--; 6627bfda801SAlan Cox m_set->order = VM_NFREEORDER; 6637bfda801SAlan Cox while (order > 0) { 6647bfda801SAlan Cox order--; 6657bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 6667bfda801SAlan Cox if (m->phys_addr < pa_half) 6677bfda801SAlan Cox m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 6687bfda801SAlan Cox else { 6697bfda801SAlan Cox m_tmp = m_set; 6707bfda801SAlan Cox m_set = &seg->first_page[atop(pa_half - seg->start)]; 6717bfda801SAlan Cox } 6727bfda801SAlan Cox m_tmp->order = order; 6737bfda801SAlan Cox TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 6747bfda801SAlan Cox fl[order].lcnt++; 6757bfda801SAlan Cox } 6767bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 677e35395ceSAlan Cox return (TRUE); 6787bfda801SAlan Cox } 6797bfda801SAlan Cox 6807bfda801SAlan Cox /* 6817bfda801SAlan Cox * Try to zero one physical page. Used by an idle priority thread. 68211752d88SAlan Cox */ 68311752d88SAlan Cox boolean_t 68411752d88SAlan Cox vm_phys_zero_pages_idle(void) 68511752d88SAlan Cox { 6867bfda801SAlan Cox static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 6877bfda801SAlan Cox static int flind, oind, pind; 68811752d88SAlan Cox vm_page_t m, m_tmp; 68911752d88SAlan Cox 69011752d88SAlan Cox mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 6917bfda801SAlan Cox for (;;) { 6927bfda801SAlan Cox TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 6937bfda801SAlan Cox for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 6947bfda801SAlan Cox if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 6957bfda801SAlan Cox vm_phys_unfree_page(m_tmp); 6967bfda801SAlan Cox cnt.v_free_count--; 69711752d88SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 69811752d88SAlan Cox pmap_zero_page_idle(m_tmp); 69911752d88SAlan Cox m_tmp->flags |= PG_ZERO; 70011752d88SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 7017bfda801SAlan Cox cnt.v_free_count++; 7027bfda801SAlan Cox vm_phys_free_pages(m_tmp, 0); 7037bfda801SAlan Cox vm_page_zero_count++; 7047bfda801SAlan Cox cnt_prezero++; 70511752d88SAlan Cox return (TRUE); 70611752d88SAlan Cox } 70711752d88SAlan Cox } 70811752d88SAlan Cox } 7097bfda801SAlan Cox oind++; 7107bfda801SAlan Cox if (oind == VM_NFREEORDER) { 7117bfda801SAlan Cox oind = 0; 7127bfda801SAlan Cox pind++; 7137bfda801SAlan Cox if (pind == VM_NFREEPOOL) { 7147bfda801SAlan Cox pind = 0; 7157bfda801SAlan Cox flind++; 7167bfda801SAlan Cox if (flind == vm_nfreelists) 7177bfda801SAlan Cox flind = 0; 7187bfda801SAlan Cox } 7197bfda801SAlan Cox fl = vm_phys_free_queues[flind][pind]; 7207bfda801SAlan Cox } 7217bfda801SAlan Cox } 72211752d88SAlan Cox } 72311752d88SAlan Cox 72411752d88SAlan Cox /* 7252f9f48d6SAlan Cox * Allocate a contiguous set of physical pages of the given size 7262f9f48d6SAlan Cox * "npages" from the free lists. All of the physical pages must be at 7272f9f48d6SAlan Cox * or above the given physical address "low" and below the given 7282f9f48d6SAlan Cox * physical address "high". The given value "alignment" determines the 7292f9f48d6SAlan Cox * alignment of the first physical page in the set. If the given value 7302f9f48d6SAlan Cox * "boundary" is non-zero, then the set of physical pages cannot cross 7312f9f48d6SAlan Cox * any physical address boundary that is a multiple of that value. Both 73211752d88SAlan Cox * "alignment" and "boundary" must be a power of two. 73311752d88SAlan Cox */ 73411752d88SAlan Cox vm_page_t 73511752d88SAlan Cox vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, 7363153e878SAlan Cox unsigned long alignment, unsigned long boundary) 73711752d88SAlan Cox { 73811752d88SAlan Cox struct vm_freelist *fl; 73911752d88SAlan Cox struct vm_phys_seg *seg; 74049ca10d4SJayachandran C. struct vnode *vp; 74111752d88SAlan Cox vm_paddr_t pa, pa_last, size; 742ef327c3eSAlan Cox vm_page_t deferred_vdrop_list, m, m_ret; 743*a3870a18SJohn Baldwin int domain, flind, i, oind, order, pind; 74411752d88SAlan Cox 745*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1 746*a3870a18SJohn Baldwin domain = PCPU_GET(domain); 747*a3870a18SJohn Baldwin #else 748*a3870a18SJohn Baldwin domain = 0; 749*a3870a18SJohn Baldwin #endif 75011752d88SAlan Cox size = npages << PAGE_SHIFT; 75111752d88SAlan Cox KASSERT(size != 0, 75211752d88SAlan Cox ("vm_phys_alloc_contig: size must not be 0")); 75311752d88SAlan Cox KASSERT((alignment & (alignment - 1)) == 0, 75411752d88SAlan Cox ("vm_phys_alloc_contig: alignment must be a power of 2")); 75511752d88SAlan Cox KASSERT((boundary & (boundary - 1)) == 0, 75611752d88SAlan Cox ("vm_phys_alloc_contig: boundary must be a power of 2")); 757ef327c3eSAlan Cox deferred_vdrop_list = NULL; 75811752d88SAlan Cox /* Compute the queue that is the best fit for npages. */ 75911752d88SAlan Cox for (order = 0; (1 << order) < npages; order++); 76011752d88SAlan Cox mtx_lock(&vm_page_queue_free_mtx); 76144aab2c3SAlan Cox #if VM_NRESERVLEVEL > 0 76244aab2c3SAlan Cox retry: 76344aab2c3SAlan Cox #endif 76411752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 76511752d88SAlan Cox for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 76611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 767*a3870a18SJohn Baldwin fl = (*vm_phys_lookup_lists[domain][flind]) 768*a3870a18SJohn Baldwin [pind]; 76911752d88SAlan Cox TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 77011752d88SAlan Cox /* 77111752d88SAlan Cox * A free list may contain physical pages 77211752d88SAlan Cox * from one or more segments. 77311752d88SAlan Cox */ 77411752d88SAlan Cox seg = &vm_phys_segs[m_ret->segind]; 77511752d88SAlan Cox if (seg->start > high || 77611752d88SAlan Cox low >= seg->end) 77711752d88SAlan Cox continue; 77811752d88SAlan Cox 77911752d88SAlan Cox /* 78011752d88SAlan Cox * Is the size of this allocation request 78111752d88SAlan Cox * larger than the largest block size? 78211752d88SAlan Cox */ 78311752d88SAlan Cox if (order >= VM_NFREEORDER) { 78411752d88SAlan Cox /* 78511752d88SAlan Cox * Determine if a sufficient number 78611752d88SAlan Cox * of subsequent blocks to satisfy 78711752d88SAlan Cox * the allocation request are free. 78811752d88SAlan Cox */ 78911752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 79011752d88SAlan Cox pa_last = pa + size; 79111752d88SAlan Cox for (;;) { 79211752d88SAlan Cox pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 79311752d88SAlan Cox if (pa >= pa_last) 79411752d88SAlan Cox break; 79511752d88SAlan Cox if (pa < seg->start || 79611752d88SAlan Cox pa >= seg->end) 79711752d88SAlan Cox break; 79811752d88SAlan Cox m = &seg->first_page[atop(pa - seg->start)]; 79911752d88SAlan Cox if (m->order != VM_NFREEORDER - 1) 80011752d88SAlan Cox break; 80111752d88SAlan Cox } 80211752d88SAlan Cox /* If not, continue to the next block. */ 80311752d88SAlan Cox if (pa < pa_last) 80411752d88SAlan Cox continue; 80511752d88SAlan Cox } 80611752d88SAlan Cox 80711752d88SAlan Cox /* 80811752d88SAlan Cox * Determine if the blocks are within the given range, 80911752d88SAlan Cox * satisfy the given alignment, and do not cross the 81011752d88SAlan Cox * given boundary. 81111752d88SAlan Cox */ 81211752d88SAlan Cox pa = VM_PAGE_TO_PHYS(m_ret); 81311752d88SAlan Cox if (pa >= low && 81411752d88SAlan Cox pa + size <= high && 81511752d88SAlan Cox (pa & (alignment - 1)) == 0 && 81611752d88SAlan Cox ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 81711752d88SAlan Cox goto done; 81811752d88SAlan Cox } 81911752d88SAlan Cox } 82011752d88SAlan Cox } 82111752d88SAlan Cox } 82244aab2c3SAlan Cox #if VM_NRESERVLEVEL > 0 82344aab2c3SAlan Cox if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) 82444aab2c3SAlan Cox goto retry; 82544aab2c3SAlan Cox #endif 82611752d88SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 82711752d88SAlan Cox return (NULL); 82811752d88SAlan Cox done: 82911752d88SAlan Cox for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 83011752d88SAlan Cox fl = (*seg->free_queues)[m->pool]; 83111752d88SAlan Cox TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 83211752d88SAlan Cox fl[m->order].lcnt--; 83311752d88SAlan Cox m->order = VM_NFREEORDER; 83411752d88SAlan Cox } 83511752d88SAlan Cox if (m_ret->pool != VM_FREEPOOL_DEFAULT) 83611752d88SAlan Cox vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 83711752d88SAlan Cox fl = (*seg->free_queues)[m_ret->pool]; 83811752d88SAlan Cox vm_phys_split_pages(m_ret, oind, fl, order); 83911752d88SAlan Cox for (i = 0; i < npages; i++) { 84011752d88SAlan Cox m = &m_ret[i]; 84149ca10d4SJayachandran C. vp = vm_page_alloc_init(m); 84249ca10d4SJayachandran C. if (vp != NULL) { 843ef327c3eSAlan Cox /* 844ef327c3eSAlan Cox * Enqueue the vnode for deferred vdrop(). 845ef327c3eSAlan Cox * 846ef327c3eSAlan Cox * Unmanaged pages don't use "pageq", so it 847ef327c3eSAlan Cox * can be safely abused to construct a short- 848ef327c3eSAlan Cox * lived queue of vnodes. 849ef327c3eSAlan Cox */ 85049ca10d4SJayachandran C. m->pageq.tqe_prev = (void *)vp; 851ef327c3eSAlan Cox m->pageq.tqe_next = deferred_vdrop_list; 852ef327c3eSAlan Cox deferred_vdrop_list = m; 853ef327c3eSAlan Cox } 85411752d88SAlan Cox } 85511752d88SAlan Cox for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { 85611752d88SAlan Cox m = &m_ret[i]; 85711752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 85811752d88SAlan Cox ("vm_phys_alloc_contig: page %p has unexpected order %d", 85911752d88SAlan Cox m, m->order)); 8608941dc44SAlan Cox vm_phys_free_pages(m, 0); 86111752d88SAlan Cox } 86211752d88SAlan Cox mtx_unlock(&vm_page_queue_free_mtx); 863ef327c3eSAlan Cox while (deferred_vdrop_list != NULL) { 864ef327c3eSAlan Cox vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); 865ef327c3eSAlan Cox deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; 866ef327c3eSAlan Cox } 86711752d88SAlan Cox return (m_ret); 86811752d88SAlan Cox } 86911752d88SAlan Cox 87011752d88SAlan Cox #ifdef DDB 87111752d88SAlan Cox /* 87211752d88SAlan Cox * Show the number of physical pages in each of the free lists. 87311752d88SAlan Cox */ 87411752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages) 87511752d88SAlan Cox { 87611752d88SAlan Cox struct vm_freelist *fl; 87711752d88SAlan Cox int flind, oind, pind; 87811752d88SAlan Cox 87911752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 88011752d88SAlan Cox db_printf("FREE LIST %d:\n" 88111752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 88211752d88SAlan Cox "\n ", flind); 88311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 88411752d88SAlan Cox db_printf(" | POOL %d", pind); 88511752d88SAlan Cox db_printf("\n-- "); 88611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 88711752d88SAlan Cox db_printf("-- -- "); 88811752d88SAlan Cox db_printf("--\n"); 88911752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 89011752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 89111752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 89211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 89311752d88SAlan Cox fl = vm_phys_free_queues[flind][pind]; 89411752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 89511752d88SAlan Cox } 89611752d88SAlan Cox db_printf("\n"); 89711752d88SAlan Cox } 89811752d88SAlan Cox db_printf("\n"); 89911752d88SAlan Cox } 90011752d88SAlan Cox } 90111752d88SAlan Cox #endif 902