xref: /freebsd/sys/vm/vm_phys.c (revision 174b5f385023a583e4a89f1e213ab586d332065a)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
32fbd80bd0SAlan Cox /*
33fbd80bd0SAlan Cox  *	Physical memory system implementation
34fbd80bd0SAlan Cox  *
35fbd80bd0SAlan Cox  * Any external functions defined by this module are only to be used by the
36fbd80bd0SAlan Cox  * virtual memory system.
37fbd80bd0SAlan Cox  */
38fbd80bd0SAlan Cox 
3911752d88SAlan Cox #include <sys/cdefs.h>
4011752d88SAlan Cox __FBSDID("$FreeBSD$");
4111752d88SAlan Cox 
4211752d88SAlan Cox #include "opt_ddb.h"
43*174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox 
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
4711752d88SAlan Cox #include <sys/lock.h>
4811752d88SAlan Cox #include <sys/kernel.h>
4911752d88SAlan Cox #include <sys/malloc.h>
5011752d88SAlan Cox #include <sys/mutex.h>
5111752d88SAlan Cox #include <sys/queue.h>
5211752d88SAlan Cox #include <sys/sbuf.h>
5311752d88SAlan Cox #include <sys/sysctl.h>
5411752d88SAlan Cox #include <sys/vmmeter.h>
5511752d88SAlan Cox 
5611752d88SAlan Cox #include <ddb/ddb.h>
5711752d88SAlan Cox 
5811752d88SAlan Cox #include <vm/vm.h>
5911752d88SAlan Cox #include <vm/vm_param.h>
6011752d88SAlan Cox #include <vm/vm_kern.h>
6111752d88SAlan Cox #include <vm/vm_object.h>
6211752d88SAlan Cox #include <vm/vm_page.h>
6311752d88SAlan Cox #include <vm/vm_phys.h>
6411752d88SAlan Cox 
65a3870a18SJohn Baldwin /*
66a3870a18SJohn Baldwin  * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
67a3870a18SJohn Baldwin  * domain.  These extra lists are stored at the end of the regular
68a3870a18SJohn Baldwin  * free lists starting with VM_NFREELIST.
69a3870a18SJohn Baldwin  */
70a3870a18SJohn Baldwin #define VM_RAW_NFREELIST	(VM_NFREELIST + VM_NDOMAIN - 1)
71a3870a18SJohn Baldwin 
7211752d88SAlan Cox struct vm_freelist {
7311752d88SAlan Cox 	struct pglist pl;
7411752d88SAlan Cox 	int lcnt;
7511752d88SAlan Cox };
7611752d88SAlan Cox 
7711752d88SAlan Cox struct vm_phys_seg {
7811752d88SAlan Cox 	vm_paddr_t	start;
7911752d88SAlan Cox 	vm_paddr_t	end;
8011752d88SAlan Cox 	vm_page_t	first_page;
81a3870a18SJohn Baldwin 	int		domain;
8211752d88SAlan Cox 	struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
8311752d88SAlan Cox };
8411752d88SAlan Cox 
85a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
86a3870a18SJohn Baldwin 
8711752d88SAlan Cox static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
8811752d88SAlan Cox 
8911752d88SAlan Cox static int vm_phys_nsegs;
9011752d88SAlan Cox 
91b6de32bdSKonstantin Belousov #define VM_PHYS_FICTITIOUS_NSEGS	8
92b6de32bdSKonstantin Belousov static struct vm_phys_fictitious_seg {
93b6de32bdSKonstantin Belousov 	vm_paddr_t	start;
94b6de32bdSKonstantin Belousov 	vm_paddr_t	end;
95b6de32bdSKonstantin Belousov 	vm_page_t	first_page;
96b6de32bdSKonstantin Belousov } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
97b6de32bdSKonstantin Belousov static struct mtx vm_phys_fictitious_reg_mtx;
98b6de32bdSKonstantin Belousov MALLOC_DEFINE(M_FICT_PAGES, "", "");
99b6de32bdSKonstantin Belousov 
10011752d88SAlan Cox static struct vm_freelist
101a3870a18SJohn Baldwin     vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
102a3870a18SJohn Baldwin static struct vm_freelist
103a3870a18SJohn Baldwin (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
10411752d88SAlan Cox 
10511752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
10611752d88SAlan Cox 
10711752d88SAlan Cox static int cnt_prezero;
10811752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
10911752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
11011752d88SAlan Cox 
11111752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
11211752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
11311752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
11411752d88SAlan Cox 
11511752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
11611752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
11711752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
11811752d88SAlan Cox 
119a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
120a3870a18SJohn Baldwin static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
121a3870a18SJohn Baldwin SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
122a3870a18SJohn Baldwin     NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
123a3870a18SJohn Baldwin #endif
124a3870a18SJohn Baldwin 
125a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
126a3870a18SJohn Baldwin     int domain);
12711752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
12811752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
12911752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
13011752d88SAlan Cox     int order);
13111752d88SAlan Cox 
13211752d88SAlan Cox /*
13311752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
13411752d88SAlan Cox  * the amount of physical memory in each free list.
13511752d88SAlan Cox  */
13611752d88SAlan Cox static int
13711752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
13811752d88SAlan Cox {
13911752d88SAlan Cox 	struct sbuf sbuf;
14011752d88SAlan Cox 	struct vm_freelist *fl;
14111752d88SAlan Cox 	int error, flind, oind, pind;
14211752d88SAlan Cox 
14300f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
14400f0e671SMatthew D Fleming 	if (error != 0)
14500f0e671SMatthew D Fleming 		return (error);
1464e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
14711752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
14811752d88SAlan Cox 		sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
14911752d88SAlan Cox 		    "\n  ORDER (SIZE)  |  NUMBER"
15011752d88SAlan Cox 		    "\n              ", flind);
15111752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
15211752d88SAlan Cox 			sbuf_printf(&sbuf, "  |  POOL %d", pind);
15311752d88SAlan Cox 		sbuf_printf(&sbuf, "\n--            ");
15411752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
15511752d88SAlan Cox 			sbuf_printf(&sbuf, "-- --      ");
15611752d88SAlan Cox 		sbuf_printf(&sbuf, "--\n");
15711752d88SAlan Cox 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
158d689bc00SAlan Cox 			sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
15911752d88SAlan Cox 			    1 << (PAGE_SHIFT - 10 + oind));
16011752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
16111752d88SAlan Cox 				fl = vm_phys_free_queues[flind][pind];
162d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  |  %6d", fl[oind].lcnt);
16311752d88SAlan Cox 			}
16411752d88SAlan Cox 			sbuf_printf(&sbuf, "\n");
16511752d88SAlan Cox 		}
16611752d88SAlan Cox 	}
1674e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
16811752d88SAlan Cox 	sbuf_delete(&sbuf);
16911752d88SAlan Cox 	return (error);
17011752d88SAlan Cox }
17111752d88SAlan Cox 
17211752d88SAlan Cox /*
17311752d88SAlan Cox  * Outputs the set of physical memory segments.
17411752d88SAlan Cox  */
17511752d88SAlan Cox static int
17611752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
17711752d88SAlan Cox {
17811752d88SAlan Cox 	struct sbuf sbuf;
17911752d88SAlan Cox 	struct vm_phys_seg *seg;
18011752d88SAlan Cox 	int error, segind;
18111752d88SAlan Cox 
18200f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
18300f0e671SMatthew D Fleming 	if (error != 0)
18400f0e671SMatthew D Fleming 		return (error);
1854e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
18611752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
18711752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
18811752d88SAlan Cox 		seg = &vm_phys_segs[segind];
18911752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
19011752d88SAlan Cox 		    (uintmax_t)seg->start);
19111752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
19211752d88SAlan Cox 		    (uintmax_t)seg->end);
193a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
19411752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
19511752d88SAlan Cox 	}
1964e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
19711752d88SAlan Cox 	sbuf_delete(&sbuf);
19811752d88SAlan Cox 	return (error);
19911752d88SAlan Cox }
20011752d88SAlan Cox 
201a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
202a3870a18SJohn Baldwin /*
203a3870a18SJohn Baldwin  * Outputs the set of free list lookup lists.
204a3870a18SJohn Baldwin  */
205a3870a18SJohn Baldwin static int
206a3870a18SJohn Baldwin sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
207a3870a18SJohn Baldwin {
208a3870a18SJohn Baldwin 	struct sbuf sbuf;
209a3870a18SJohn Baldwin 	int domain, error, flind, ndomains;
210a3870a18SJohn Baldwin 
21100f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
21200f0e671SMatthew D Fleming 	if (error != 0)
21300f0e671SMatthew D Fleming 		return (error);
2144e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
21500f0e671SMatthew D Fleming 	ndomains = vm_nfreelists - VM_NFREELIST + 1;
216a3870a18SJohn Baldwin 	for (domain = 0; domain < ndomains; domain++) {
217a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
218a3870a18SJohn Baldwin 		for (flind = 0; flind < vm_nfreelists; flind++)
219a3870a18SJohn Baldwin 			sbuf_printf(&sbuf, "  [%d]:\t%p\n", flind,
220a3870a18SJohn Baldwin 			    vm_phys_lookup_lists[domain][flind]);
221a3870a18SJohn Baldwin 	}
2224e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
223a3870a18SJohn Baldwin 	sbuf_delete(&sbuf);
224a3870a18SJohn Baldwin 	return (error);
225a3870a18SJohn Baldwin }
226a3870a18SJohn Baldwin #endif
227a3870a18SJohn Baldwin 
22811752d88SAlan Cox /*
22911752d88SAlan Cox  * Create a physical memory segment.
23011752d88SAlan Cox  */
23111752d88SAlan Cox static void
232a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
23311752d88SAlan Cox {
23411752d88SAlan Cox 	struct vm_phys_seg *seg;
23511752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
236d6e9b97bSJohn Baldwin 	long pages;
23711752d88SAlan Cox 	int segind;
23811752d88SAlan Cox 
23911752d88SAlan Cox 	pages = 0;
24011752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
24111752d88SAlan Cox 		seg = &vm_phys_segs[segind];
24211752d88SAlan Cox 		pages += atop(seg->end - seg->start);
24311752d88SAlan Cox 	}
24411752d88SAlan Cox #endif
24511752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
24611752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
24711752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
24811752d88SAlan Cox 	seg->start = start;
24911752d88SAlan Cox 	seg->end = end;
250a3870a18SJohn Baldwin 	seg->domain = domain;
25111752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
25211752d88SAlan Cox 	seg->first_page = &vm_page_array[pages];
25311752d88SAlan Cox #else
25411752d88SAlan Cox 	seg->first_page = PHYS_TO_VM_PAGE(start);
25511752d88SAlan Cox #endif
256a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
257a3870a18SJohn Baldwin 	if (flind == VM_FREELIST_DEFAULT && domain != 0) {
258a3870a18SJohn Baldwin 		flind = VM_NFREELIST + (domain - 1);
259a3870a18SJohn Baldwin 		if (flind >= vm_nfreelists)
260a3870a18SJohn Baldwin 			vm_nfreelists = flind + 1;
261a3870a18SJohn Baldwin 	}
262a3870a18SJohn Baldwin #endif
26311752d88SAlan Cox 	seg->free_queues = &vm_phys_free_queues[flind];
26411752d88SAlan Cox }
26511752d88SAlan Cox 
266a3870a18SJohn Baldwin static void
267a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
268a3870a18SJohn Baldwin {
269a3870a18SJohn Baldwin 	int i;
270a3870a18SJohn Baldwin 
271a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
272a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
273a3870a18SJohn Baldwin 		return;
274a3870a18SJohn Baldwin 	}
275a3870a18SJohn Baldwin 
276a3870a18SJohn Baldwin 	for (i = 0;; i++) {
277a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
278a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
279a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
280a3870a18SJohn Baldwin 			continue;
281a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
282a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
283a3870a18SJohn Baldwin 			    (uintmax_t)start);
284a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
285a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
286a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
287a3870a18SJohn Baldwin 			break;
288a3870a18SJohn Baldwin 		}
289a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
290a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
291a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
292a3870a18SJohn Baldwin 	}
293a3870a18SJohn Baldwin }
294a3870a18SJohn Baldwin 
29511752d88SAlan Cox /*
29611752d88SAlan Cox  * Initialize the physical memory allocator.
29711752d88SAlan Cox  */
29811752d88SAlan Cox void
29911752d88SAlan Cox vm_phys_init(void)
30011752d88SAlan Cox {
30111752d88SAlan Cox 	struct vm_freelist *fl;
30211752d88SAlan Cox 	int flind, i, oind, pind;
303a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
304a3870a18SJohn Baldwin 	int ndomains, j;
305a3870a18SJohn Baldwin #endif
30611752d88SAlan Cox 
30711752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
30811752d88SAlan Cox #ifdef	VM_FREELIST_ISADMA
30911752d88SAlan Cox 		if (phys_avail[i] < 16777216) {
31011752d88SAlan Cox 			if (phys_avail[i + 1] > 16777216) {
31111752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i], 16777216,
31211752d88SAlan Cox 				    VM_FREELIST_ISADMA);
31311752d88SAlan Cox 				vm_phys_create_seg(16777216, phys_avail[i + 1],
31411752d88SAlan Cox 				    VM_FREELIST_DEFAULT);
31511752d88SAlan Cox 			} else {
31611752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
31711752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_ISADMA);
31811752d88SAlan Cox 			}
31911752d88SAlan Cox 			if (VM_FREELIST_ISADMA >= vm_nfreelists)
32011752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_ISADMA + 1;
32111752d88SAlan Cox 		} else
32211752d88SAlan Cox #endif
32311752d88SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
32411752d88SAlan Cox 		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
32511752d88SAlan Cox 			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
32611752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
32711752d88SAlan Cox 				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
32811752d88SAlan Cox 				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
32911752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
33011752d88SAlan Cox 			} else {
33111752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
33211752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
33311752d88SAlan Cox 			}
33411752d88SAlan Cox 			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
33511752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
33611752d88SAlan Cox 		} else
33711752d88SAlan Cox #endif
33811752d88SAlan Cox 		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
33911752d88SAlan Cox 		    VM_FREELIST_DEFAULT);
34011752d88SAlan Cox 	}
34111752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
34211752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
34311752d88SAlan Cox 			fl = vm_phys_free_queues[flind][pind];
34411752d88SAlan Cox 			for (oind = 0; oind < VM_NFREEORDER; oind++)
34511752d88SAlan Cox 				TAILQ_INIT(&fl[oind].pl);
34611752d88SAlan Cox 		}
34711752d88SAlan Cox 	}
348a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
349a3870a18SJohn Baldwin 	/*
350a3870a18SJohn Baldwin 	 * Build a free list lookup list for each domain.  All of the
351a3870a18SJohn Baldwin 	 * memory domain lists are inserted at the VM_FREELIST_DEFAULT
352a3870a18SJohn Baldwin 	 * index in a round-robin order starting with the current
353a3870a18SJohn Baldwin 	 * domain.
354a3870a18SJohn Baldwin 	 */
355a3870a18SJohn Baldwin 	ndomains = vm_nfreelists - VM_NFREELIST + 1;
356a3870a18SJohn Baldwin 	for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
357a3870a18SJohn Baldwin 		for (i = 0; i < ndomains; i++)
358a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][flind] =
359a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
360a3870a18SJohn Baldwin 	for (i = 0; i < ndomains; i++)
361a3870a18SJohn Baldwin 		for (j = 0; j < ndomains; j++) {
362a3870a18SJohn Baldwin 			flind = (i + j) % ndomains;
363a3870a18SJohn Baldwin 			if (flind == 0)
364a3870a18SJohn Baldwin 				flind = VM_FREELIST_DEFAULT;
365a3870a18SJohn Baldwin 			else
366a3870a18SJohn Baldwin 				flind += VM_NFREELIST - 1;
367a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
368a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
369a3870a18SJohn Baldwin 		}
370a3870a18SJohn Baldwin 	for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
371a3870a18SJohn Baldwin 	     flind++)
372a3870a18SJohn Baldwin 		for (i = 0; i < ndomains; i++)
373a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][flind + ndomains - 1] =
374a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
375a3870a18SJohn Baldwin #else
376a3870a18SJohn Baldwin 	for (flind = 0; flind < vm_nfreelists; flind++)
377a3870a18SJohn Baldwin 		vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
378a3870a18SJohn Baldwin #endif
379b6de32bdSKonstantin Belousov 
380b6de32bdSKonstantin Belousov 	mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
38111752d88SAlan Cox }
38211752d88SAlan Cox 
38311752d88SAlan Cox /*
38411752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
38511752d88SAlan Cox  */
38611752d88SAlan Cox static __inline void
38711752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
38811752d88SAlan Cox {
38911752d88SAlan Cox 	vm_page_t m_buddy;
39011752d88SAlan Cox 
39111752d88SAlan Cox 	while (oind > order) {
39211752d88SAlan Cox 		oind--;
39311752d88SAlan Cox 		m_buddy = &m[1 << oind];
39411752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
39511752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
39611752d88SAlan Cox 		    m_buddy, m_buddy->order));
39711752d88SAlan Cox 		m_buddy->order = oind;
39811752d88SAlan Cox 		TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
39911752d88SAlan Cox 		fl[oind].lcnt++;
40011752d88SAlan Cox         }
40111752d88SAlan Cox }
40211752d88SAlan Cox 
40311752d88SAlan Cox /*
40411752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
40511752d88SAlan Cox  */
40611752d88SAlan Cox void
40711752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
40811752d88SAlan Cox {
40911752d88SAlan Cox 	vm_page_t m;
41011752d88SAlan Cox 
41111752d88SAlan Cox 	cnt.v_page_count++;
41211752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
41311752d88SAlan Cox 	m->phys_addr = pa;
41444e46b9eSAlan Cox 	m->queue = PQ_NONE;
41511752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
41611752d88SAlan Cox 	m->flags = PG_FREE;
41711752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
41811752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
41911752d88SAlan Cox 	    m, m->order));
42011752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
42111752d88SAlan Cox 	pmap_page_init(m);
4228941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
4237bfda801SAlan Cox 	cnt.v_free_count++;
42411752d88SAlan Cox 	vm_phys_free_pages(m, 0);
4258941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
42611752d88SAlan Cox }
42711752d88SAlan Cox 
42811752d88SAlan Cox /*
42911752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
43011752d88SAlan Cox  * from the free lists.
4318941dc44SAlan Cox  *
4328941dc44SAlan Cox  * The free page queues must be locked.
43311752d88SAlan Cox  */
43411752d88SAlan Cox vm_page_t
43511752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
43611752d88SAlan Cox {
43749ca10d4SJayachandran C. 	vm_page_t m;
43849ca10d4SJayachandran C. 	int flind;
43949ca10d4SJayachandran C. 
44049ca10d4SJayachandran C. 	for (flind = 0; flind < vm_nfreelists; flind++) {
44149ca10d4SJayachandran C. 		m = vm_phys_alloc_freelist_pages(flind, pool, order);
44249ca10d4SJayachandran C. 		if (m != NULL)
44349ca10d4SJayachandran C. 			return (m);
44449ca10d4SJayachandran C. 	}
44549ca10d4SJayachandran C. 	return (NULL);
44649ca10d4SJayachandran C. }
44749ca10d4SJayachandran C. 
44849ca10d4SJayachandran C. /*
44949ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
45049ca10d4SJayachandran C.  * specified pool and order
45149ca10d4SJayachandran C.  */
45249ca10d4SJayachandran C. vm_page_t
45349ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
45449ca10d4SJayachandran C. {
45511752d88SAlan Cox 	struct vm_freelist *fl;
45611752d88SAlan Cox 	struct vm_freelist *alt;
457a3870a18SJohn Baldwin 	int domain, oind, pind;
45811752d88SAlan Cox 	vm_page_t m;
45911752d88SAlan Cox 
46049ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
46149ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
46211752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
46349ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
46411752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
46549ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
466a3870a18SJohn Baldwin 
467a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
468a3870a18SJohn Baldwin 	domain = PCPU_GET(domain);
469a3870a18SJohn Baldwin #else
470a3870a18SJohn Baldwin 	domain = 0;
471a3870a18SJohn Baldwin #endif
47211752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
473a3870a18SJohn Baldwin 	fl = (*vm_phys_lookup_lists[domain][flind])[pool];
47411752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
47511752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
47611752d88SAlan Cox 		if (m != NULL) {
47711752d88SAlan Cox 			TAILQ_REMOVE(&fl[oind].pl, m, pageq);
47811752d88SAlan Cox 			fl[oind].lcnt--;
47911752d88SAlan Cox 			m->order = VM_NFREEORDER;
48011752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
48111752d88SAlan Cox 			return (m);
48211752d88SAlan Cox 		}
48311752d88SAlan Cox 	}
48411752d88SAlan Cox 
48511752d88SAlan Cox 	/*
48611752d88SAlan Cox 	 * The given pool was empty.  Find the largest
48711752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
48811752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
48911752d88SAlan Cox 	 * use them to satisfy the allocation.
49011752d88SAlan Cox 	 */
49111752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
49211752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
493a3870a18SJohn Baldwin 			alt = (*vm_phys_lookup_lists[domain][flind])[pind];
49411752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
49511752d88SAlan Cox 			if (m != NULL) {
49611752d88SAlan Cox 				TAILQ_REMOVE(&alt[oind].pl, m, pageq);
49711752d88SAlan Cox 				alt[oind].lcnt--;
49811752d88SAlan Cox 				m->order = VM_NFREEORDER;
49911752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
50011752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
50111752d88SAlan Cox 				return (m);
50211752d88SAlan Cox 			}
50311752d88SAlan Cox 		}
50411752d88SAlan Cox 	}
50511752d88SAlan Cox 	return (NULL);
50611752d88SAlan Cox }
50711752d88SAlan Cox 
50811752d88SAlan Cox /*
50911752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
51011752d88SAlan Cox  */
51111752d88SAlan Cox vm_page_t
51211752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
51311752d88SAlan Cox {
51411752d88SAlan Cox 	struct vm_phys_seg *seg;
51511752d88SAlan Cox 	int segind;
51611752d88SAlan Cox 
51711752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
51811752d88SAlan Cox 		seg = &vm_phys_segs[segind];
51911752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
52011752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
52111752d88SAlan Cox 	}
522f06a3a36SAndrew Thompson 	return (NULL);
52311752d88SAlan Cox }
52411752d88SAlan Cox 
525b6de32bdSKonstantin Belousov vm_page_t
526b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
527b6de32bdSKonstantin Belousov {
528b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
529b6de32bdSKonstantin Belousov 	vm_page_t m;
530b6de32bdSKonstantin Belousov 	int segind;
531b6de32bdSKonstantin Belousov 
532b6de32bdSKonstantin Belousov 	m = NULL;
533b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
534b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
535b6de32bdSKonstantin Belousov 		if (pa >= seg->start && pa < seg->end) {
536b6de32bdSKonstantin Belousov 			m = &seg->first_page[atop(pa - seg->start)];
537b6de32bdSKonstantin Belousov 			KASSERT((m->flags & PG_FICTITIOUS) != 0,
538b6de32bdSKonstantin Belousov 			    ("%p not fictitious", m));
539b6de32bdSKonstantin Belousov 			break;
540b6de32bdSKonstantin Belousov 		}
541b6de32bdSKonstantin Belousov 	}
542b6de32bdSKonstantin Belousov 	return (m);
543b6de32bdSKonstantin Belousov }
544b6de32bdSKonstantin Belousov 
545b6de32bdSKonstantin Belousov int
546b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
547b6de32bdSKonstantin Belousov     vm_memattr_t memattr)
548b6de32bdSKonstantin Belousov {
549b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
550b6de32bdSKonstantin Belousov 	vm_page_t fp;
551b6de32bdSKonstantin Belousov 	long i, page_count;
552b6de32bdSKonstantin Belousov 	int segind;
553b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
554b6de32bdSKonstantin Belousov 	long pi;
555b6de32bdSKonstantin Belousov 	boolean_t malloced;
556b6de32bdSKonstantin Belousov #endif
557b6de32bdSKonstantin Belousov 
558b6de32bdSKonstantin Belousov 	page_count = (end - start) / PAGE_SIZE;
559b6de32bdSKonstantin Belousov 
560b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
561b6de32bdSKonstantin Belousov 	pi = atop(start);
562b6de32bdSKonstantin Belousov 	if (pi >= first_page && atop(end) < vm_page_array_size) {
563b6de32bdSKonstantin Belousov 		fp = &vm_page_array[pi - first_page];
564b6de32bdSKonstantin Belousov 		malloced = FALSE;
565b6de32bdSKonstantin Belousov 	} else
566b6de32bdSKonstantin Belousov #endif
567b6de32bdSKonstantin Belousov 	{
568b6de32bdSKonstantin Belousov 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
569b6de32bdSKonstantin Belousov 		    M_WAITOK | M_ZERO);
570b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
571b6de32bdSKonstantin Belousov 		malloced = TRUE;
572b6de32bdSKonstantin Belousov #endif
573b6de32bdSKonstantin Belousov 	}
574b6de32bdSKonstantin Belousov 	for (i = 0; i < page_count; i++) {
575b6de32bdSKonstantin Belousov 		vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
576b6de32bdSKonstantin Belousov 		pmap_page_init(&fp[i]);
577b6de32bdSKonstantin Belousov 		fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
578b6de32bdSKonstantin Belousov 	}
579b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
580b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
581b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
582b6de32bdSKonstantin Belousov 		if (seg->start == 0 && seg->end == 0) {
583b6de32bdSKonstantin Belousov 			seg->start = start;
584b6de32bdSKonstantin Belousov 			seg->end = end;
585b6de32bdSKonstantin Belousov 			seg->first_page = fp;
586b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
587b6de32bdSKonstantin Belousov 			return (0);
588b6de32bdSKonstantin Belousov 		}
589b6de32bdSKonstantin Belousov 	}
590b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
591b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
592b6de32bdSKonstantin Belousov 	if (malloced)
593b6de32bdSKonstantin Belousov #endif
594b6de32bdSKonstantin Belousov 		free(fp, M_FICT_PAGES);
595b6de32bdSKonstantin Belousov 	return (EBUSY);
596b6de32bdSKonstantin Belousov }
597b6de32bdSKonstantin Belousov 
598b6de32bdSKonstantin Belousov void
599b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
600b6de32bdSKonstantin Belousov {
601b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
602b6de32bdSKonstantin Belousov 	vm_page_t fp;
603b6de32bdSKonstantin Belousov 	int segind;
604b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
605b6de32bdSKonstantin Belousov 	long pi;
606b6de32bdSKonstantin Belousov #endif
607b6de32bdSKonstantin Belousov 
608b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
609b6de32bdSKonstantin Belousov 	pi = atop(start);
610b6de32bdSKonstantin Belousov #endif
611b6de32bdSKonstantin Belousov 
612b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
613b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
614b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
615b6de32bdSKonstantin Belousov 		if (seg->start == start && seg->end == end) {
616b6de32bdSKonstantin Belousov 			seg->start = seg->end = 0;
617b6de32bdSKonstantin Belousov 			fp = seg->first_page;
618b6de32bdSKonstantin Belousov 			seg->first_page = NULL;
619b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
620b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
621b6de32bdSKonstantin Belousov 			if (pi < first_page || atop(end) >= vm_page_array_size)
622b6de32bdSKonstantin Belousov #endif
623b6de32bdSKonstantin Belousov 				free(fp, M_FICT_PAGES);
624b6de32bdSKonstantin Belousov 			return;
625b6de32bdSKonstantin Belousov 		}
626b6de32bdSKonstantin Belousov 	}
627b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
628b6de32bdSKonstantin Belousov 	KASSERT(0, ("Unregistering not registered fictitious range"));
629b6de32bdSKonstantin Belousov }
630b6de32bdSKonstantin Belousov 
63111752d88SAlan Cox /*
63211752d88SAlan Cox  * Find the segment containing the given physical address.
63311752d88SAlan Cox  */
63411752d88SAlan Cox static int
63511752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
63611752d88SAlan Cox {
63711752d88SAlan Cox 	struct vm_phys_seg *seg;
63811752d88SAlan Cox 	int segind;
63911752d88SAlan Cox 
64011752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
64111752d88SAlan Cox 		seg = &vm_phys_segs[segind];
64211752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
64311752d88SAlan Cox 			return (segind);
64411752d88SAlan Cox 	}
64511752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
64611752d88SAlan Cox 	    (uintmax_t)pa);
64711752d88SAlan Cox }
64811752d88SAlan Cox 
64911752d88SAlan Cox /*
65011752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
6518941dc44SAlan Cox  *
6528941dc44SAlan Cox  * The free page queues must be locked.
65311752d88SAlan Cox  */
65411752d88SAlan Cox void
65511752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
65611752d88SAlan Cox {
65711752d88SAlan Cox 	struct vm_freelist *fl;
65811752d88SAlan Cox 	struct vm_phys_seg *seg;
6595c1f2cc4SAlan Cox 	vm_paddr_t pa;
66011752d88SAlan Cox 	vm_page_t m_buddy;
66111752d88SAlan Cox 
66211752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
6638941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
66411752d88SAlan Cox 	    m, m->order));
66511752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
6668941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
66711752d88SAlan Cox 	    m, m->pool));
66811752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
6698941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
67011752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
67111752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
6725c1f2cc4SAlan Cox 	if (order < VM_NFREEORDER - 1) {
6735c1f2cc4SAlan Cox 		pa = VM_PAGE_TO_PHYS(m);
6745c1f2cc4SAlan Cox 		do {
6755c1f2cc4SAlan Cox 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
6765c1f2cc4SAlan Cox 			if (pa < seg->start || pa >= seg->end)
67711752d88SAlan Cox 				break;
6785c1f2cc4SAlan Cox 			m_buddy = &seg->first_page[atop(pa - seg->start)];
67911752d88SAlan Cox 			if (m_buddy->order != order)
68011752d88SAlan Cox 				break;
68111752d88SAlan Cox 			fl = (*seg->free_queues)[m_buddy->pool];
6825c1f2cc4SAlan Cox 			TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq);
6835c1f2cc4SAlan Cox 			fl[order].lcnt--;
68411752d88SAlan Cox 			m_buddy->order = VM_NFREEORDER;
68511752d88SAlan Cox 			if (m_buddy->pool != m->pool)
68611752d88SAlan Cox 				vm_phys_set_pool(m->pool, m_buddy, order);
68711752d88SAlan Cox 			order++;
6885c1f2cc4SAlan Cox 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
68911752d88SAlan Cox 			m = &seg->first_page[atop(pa - seg->start)];
6905c1f2cc4SAlan Cox 		} while (order < VM_NFREEORDER - 1);
69111752d88SAlan Cox 	}
69211752d88SAlan Cox 	m->order = order;
69311752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
69411752d88SAlan Cox 	TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
69511752d88SAlan Cox 	fl[order].lcnt++;
69611752d88SAlan Cox }
69711752d88SAlan Cox 
69811752d88SAlan Cox /*
6995c1f2cc4SAlan Cox  * Free a contiguous, arbitrarily sized set of physical pages.
7005c1f2cc4SAlan Cox  *
7015c1f2cc4SAlan Cox  * The free page queues must be locked.
7025c1f2cc4SAlan Cox  */
7035c1f2cc4SAlan Cox void
7045c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages)
7055c1f2cc4SAlan Cox {
7065c1f2cc4SAlan Cox 	u_int n;
7075c1f2cc4SAlan Cox 	int order;
7085c1f2cc4SAlan Cox 
7095c1f2cc4SAlan Cox 	/*
7105c1f2cc4SAlan Cox 	 * Avoid unnecessary coalescing by freeing the pages in the largest
7115c1f2cc4SAlan Cox 	 * possible power-of-two-sized subsets.
7125c1f2cc4SAlan Cox 	 */
7135c1f2cc4SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7145c1f2cc4SAlan Cox 	for (;; npages -= n) {
7155c1f2cc4SAlan Cox 		/*
7165c1f2cc4SAlan Cox 		 * Unsigned "min" is used here so that "order" is assigned
7175c1f2cc4SAlan Cox 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
7185c1f2cc4SAlan Cox 		 * or the low-order bits of its physical address are zero
7195c1f2cc4SAlan Cox 		 * because the size of a physical address exceeds the size of
7205c1f2cc4SAlan Cox 		 * a long.
7215c1f2cc4SAlan Cox 		 */
7225c1f2cc4SAlan Cox 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
7235c1f2cc4SAlan Cox 		    VM_NFREEORDER - 1);
7245c1f2cc4SAlan Cox 		n = 1 << order;
7255c1f2cc4SAlan Cox 		if (npages < n)
7265c1f2cc4SAlan Cox 			break;
7275c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7285c1f2cc4SAlan Cox 		m += n;
7295c1f2cc4SAlan Cox 	}
7305c1f2cc4SAlan Cox 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
7315c1f2cc4SAlan Cox 	for (; npages > 0; npages -= n) {
7325c1f2cc4SAlan Cox 		order = flsl(npages) - 1;
7335c1f2cc4SAlan Cox 		n = 1 << order;
7345c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7355c1f2cc4SAlan Cox 		m += n;
7365c1f2cc4SAlan Cox 	}
7375c1f2cc4SAlan Cox }
7385c1f2cc4SAlan Cox 
7395c1f2cc4SAlan Cox /*
74011752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
74111752d88SAlan Cox  */
7427bfda801SAlan Cox void
74311752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
74411752d88SAlan Cox {
74511752d88SAlan Cox 	vm_page_t m_tmp;
74611752d88SAlan Cox 
74711752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
74811752d88SAlan Cox 		m_tmp->pool = pool;
74911752d88SAlan Cox }
75011752d88SAlan Cox 
75111752d88SAlan Cox /*
7529742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
7539742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
7549742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
7557bfda801SAlan Cox  *
7567bfda801SAlan Cox  * The free page queues must be locked.
7577bfda801SAlan Cox  */
758e35395ceSAlan Cox boolean_t
7597bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
7607bfda801SAlan Cox {
7617bfda801SAlan Cox 	struct vm_freelist *fl;
7627bfda801SAlan Cox 	struct vm_phys_seg *seg;
7637bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
7647bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
7657bfda801SAlan Cox 	int order;
7667bfda801SAlan Cox 
7677bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7687bfda801SAlan Cox 
7697bfda801SAlan Cox 	/*
7707bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
7717bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
7727bfda801SAlan Cox 	 * assign it to "m_set".
7737bfda801SAlan Cox 	 */
7747bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
7757bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
776bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
7777bfda801SAlan Cox 		order++;
7787bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
7792fbced65SAlan Cox 		if (pa >= seg->start)
7807bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
781e35395ceSAlan Cox 		else
782e35395ceSAlan Cox 			return (FALSE);
7837bfda801SAlan Cox 	}
784e35395ceSAlan Cox 	if (m_set->order < order)
785e35395ceSAlan Cox 		return (FALSE);
786e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
787e35395ceSAlan Cox 		return (FALSE);
7887bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
7897bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
7907bfda801SAlan Cox 	    m_set, m_set->order));
7917bfda801SAlan Cox 
7927bfda801SAlan Cox 	/*
7937bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
7947bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
7957bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
7967bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
7977bfda801SAlan Cox 	 */
7987bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
7997bfda801SAlan Cox 	order = m_set->order;
8007bfda801SAlan Cox 	TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
8017bfda801SAlan Cox 	fl[order].lcnt--;
8027bfda801SAlan Cox 	m_set->order = VM_NFREEORDER;
8037bfda801SAlan Cox 	while (order > 0) {
8047bfda801SAlan Cox 		order--;
8057bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
8067bfda801SAlan Cox 		if (m->phys_addr < pa_half)
8077bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
8087bfda801SAlan Cox 		else {
8097bfda801SAlan Cox 			m_tmp = m_set;
8107bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
8117bfda801SAlan Cox 		}
8127bfda801SAlan Cox 		m_tmp->order = order;
8137bfda801SAlan Cox 		TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
8147bfda801SAlan Cox 		fl[order].lcnt++;
8157bfda801SAlan Cox 	}
8167bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
817e35395ceSAlan Cox 	return (TRUE);
8187bfda801SAlan Cox }
8197bfda801SAlan Cox 
8207bfda801SAlan Cox /*
8217bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
82211752d88SAlan Cox  */
82311752d88SAlan Cox boolean_t
82411752d88SAlan Cox vm_phys_zero_pages_idle(void)
82511752d88SAlan Cox {
8267bfda801SAlan Cox 	static struct vm_freelist *fl = vm_phys_free_queues[0][0];
8277bfda801SAlan Cox 	static int flind, oind, pind;
82811752d88SAlan Cox 	vm_page_t m, m_tmp;
82911752d88SAlan Cox 
83011752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8317bfda801SAlan Cox 	for (;;) {
8327bfda801SAlan Cox 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
8337bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
8347bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
8357bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
8367bfda801SAlan Cox 					cnt.v_free_count--;
83711752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
83811752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
83911752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
84011752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
8417bfda801SAlan Cox 					cnt.v_free_count++;
8427bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
8437bfda801SAlan Cox 					vm_page_zero_count++;
8447bfda801SAlan Cox 					cnt_prezero++;
84511752d88SAlan Cox 					return (TRUE);
84611752d88SAlan Cox 				}
84711752d88SAlan Cox 			}
84811752d88SAlan Cox 		}
8497bfda801SAlan Cox 		oind++;
8507bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
8517bfda801SAlan Cox 			oind = 0;
8527bfda801SAlan Cox 			pind++;
8537bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
8547bfda801SAlan Cox 				pind = 0;
8557bfda801SAlan Cox 				flind++;
8567bfda801SAlan Cox 				if (flind == vm_nfreelists)
8577bfda801SAlan Cox 					flind = 0;
8587bfda801SAlan Cox 			}
8597bfda801SAlan Cox 			fl = vm_phys_free_queues[flind][pind];
8607bfda801SAlan Cox 		}
8617bfda801SAlan Cox 	}
86211752d88SAlan Cox }
86311752d88SAlan Cox 
86411752d88SAlan Cox /*
8652f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
8662f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
8672f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
8682f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
8692f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
8702f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
8712f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
87211752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
87311752d88SAlan Cox  */
87411752d88SAlan Cox vm_page_t
8755c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
8765c1f2cc4SAlan Cox     u_long alignment, vm_paddr_t boundary)
87711752d88SAlan Cox {
87811752d88SAlan Cox 	struct vm_freelist *fl;
87911752d88SAlan Cox 	struct vm_phys_seg *seg;
88011752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
881fbd80bd0SAlan Cox 	vm_page_t m, m_ret;
8825c1f2cc4SAlan Cox 	u_long npages_end;
883fbd80bd0SAlan Cox 	int domain, flind, oind, order, pind;
88411752d88SAlan Cox 
885fbd80bd0SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
886a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
887a3870a18SJohn Baldwin 	domain = PCPU_GET(domain);
888a3870a18SJohn Baldwin #else
889a3870a18SJohn Baldwin 	domain = 0;
890a3870a18SJohn Baldwin #endif
89111752d88SAlan Cox 	size = npages << PAGE_SHIFT;
89211752d88SAlan Cox 	KASSERT(size != 0,
89311752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
89411752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
89511752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
89611752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
89711752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
89811752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
89911752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
90011752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
90111752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
90211752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
903a3870a18SJohn Baldwin 				fl = (*vm_phys_lookup_lists[domain][flind])
904a3870a18SJohn Baldwin 				    [pind];
90511752d88SAlan Cox 				TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
90611752d88SAlan Cox 					/*
90711752d88SAlan Cox 					 * A free list may contain physical pages
90811752d88SAlan Cox 					 * from one or more segments.
90911752d88SAlan Cox 					 */
91011752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
91111752d88SAlan Cox 					if (seg->start > high ||
91211752d88SAlan Cox 					    low >= seg->end)
91311752d88SAlan Cox 						continue;
91411752d88SAlan Cox 
91511752d88SAlan Cox 					/*
91611752d88SAlan Cox 					 * Is the size of this allocation request
91711752d88SAlan Cox 					 * larger than the largest block size?
91811752d88SAlan Cox 					 */
91911752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
92011752d88SAlan Cox 						/*
92111752d88SAlan Cox 						 * Determine if a sufficient number
92211752d88SAlan Cox 						 * of subsequent blocks to satisfy
92311752d88SAlan Cox 						 * the allocation request are free.
92411752d88SAlan Cox 						 */
92511752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
92611752d88SAlan Cox 						pa_last = pa + size;
92711752d88SAlan Cox 						for (;;) {
92811752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
92911752d88SAlan Cox 							if (pa >= pa_last)
93011752d88SAlan Cox 								break;
93111752d88SAlan Cox 							if (pa < seg->start ||
93211752d88SAlan Cox 							    pa >= seg->end)
93311752d88SAlan Cox 								break;
93411752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
93511752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
93611752d88SAlan Cox 								break;
93711752d88SAlan Cox 						}
93811752d88SAlan Cox 						/* If not, continue to the next block. */
93911752d88SAlan Cox 						if (pa < pa_last)
94011752d88SAlan Cox 							continue;
94111752d88SAlan Cox 					}
94211752d88SAlan Cox 
94311752d88SAlan Cox 					/*
94411752d88SAlan Cox 					 * Determine if the blocks are within the given range,
94511752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
94611752d88SAlan Cox 					 * given boundary.
94711752d88SAlan Cox 					 */
94811752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
94911752d88SAlan Cox 					if (pa >= low &&
95011752d88SAlan Cox 					    pa + size <= high &&
95111752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
95211752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
95311752d88SAlan Cox 						goto done;
95411752d88SAlan Cox 				}
95511752d88SAlan Cox 			}
95611752d88SAlan Cox 		}
95711752d88SAlan Cox 	}
95811752d88SAlan Cox 	return (NULL);
95911752d88SAlan Cox done:
96011752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
96111752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
96211752d88SAlan Cox 		TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
96311752d88SAlan Cox 		fl[m->order].lcnt--;
96411752d88SAlan Cox 		m->order = VM_NFREEORDER;
96511752d88SAlan Cox 	}
96611752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
96711752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
96811752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
96911752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
9705c1f2cc4SAlan Cox 	/* Return excess pages to the free lists. */
9715c1f2cc4SAlan Cox 	npages_end = roundup2(npages, 1 << imin(oind, order));
9725c1f2cc4SAlan Cox 	if (npages < npages_end)
9735c1f2cc4SAlan Cox 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
97411752d88SAlan Cox 	return (m_ret);
97511752d88SAlan Cox }
97611752d88SAlan Cox 
97711752d88SAlan Cox #ifdef DDB
97811752d88SAlan Cox /*
97911752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
98011752d88SAlan Cox  */
98111752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
98211752d88SAlan Cox {
98311752d88SAlan Cox 	struct vm_freelist *fl;
98411752d88SAlan Cox 	int flind, oind, pind;
98511752d88SAlan Cox 
98611752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
98711752d88SAlan Cox 		db_printf("FREE LIST %d:\n"
98811752d88SAlan Cox 		    "\n  ORDER (SIZE)  |  NUMBER"
98911752d88SAlan Cox 		    "\n              ", flind);
99011752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
99111752d88SAlan Cox 			db_printf("  |  POOL %d", pind);
99211752d88SAlan Cox 		db_printf("\n--            ");
99311752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
99411752d88SAlan Cox 			db_printf("-- --      ");
99511752d88SAlan Cox 		db_printf("--\n");
99611752d88SAlan Cox 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
99711752d88SAlan Cox 			db_printf("  %2.2d (%6.6dK)", oind,
99811752d88SAlan Cox 			    1 << (PAGE_SHIFT - 10 + oind));
99911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
100011752d88SAlan Cox 				fl = vm_phys_free_queues[flind][pind];
100111752d88SAlan Cox 				db_printf("  |  %6.6d", fl[oind].lcnt);
100211752d88SAlan Cox 			}
100311752d88SAlan Cox 			db_printf("\n");
100411752d88SAlan Cox 		}
100511752d88SAlan Cox 		db_printf("\n");
100611752d88SAlan Cox 	}
100711752d88SAlan Cox }
100811752d88SAlan Cox #endif
1009