xref: /freebsd/sys/vm/vm_phys.c (revision 00f0e671ff895f4cae35d3b53108ca2ddec0a526)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
3211752d88SAlan Cox #include <sys/cdefs.h>
3311752d88SAlan Cox __FBSDID("$FreeBSD$");
3411752d88SAlan Cox 
3511752d88SAlan Cox #include "opt_ddb.h"
3611752d88SAlan Cox 
3711752d88SAlan Cox #include <sys/param.h>
3811752d88SAlan Cox #include <sys/systm.h>
3911752d88SAlan Cox #include <sys/lock.h>
4011752d88SAlan Cox #include <sys/kernel.h>
4111752d88SAlan Cox #include <sys/malloc.h>
4211752d88SAlan Cox #include <sys/mutex.h>
4311752d88SAlan Cox #include <sys/queue.h>
4411752d88SAlan Cox #include <sys/sbuf.h>
4511752d88SAlan Cox #include <sys/sysctl.h>
4611752d88SAlan Cox #include <sys/vmmeter.h>
477bfda801SAlan Cox #include <sys/vnode.h>
4811752d88SAlan Cox 
4911752d88SAlan Cox #include <ddb/ddb.h>
5011752d88SAlan Cox 
5111752d88SAlan Cox #include <vm/vm.h>
5211752d88SAlan Cox #include <vm/vm_param.h>
5311752d88SAlan Cox #include <vm/vm_kern.h>
5411752d88SAlan Cox #include <vm/vm_object.h>
5511752d88SAlan Cox #include <vm/vm_page.h>
5611752d88SAlan Cox #include <vm/vm_phys.h>
5744aab2c3SAlan Cox #include <vm/vm_reserv.h>
5811752d88SAlan Cox 
59a3870a18SJohn Baldwin /*
60a3870a18SJohn Baldwin  * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
61a3870a18SJohn Baldwin  * domain.  These extra lists are stored at the end of the regular
62a3870a18SJohn Baldwin  * free lists starting with VM_NFREELIST.
63a3870a18SJohn Baldwin  */
64a3870a18SJohn Baldwin #define VM_RAW_NFREELIST	(VM_NFREELIST + VM_NDOMAIN - 1)
65a3870a18SJohn Baldwin 
6611752d88SAlan Cox struct vm_freelist {
6711752d88SAlan Cox 	struct pglist pl;
6811752d88SAlan Cox 	int lcnt;
6911752d88SAlan Cox };
7011752d88SAlan Cox 
7111752d88SAlan Cox struct vm_phys_seg {
7211752d88SAlan Cox 	vm_paddr_t	start;
7311752d88SAlan Cox 	vm_paddr_t	end;
7411752d88SAlan Cox 	vm_page_t	first_page;
75a3870a18SJohn Baldwin 	int		domain;
7611752d88SAlan Cox 	struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
7711752d88SAlan Cox };
7811752d88SAlan Cox 
79a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
80a3870a18SJohn Baldwin 
8111752d88SAlan Cox static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
8211752d88SAlan Cox 
8311752d88SAlan Cox static int vm_phys_nsegs;
8411752d88SAlan Cox 
8511752d88SAlan Cox static struct vm_freelist
86a3870a18SJohn Baldwin     vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
87a3870a18SJohn Baldwin static struct vm_freelist
88a3870a18SJohn Baldwin (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
8911752d88SAlan Cox 
9011752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
9111752d88SAlan Cox 
9211752d88SAlan Cox static int cnt_prezero;
9311752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
9411752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
9511752d88SAlan Cox 
9611752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
9711752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
9811752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
9911752d88SAlan Cox 
10011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
10111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
10211752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
10311752d88SAlan Cox 
104a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
105a3870a18SJohn Baldwin static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
106a3870a18SJohn Baldwin SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
107a3870a18SJohn Baldwin     NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
108a3870a18SJohn Baldwin #endif
109a3870a18SJohn Baldwin 
110a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
111a3870a18SJohn Baldwin     int domain);
11211752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
11311752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
11411752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
11511752d88SAlan Cox     int order);
11611752d88SAlan Cox 
11711752d88SAlan Cox /*
11811752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
11911752d88SAlan Cox  * the amount of physical memory in each free list.
12011752d88SAlan Cox  */
12111752d88SAlan Cox static int
12211752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
12311752d88SAlan Cox {
12411752d88SAlan Cox 	struct sbuf sbuf;
12511752d88SAlan Cox 	struct vm_freelist *fl;
12611752d88SAlan Cox 	int error, flind, oind, pind;
12711752d88SAlan Cox 
128*00f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
129*00f0e671SMatthew D Fleming 	if (error != 0)
130*00f0e671SMatthew D Fleming 		return (error);
1314e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
13211752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
13311752d88SAlan Cox 		sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
13411752d88SAlan Cox 		    "\n  ORDER (SIZE)  |  NUMBER"
13511752d88SAlan Cox 		    "\n              ", flind);
13611752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
13711752d88SAlan Cox 			sbuf_printf(&sbuf, "  |  POOL %d", pind);
13811752d88SAlan Cox 		sbuf_printf(&sbuf, "\n--            ");
13911752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
14011752d88SAlan Cox 			sbuf_printf(&sbuf, "-- --      ");
14111752d88SAlan Cox 		sbuf_printf(&sbuf, "--\n");
14211752d88SAlan Cox 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
143d689bc00SAlan Cox 			sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
14411752d88SAlan Cox 			    1 << (PAGE_SHIFT - 10 + oind));
14511752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
14611752d88SAlan Cox 				fl = vm_phys_free_queues[flind][pind];
147d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  |  %6d", fl[oind].lcnt);
14811752d88SAlan Cox 			}
14911752d88SAlan Cox 			sbuf_printf(&sbuf, "\n");
15011752d88SAlan Cox 		}
15111752d88SAlan Cox 	}
1524e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
15311752d88SAlan Cox 	sbuf_delete(&sbuf);
15411752d88SAlan Cox 	return (error);
15511752d88SAlan Cox }
15611752d88SAlan Cox 
15711752d88SAlan Cox /*
15811752d88SAlan Cox  * Outputs the set of physical memory segments.
15911752d88SAlan Cox  */
16011752d88SAlan Cox static int
16111752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
16211752d88SAlan Cox {
16311752d88SAlan Cox 	struct sbuf sbuf;
16411752d88SAlan Cox 	struct vm_phys_seg *seg;
16511752d88SAlan Cox 	int error, segind;
16611752d88SAlan Cox 
167*00f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
168*00f0e671SMatthew D Fleming 	if (error != 0)
169*00f0e671SMatthew D Fleming 		return (error);
1704e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
17111752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
17211752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
17311752d88SAlan Cox 		seg = &vm_phys_segs[segind];
17411752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
17511752d88SAlan Cox 		    (uintmax_t)seg->start);
17611752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
17711752d88SAlan Cox 		    (uintmax_t)seg->end);
178a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
17911752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
18011752d88SAlan Cox 	}
1814e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
18211752d88SAlan Cox 	sbuf_delete(&sbuf);
18311752d88SAlan Cox 	return (error);
18411752d88SAlan Cox }
18511752d88SAlan Cox 
186a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
187a3870a18SJohn Baldwin /*
188a3870a18SJohn Baldwin  * Outputs the set of free list lookup lists.
189a3870a18SJohn Baldwin  */
190a3870a18SJohn Baldwin static int
191a3870a18SJohn Baldwin sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
192a3870a18SJohn Baldwin {
193a3870a18SJohn Baldwin 	struct sbuf sbuf;
194a3870a18SJohn Baldwin 	int domain, error, flind, ndomains;
195a3870a18SJohn Baldwin 
196*00f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
197*00f0e671SMatthew D Fleming 	if (error != 0)
198*00f0e671SMatthew D Fleming 		return (error);
1994e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
200*00f0e671SMatthew D Fleming 	ndomains = vm_nfreelists - VM_NFREELIST + 1;
201a3870a18SJohn Baldwin 	for (domain = 0; domain < ndomains; domain++) {
202a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
203a3870a18SJohn Baldwin 		for (flind = 0; flind < vm_nfreelists; flind++)
204a3870a18SJohn Baldwin 			sbuf_printf(&sbuf, "  [%d]:\t%p\n", flind,
205a3870a18SJohn Baldwin 			    vm_phys_lookup_lists[domain][flind]);
206a3870a18SJohn Baldwin 	}
2074e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
208a3870a18SJohn Baldwin 	sbuf_delete(&sbuf);
209a3870a18SJohn Baldwin 	return (error);
210a3870a18SJohn Baldwin }
211a3870a18SJohn Baldwin #endif
212a3870a18SJohn Baldwin 
21311752d88SAlan Cox /*
21411752d88SAlan Cox  * Create a physical memory segment.
21511752d88SAlan Cox  */
21611752d88SAlan Cox static void
217a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
21811752d88SAlan Cox {
21911752d88SAlan Cox 	struct vm_phys_seg *seg;
22011752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
22111752d88SAlan Cox 	long pages;
22211752d88SAlan Cox 	int segind;
22311752d88SAlan Cox 
22411752d88SAlan Cox 	pages = 0;
22511752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
22611752d88SAlan Cox 		seg = &vm_phys_segs[segind];
22711752d88SAlan Cox 		pages += atop(seg->end - seg->start);
22811752d88SAlan Cox 	}
22911752d88SAlan Cox #endif
23011752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
23111752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
23211752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
23311752d88SAlan Cox 	seg->start = start;
23411752d88SAlan Cox 	seg->end = end;
235a3870a18SJohn Baldwin 	seg->domain = domain;
23611752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
23711752d88SAlan Cox 	seg->first_page = &vm_page_array[pages];
23811752d88SAlan Cox #else
23911752d88SAlan Cox 	seg->first_page = PHYS_TO_VM_PAGE(start);
24011752d88SAlan Cox #endif
241a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
242a3870a18SJohn Baldwin 	if (flind == VM_FREELIST_DEFAULT && domain != 0) {
243a3870a18SJohn Baldwin 		flind = VM_NFREELIST + (domain - 1);
244a3870a18SJohn Baldwin 		if (flind >= vm_nfreelists)
245a3870a18SJohn Baldwin 			vm_nfreelists = flind + 1;
246a3870a18SJohn Baldwin 	}
247a3870a18SJohn Baldwin #endif
24811752d88SAlan Cox 	seg->free_queues = &vm_phys_free_queues[flind];
24911752d88SAlan Cox }
25011752d88SAlan Cox 
251a3870a18SJohn Baldwin static void
252a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
253a3870a18SJohn Baldwin {
254a3870a18SJohn Baldwin 	int i;
255a3870a18SJohn Baldwin 
256a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
257a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
258a3870a18SJohn Baldwin 		return;
259a3870a18SJohn Baldwin 	}
260a3870a18SJohn Baldwin 
261a3870a18SJohn Baldwin 	for (i = 0;; i++) {
262a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
263a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
264a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
265a3870a18SJohn Baldwin 			continue;
266a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
267a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
268a3870a18SJohn Baldwin 			    (uintmax_t)start);
269a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
270a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
271a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
272a3870a18SJohn Baldwin 			break;
273a3870a18SJohn Baldwin 		}
274a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
275a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
276a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
277a3870a18SJohn Baldwin 	}
278a3870a18SJohn Baldwin }
279a3870a18SJohn Baldwin 
28011752d88SAlan Cox /*
28111752d88SAlan Cox  * Initialize the physical memory allocator.
28211752d88SAlan Cox  */
28311752d88SAlan Cox void
28411752d88SAlan Cox vm_phys_init(void)
28511752d88SAlan Cox {
28611752d88SAlan Cox 	struct vm_freelist *fl;
28711752d88SAlan Cox 	int flind, i, oind, pind;
288a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
289a3870a18SJohn Baldwin 	int ndomains, j;
290a3870a18SJohn Baldwin #endif
29111752d88SAlan Cox 
29211752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
29311752d88SAlan Cox #ifdef	VM_FREELIST_ISADMA
29411752d88SAlan Cox 		if (phys_avail[i] < 16777216) {
29511752d88SAlan Cox 			if (phys_avail[i + 1] > 16777216) {
29611752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i], 16777216,
29711752d88SAlan Cox 				    VM_FREELIST_ISADMA);
29811752d88SAlan Cox 				vm_phys_create_seg(16777216, phys_avail[i + 1],
29911752d88SAlan Cox 				    VM_FREELIST_DEFAULT);
30011752d88SAlan Cox 			} else {
30111752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
30211752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_ISADMA);
30311752d88SAlan Cox 			}
30411752d88SAlan Cox 			if (VM_FREELIST_ISADMA >= vm_nfreelists)
30511752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_ISADMA + 1;
30611752d88SAlan Cox 		} else
30711752d88SAlan Cox #endif
30811752d88SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
30911752d88SAlan Cox 		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
31011752d88SAlan Cox 			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
31111752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
31211752d88SAlan Cox 				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
31311752d88SAlan Cox 				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
31411752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
31511752d88SAlan Cox 			} else {
31611752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
31711752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
31811752d88SAlan Cox 			}
31911752d88SAlan Cox 			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
32011752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
32111752d88SAlan Cox 		} else
32211752d88SAlan Cox #endif
32311752d88SAlan Cox 		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
32411752d88SAlan Cox 		    VM_FREELIST_DEFAULT);
32511752d88SAlan Cox 	}
32611752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
32711752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
32811752d88SAlan Cox 			fl = vm_phys_free_queues[flind][pind];
32911752d88SAlan Cox 			for (oind = 0; oind < VM_NFREEORDER; oind++)
33011752d88SAlan Cox 				TAILQ_INIT(&fl[oind].pl);
33111752d88SAlan Cox 		}
33211752d88SAlan Cox 	}
333a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
334a3870a18SJohn Baldwin 	/*
335a3870a18SJohn Baldwin 	 * Build a free list lookup list for each domain.  All of the
336a3870a18SJohn Baldwin 	 * memory domain lists are inserted at the VM_FREELIST_DEFAULT
337a3870a18SJohn Baldwin 	 * index in a round-robin order starting with the current
338a3870a18SJohn Baldwin 	 * domain.
339a3870a18SJohn Baldwin 	 */
340a3870a18SJohn Baldwin 	ndomains = vm_nfreelists - VM_NFREELIST + 1;
341a3870a18SJohn Baldwin 	for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
342a3870a18SJohn Baldwin 		for (i = 0; i < ndomains; i++)
343a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][flind] =
344a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
345a3870a18SJohn Baldwin 	for (i = 0; i < ndomains; i++)
346a3870a18SJohn Baldwin 		for (j = 0; j < ndomains; j++) {
347a3870a18SJohn Baldwin 			flind = (i + j) % ndomains;
348a3870a18SJohn Baldwin 			if (flind == 0)
349a3870a18SJohn Baldwin 				flind = VM_FREELIST_DEFAULT;
350a3870a18SJohn Baldwin 			else
351a3870a18SJohn Baldwin 				flind += VM_NFREELIST - 1;
352a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
353a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
354a3870a18SJohn Baldwin 		}
355a3870a18SJohn Baldwin 	for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
356a3870a18SJohn Baldwin 	     flind++)
357a3870a18SJohn Baldwin 		for (i = 0; i < ndomains; i++)
358a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][flind + ndomains - 1] =
359a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
360a3870a18SJohn Baldwin #else
361a3870a18SJohn Baldwin 	for (flind = 0; flind < vm_nfreelists; flind++)
362a3870a18SJohn Baldwin 		vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
363a3870a18SJohn Baldwin #endif
36411752d88SAlan Cox }
36511752d88SAlan Cox 
36611752d88SAlan Cox /*
36711752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
36811752d88SAlan Cox  */
36911752d88SAlan Cox static __inline void
37011752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
37111752d88SAlan Cox {
37211752d88SAlan Cox 	vm_page_t m_buddy;
37311752d88SAlan Cox 
37411752d88SAlan Cox 	while (oind > order) {
37511752d88SAlan Cox 		oind--;
37611752d88SAlan Cox 		m_buddy = &m[1 << oind];
37711752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
37811752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
37911752d88SAlan Cox 		    m_buddy, m_buddy->order));
38011752d88SAlan Cox 		m_buddy->order = oind;
38111752d88SAlan Cox 		TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
38211752d88SAlan Cox 		fl[oind].lcnt++;
38311752d88SAlan Cox         }
38411752d88SAlan Cox }
38511752d88SAlan Cox 
38611752d88SAlan Cox /*
38711752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
38811752d88SAlan Cox  */
38911752d88SAlan Cox void
39011752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
39111752d88SAlan Cox {
39211752d88SAlan Cox 	vm_page_t m;
39311752d88SAlan Cox 
39411752d88SAlan Cox 	cnt.v_page_count++;
39511752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
39611752d88SAlan Cox 	m->phys_addr = pa;
39744e46b9eSAlan Cox 	m->queue = PQ_NONE;
39811752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
39911752d88SAlan Cox 	m->flags = PG_FREE;
40011752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
40111752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
40211752d88SAlan Cox 	    m, m->order));
40311752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
40411752d88SAlan Cox 	pmap_page_init(m);
4058941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
4067bfda801SAlan Cox 	cnt.v_free_count++;
40711752d88SAlan Cox 	vm_phys_free_pages(m, 0);
4088941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
40911752d88SAlan Cox }
41011752d88SAlan Cox 
41111752d88SAlan Cox /*
41211752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
41311752d88SAlan Cox  * from the free lists.
4148941dc44SAlan Cox  *
4158941dc44SAlan Cox  * The free page queues must be locked.
41611752d88SAlan Cox  */
41711752d88SAlan Cox vm_page_t
41811752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
41911752d88SAlan Cox {
42049ca10d4SJayachandran C. 	vm_page_t m;
42149ca10d4SJayachandran C. 	int flind;
42249ca10d4SJayachandran C. 
42349ca10d4SJayachandran C. 	for (flind = 0; flind < vm_nfreelists; flind++) {
42449ca10d4SJayachandran C. 		m = vm_phys_alloc_freelist_pages(flind, pool, order);
42549ca10d4SJayachandran C. 		if (m != NULL)
42649ca10d4SJayachandran C. 			return (m);
42749ca10d4SJayachandran C. 	}
42849ca10d4SJayachandran C. 	return (NULL);
42949ca10d4SJayachandran C. }
43049ca10d4SJayachandran C. 
43149ca10d4SJayachandran C. /*
43249ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
43349ca10d4SJayachandran C.  * specified pool and order
43449ca10d4SJayachandran C.  */
43549ca10d4SJayachandran C. vm_page_t
43649ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
43749ca10d4SJayachandran C. {
43811752d88SAlan Cox 	struct vm_freelist *fl;
43911752d88SAlan Cox 	struct vm_freelist *alt;
440a3870a18SJohn Baldwin 	int domain, oind, pind;
44111752d88SAlan Cox 	vm_page_t m;
44211752d88SAlan Cox 
44349ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
44449ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
44511752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
44649ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
44711752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
44849ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
449a3870a18SJohn Baldwin 
450a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
451a3870a18SJohn Baldwin 	domain = PCPU_GET(domain);
452a3870a18SJohn Baldwin #else
453a3870a18SJohn Baldwin 	domain = 0;
454a3870a18SJohn Baldwin #endif
45511752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
456a3870a18SJohn Baldwin 	fl = (*vm_phys_lookup_lists[domain][flind])[pool];
45711752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
45811752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
45911752d88SAlan Cox 		if (m != NULL) {
46011752d88SAlan Cox 			TAILQ_REMOVE(&fl[oind].pl, m, pageq);
46111752d88SAlan Cox 			fl[oind].lcnt--;
46211752d88SAlan Cox 			m->order = VM_NFREEORDER;
46311752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
46411752d88SAlan Cox 			return (m);
46511752d88SAlan Cox 		}
46611752d88SAlan Cox 	}
46711752d88SAlan Cox 
46811752d88SAlan Cox 	/*
46911752d88SAlan Cox 	 * The given pool was empty.  Find the largest
47011752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
47111752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
47211752d88SAlan Cox 	 * use them to satisfy the allocation.
47311752d88SAlan Cox 	 */
47411752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
47511752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
476a3870a18SJohn Baldwin 			alt = (*vm_phys_lookup_lists[domain][flind])[pind];
47711752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
47811752d88SAlan Cox 			if (m != NULL) {
47911752d88SAlan Cox 				TAILQ_REMOVE(&alt[oind].pl, m, pageq);
48011752d88SAlan Cox 				alt[oind].lcnt--;
48111752d88SAlan Cox 				m->order = VM_NFREEORDER;
48211752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
48311752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
48411752d88SAlan Cox 				return (m);
48511752d88SAlan Cox 			}
48611752d88SAlan Cox 		}
48711752d88SAlan Cox 	}
48811752d88SAlan Cox 	return (NULL);
48911752d88SAlan Cox }
49011752d88SAlan Cox 
49111752d88SAlan Cox /*
49211752d88SAlan Cox  * Allocate physical memory from phys_avail[].
49311752d88SAlan Cox  */
49411752d88SAlan Cox vm_paddr_t
49511752d88SAlan Cox vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment)
49611752d88SAlan Cox {
49711752d88SAlan Cox 	vm_paddr_t pa;
49811752d88SAlan Cox 	int i;
49911752d88SAlan Cox 
50011752d88SAlan Cox 	size = round_page(size);
50111752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
50211752d88SAlan Cox 		if (phys_avail[i + 1] - phys_avail[i] < size)
50311752d88SAlan Cox 			continue;
50411752d88SAlan Cox 		pa = phys_avail[i];
50511752d88SAlan Cox 		phys_avail[i] += size;
50611752d88SAlan Cox 		return (pa);
50711752d88SAlan Cox 	}
50811752d88SAlan Cox 	panic("vm_phys_bootstrap_alloc");
50911752d88SAlan Cox }
51011752d88SAlan Cox 
51111752d88SAlan Cox /*
51211752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
51311752d88SAlan Cox  */
51411752d88SAlan Cox vm_page_t
51511752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
51611752d88SAlan Cox {
51711752d88SAlan Cox 	struct vm_phys_seg *seg;
51811752d88SAlan Cox 	int segind;
51911752d88SAlan Cox 
52011752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
52111752d88SAlan Cox 		seg = &vm_phys_segs[segind];
52211752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
52311752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
52411752d88SAlan Cox 	}
525f06a3a36SAndrew Thompson 	return (NULL);
52611752d88SAlan Cox }
52711752d88SAlan Cox 
52811752d88SAlan Cox /*
52911752d88SAlan Cox  * Find the segment containing the given physical address.
53011752d88SAlan Cox  */
53111752d88SAlan Cox static int
53211752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
53311752d88SAlan Cox {
53411752d88SAlan Cox 	struct vm_phys_seg *seg;
53511752d88SAlan Cox 	int segind;
53611752d88SAlan Cox 
53711752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
53811752d88SAlan Cox 		seg = &vm_phys_segs[segind];
53911752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
54011752d88SAlan Cox 			return (segind);
54111752d88SAlan Cox 	}
54211752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
54311752d88SAlan Cox 	    (uintmax_t)pa);
54411752d88SAlan Cox }
54511752d88SAlan Cox 
54611752d88SAlan Cox /*
54711752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
5488941dc44SAlan Cox  *
5498941dc44SAlan Cox  * The free page queues must be locked.
55011752d88SAlan Cox  */
55111752d88SAlan Cox void
55211752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
55311752d88SAlan Cox {
55411752d88SAlan Cox 	struct vm_freelist *fl;
55511752d88SAlan Cox 	struct vm_phys_seg *seg;
55611752d88SAlan Cox 	vm_paddr_t pa, pa_buddy;
55711752d88SAlan Cox 	vm_page_t m_buddy;
55811752d88SAlan Cox 
55911752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
5608941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
56111752d88SAlan Cox 	    m, m->order));
56211752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
5638941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
56411752d88SAlan Cox 	    m, m->pool));
56511752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
5668941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
56711752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
56811752d88SAlan Cox 	pa = VM_PAGE_TO_PHYS(m);
56911752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
57011752d88SAlan Cox 	while (order < VM_NFREEORDER - 1) {
57111752d88SAlan Cox 		pa_buddy = pa ^ (1 << (PAGE_SHIFT + order));
57211752d88SAlan Cox 		if (pa_buddy < seg->start ||
57311752d88SAlan Cox 		    pa_buddy >= seg->end)
57411752d88SAlan Cox 			break;
57511752d88SAlan Cox 		m_buddy = &seg->first_page[atop(pa_buddy - seg->start)];
57611752d88SAlan Cox 		if (m_buddy->order != order)
57711752d88SAlan Cox 			break;
57811752d88SAlan Cox 		fl = (*seg->free_queues)[m_buddy->pool];
57911752d88SAlan Cox 		TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq);
58011752d88SAlan Cox 		fl[m_buddy->order].lcnt--;
58111752d88SAlan Cox 		m_buddy->order = VM_NFREEORDER;
58211752d88SAlan Cox 		if (m_buddy->pool != m->pool)
58311752d88SAlan Cox 			vm_phys_set_pool(m->pool, m_buddy, order);
58411752d88SAlan Cox 		order++;
58511752d88SAlan Cox 		pa &= ~((1 << (PAGE_SHIFT + order)) - 1);
58611752d88SAlan Cox 		m = &seg->first_page[atop(pa - seg->start)];
58711752d88SAlan Cox 	}
58811752d88SAlan Cox 	m->order = order;
58911752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
59011752d88SAlan Cox 	TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
59111752d88SAlan Cox 	fl[order].lcnt++;
59211752d88SAlan Cox }
59311752d88SAlan Cox 
59411752d88SAlan Cox /*
59511752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
59611752d88SAlan Cox  */
5977bfda801SAlan Cox void
59811752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
59911752d88SAlan Cox {
60011752d88SAlan Cox 	vm_page_t m_tmp;
60111752d88SAlan Cox 
60211752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
60311752d88SAlan Cox 		m_tmp->pool = pool;
60411752d88SAlan Cox }
60511752d88SAlan Cox 
60611752d88SAlan Cox /*
6079742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
6089742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
6099742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
6107bfda801SAlan Cox  *
6117bfda801SAlan Cox  * The free page queues must be locked.
6127bfda801SAlan Cox  */
613e35395ceSAlan Cox boolean_t
6147bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
6157bfda801SAlan Cox {
6167bfda801SAlan Cox 	struct vm_freelist *fl;
6177bfda801SAlan Cox 	struct vm_phys_seg *seg;
6187bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
6197bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
6207bfda801SAlan Cox 	int order;
6217bfda801SAlan Cox 
6227bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
6237bfda801SAlan Cox 
6247bfda801SAlan Cox 	/*
6257bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
6267bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
6277bfda801SAlan Cox 	 * assign it to "m_set".
6287bfda801SAlan Cox 	 */
6297bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
6307bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
631bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
6327bfda801SAlan Cox 		order++;
6337bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
6342fbced65SAlan Cox 		if (pa >= seg->start)
6357bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
636e35395ceSAlan Cox 		else
637e35395ceSAlan Cox 			return (FALSE);
6387bfda801SAlan Cox 	}
639e35395ceSAlan Cox 	if (m_set->order < order)
640e35395ceSAlan Cox 		return (FALSE);
641e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
642e35395ceSAlan Cox 		return (FALSE);
6437bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
6447bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
6457bfda801SAlan Cox 	    m_set, m_set->order));
6467bfda801SAlan Cox 
6477bfda801SAlan Cox 	/*
6487bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
6497bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
6507bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
6517bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
6527bfda801SAlan Cox 	 */
6537bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
6547bfda801SAlan Cox 	order = m_set->order;
6557bfda801SAlan Cox 	TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
6567bfda801SAlan Cox 	fl[order].lcnt--;
6577bfda801SAlan Cox 	m_set->order = VM_NFREEORDER;
6587bfda801SAlan Cox 	while (order > 0) {
6597bfda801SAlan Cox 		order--;
6607bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
6617bfda801SAlan Cox 		if (m->phys_addr < pa_half)
6627bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
6637bfda801SAlan Cox 		else {
6647bfda801SAlan Cox 			m_tmp = m_set;
6657bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
6667bfda801SAlan Cox 		}
6677bfda801SAlan Cox 		m_tmp->order = order;
6687bfda801SAlan Cox 		TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
6697bfda801SAlan Cox 		fl[order].lcnt++;
6707bfda801SAlan Cox 	}
6717bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
672e35395ceSAlan Cox 	return (TRUE);
6737bfda801SAlan Cox }
6747bfda801SAlan Cox 
6757bfda801SAlan Cox /*
6767bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
67711752d88SAlan Cox  */
67811752d88SAlan Cox boolean_t
67911752d88SAlan Cox vm_phys_zero_pages_idle(void)
68011752d88SAlan Cox {
6817bfda801SAlan Cox 	static struct vm_freelist *fl = vm_phys_free_queues[0][0];
6827bfda801SAlan Cox 	static int flind, oind, pind;
68311752d88SAlan Cox 	vm_page_t m, m_tmp;
68411752d88SAlan Cox 
68511752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
6867bfda801SAlan Cox 	for (;;) {
6877bfda801SAlan Cox 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
6887bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
6897bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
6907bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
6917bfda801SAlan Cox 					cnt.v_free_count--;
69211752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
69311752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
69411752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
69511752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
6967bfda801SAlan Cox 					cnt.v_free_count++;
6977bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
6987bfda801SAlan Cox 					vm_page_zero_count++;
6997bfda801SAlan Cox 					cnt_prezero++;
70011752d88SAlan Cox 					return (TRUE);
70111752d88SAlan Cox 				}
70211752d88SAlan Cox 			}
70311752d88SAlan Cox 		}
7047bfda801SAlan Cox 		oind++;
7057bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
7067bfda801SAlan Cox 			oind = 0;
7077bfda801SAlan Cox 			pind++;
7087bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
7097bfda801SAlan Cox 				pind = 0;
7107bfda801SAlan Cox 				flind++;
7117bfda801SAlan Cox 				if (flind == vm_nfreelists)
7127bfda801SAlan Cox 					flind = 0;
7137bfda801SAlan Cox 			}
7147bfda801SAlan Cox 			fl = vm_phys_free_queues[flind][pind];
7157bfda801SAlan Cox 		}
7167bfda801SAlan Cox 	}
71711752d88SAlan Cox }
71811752d88SAlan Cox 
71911752d88SAlan Cox /*
7202f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
7212f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
7222f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
7232f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
7242f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
7252f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
7262f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
72711752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
72811752d88SAlan Cox  */
72911752d88SAlan Cox vm_page_t
73011752d88SAlan Cox vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
7313153e878SAlan Cox     unsigned long alignment, unsigned long boundary)
73211752d88SAlan Cox {
73311752d88SAlan Cox 	struct vm_freelist *fl;
73411752d88SAlan Cox 	struct vm_phys_seg *seg;
73549ca10d4SJayachandran C. 	struct vnode *vp;
73611752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
737ef327c3eSAlan Cox 	vm_page_t deferred_vdrop_list, m, m_ret;
738a3870a18SJohn Baldwin 	int domain, flind, i, oind, order, pind;
73911752d88SAlan Cox 
740a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
741a3870a18SJohn Baldwin 	domain = PCPU_GET(domain);
742a3870a18SJohn Baldwin #else
743a3870a18SJohn Baldwin 	domain = 0;
744a3870a18SJohn Baldwin #endif
74511752d88SAlan Cox 	size = npages << PAGE_SHIFT;
74611752d88SAlan Cox 	KASSERT(size != 0,
74711752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
74811752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
74911752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
75011752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
75111752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
752ef327c3eSAlan Cox 	deferred_vdrop_list = NULL;
75311752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
75411752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
75511752d88SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
75644aab2c3SAlan Cox #if VM_NRESERVLEVEL > 0
75744aab2c3SAlan Cox retry:
75844aab2c3SAlan Cox #endif
75911752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
76011752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
76111752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
762a3870a18SJohn Baldwin 				fl = (*vm_phys_lookup_lists[domain][flind])
763a3870a18SJohn Baldwin 				    [pind];
76411752d88SAlan Cox 				TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
76511752d88SAlan Cox 					/*
76611752d88SAlan Cox 					 * A free list may contain physical pages
76711752d88SAlan Cox 					 * from one or more segments.
76811752d88SAlan Cox 					 */
76911752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
77011752d88SAlan Cox 					if (seg->start > high ||
77111752d88SAlan Cox 					    low >= seg->end)
77211752d88SAlan Cox 						continue;
77311752d88SAlan Cox 
77411752d88SAlan Cox 					/*
77511752d88SAlan Cox 					 * Is the size of this allocation request
77611752d88SAlan Cox 					 * larger than the largest block size?
77711752d88SAlan Cox 					 */
77811752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
77911752d88SAlan Cox 						/*
78011752d88SAlan Cox 						 * Determine if a sufficient number
78111752d88SAlan Cox 						 * of subsequent blocks to satisfy
78211752d88SAlan Cox 						 * the allocation request are free.
78311752d88SAlan Cox 						 */
78411752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
78511752d88SAlan Cox 						pa_last = pa + size;
78611752d88SAlan Cox 						for (;;) {
78711752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
78811752d88SAlan Cox 							if (pa >= pa_last)
78911752d88SAlan Cox 								break;
79011752d88SAlan Cox 							if (pa < seg->start ||
79111752d88SAlan Cox 							    pa >= seg->end)
79211752d88SAlan Cox 								break;
79311752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
79411752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
79511752d88SAlan Cox 								break;
79611752d88SAlan Cox 						}
79711752d88SAlan Cox 						/* If not, continue to the next block. */
79811752d88SAlan Cox 						if (pa < pa_last)
79911752d88SAlan Cox 							continue;
80011752d88SAlan Cox 					}
80111752d88SAlan Cox 
80211752d88SAlan Cox 					/*
80311752d88SAlan Cox 					 * Determine if the blocks are within the given range,
80411752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
80511752d88SAlan Cox 					 * given boundary.
80611752d88SAlan Cox 					 */
80711752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
80811752d88SAlan Cox 					if (pa >= low &&
80911752d88SAlan Cox 					    pa + size <= high &&
81011752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
81111752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
81211752d88SAlan Cox 						goto done;
81311752d88SAlan Cox 				}
81411752d88SAlan Cox 			}
81511752d88SAlan Cox 		}
81611752d88SAlan Cox 	}
81744aab2c3SAlan Cox #if VM_NRESERVLEVEL > 0
81844aab2c3SAlan Cox 	if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
81944aab2c3SAlan Cox 		goto retry;
82044aab2c3SAlan Cox #endif
82111752d88SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
82211752d88SAlan Cox 	return (NULL);
82311752d88SAlan Cox done:
82411752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
82511752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
82611752d88SAlan Cox 		TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
82711752d88SAlan Cox 		fl[m->order].lcnt--;
82811752d88SAlan Cox 		m->order = VM_NFREEORDER;
82911752d88SAlan Cox 	}
83011752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
83111752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
83211752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
83311752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
83411752d88SAlan Cox 	for (i = 0; i < npages; i++) {
83511752d88SAlan Cox 		m = &m_ret[i];
83649ca10d4SJayachandran C. 		vp = vm_page_alloc_init(m);
83749ca10d4SJayachandran C. 		if (vp != NULL) {
838ef327c3eSAlan Cox 			/*
839ef327c3eSAlan Cox 			 * Enqueue the vnode for deferred vdrop().
840ef327c3eSAlan Cox 			 *
841ef327c3eSAlan Cox 			 * Unmanaged pages don't use "pageq", so it
842ef327c3eSAlan Cox 			 * can be safely abused to construct a short-
843ef327c3eSAlan Cox 			 * lived queue of vnodes.
844ef327c3eSAlan Cox 			 */
84549ca10d4SJayachandran C. 			m->pageq.tqe_prev = (void *)vp;
846ef327c3eSAlan Cox 			m->pageq.tqe_next = deferred_vdrop_list;
847ef327c3eSAlan Cox 			deferred_vdrop_list = m;
848ef327c3eSAlan Cox 		}
84911752d88SAlan Cox 	}
85011752d88SAlan Cox 	for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
85111752d88SAlan Cox 		m = &m_ret[i];
85211752d88SAlan Cox 		KASSERT(m->order == VM_NFREEORDER,
85311752d88SAlan Cox 		    ("vm_phys_alloc_contig: page %p has unexpected order %d",
85411752d88SAlan Cox 		    m, m->order));
8558941dc44SAlan Cox 		vm_phys_free_pages(m, 0);
85611752d88SAlan Cox 	}
85711752d88SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
858ef327c3eSAlan Cox 	while (deferred_vdrop_list != NULL) {
859ef327c3eSAlan Cox 		vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
860ef327c3eSAlan Cox 		deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
861ef327c3eSAlan Cox 	}
86211752d88SAlan Cox 	return (m_ret);
86311752d88SAlan Cox }
86411752d88SAlan Cox 
86511752d88SAlan Cox #ifdef DDB
86611752d88SAlan Cox /*
86711752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
86811752d88SAlan Cox  */
86911752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
87011752d88SAlan Cox {
87111752d88SAlan Cox 	struct vm_freelist *fl;
87211752d88SAlan Cox 	int flind, oind, pind;
87311752d88SAlan Cox 
87411752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
87511752d88SAlan Cox 		db_printf("FREE LIST %d:\n"
87611752d88SAlan Cox 		    "\n  ORDER (SIZE)  |  NUMBER"
87711752d88SAlan Cox 		    "\n              ", flind);
87811752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
87911752d88SAlan Cox 			db_printf("  |  POOL %d", pind);
88011752d88SAlan Cox 		db_printf("\n--            ");
88111752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
88211752d88SAlan Cox 			db_printf("-- --      ");
88311752d88SAlan Cox 		db_printf("--\n");
88411752d88SAlan Cox 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
88511752d88SAlan Cox 			db_printf("  %2.2d (%6.6dK)", oind,
88611752d88SAlan Cox 			    1 << (PAGE_SHIFT - 10 + oind));
88711752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
88811752d88SAlan Cox 				fl = vm_phys_free_queues[flind][pind];
88911752d88SAlan Cox 				db_printf("  |  %6.6d", fl[oind].lcnt);
89011752d88SAlan Cox 			}
89111752d88SAlan Cox 			db_printf("\n");
89211752d88SAlan Cox 		}
89311752d88SAlan Cox 		db_printf("\n");
89411752d88SAlan Cox 	}
89511752d88SAlan Cox }
89611752d88SAlan Cox #endif
897