xref: /freebsd/sys/vm/vm_phys.c (revision a3870a1826f50ac9d8076f7f21738aa608a916fa)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
3211752d88SAlan Cox #include <sys/cdefs.h>
3311752d88SAlan Cox __FBSDID("$FreeBSD$");
3411752d88SAlan Cox 
3511752d88SAlan Cox #include "opt_ddb.h"
3611752d88SAlan Cox 
3711752d88SAlan Cox #include <sys/param.h>
3811752d88SAlan Cox #include <sys/systm.h>
3911752d88SAlan Cox #include <sys/lock.h>
4011752d88SAlan Cox #include <sys/kernel.h>
4111752d88SAlan Cox #include <sys/malloc.h>
4211752d88SAlan Cox #include <sys/mutex.h>
4311752d88SAlan Cox #include <sys/queue.h>
4411752d88SAlan Cox #include <sys/sbuf.h>
4511752d88SAlan Cox #include <sys/sysctl.h>
4611752d88SAlan Cox #include <sys/vmmeter.h>
477bfda801SAlan Cox #include <sys/vnode.h>
4811752d88SAlan Cox 
4911752d88SAlan Cox #include <ddb/ddb.h>
5011752d88SAlan Cox 
5111752d88SAlan Cox #include <vm/vm.h>
5211752d88SAlan Cox #include <vm/vm_param.h>
5311752d88SAlan Cox #include <vm/vm_kern.h>
5411752d88SAlan Cox #include <vm/vm_object.h>
5511752d88SAlan Cox #include <vm/vm_page.h>
5611752d88SAlan Cox #include <vm/vm_phys.h>
5744aab2c3SAlan Cox #include <vm/vm_reserv.h>
5811752d88SAlan Cox 
59*a3870a18SJohn Baldwin /*
60*a3870a18SJohn Baldwin  * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
61*a3870a18SJohn Baldwin  * domain.  These extra lists are stored at the end of the regular
62*a3870a18SJohn Baldwin  * free lists starting with VM_NFREELIST.
63*a3870a18SJohn Baldwin  */
64*a3870a18SJohn Baldwin #define VM_RAW_NFREELIST	(VM_NFREELIST + VM_NDOMAIN - 1)
65*a3870a18SJohn Baldwin 
6611752d88SAlan Cox struct vm_freelist {
6711752d88SAlan Cox 	struct pglist pl;
6811752d88SAlan Cox 	int lcnt;
6911752d88SAlan Cox };
7011752d88SAlan Cox 
7111752d88SAlan Cox struct vm_phys_seg {
7211752d88SAlan Cox 	vm_paddr_t	start;
7311752d88SAlan Cox 	vm_paddr_t	end;
7411752d88SAlan Cox 	vm_page_t	first_page;
75*a3870a18SJohn Baldwin 	int		domain;
7611752d88SAlan Cox 	struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
7711752d88SAlan Cox };
7811752d88SAlan Cox 
79*a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
80*a3870a18SJohn Baldwin 
8111752d88SAlan Cox static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
8211752d88SAlan Cox 
8311752d88SAlan Cox static int vm_phys_nsegs;
8411752d88SAlan Cox 
8511752d88SAlan Cox static struct vm_freelist
86*a3870a18SJohn Baldwin     vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
87*a3870a18SJohn Baldwin static struct vm_freelist
88*a3870a18SJohn Baldwin (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
8911752d88SAlan Cox 
9011752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
9111752d88SAlan Cox 
9211752d88SAlan Cox static int cnt_prezero;
9311752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
9411752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
9511752d88SAlan Cox 
9611752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
9711752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
9811752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
9911752d88SAlan Cox 
10011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
10111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
10211752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
10311752d88SAlan Cox 
104*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
105*a3870a18SJohn Baldwin static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
106*a3870a18SJohn Baldwin SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
107*a3870a18SJohn Baldwin     NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
108*a3870a18SJohn Baldwin #endif
109*a3870a18SJohn Baldwin 
110*a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
111*a3870a18SJohn Baldwin     int domain);
11211752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
11311752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
11411752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
11511752d88SAlan Cox     int order);
11611752d88SAlan Cox 
11711752d88SAlan Cox /*
11811752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
11911752d88SAlan Cox  * the amount of physical memory in each free list.
12011752d88SAlan Cox  */
12111752d88SAlan Cox static int
12211752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
12311752d88SAlan Cox {
12411752d88SAlan Cox 	struct sbuf sbuf;
12511752d88SAlan Cox 	struct vm_freelist *fl;
12611752d88SAlan Cox 	char *cbuf;
12711752d88SAlan Cox 	const int cbufsize = vm_nfreelists*(VM_NFREEORDER + 1)*81;
12811752d88SAlan Cox 	int error, flind, oind, pind;
12911752d88SAlan Cox 
13011752d88SAlan Cox 	cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO);
13111752d88SAlan Cox 	sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN);
13211752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
13311752d88SAlan Cox 		sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
13411752d88SAlan Cox 		    "\n  ORDER (SIZE)  |  NUMBER"
13511752d88SAlan Cox 		    "\n              ", flind);
13611752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
13711752d88SAlan Cox 			sbuf_printf(&sbuf, "  |  POOL %d", pind);
13811752d88SAlan Cox 		sbuf_printf(&sbuf, "\n--            ");
13911752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
14011752d88SAlan Cox 			sbuf_printf(&sbuf, "-- --      ");
14111752d88SAlan Cox 		sbuf_printf(&sbuf, "--\n");
14211752d88SAlan Cox 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
14311752d88SAlan Cox 			sbuf_printf(&sbuf, "  %2.2d (%6.6dK)", oind,
14411752d88SAlan Cox 			    1 << (PAGE_SHIFT - 10 + oind));
14511752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
14611752d88SAlan Cox 				fl = vm_phys_free_queues[flind][pind];
14711752d88SAlan Cox 				sbuf_printf(&sbuf, "  |  %6.6d", fl[oind].lcnt);
14811752d88SAlan Cox 			}
14911752d88SAlan Cox 			sbuf_printf(&sbuf, "\n");
15011752d88SAlan Cox 		}
15111752d88SAlan Cox 	}
15211752d88SAlan Cox 	sbuf_finish(&sbuf);
15311752d88SAlan Cox 	error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
15411752d88SAlan Cox 	sbuf_delete(&sbuf);
15511752d88SAlan Cox 	free(cbuf, M_TEMP);
15611752d88SAlan Cox 	return (error);
15711752d88SAlan Cox }
15811752d88SAlan Cox 
15911752d88SAlan Cox /*
16011752d88SAlan Cox  * Outputs the set of physical memory segments.
16111752d88SAlan Cox  */
16211752d88SAlan Cox static int
16311752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
16411752d88SAlan Cox {
16511752d88SAlan Cox 	struct sbuf sbuf;
16611752d88SAlan Cox 	struct vm_phys_seg *seg;
16711752d88SAlan Cox 	char *cbuf;
16811752d88SAlan Cox 	const int cbufsize = VM_PHYSSEG_MAX*(VM_NFREEORDER + 1)*81;
16911752d88SAlan Cox 	int error, segind;
17011752d88SAlan Cox 
17111752d88SAlan Cox 	cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO);
17211752d88SAlan Cox 	sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN);
17311752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
17411752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
17511752d88SAlan Cox 		seg = &vm_phys_segs[segind];
17611752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
17711752d88SAlan Cox 		    (uintmax_t)seg->start);
17811752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
17911752d88SAlan Cox 		    (uintmax_t)seg->end);
180*a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
18111752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
18211752d88SAlan Cox 	}
18311752d88SAlan Cox 	sbuf_finish(&sbuf);
18411752d88SAlan Cox 	error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
18511752d88SAlan Cox 	sbuf_delete(&sbuf);
18611752d88SAlan Cox 	free(cbuf, M_TEMP);
18711752d88SAlan Cox 	return (error);
18811752d88SAlan Cox }
18911752d88SAlan Cox 
190*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
191*a3870a18SJohn Baldwin /*
192*a3870a18SJohn Baldwin  * Outputs the set of free list lookup lists.
193*a3870a18SJohn Baldwin  */
194*a3870a18SJohn Baldwin static int
195*a3870a18SJohn Baldwin sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
196*a3870a18SJohn Baldwin {
197*a3870a18SJohn Baldwin 	struct sbuf sbuf;
198*a3870a18SJohn Baldwin 	char *cbuf;
199*a3870a18SJohn Baldwin 	const int cbufsize = (vm_nfreelists + 1) * VM_NDOMAIN * 81;
200*a3870a18SJohn Baldwin 	int domain, error, flind, ndomains;
201*a3870a18SJohn Baldwin 
202*a3870a18SJohn Baldwin 	ndomains = vm_nfreelists - VM_NFREELIST + 1;
203*a3870a18SJohn Baldwin 	cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO);
204*a3870a18SJohn Baldwin 	sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN);
205*a3870a18SJohn Baldwin 	for (domain = 0; domain < ndomains; domain++) {
206*a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
207*a3870a18SJohn Baldwin 		for (flind = 0; flind < vm_nfreelists; flind++)
208*a3870a18SJohn Baldwin 			sbuf_printf(&sbuf, "  [%d]:\t%p\n", flind,
209*a3870a18SJohn Baldwin 			    vm_phys_lookup_lists[domain][flind]);
210*a3870a18SJohn Baldwin 	}
211*a3870a18SJohn Baldwin 	sbuf_finish(&sbuf);
212*a3870a18SJohn Baldwin 	error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
213*a3870a18SJohn Baldwin 	sbuf_delete(&sbuf);
214*a3870a18SJohn Baldwin 	free(cbuf, M_TEMP);
215*a3870a18SJohn Baldwin 	return (error);
216*a3870a18SJohn Baldwin }
217*a3870a18SJohn Baldwin #endif
218*a3870a18SJohn Baldwin 
21911752d88SAlan Cox /*
22011752d88SAlan Cox  * Create a physical memory segment.
22111752d88SAlan Cox  */
22211752d88SAlan Cox static void
223*a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
22411752d88SAlan Cox {
22511752d88SAlan Cox 	struct vm_phys_seg *seg;
22611752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
22711752d88SAlan Cox 	long pages;
22811752d88SAlan Cox 	int segind;
22911752d88SAlan Cox 
23011752d88SAlan Cox 	pages = 0;
23111752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
23211752d88SAlan Cox 		seg = &vm_phys_segs[segind];
23311752d88SAlan Cox 		pages += atop(seg->end - seg->start);
23411752d88SAlan Cox 	}
23511752d88SAlan Cox #endif
23611752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
23711752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
23811752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
23911752d88SAlan Cox 	seg->start = start;
24011752d88SAlan Cox 	seg->end = end;
241*a3870a18SJohn Baldwin 	seg->domain = domain;
24211752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
24311752d88SAlan Cox 	seg->first_page = &vm_page_array[pages];
24411752d88SAlan Cox #else
24511752d88SAlan Cox 	seg->first_page = PHYS_TO_VM_PAGE(start);
24611752d88SAlan Cox #endif
247*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
248*a3870a18SJohn Baldwin 	if (flind == VM_FREELIST_DEFAULT && domain != 0) {
249*a3870a18SJohn Baldwin 		flind = VM_NFREELIST + (domain - 1);
250*a3870a18SJohn Baldwin 		if (flind >= vm_nfreelists)
251*a3870a18SJohn Baldwin 			vm_nfreelists = flind + 1;
252*a3870a18SJohn Baldwin 	}
253*a3870a18SJohn Baldwin #endif
25411752d88SAlan Cox 	seg->free_queues = &vm_phys_free_queues[flind];
25511752d88SAlan Cox }
25611752d88SAlan Cox 
257*a3870a18SJohn Baldwin static void
258*a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
259*a3870a18SJohn Baldwin {
260*a3870a18SJohn Baldwin 	int i;
261*a3870a18SJohn Baldwin 
262*a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
263*a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
264*a3870a18SJohn Baldwin 		return;
265*a3870a18SJohn Baldwin 	}
266*a3870a18SJohn Baldwin 
267*a3870a18SJohn Baldwin 	for (i = 0;; i++) {
268*a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
269*a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
270*a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
271*a3870a18SJohn Baldwin 			continue;
272*a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
273*a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
274*a3870a18SJohn Baldwin 			    (uintmax_t)start);
275*a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
276*a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
277*a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
278*a3870a18SJohn Baldwin 			break;
279*a3870a18SJohn Baldwin 		}
280*a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
281*a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
282*a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
283*a3870a18SJohn Baldwin 	}
284*a3870a18SJohn Baldwin }
285*a3870a18SJohn Baldwin 
28611752d88SAlan Cox /*
28711752d88SAlan Cox  * Initialize the physical memory allocator.
28811752d88SAlan Cox  */
28911752d88SAlan Cox void
29011752d88SAlan Cox vm_phys_init(void)
29111752d88SAlan Cox {
29211752d88SAlan Cox 	struct vm_freelist *fl;
29311752d88SAlan Cox 	int flind, i, oind, pind;
294*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
295*a3870a18SJohn Baldwin 	int ndomains, j;
296*a3870a18SJohn Baldwin #endif
29711752d88SAlan Cox 
29811752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
29911752d88SAlan Cox #ifdef	VM_FREELIST_ISADMA
30011752d88SAlan Cox 		if (phys_avail[i] < 16777216) {
30111752d88SAlan Cox 			if (phys_avail[i + 1] > 16777216) {
30211752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i], 16777216,
30311752d88SAlan Cox 				    VM_FREELIST_ISADMA);
30411752d88SAlan Cox 				vm_phys_create_seg(16777216, phys_avail[i + 1],
30511752d88SAlan Cox 				    VM_FREELIST_DEFAULT);
30611752d88SAlan Cox 			} else {
30711752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
30811752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_ISADMA);
30911752d88SAlan Cox 			}
31011752d88SAlan Cox 			if (VM_FREELIST_ISADMA >= vm_nfreelists)
31111752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_ISADMA + 1;
31211752d88SAlan Cox 		} else
31311752d88SAlan Cox #endif
31411752d88SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
31511752d88SAlan Cox 		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
31611752d88SAlan Cox 			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
31711752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
31811752d88SAlan Cox 				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
31911752d88SAlan Cox 				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
32011752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
32111752d88SAlan Cox 			} else {
32211752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
32311752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
32411752d88SAlan Cox 			}
32511752d88SAlan Cox 			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
32611752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
32711752d88SAlan Cox 		} else
32811752d88SAlan Cox #endif
32911752d88SAlan Cox 		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
33011752d88SAlan Cox 		    VM_FREELIST_DEFAULT);
33111752d88SAlan Cox 	}
33211752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
33311752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
33411752d88SAlan Cox 			fl = vm_phys_free_queues[flind][pind];
33511752d88SAlan Cox 			for (oind = 0; oind < VM_NFREEORDER; oind++)
33611752d88SAlan Cox 				TAILQ_INIT(&fl[oind].pl);
33711752d88SAlan Cox 		}
33811752d88SAlan Cox 	}
339*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
340*a3870a18SJohn Baldwin 	/*
341*a3870a18SJohn Baldwin 	 * Build a free list lookup list for each domain.  All of the
342*a3870a18SJohn Baldwin 	 * memory domain lists are inserted at the VM_FREELIST_DEFAULT
343*a3870a18SJohn Baldwin 	 * index in a round-robin order starting with the current
344*a3870a18SJohn Baldwin 	 * domain.
345*a3870a18SJohn Baldwin 	 */
346*a3870a18SJohn Baldwin 	ndomains = vm_nfreelists - VM_NFREELIST + 1;
347*a3870a18SJohn Baldwin 	for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
348*a3870a18SJohn Baldwin 		for (i = 0; i < ndomains; i++)
349*a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][flind] =
350*a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
351*a3870a18SJohn Baldwin 	for (i = 0; i < ndomains; i++)
352*a3870a18SJohn Baldwin 		for (j = 0; j < ndomains; j++) {
353*a3870a18SJohn Baldwin 			flind = (i + j) % ndomains;
354*a3870a18SJohn Baldwin 			if (flind == 0)
355*a3870a18SJohn Baldwin 				flind = VM_FREELIST_DEFAULT;
356*a3870a18SJohn Baldwin 			else
357*a3870a18SJohn Baldwin 				flind += VM_NFREELIST - 1;
358*a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
359*a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
360*a3870a18SJohn Baldwin 		}
361*a3870a18SJohn Baldwin 	for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
362*a3870a18SJohn Baldwin 	     flind++)
363*a3870a18SJohn Baldwin 		for (i = 0; i < ndomains; i++)
364*a3870a18SJohn Baldwin 			vm_phys_lookup_lists[i][flind + ndomains - 1] =
365*a3870a18SJohn Baldwin 			    &vm_phys_free_queues[flind];
366*a3870a18SJohn Baldwin #else
367*a3870a18SJohn Baldwin 	for (flind = 0; flind < vm_nfreelists; flind++)
368*a3870a18SJohn Baldwin 		vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
369*a3870a18SJohn Baldwin #endif
37011752d88SAlan Cox }
37111752d88SAlan Cox 
37211752d88SAlan Cox /*
37311752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
37411752d88SAlan Cox  */
37511752d88SAlan Cox static __inline void
37611752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
37711752d88SAlan Cox {
37811752d88SAlan Cox 	vm_page_t m_buddy;
37911752d88SAlan Cox 
38011752d88SAlan Cox 	while (oind > order) {
38111752d88SAlan Cox 		oind--;
38211752d88SAlan Cox 		m_buddy = &m[1 << oind];
38311752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
38411752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
38511752d88SAlan Cox 		    m_buddy, m_buddy->order));
38611752d88SAlan Cox 		m_buddy->order = oind;
38711752d88SAlan Cox 		TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
38811752d88SAlan Cox 		fl[oind].lcnt++;
38911752d88SAlan Cox         }
39011752d88SAlan Cox }
39111752d88SAlan Cox 
39211752d88SAlan Cox /*
39311752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
39411752d88SAlan Cox  */
39511752d88SAlan Cox void
39611752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
39711752d88SAlan Cox {
39811752d88SAlan Cox 	vm_page_t m;
39911752d88SAlan Cox 
40011752d88SAlan Cox 	cnt.v_page_count++;
40111752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
40211752d88SAlan Cox 	m->phys_addr = pa;
40311752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
40411752d88SAlan Cox 	m->flags = PG_FREE;
40511752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
40611752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
40711752d88SAlan Cox 	    m, m->order));
40811752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
40911752d88SAlan Cox 	pmap_page_init(m);
4108941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
4117bfda801SAlan Cox 	cnt.v_free_count++;
41211752d88SAlan Cox 	vm_phys_free_pages(m, 0);
4138941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
41411752d88SAlan Cox }
41511752d88SAlan Cox 
41611752d88SAlan Cox /*
41711752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
41811752d88SAlan Cox  * from the free lists.
4198941dc44SAlan Cox  *
4208941dc44SAlan Cox  * The free page queues must be locked.
42111752d88SAlan Cox  */
42211752d88SAlan Cox vm_page_t
42311752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
42411752d88SAlan Cox {
42549ca10d4SJayachandran C. 	vm_page_t m;
42649ca10d4SJayachandran C. 	int flind;
42749ca10d4SJayachandran C. 
42849ca10d4SJayachandran C. 	for (flind = 0; flind < vm_nfreelists; flind++) {
42949ca10d4SJayachandran C. 		m = vm_phys_alloc_freelist_pages(flind, pool, order);
43049ca10d4SJayachandran C. 		if (m != NULL)
43149ca10d4SJayachandran C. 			return (m);
43249ca10d4SJayachandran C. 	}
43349ca10d4SJayachandran C. 	return (NULL);
43449ca10d4SJayachandran C. }
43549ca10d4SJayachandran C. 
43649ca10d4SJayachandran C. /*
43749ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
43849ca10d4SJayachandran C.  * specified pool and order
43949ca10d4SJayachandran C.  */
44049ca10d4SJayachandran C. vm_page_t
44149ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
44249ca10d4SJayachandran C. {
44311752d88SAlan Cox 	struct vm_freelist *fl;
44411752d88SAlan Cox 	struct vm_freelist *alt;
445*a3870a18SJohn Baldwin 	int domain, oind, pind;
44611752d88SAlan Cox 	vm_page_t m;
44711752d88SAlan Cox 
44849ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
44949ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
45011752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
45149ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
45211752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
45349ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
454*a3870a18SJohn Baldwin 
455*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
456*a3870a18SJohn Baldwin 	domain = PCPU_GET(domain);
457*a3870a18SJohn Baldwin #else
458*a3870a18SJohn Baldwin 	domain = 0;
459*a3870a18SJohn Baldwin #endif
46011752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
461*a3870a18SJohn Baldwin 	fl = (*vm_phys_lookup_lists[domain][flind])[pool];
46211752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
46311752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
46411752d88SAlan Cox 		if (m != NULL) {
46511752d88SAlan Cox 			TAILQ_REMOVE(&fl[oind].pl, m, pageq);
46611752d88SAlan Cox 			fl[oind].lcnt--;
46711752d88SAlan Cox 			m->order = VM_NFREEORDER;
46811752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
46911752d88SAlan Cox 			return (m);
47011752d88SAlan Cox 		}
47111752d88SAlan Cox 	}
47211752d88SAlan Cox 
47311752d88SAlan Cox 	/*
47411752d88SAlan Cox 	 * The given pool was empty.  Find the largest
47511752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
47611752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
47711752d88SAlan Cox 	 * use them to satisfy the allocation.
47811752d88SAlan Cox 	 */
47911752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
48011752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
481*a3870a18SJohn Baldwin 			alt = (*vm_phys_lookup_lists[domain][flind])[pind];
48211752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
48311752d88SAlan Cox 			if (m != NULL) {
48411752d88SAlan Cox 				TAILQ_REMOVE(&alt[oind].pl, m, pageq);
48511752d88SAlan Cox 				alt[oind].lcnt--;
48611752d88SAlan Cox 				m->order = VM_NFREEORDER;
48711752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
48811752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
48911752d88SAlan Cox 				return (m);
49011752d88SAlan Cox 			}
49111752d88SAlan Cox 		}
49211752d88SAlan Cox 	}
49311752d88SAlan Cox 	return (NULL);
49411752d88SAlan Cox }
49511752d88SAlan Cox 
49611752d88SAlan Cox /*
49711752d88SAlan Cox  * Allocate physical memory from phys_avail[].
49811752d88SAlan Cox  */
49911752d88SAlan Cox vm_paddr_t
50011752d88SAlan Cox vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment)
50111752d88SAlan Cox {
50211752d88SAlan Cox 	vm_paddr_t pa;
50311752d88SAlan Cox 	int i;
50411752d88SAlan Cox 
50511752d88SAlan Cox 	size = round_page(size);
50611752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
50711752d88SAlan Cox 		if (phys_avail[i + 1] - phys_avail[i] < size)
50811752d88SAlan Cox 			continue;
50911752d88SAlan Cox 		pa = phys_avail[i];
51011752d88SAlan Cox 		phys_avail[i] += size;
51111752d88SAlan Cox 		return (pa);
51211752d88SAlan Cox 	}
51311752d88SAlan Cox 	panic("vm_phys_bootstrap_alloc");
51411752d88SAlan Cox }
51511752d88SAlan Cox 
51611752d88SAlan Cox /*
51711752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
51811752d88SAlan Cox  */
51911752d88SAlan Cox vm_page_t
52011752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
52111752d88SAlan Cox {
52211752d88SAlan Cox 	struct vm_phys_seg *seg;
52311752d88SAlan Cox 	int segind;
52411752d88SAlan Cox 
52511752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
52611752d88SAlan Cox 		seg = &vm_phys_segs[segind];
52711752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
52811752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
52911752d88SAlan Cox 	}
530f06a3a36SAndrew Thompson 	return (NULL);
53111752d88SAlan Cox }
53211752d88SAlan Cox 
53311752d88SAlan Cox /*
53411752d88SAlan Cox  * Find the segment containing the given physical address.
53511752d88SAlan Cox  */
53611752d88SAlan Cox static int
53711752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
53811752d88SAlan Cox {
53911752d88SAlan Cox 	struct vm_phys_seg *seg;
54011752d88SAlan Cox 	int segind;
54111752d88SAlan Cox 
54211752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
54311752d88SAlan Cox 		seg = &vm_phys_segs[segind];
54411752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
54511752d88SAlan Cox 			return (segind);
54611752d88SAlan Cox 	}
54711752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
54811752d88SAlan Cox 	    (uintmax_t)pa);
54911752d88SAlan Cox }
55011752d88SAlan Cox 
55111752d88SAlan Cox /*
55211752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
5538941dc44SAlan Cox  *
5548941dc44SAlan Cox  * The free page queues must be locked.
55511752d88SAlan Cox  */
55611752d88SAlan Cox void
55711752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
55811752d88SAlan Cox {
55911752d88SAlan Cox 	struct vm_freelist *fl;
56011752d88SAlan Cox 	struct vm_phys_seg *seg;
56111752d88SAlan Cox 	vm_paddr_t pa, pa_buddy;
56211752d88SAlan Cox 	vm_page_t m_buddy;
56311752d88SAlan Cox 
56411752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
5658941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
56611752d88SAlan Cox 	    m, m->order));
56711752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
5688941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
56911752d88SAlan Cox 	    m, m->pool));
57011752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
5718941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
57211752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
57311752d88SAlan Cox 	pa = VM_PAGE_TO_PHYS(m);
57411752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
57511752d88SAlan Cox 	while (order < VM_NFREEORDER - 1) {
57611752d88SAlan Cox 		pa_buddy = pa ^ (1 << (PAGE_SHIFT + order));
57711752d88SAlan Cox 		if (pa_buddy < seg->start ||
57811752d88SAlan Cox 		    pa_buddy >= seg->end)
57911752d88SAlan Cox 			break;
58011752d88SAlan Cox 		m_buddy = &seg->first_page[atop(pa_buddy - seg->start)];
58111752d88SAlan Cox 		if (m_buddy->order != order)
58211752d88SAlan Cox 			break;
58311752d88SAlan Cox 		fl = (*seg->free_queues)[m_buddy->pool];
58411752d88SAlan Cox 		TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq);
58511752d88SAlan Cox 		fl[m_buddy->order].lcnt--;
58611752d88SAlan Cox 		m_buddy->order = VM_NFREEORDER;
58711752d88SAlan Cox 		if (m_buddy->pool != m->pool)
58811752d88SAlan Cox 			vm_phys_set_pool(m->pool, m_buddy, order);
58911752d88SAlan Cox 		order++;
59011752d88SAlan Cox 		pa &= ~((1 << (PAGE_SHIFT + order)) - 1);
59111752d88SAlan Cox 		m = &seg->first_page[atop(pa - seg->start)];
59211752d88SAlan Cox 	}
59311752d88SAlan Cox 	m->order = order;
59411752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
59511752d88SAlan Cox 	TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
59611752d88SAlan Cox 	fl[order].lcnt++;
59711752d88SAlan Cox }
59811752d88SAlan Cox 
59911752d88SAlan Cox /*
60011752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
60111752d88SAlan Cox  */
6027bfda801SAlan Cox void
60311752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
60411752d88SAlan Cox {
60511752d88SAlan Cox 	vm_page_t m_tmp;
60611752d88SAlan Cox 
60711752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
60811752d88SAlan Cox 		m_tmp->pool = pool;
60911752d88SAlan Cox }
61011752d88SAlan Cox 
61111752d88SAlan Cox /*
6129742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
6139742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
6149742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
6157bfda801SAlan Cox  *
6167bfda801SAlan Cox  * The free page queues must be locked.
6177bfda801SAlan Cox  */
618e35395ceSAlan Cox boolean_t
6197bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
6207bfda801SAlan Cox {
6217bfda801SAlan Cox 	struct vm_freelist *fl;
6227bfda801SAlan Cox 	struct vm_phys_seg *seg;
6237bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
6247bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
6257bfda801SAlan Cox 	int order;
6267bfda801SAlan Cox 
6277bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
6287bfda801SAlan Cox 
6297bfda801SAlan Cox 	/*
6307bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
6317bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
6327bfda801SAlan Cox 	 * assign it to "m_set".
6337bfda801SAlan Cox 	 */
6347bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
6357bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
636bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
6377bfda801SAlan Cox 		order++;
6387bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
6392fbced65SAlan Cox 		if (pa >= seg->start)
6407bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
641e35395ceSAlan Cox 		else
642e35395ceSAlan Cox 			return (FALSE);
6437bfda801SAlan Cox 	}
644e35395ceSAlan Cox 	if (m_set->order < order)
645e35395ceSAlan Cox 		return (FALSE);
646e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
647e35395ceSAlan Cox 		return (FALSE);
6487bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
6497bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
6507bfda801SAlan Cox 	    m_set, m_set->order));
6517bfda801SAlan Cox 
6527bfda801SAlan Cox 	/*
6537bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
6547bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
6557bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
6567bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
6577bfda801SAlan Cox 	 */
6587bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
6597bfda801SAlan Cox 	order = m_set->order;
6607bfda801SAlan Cox 	TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
6617bfda801SAlan Cox 	fl[order].lcnt--;
6627bfda801SAlan Cox 	m_set->order = VM_NFREEORDER;
6637bfda801SAlan Cox 	while (order > 0) {
6647bfda801SAlan Cox 		order--;
6657bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
6667bfda801SAlan Cox 		if (m->phys_addr < pa_half)
6677bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
6687bfda801SAlan Cox 		else {
6697bfda801SAlan Cox 			m_tmp = m_set;
6707bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
6717bfda801SAlan Cox 		}
6727bfda801SAlan Cox 		m_tmp->order = order;
6737bfda801SAlan Cox 		TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
6747bfda801SAlan Cox 		fl[order].lcnt++;
6757bfda801SAlan Cox 	}
6767bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
677e35395ceSAlan Cox 	return (TRUE);
6787bfda801SAlan Cox }
6797bfda801SAlan Cox 
6807bfda801SAlan Cox /*
6817bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
68211752d88SAlan Cox  */
68311752d88SAlan Cox boolean_t
68411752d88SAlan Cox vm_phys_zero_pages_idle(void)
68511752d88SAlan Cox {
6867bfda801SAlan Cox 	static struct vm_freelist *fl = vm_phys_free_queues[0][0];
6877bfda801SAlan Cox 	static int flind, oind, pind;
68811752d88SAlan Cox 	vm_page_t m, m_tmp;
68911752d88SAlan Cox 
69011752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
6917bfda801SAlan Cox 	for (;;) {
6927bfda801SAlan Cox 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
6937bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
6947bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
6957bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
6967bfda801SAlan Cox 					cnt.v_free_count--;
69711752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
69811752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
69911752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
70011752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
7017bfda801SAlan Cox 					cnt.v_free_count++;
7027bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
7037bfda801SAlan Cox 					vm_page_zero_count++;
7047bfda801SAlan Cox 					cnt_prezero++;
70511752d88SAlan Cox 					return (TRUE);
70611752d88SAlan Cox 				}
70711752d88SAlan Cox 			}
70811752d88SAlan Cox 		}
7097bfda801SAlan Cox 		oind++;
7107bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
7117bfda801SAlan Cox 			oind = 0;
7127bfda801SAlan Cox 			pind++;
7137bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
7147bfda801SAlan Cox 				pind = 0;
7157bfda801SAlan Cox 				flind++;
7167bfda801SAlan Cox 				if (flind == vm_nfreelists)
7177bfda801SAlan Cox 					flind = 0;
7187bfda801SAlan Cox 			}
7197bfda801SAlan Cox 			fl = vm_phys_free_queues[flind][pind];
7207bfda801SAlan Cox 		}
7217bfda801SAlan Cox 	}
72211752d88SAlan Cox }
72311752d88SAlan Cox 
72411752d88SAlan Cox /*
7252f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
7262f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
7272f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
7282f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
7292f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
7302f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
7312f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
73211752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
73311752d88SAlan Cox  */
73411752d88SAlan Cox vm_page_t
73511752d88SAlan Cox vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
7363153e878SAlan Cox     unsigned long alignment, unsigned long boundary)
73711752d88SAlan Cox {
73811752d88SAlan Cox 	struct vm_freelist *fl;
73911752d88SAlan Cox 	struct vm_phys_seg *seg;
74049ca10d4SJayachandran C. 	struct vnode *vp;
74111752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
742ef327c3eSAlan Cox 	vm_page_t deferred_vdrop_list, m, m_ret;
743*a3870a18SJohn Baldwin 	int domain, flind, i, oind, order, pind;
74411752d88SAlan Cox 
745*a3870a18SJohn Baldwin #if VM_NDOMAIN > 1
746*a3870a18SJohn Baldwin 	domain = PCPU_GET(domain);
747*a3870a18SJohn Baldwin #else
748*a3870a18SJohn Baldwin 	domain = 0;
749*a3870a18SJohn Baldwin #endif
75011752d88SAlan Cox 	size = npages << PAGE_SHIFT;
75111752d88SAlan Cox 	KASSERT(size != 0,
75211752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
75311752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
75411752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
75511752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
75611752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
757ef327c3eSAlan Cox 	deferred_vdrop_list = NULL;
75811752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
75911752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
76011752d88SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
76144aab2c3SAlan Cox #if VM_NRESERVLEVEL > 0
76244aab2c3SAlan Cox retry:
76344aab2c3SAlan Cox #endif
76411752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
76511752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
76611752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
767*a3870a18SJohn Baldwin 				fl = (*vm_phys_lookup_lists[domain][flind])
768*a3870a18SJohn Baldwin 				    [pind];
76911752d88SAlan Cox 				TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
77011752d88SAlan Cox 					/*
77111752d88SAlan Cox 					 * A free list may contain physical pages
77211752d88SAlan Cox 					 * from one or more segments.
77311752d88SAlan Cox 					 */
77411752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
77511752d88SAlan Cox 					if (seg->start > high ||
77611752d88SAlan Cox 					    low >= seg->end)
77711752d88SAlan Cox 						continue;
77811752d88SAlan Cox 
77911752d88SAlan Cox 					/*
78011752d88SAlan Cox 					 * Is the size of this allocation request
78111752d88SAlan Cox 					 * larger than the largest block size?
78211752d88SAlan Cox 					 */
78311752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
78411752d88SAlan Cox 						/*
78511752d88SAlan Cox 						 * Determine if a sufficient number
78611752d88SAlan Cox 						 * of subsequent blocks to satisfy
78711752d88SAlan Cox 						 * the allocation request are free.
78811752d88SAlan Cox 						 */
78911752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
79011752d88SAlan Cox 						pa_last = pa + size;
79111752d88SAlan Cox 						for (;;) {
79211752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
79311752d88SAlan Cox 							if (pa >= pa_last)
79411752d88SAlan Cox 								break;
79511752d88SAlan Cox 							if (pa < seg->start ||
79611752d88SAlan Cox 							    pa >= seg->end)
79711752d88SAlan Cox 								break;
79811752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
79911752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
80011752d88SAlan Cox 								break;
80111752d88SAlan Cox 						}
80211752d88SAlan Cox 						/* If not, continue to the next block. */
80311752d88SAlan Cox 						if (pa < pa_last)
80411752d88SAlan Cox 							continue;
80511752d88SAlan Cox 					}
80611752d88SAlan Cox 
80711752d88SAlan Cox 					/*
80811752d88SAlan Cox 					 * Determine if the blocks are within the given range,
80911752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
81011752d88SAlan Cox 					 * given boundary.
81111752d88SAlan Cox 					 */
81211752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
81311752d88SAlan Cox 					if (pa >= low &&
81411752d88SAlan Cox 					    pa + size <= high &&
81511752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
81611752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
81711752d88SAlan Cox 						goto done;
81811752d88SAlan Cox 				}
81911752d88SAlan Cox 			}
82011752d88SAlan Cox 		}
82111752d88SAlan Cox 	}
82244aab2c3SAlan Cox #if VM_NRESERVLEVEL > 0
82344aab2c3SAlan Cox 	if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
82444aab2c3SAlan Cox 		goto retry;
82544aab2c3SAlan Cox #endif
82611752d88SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
82711752d88SAlan Cox 	return (NULL);
82811752d88SAlan Cox done:
82911752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
83011752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
83111752d88SAlan Cox 		TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
83211752d88SAlan Cox 		fl[m->order].lcnt--;
83311752d88SAlan Cox 		m->order = VM_NFREEORDER;
83411752d88SAlan Cox 	}
83511752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
83611752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
83711752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
83811752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
83911752d88SAlan Cox 	for (i = 0; i < npages; i++) {
84011752d88SAlan Cox 		m = &m_ret[i];
84149ca10d4SJayachandran C. 		vp = vm_page_alloc_init(m);
84249ca10d4SJayachandran C. 		if (vp != NULL) {
843ef327c3eSAlan Cox 			/*
844ef327c3eSAlan Cox 			 * Enqueue the vnode for deferred vdrop().
845ef327c3eSAlan Cox 			 *
846ef327c3eSAlan Cox 			 * Unmanaged pages don't use "pageq", so it
847ef327c3eSAlan Cox 			 * can be safely abused to construct a short-
848ef327c3eSAlan Cox 			 * lived queue of vnodes.
849ef327c3eSAlan Cox 			 */
85049ca10d4SJayachandran C. 			m->pageq.tqe_prev = (void *)vp;
851ef327c3eSAlan Cox 			m->pageq.tqe_next = deferred_vdrop_list;
852ef327c3eSAlan Cox 			deferred_vdrop_list = m;
853ef327c3eSAlan Cox 		}
85411752d88SAlan Cox 	}
85511752d88SAlan Cox 	for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
85611752d88SAlan Cox 		m = &m_ret[i];
85711752d88SAlan Cox 		KASSERT(m->order == VM_NFREEORDER,
85811752d88SAlan Cox 		    ("vm_phys_alloc_contig: page %p has unexpected order %d",
85911752d88SAlan Cox 		    m, m->order));
8608941dc44SAlan Cox 		vm_phys_free_pages(m, 0);
86111752d88SAlan Cox 	}
86211752d88SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
863ef327c3eSAlan Cox 	while (deferred_vdrop_list != NULL) {
864ef327c3eSAlan Cox 		vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
865ef327c3eSAlan Cox 		deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
866ef327c3eSAlan Cox 	}
86711752d88SAlan Cox 	return (m_ret);
86811752d88SAlan Cox }
86911752d88SAlan Cox 
87011752d88SAlan Cox #ifdef DDB
87111752d88SAlan Cox /*
87211752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
87311752d88SAlan Cox  */
87411752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
87511752d88SAlan Cox {
87611752d88SAlan Cox 	struct vm_freelist *fl;
87711752d88SAlan Cox 	int flind, oind, pind;
87811752d88SAlan Cox 
87911752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
88011752d88SAlan Cox 		db_printf("FREE LIST %d:\n"
88111752d88SAlan Cox 		    "\n  ORDER (SIZE)  |  NUMBER"
88211752d88SAlan Cox 		    "\n              ", flind);
88311752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
88411752d88SAlan Cox 			db_printf("  |  POOL %d", pind);
88511752d88SAlan Cox 		db_printf("\n--            ");
88611752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++)
88711752d88SAlan Cox 			db_printf("-- --      ");
88811752d88SAlan Cox 		db_printf("--\n");
88911752d88SAlan Cox 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
89011752d88SAlan Cox 			db_printf("  %2.2d (%6.6dK)", oind,
89111752d88SAlan Cox 			    1 << (PAGE_SHIFT - 10 + oind));
89211752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
89311752d88SAlan Cox 				fl = vm_phys_free_queues[flind][pind];
89411752d88SAlan Cox 				db_printf("  |  %6.6d", fl[oind].lcnt);
89511752d88SAlan Cox 			}
89611752d88SAlan Cox 			db_printf("\n");
89711752d88SAlan Cox 		}
89811752d88SAlan Cox 		db_printf("\n");
89911752d88SAlan Cox 	}
90011752d88SAlan Cox }
90111752d88SAlan Cox #endif
902