xref: /freebsd/sys/vm/vm_phys.c (revision 449c2e92c99cffdc00ebe516479f36154bf2c54c)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
32fbd80bd0SAlan Cox /*
33fbd80bd0SAlan Cox  *	Physical memory system implementation
34fbd80bd0SAlan Cox  *
35fbd80bd0SAlan Cox  * Any external functions defined by this module are only to be used by the
36fbd80bd0SAlan Cox  * virtual memory system.
37fbd80bd0SAlan Cox  */
38fbd80bd0SAlan Cox 
3911752d88SAlan Cox #include <sys/cdefs.h>
4011752d88SAlan Cox __FBSDID("$FreeBSD$");
4111752d88SAlan Cox 
4211752d88SAlan Cox #include "opt_ddb.h"
43174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox 
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
4711752d88SAlan Cox #include <sys/lock.h>
4811752d88SAlan Cox #include <sys/kernel.h>
4911752d88SAlan Cox #include <sys/malloc.h>
5011752d88SAlan Cox #include <sys/mutex.h>
517e226537SAttilio Rao #if MAXMEMDOM > 1
527e226537SAttilio Rao #include <sys/proc.h>
537e226537SAttilio Rao #endif
5411752d88SAlan Cox #include <sys/queue.h>
5511752d88SAlan Cox #include <sys/sbuf.h>
5611752d88SAlan Cox #include <sys/sysctl.h>
5711752d88SAlan Cox #include <sys/vmmeter.h>
5811752d88SAlan Cox 
5911752d88SAlan Cox #include <ddb/ddb.h>
6011752d88SAlan Cox 
6111752d88SAlan Cox #include <vm/vm.h>
6211752d88SAlan Cox #include <vm/vm_param.h>
6311752d88SAlan Cox #include <vm/vm_kern.h>
6411752d88SAlan Cox #include <vm/vm_object.h>
6511752d88SAlan Cox #include <vm/vm_page.h>
6611752d88SAlan Cox #include <vm/vm_phys.h>
6711752d88SAlan Cox 
68*449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
69*449c2e92SKonstantin Belousov     "Too many physsegs.");
7011752d88SAlan Cox 
71a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
72a3870a18SJohn Baldwin 
737e226537SAttilio Rao int vm_ndomains = 1;
747e226537SAttilio Rao 
75*449c2e92SKonstantin Belousov struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
76*449c2e92SKonstantin Belousov int vm_phys_nsegs;
7711752d88SAlan Cox 
78b6de32bdSKonstantin Belousov #define VM_PHYS_FICTITIOUS_NSEGS	8
79b6de32bdSKonstantin Belousov static struct vm_phys_fictitious_seg {
80b6de32bdSKonstantin Belousov 	vm_paddr_t	start;
81b6de32bdSKonstantin Belousov 	vm_paddr_t	end;
82b6de32bdSKonstantin Belousov 	vm_page_t	first_page;
83b6de32bdSKonstantin Belousov } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
84b6de32bdSKonstantin Belousov static struct mtx vm_phys_fictitious_reg_mtx;
85c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
86b6de32bdSKonstantin Belousov 
8711752d88SAlan Cox static struct vm_freelist
887e226537SAttilio Rao     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
8911752d88SAlan Cox 
9011752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
9111752d88SAlan Cox 
9211752d88SAlan Cox static int cnt_prezero;
9311752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
9411752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
9511752d88SAlan Cox 
9611752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
9711752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
9811752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
9911752d88SAlan Cox 
10011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
10111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
10211752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
10311752d88SAlan Cox 
1047e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
1057e226537SAttilio Rao     &vm_ndomains, 0, "Number of physical memory domains available.");
106a3870a18SJohn Baldwin 
107f5c4b077SJohn Baldwin static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
108f5c4b077SJohn Baldwin     int order);
109a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
110a3870a18SJohn Baldwin     int domain);
11111752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
11211752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
11311752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
11411752d88SAlan Cox     int order);
11511752d88SAlan Cox 
1167e226537SAttilio Rao static __inline int
1177e226537SAttilio Rao vm_rr_selectdomain(void)
1187e226537SAttilio Rao {
1197e226537SAttilio Rao #if MAXMEMDOM > 1
1207e226537SAttilio Rao 	struct thread *td;
1217e226537SAttilio Rao 
1227e226537SAttilio Rao 	td = curthread;
1237e226537SAttilio Rao 
1247e226537SAttilio Rao 	td->td_dom_rr_idx++;
1257e226537SAttilio Rao 	td->td_dom_rr_idx %= vm_ndomains;
1267e226537SAttilio Rao 	return (td->td_dom_rr_idx);
1277e226537SAttilio Rao #else
1287e226537SAttilio Rao 	return (0);
1297e226537SAttilio Rao #endif
1307e226537SAttilio Rao }
1317e226537SAttilio Rao 
132*449c2e92SKonstantin Belousov boolean_t
133*449c2e92SKonstantin Belousov vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
134*449c2e92SKonstantin Belousov {
135*449c2e92SKonstantin Belousov 	struct vm_phys_seg *s;
136*449c2e92SKonstantin Belousov 	int idx;
137*449c2e92SKonstantin Belousov 
138*449c2e92SKonstantin Belousov 	while ((idx = ffsl(mask)) != 0) {
139*449c2e92SKonstantin Belousov 		idx--;	/* ffsl counts from 1 */
140*449c2e92SKonstantin Belousov 		mask &= ~(1UL << idx);
141*449c2e92SKonstantin Belousov 		s = &vm_phys_segs[idx];
142*449c2e92SKonstantin Belousov 		if (low < s->end && high > s->start)
143*449c2e92SKonstantin Belousov 			return (TRUE);
144*449c2e92SKonstantin Belousov 	}
145*449c2e92SKonstantin Belousov 	return (FALSE);
146*449c2e92SKonstantin Belousov }
147*449c2e92SKonstantin Belousov 
14811752d88SAlan Cox /*
14911752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
15011752d88SAlan Cox  * the amount of physical memory in each free list.
15111752d88SAlan Cox  */
15211752d88SAlan Cox static int
15311752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
15411752d88SAlan Cox {
15511752d88SAlan Cox 	struct sbuf sbuf;
15611752d88SAlan Cox 	struct vm_freelist *fl;
1577e226537SAttilio Rao 	int dom, error, flind, oind, pind;
15811752d88SAlan Cox 
15900f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
16000f0e671SMatthew D Fleming 	if (error != 0)
16100f0e671SMatthew D Fleming 		return (error);
1627e226537SAttilio Rao 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
1637e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
1647e226537SAttilio Rao 		sbuf_printf(&sbuf,"DOMAIN: %d\n", dom);
16511752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
1667e226537SAttilio Rao 			sbuf_printf(&sbuf, "FREE LIST %d:\n"
16711752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
16811752d88SAlan Cox 			    "\n              ", flind);
16911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
17011752d88SAlan Cox 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
17111752d88SAlan Cox 			sbuf_printf(&sbuf, "\n--            ");
17211752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
17311752d88SAlan Cox 				sbuf_printf(&sbuf, "-- --      ");
17411752d88SAlan Cox 			sbuf_printf(&sbuf, "--\n");
17511752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
176d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
17711752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
17811752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1797e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
1807e226537SAttilio Rao 					sbuf_printf(&sbuf, "  |  %6.6d",
1817e226537SAttilio Rao 					    fl[oind].lcnt);
18211752d88SAlan Cox 				}
18311752d88SAlan Cox 				sbuf_printf(&sbuf, "\n");
18411752d88SAlan Cox 			}
1857e226537SAttilio Rao 			sbuf_printf(&sbuf, "\n");
1867e226537SAttilio Rao 		}
1877e226537SAttilio Rao 		sbuf_printf(&sbuf, "\n");
18811752d88SAlan Cox 	}
1894e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
19011752d88SAlan Cox 	sbuf_delete(&sbuf);
19111752d88SAlan Cox 	return (error);
19211752d88SAlan Cox }
19311752d88SAlan Cox 
19411752d88SAlan Cox /*
19511752d88SAlan Cox  * Outputs the set of physical memory segments.
19611752d88SAlan Cox  */
19711752d88SAlan Cox static int
19811752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
19911752d88SAlan Cox {
20011752d88SAlan Cox 	struct sbuf sbuf;
20111752d88SAlan Cox 	struct vm_phys_seg *seg;
20211752d88SAlan Cox 	int error, segind;
20311752d88SAlan Cox 
20400f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
20500f0e671SMatthew D Fleming 	if (error != 0)
20600f0e671SMatthew D Fleming 		return (error);
2074e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
20811752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
20911752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
21011752d88SAlan Cox 		seg = &vm_phys_segs[segind];
21111752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
21211752d88SAlan Cox 		    (uintmax_t)seg->start);
21311752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
21411752d88SAlan Cox 		    (uintmax_t)seg->end);
215a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
21611752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
21711752d88SAlan Cox 	}
2184e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
21911752d88SAlan Cox 	sbuf_delete(&sbuf);
22011752d88SAlan Cox 	return (error);
22111752d88SAlan Cox }
22211752d88SAlan Cox 
2237e226537SAttilio Rao static void
2247e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
225a3870a18SJohn Baldwin {
226a3870a18SJohn Baldwin 
2277e226537SAttilio Rao 	m->order = order;
2287e226537SAttilio Rao 	if (tail)
2297e226537SAttilio Rao 		TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
2307e226537SAttilio Rao 	else
2317e226537SAttilio Rao 		TAILQ_INSERT_HEAD(&fl[order].pl, m, pageq);
2327e226537SAttilio Rao 	fl[order].lcnt++;
233a3870a18SJohn Baldwin }
2347e226537SAttilio Rao 
2357e226537SAttilio Rao static void
2367e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
2377e226537SAttilio Rao {
2387e226537SAttilio Rao 
2397e226537SAttilio Rao 	TAILQ_REMOVE(&fl[order].pl, m, pageq);
2407e226537SAttilio Rao 	fl[order].lcnt--;
2417e226537SAttilio Rao 	m->order = VM_NFREEORDER;
242a3870a18SJohn Baldwin }
243a3870a18SJohn Baldwin 
24411752d88SAlan Cox /*
24511752d88SAlan Cox  * Create a physical memory segment.
24611752d88SAlan Cox  */
24711752d88SAlan Cox static void
248a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
24911752d88SAlan Cox {
25011752d88SAlan Cox 	struct vm_phys_seg *seg;
25111752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
252d6e9b97bSJohn Baldwin 	long pages;
25311752d88SAlan Cox 	int segind;
25411752d88SAlan Cox 
25511752d88SAlan Cox 	pages = 0;
25611752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
25711752d88SAlan Cox 		seg = &vm_phys_segs[segind];
25811752d88SAlan Cox 		pages += atop(seg->end - seg->start);
25911752d88SAlan Cox 	}
26011752d88SAlan Cox #endif
26111752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
26211752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
2637e226537SAttilio Rao 	KASSERT(domain < vm_ndomains,
2647e226537SAttilio Rao 	    ("vm_phys_create_seg: invalid domain provided"));
26511752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
26611752d88SAlan Cox 	seg->start = start;
26711752d88SAlan Cox 	seg->end = end;
268a3870a18SJohn Baldwin 	seg->domain = domain;
26911752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
27011752d88SAlan Cox 	seg->first_page = &vm_page_array[pages];
27111752d88SAlan Cox #else
27211752d88SAlan Cox 	seg->first_page = PHYS_TO_VM_PAGE(start);
27311752d88SAlan Cox #endif
2747e226537SAttilio Rao 	seg->free_queues = &vm_phys_free_queues[domain][flind];
27511752d88SAlan Cox }
27611752d88SAlan Cox 
277a3870a18SJohn Baldwin static void
278a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
279a3870a18SJohn Baldwin {
280a3870a18SJohn Baldwin 	int i;
281a3870a18SJohn Baldwin 
282a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
283a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
284a3870a18SJohn Baldwin 		return;
285a3870a18SJohn Baldwin 	}
286a3870a18SJohn Baldwin 
287a3870a18SJohn Baldwin 	for (i = 0;; i++) {
288a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
289a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
290a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
291a3870a18SJohn Baldwin 			continue;
292a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
293a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
294a3870a18SJohn Baldwin 			    (uintmax_t)start);
295a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
296a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
297a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
298a3870a18SJohn Baldwin 			break;
299a3870a18SJohn Baldwin 		}
300a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
301a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
302a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
303a3870a18SJohn Baldwin 	}
304a3870a18SJohn Baldwin }
305a3870a18SJohn Baldwin 
30611752d88SAlan Cox /*
30711752d88SAlan Cox  * Initialize the physical memory allocator.
30811752d88SAlan Cox  */
30911752d88SAlan Cox void
31011752d88SAlan Cox vm_phys_init(void)
31111752d88SAlan Cox {
31211752d88SAlan Cox 	struct vm_freelist *fl;
3137e226537SAttilio Rao 	int dom, flind, i, oind, pind;
31411752d88SAlan Cox 
31511752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
31611752d88SAlan Cox #ifdef	VM_FREELIST_ISADMA
31711752d88SAlan Cox 		if (phys_avail[i] < 16777216) {
31811752d88SAlan Cox 			if (phys_avail[i + 1] > 16777216) {
31911752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i], 16777216,
32011752d88SAlan Cox 				    VM_FREELIST_ISADMA);
32111752d88SAlan Cox 				vm_phys_create_seg(16777216, phys_avail[i + 1],
32211752d88SAlan Cox 				    VM_FREELIST_DEFAULT);
32311752d88SAlan Cox 			} else {
32411752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
32511752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_ISADMA);
32611752d88SAlan Cox 			}
32711752d88SAlan Cox 			if (VM_FREELIST_ISADMA >= vm_nfreelists)
32811752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_ISADMA + 1;
32911752d88SAlan Cox 		} else
33011752d88SAlan Cox #endif
33111752d88SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
33211752d88SAlan Cox 		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
33311752d88SAlan Cox 			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
33411752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
33511752d88SAlan Cox 				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
33611752d88SAlan Cox 				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
33711752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
33811752d88SAlan Cox 			} else {
33911752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
34011752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
34111752d88SAlan Cox 			}
34211752d88SAlan Cox 			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
34311752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
34411752d88SAlan Cox 		} else
34511752d88SAlan Cox #endif
34611752d88SAlan Cox 		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
34711752d88SAlan Cox 		    VM_FREELIST_DEFAULT);
34811752d88SAlan Cox 	}
3497e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
35011752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
35111752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
3527e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
35311752d88SAlan Cox 				for (oind = 0; oind < VM_NFREEORDER; oind++)
35411752d88SAlan Cox 					TAILQ_INIT(&fl[oind].pl);
35511752d88SAlan Cox 			}
35611752d88SAlan Cox 		}
357a3870a18SJohn Baldwin 	}
358b6de32bdSKonstantin Belousov 	mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
35911752d88SAlan Cox }
36011752d88SAlan Cox 
36111752d88SAlan Cox /*
36211752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
36311752d88SAlan Cox  */
36411752d88SAlan Cox static __inline void
36511752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
36611752d88SAlan Cox {
36711752d88SAlan Cox 	vm_page_t m_buddy;
36811752d88SAlan Cox 
36911752d88SAlan Cox 	while (oind > order) {
37011752d88SAlan Cox 		oind--;
37111752d88SAlan Cox 		m_buddy = &m[1 << oind];
37211752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
37311752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
37411752d88SAlan Cox 		    m_buddy, m_buddy->order));
3757e226537SAttilio Rao 		vm_freelist_add(fl, m_buddy, oind, 0);
37611752d88SAlan Cox         }
37711752d88SAlan Cox }
37811752d88SAlan Cox 
37911752d88SAlan Cox /*
38011752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
38111752d88SAlan Cox  */
38211752d88SAlan Cox void
38311752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
38411752d88SAlan Cox {
38511752d88SAlan Cox 	vm_page_t m;
386*449c2e92SKonstantin Belousov 	struct vm_domain *vmd;
38711752d88SAlan Cox 
38811752d88SAlan Cox 	cnt.v_page_count++;
38911752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
39011752d88SAlan Cox 	m->phys_addr = pa;
39144e46b9eSAlan Cox 	m->queue = PQ_NONE;
39211752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
393*449c2e92SKonstantin Belousov 	vmd = vm_phys_domain(m);
394*449c2e92SKonstantin Belousov 	vmd->vmd_page_count++;
395*449c2e92SKonstantin Belousov 	vmd->vmd_segs |= 1UL << m->segind;
39611752d88SAlan Cox 	m->flags = PG_FREE;
39711752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
39811752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
39911752d88SAlan Cox 	    m, m->order));
40011752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
40111752d88SAlan Cox 	pmap_page_init(m);
4028941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
403*449c2e92SKonstantin Belousov 	vm_phys_freecnt_adj(m, 1);
40411752d88SAlan Cox 	vm_phys_free_pages(m, 0);
4058941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
40611752d88SAlan Cox }
40711752d88SAlan Cox 
40811752d88SAlan Cox /*
40911752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
41011752d88SAlan Cox  * from the free lists.
4118941dc44SAlan Cox  *
4128941dc44SAlan Cox  * The free page queues must be locked.
41311752d88SAlan Cox  */
41411752d88SAlan Cox vm_page_t
41511752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
41611752d88SAlan Cox {
41749ca10d4SJayachandran C. 	vm_page_t m;
4187e226537SAttilio Rao 	int dom, domain, flind;
41949ca10d4SJayachandran C. 
420f5c4b077SJohn Baldwin 	KASSERT(pool < VM_NFREEPOOL,
421f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
422f5c4b077SJohn Baldwin 	KASSERT(order < VM_NFREEORDER,
423f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: order %d is out of range", order));
424f5c4b077SJohn Baldwin 
4257e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
4267e226537SAttilio Rao 		domain = vm_rr_selectdomain();
42749ca10d4SJayachandran C. 		for (flind = 0; flind < vm_nfreelists; flind++) {
4287e226537SAttilio Rao 			m = vm_phys_alloc_domain_pages(domain, flind, pool,
4297e226537SAttilio Rao 			    order);
43049ca10d4SJayachandran C. 			if (m != NULL)
43149ca10d4SJayachandran C. 				return (m);
43249ca10d4SJayachandran C. 		}
4337e226537SAttilio Rao 	}
43449ca10d4SJayachandran C. 	return (NULL);
43549ca10d4SJayachandran C. }
43649ca10d4SJayachandran C. 
43749ca10d4SJayachandran C. /*
43849ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
43949ca10d4SJayachandran C.  * specified pool and order
44049ca10d4SJayachandran C.  */
44149ca10d4SJayachandran C. vm_page_t
44249ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
44349ca10d4SJayachandran C. {
44411752d88SAlan Cox 	vm_page_t m;
4457e226537SAttilio Rao 	int dom, domain;
44611752d88SAlan Cox 
44749ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
44849ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
44911752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
45049ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
45111752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
45249ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
453a3870a18SJohn Baldwin 
4547e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
4557e226537SAttilio Rao 		domain = vm_rr_selectdomain();
4567e226537SAttilio Rao 		m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
457f5c4b077SJohn Baldwin 		if (m != NULL)
458f5c4b077SJohn Baldwin 			return (m);
4597e226537SAttilio Rao 	}
4607e226537SAttilio Rao 	return (NULL);
461f5c4b077SJohn Baldwin }
462f5c4b077SJohn Baldwin 
463f5c4b077SJohn Baldwin static vm_page_t
464f5c4b077SJohn Baldwin vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
465f5c4b077SJohn Baldwin {
466f5c4b077SJohn Baldwin 	struct vm_freelist *fl;
467f5c4b077SJohn Baldwin 	struct vm_freelist *alt;
468f5c4b077SJohn Baldwin 	int oind, pind;
469f5c4b077SJohn Baldwin 	vm_page_t m;
470f5c4b077SJohn Baldwin 
47111752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
4727e226537SAttilio Rao 	fl = &vm_phys_free_queues[domain][flind][pool][0];
47311752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
47411752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
47511752d88SAlan Cox 		if (m != NULL) {
4767e226537SAttilio Rao 			vm_freelist_rem(fl, m, oind);
47711752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
47811752d88SAlan Cox 			return (m);
47911752d88SAlan Cox 		}
48011752d88SAlan Cox 	}
48111752d88SAlan Cox 
48211752d88SAlan Cox 	/*
48311752d88SAlan Cox 	 * The given pool was empty.  Find the largest
48411752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
48511752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
48611752d88SAlan Cox 	 * use them to satisfy the allocation.
48711752d88SAlan Cox 	 */
48811752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
48911752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
4907e226537SAttilio Rao 			alt = &vm_phys_free_queues[domain][flind][pind][0];
49111752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
49211752d88SAlan Cox 			if (m != NULL) {
4937e226537SAttilio Rao 				vm_freelist_rem(alt, m, oind);
49411752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
49511752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
49611752d88SAlan Cox 				return (m);
49711752d88SAlan Cox 			}
49811752d88SAlan Cox 		}
49911752d88SAlan Cox 	}
50011752d88SAlan Cox 	return (NULL);
50111752d88SAlan Cox }
50211752d88SAlan Cox 
50311752d88SAlan Cox /*
50411752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
50511752d88SAlan Cox  */
50611752d88SAlan Cox vm_page_t
50711752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
50811752d88SAlan Cox {
50911752d88SAlan Cox 	struct vm_phys_seg *seg;
51011752d88SAlan Cox 	int segind;
51111752d88SAlan Cox 
51211752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
51311752d88SAlan Cox 		seg = &vm_phys_segs[segind];
51411752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
51511752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
51611752d88SAlan Cox 	}
517f06a3a36SAndrew Thompson 	return (NULL);
51811752d88SAlan Cox }
51911752d88SAlan Cox 
520b6de32bdSKonstantin Belousov vm_page_t
521b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
522b6de32bdSKonstantin Belousov {
523b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
524b6de32bdSKonstantin Belousov 	vm_page_t m;
525b6de32bdSKonstantin Belousov 	int segind;
526b6de32bdSKonstantin Belousov 
527b6de32bdSKonstantin Belousov 	m = NULL;
528b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
529b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
530b6de32bdSKonstantin Belousov 		if (pa >= seg->start && pa < seg->end) {
531b6de32bdSKonstantin Belousov 			m = &seg->first_page[atop(pa - seg->start)];
532b6de32bdSKonstantin Belousov 			KASSERT((m->flags & PG_FICTITIOUS) != 0,
533b6de32bdSKonstantin Belousov 			    ("%p not fictitious", m));
534b6de32bdSKonstantin Belousov 			break;
535b6de32bdSKonstantin Belousov 		}
536b6de32bdSKonstantin Belousov 	}
537b6de32bdSKonstantin Belousov 	return (m);
538b6de32bdSKonstantin Belousov }
539b6de32bdSKonstantin Belousov 
540b6de32bdSKonstantin Belousov int
541b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
542b6de32bdSKonstantin Belousov     vm_memattr_t memattr)
543b6de32bdSKonstantin Belousov {
544b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
545b6de32bdSKonstantin Belousov 	vm_page_t fp;
546b6de32bdSKonstantin Belousov 	long i, page_count;
547b6de32bdSKonstantin Belousov 	int segind;
548b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
549b6de32bdSKonstantin Belousov 	long pi;
550b6de32bdSKonstantin Belousov 	boolean_t malloced;
551b6de32bdSKonstantin Belousov #endif
552b6de32bdSKonstantin Belousov 
553b6de32bdSKonstantin Belousov 	page_count = (end - start) / PAGE_SIZE;
554b6de32bdSKonstantin Belousov 
555b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
556b6de32bdSKonstantin Belousov 	pi = atop(start);
557b6de32bdSKonstantin Belousov 	if (pi >= first_page && atop(end) < vm_page_array_size) {
558b6de32bdSKonstantin Belousov 		fp = &vm_page_array[pi - first_page];
559b6de32bdSKonstantin Belousov 		malloced = FALSE;
560b6de32bdSKonstantin Belousov 	} else
561b6de32bdSKonstantin Belousov #endif
562b6de32bdSKonstantin Belousov 	{
563b6de32bdSKonstantin Belousov 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
564b6de32bdSKonstantin Belousov 		    M_WAITOK | M_ZERO);
565b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
566b6de32bdSKonstantin Belousov 		malloced = TRUE;
567b6de32bdSKonstantin Belousov #endif
568b6de32bdSKonstantin Belousov 	}
569b6de32bdSKonstantin Belousov 	for (i = 0; i < page_count; i++) {
570b6de32bdSKonstantin Belousov 		vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
571b6de32bdSKonstantin Belousov 		fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
572b6de32bdSKonstantin Belousov 	}
573b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
574b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
575b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
576b6de32bdSKonstantin Belousov 		if (seg->start == 0 && seg->end == 0) {
577b6de32bdSKonstantin Belousov 			seg->start = start;
578b6de32bdSKonstantin Belousov 			seg->end = end;
579b6de32bdSKonstantin Belousov 			seg->first_page = fp;
580b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
581b6de32bdSKonstantin Belousov 			return (0);
582b6de32bdSKonstantin Belousov 		}
583b6de32bdSKonstantin Belousov 	}
584b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
585b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
586b6de32bdSKonstantin Belousov 	if (malloced)
587b6de32bdSKonstantin Belousov #endif
588b6de32bdSKonstantin Belousov 		free(fp, M_FICT_PAGES);
589b6de32bdSKonstantin Belousov 	return (EBUSY);
590b6de32bdSKonstantin Belousov }
591b6de32bdSKonstantin Belousov 
592b6de32bdSKonstantin Belousov void
593b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
594b6de32bdSKonstantin Belousov {
595b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
596b6de32bdSKonstantin Belousov 	vm_page_t fp;
597b6de32bdSKonstantin Belousov 	int segind;
598b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
599b6de32bdSKonstantin Belousov 	long pi;
600b6de32bdSKonstantin Belousov #endif
601b6de32bdSKonstantin Belousov 
602b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
603b6de32bdSKonstantin Belousov 	pi = atop(start);
604b6de32bdSKonstantin Belousov #endif
605b6de32bdSKonstantin Belousov 
606b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
607b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
608b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
609b6de32bdSKonstantin Belousov 		if (seg->start == start && seg->end == end) {
610b6de32bdSKonstantin Belousov 			seg->start = seg->end = 0;
611b6de32bdSKonstantin Belousov 			fp = seg->first_page;
612b6de32bdSKonstantin Belousov 			seg->first_page = NULL;
613b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
614b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
615b6de32bdSKonstantin Belousov 			if (pi < first_page || atop(end) >= vm_page_array_size)
616b6de32bdSKonstantin Belousov #endif
617b6de32bdSKonstantin Belousov 				free(fp, M_FICT_PAGES);
618b6de32bdSKonstantin Belousov 			return;
619b6de32bdSKonstantin Belousov 		}
620b6de32bdSKonstantin Belousov 	}
621b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
622b6de32bdSKonstantin Belousov 	KASSERT(0, ("Unregistering not registered fictitious range"));
623b6de32bdSKonstantin Belousov }
624b6de32bdSKonstantin Belousov 
62511752d88SAlan Cox /*
62611752d88SAlan Cox  * Find the segment containing the given physical address.
62711752d88SAlan Cox  */
62811752d88SAlan Cox static int
62911752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
63011752d88SAlan Cox {
63111752d88SAlan Cox 	struct vm_phys_seg *seg;
63211752d88SAlan Cox 	int segind;
63311752d88SAlan Cox 
63411752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
63511752d88SAlan Cox 		seg = &vm_phys_segs[segind];
63611752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
63711752d88SAlan Cox 			return (segind);
63811752d88SAlan Cox 	}
63911752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
64011752d88SAlan Cox 	    (uintmax_t)pa);
64111752d88SAlan Cox }
64211752d88SAlan Cox 
64311752d88SAlan Cox /*
64411752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
6458941dc44SAlan Cox  *
6468941dc44SAlan Cox  * The free page queues must be locked.
64711752d88SAlan Cox  */
64811752d88SAlan Cox void
64911752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
65011752d88SAlan Cox {
65111752d88SAlan Cox 	struct vm_freelist *fl;
65211752d88SAlan Cox 	struct vm_phys_seg *seg;
6535c1f2cc4SAlan Cox 	vm_paddr_t pa;
65411752d88SAlan Cox 	vm_page_t m_buddy;
65511752d88SAlan Cox 
65611752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
6578941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
65811752d88SAlan Cox 	    m, m->order));
65911752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
6608941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
66111752d88SAlan Cox 	    m, m->pool));
66211752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
6638941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
66411752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
66511752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
6665c1f2cc4SAlan Cox 	if (order < VM_NFREEORDER - 1) {
6675c1f2cc4SAlan Cox 		pa = VM_PAGE_TO_PHYS(m);
6685c1f2cc4SAlan Cox 		do {
6695c1f2cc4SAlan Cox 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
6705c1f2cc4SAlan Cox 			if (pa < seg->start || pa >= seg->end)
67111752d88SAlan Cox 				break;
6725c1f2cc4SAlan Cox 			m_buddy = &seg->first_page[atop(pa - seg->start)];
67311752d88SAlan Cox 			if (m_buddy->order != order)
67411752d88SAlan Cox 				break;
67511752d88SAlan Cox 			fl = (*seg->free_queues)[m_buddy->pool];
6767e226537SAttilio Rao 			vm_freelist_rem(fl, m_buddy, order);
67711752d88SAlan Cox 			if (m_buddy->pool != m->pool)
67811752d88SAlan Cox 				vm_phys_set_pool(m->pool, m_buddy, order);
67911752d88SAlan Cox 			order++;
6805c1f2cc4SAlan Cox 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
68111752d88SAlan Cox 			m = &seg->first_page[atop(pa - seg->start)];
6825c1f2cc4SAlan Cox 		} while (order < VM_NFREEORDER - 1);
68311752d88SAlan Cox 	}
68411752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
6857e226537SAttilio Rao 	vm_freelist_add(fl, m, order, 1);
68611752d88SAlan Cox }
68711752d88SAlan Cox 
68811752d88SAlan Cox /*
6895c1f2cc4SAlan Cox  * Free a contiguous, arbitrarily sized set of physical pages.
6905c1f2cc4SAlan Cox  *
6915c1f2cc4SAlan Cox  * The free page queues must be locked.
6925c1f2cc4SAlan Cox  */
6935c1f2cc4SAlan Cox void
6945c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages)
6955c1f2cc4SAlan Cox {
6965c1f2cc4SAlan Cox 	u_int n;
6975c1f2cc4SAlan Cox 	int order;
6985c1f2cc4SAlan Cox 
6995c1f2cc4SAlan Cox 	/*
7005c1f2cc4SAlan Cox 	 * Avoid unnecessary coalescing by freeing the pages in the largest
7015c1f2cc4SAlan Cox 	 * possible power-of-two-sized subsets.
7025c1f2cc4SAlan Cox 	 */
7035c1f2cc4SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7045c1f2cc4SAlan Cox 	for (;; npages -= n) {
7055c1f2cc4SAlan Cox 		/*
7065c1f2cc4SAlan Cox 		 * Unsigned "min" is used here so that "order" is assigned
7075c1f2cc4SAlan Cox 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
7085c1f2cc4SAlan Cox 		 * or the low-order bits of its physical address are zero
7095c1f2cc4SAlan Cox 		 * because the size of a physical address exceeds the size of
7105c1f2cc4SAlan Cox 		 * a long.
7115c1f2cc4SAlan Cox 		 */
7125c1f2cc4SAlan Cox 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
7135c1f2cc4SAlan Cox 		    VM_NFREEORDER - 1);
7145c1f2cc4SAlan Cox 		n = 1 << order;
7155c1f2cc4SAlan Cox 		if (npages < n)
7165c1f2cc4SAlan Cox 			break;
7175c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7185c1f2cc4SAlan Cox 		m += n;
7195c1f2cc4SAlan Cox 	}
7205c1f2cc4SAlan Cox 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
7215c1f2cc4SAlan Cox 	for (; npages > 0; npages -= n) {
7225c1f2cc4SAlan Cox 		order = flsl(npages) - 1;
7235c1f2cc4SAlan Cox 		n = 1 << order;
7245c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7255c1f2cc4SAlan Cox 		m += n;
7265c1f2cc4SAlan Cox 	}
7275c1f2cc4SAlan Cox }
7285c1f2cc4SAlan Cox 
7295c1f2cc4SAlan Cox /*
73011752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
73111752d88SAlan Cox  */
7327bfda801SAlan Cox void
73311752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
73411752d88SAlan Cox {
73511752d88SAlan Cox 	vm_page_t m_tmp;
73611752d88SAlan Cox 
73711752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
73811752d88SAlan Cox 		m_tmp->pool = pool;
73911752d88SAlan Cox }
74011752d88SAlan Cox 
74111752d88SAlan Cox /*
7429742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
7439742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
7449742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
7457bfda801SAlan Cox  *
7467bfda801SAlan Cox  * The free page queues must be locked.
7477bfda801SAlan Cox  */
748e35395ceSAlan Cox boolean_t
7497bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
7507bfda801SAlan Cox {
7517bfda801SAlan Cox 	struct vm_freelist *fl;
7527bfda801SAlan Cox 	struct vm_phys_seg *seg;
7537bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
7547bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
7557bfda801SAlan Cox 	int order;
7567bfda801SAlan Cox 
7577bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7587bfda801SAlan Cox 
7597bfda801SAlan Cox 	/*
7607bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
7617bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
7627bfda801SAlan Cox 	 * assign it to "m_set".
7637bfda801SAlan Cox 	 */
7647bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
7657bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
766bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
7677bfda801SAlan Cox 		order++;
7687bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
7692fbced65SAlan Cox 		if (pa >= seg->start)
7707bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
771e35395ceSAlan Cox 		else
772e35395ceSAlan Cox 			return (FALSE);
7737bfda801SAlan Cox 	}
774e35395ceSAlan Cox 	if (m_set->order < order)
775e35395ceSAlan Cox 		return (FALSE);
776e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
777e35395ceSAlan Cox 		return (FALSE);
7787bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
7797bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
7807bfda801SAlan Cox 	    m_set, m_set->order));
7817bfda801SAlan Cox 
7827bfda801SAlan Cox 	/*
7837bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
7847bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
7857bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
7867bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
7877bfda801SAlan Cox 	 */
7887bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
7897bfda801SAlan Cox 	order = m_set->order;
7907e226537SAttilio Rao 	vm_freelist_rem(fl, m_set, order);
7917bfda801SAlan Cox 	while (order > 0) {
7927bfda801SAlan Cox 		order--;
7937bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
7947bfda801SAlan Cox 		if (m->phys_addr < pa_half)
7957bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
7967bfda801SAlan Cox 		else {
7977bfda801SAlan Cox 			m_tmp = m_set;
7987bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
7997bfda801SAlan Cox 		}
8007e226537SAttilio Rao 		vm_freelist_add(fl, m_tmp, order, 0);
8017bfda801SAlan Cox 	}
8027bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
803e35395ceSAlan Cox 	return (TRUE);
8047bfda801SAlan Cox }
8057bfda801SAlan Cox 
8067bfda801SAlan Cox /*
8077bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
80811752d88SAlan Cox  */
80911752d88SAlan Cox boolean_t
81011752d88SAlan Cox vm_phys_zero_pages_idle(void)
81111752d88SAlan Cox {
8127e226537SAttilio Rao 	static struct vm_freelist *fl;
8137bfda801SAlan Cox 	static int flind, oind, pind;
81411752d88SAlan Cox 	vm_page_t m, m_tmp;
8157e226537SAttilio Rao 	int domain;
81611752d88SAlan Cox 
8177e226537SAttilio Rao 	domain = vm_rr_selectdomain();
8187e226537SAttilio Rao 	fl = vm_phys_free_queues[domain][0][0];
81911752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8207bfda801SAlan Cox 	for (;;) {
8217bfda801SAlan Cox 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
8227bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
8237bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
8247bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
825*449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, -1);
82611752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
82711752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
82811752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
82911752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
830*449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, 1);
8317bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
8327bfda801SAlan Cox 					vm_page_zero_count++;
8337bfda801SAlan Cox 					cnt_prezero++;
83411752d88SAlan Cox 					return (TRUE);
83511752d88SAlan Cox 				}
83611752d88SAlan Cox 			}
83711752d88SAlan Cox 		}
8387bfda801SAlan Cox 		oind++;
8397bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
8407bfda801SAlan Cox 			oind = 0;
8417bfda801SAlan Cox 			pind++;
8427bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
8437bfda801SAlan Cox 				pind = 0;
8447bfda801SAlan Cox 				flind++;
8457bfda801SAlan Cox 				if (flind == vm_nfreelists)
8467bfda801SAlan Cox 					flind = 0;
8477bfda801SAlan Cox 			}
8487e226537SAttilio Rao 			fl = vm_phys_free_queues[domain][flind][pind];
8497bfda801SAlan Cox 		}
8507bfda801SAlan Cox 	}
85111752d88SAlan Cox }
85211752d88SAlan Cox 
85311752d88SAlan Cox /*
8542f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
8552f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
8562f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
8572f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
8582f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
8592f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
8602f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
86111752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
86211752d88SAlan Cox  */
86311752d88SAlan Cox vm_page_t
8645c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
8655c1f2cc4SAlan Cox     u_long alignment, vm_paddr_t boundary)
86611752d88SAlan Cox {
86711752d88SAlan Cox 	struct vm_freelist *fl;
86811752d88SAlan Cox 	struct vm_phys_seg *seg;
86911752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
870fbd80bd0SAlan Cox 	vm_page_t m, m_ret;
8715c1f2cc4SAlan Cox 	u_long npages_end;
8727e226537SAttilio Rao 	int dom, domain, flind, oind, order, pind;
87311752d88SAlan Cox 
874fbd80bd0SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
87511752d88SAlan Cox 	size = npages << PAGE_SHIFT;
87611752d88SAlan Cox 	KASSERT(size != 0,
87711752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
87811752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
87911752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
88011752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
88111752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
88211752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
88311752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
8847e226537SAttilio Rao 	dom = 0;
8857e226537SAttilio Rao restartdom:
8867e226537SAttilio Rao 	domain = vm_rr_selectdomain();
88711752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
88811752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
88911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
8907e226537SAttilio Rao 				fl = &vm_phys_free_queues[domain][flind][pind][0];
89111752d88SAlan Cox 				TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
89211752d88SAlan Cox 					/*
89311752d88SAlan Cox 					 * A free list may contain physical pages
89411752d88SAlan Cox 					 * from one or more segments.
89511752d88SAlan Cox 					 */
89611752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
89711752d88SAlan Cox 					if (seg->start > high ||
89811752d88SAlan Cox 					    low >= seg->end)
89911752d88SAlan Cox 						continue;
90011752d88SAlan Cox 
90111752d88SAlan Cox 					/*
90211752d88SAlan Cox 					 * Is the size of this allocation request
90311752d88SAlan Cox 					 * larger than the largest block size?
90411752d88SAlan Cox 					 */
90511752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
90611752d88SAlan Cox 						/*
90711752d88SAlan Cox 						 * Determine if a sufficient number
90811752d88SAlan Cox 						 * of subsequent blocks to satisfy
90911752d88SAlan Cox 						 * the allocation request are free.
91011752d88SAlan Cox 						 */
91111752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
91211752d88SAlan Cox 						pa_last = pa + size;
91311752d88SAlan Cox 						for (;;) {
91411752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
91511752d88SAlan Cox 							if (pa >= pa_last)
91611752d88SAlan Cox 								break;
91711752d88SAlan Cox 							if (pa < seg->start ||
91811752d88SAlan Cox 							    pa >= seg->end)
91911752d88SAlan Cox 								break;
92011752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
92111752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
92211752d88SAlan Cox 								break;
92311752d88SAlan Cox 						}
92411752d88SAlan Cox 						/* If not, continue to the next block. */
92511752d88SAlan Cox 						if (pa < pa_last)
92611752d88SAlan Cox 							continue;
92711752d88SAlan Cox 					}
92811752d88SAlan Cox 
92911752d88SAlan Cox 					/*
93011752d88SAlan Cox 					 * Determine if the blocks are within the given range,
93111752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
93211752d88SAlan Cox 					 * given boundary.
93311752d88SAlan Cox 					 */
93411752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
93511752d88SAlan Cox 					if (pa >= low &&
93611752d88SAlan Cox 					    pa + size <= high &&
93711752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
93811752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
93911752d88SAlan Cox 						goto done;
94011752d88SAlan Cox 				}
94111752d88SAlan Cox 			}
94211752d88SAlan Cox 		}
94311752d88SAlan Cox 	}
9447e226537SAttilio Rao 	if (++dom < vm_ndomains)
9457e226537SAttilio Rao 		goto restartdom;
94611752d88SAlan Cox 	return (NULL);
94711752d88SAlan Cox done:
94811752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
94911752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
9507e226537SAttilio Rao 		vm_freelist_rem(fl, m, m->order);
95111752d88SAlan Cox 	}
95211752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
95311752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
95411752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
95511752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
9565c1f2cc4SAlan Cox 	/* Return excess pages to the free lists. */
9575c1f2cc4SAlan Cox 	npages_end = roundup2(npages, 1 << imin(oind, order));
9585c1f2cc4SAlan Cox 	if (npages < npages_end)
9595c1f2cc4SAlan Cox 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
96011752d88SAlan Cox 	return (m_ret);
96111752d88SAlan Cox }
96211752d88SAlan Cox 
96311752d88SAlan Cox #ifdef DDB
96411752d88SAlan Cox /*
96511752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
96611752d88SAlan Cox  */
96711752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
96811752d88SAlan Cox {
96911752d88SAlan Cox 	struct vm_freelist *fl;
9707e226537SAttilio Rao 	int flind, oind, pind, dom;
97111752d88SAlan Cox 
9727e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
9737e226537SAttilio Rao 		db_printf("DOMAIN: %d\n", dom);
97411752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
97511752d88SAlan Cox 			db_printf("FREE LIST %d:\n"
97611752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
97711752d88SAlan Cox 			    "\n              ", flind);
97811752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
97911752d88SAlan Cox 				db_printf("  |  POOL %d", pind);
98011752d88SAlan Cox 			db_printf("\n--            ");
98111752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
98211752d88SAlan Cox 				db_printf("-- --      ");
98311752d88SAlan Cox 			db_printf("--\n");
98411752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
98511752d88SAlan Cox 				db_printf("  %2.2d (%6.6dK)", oind,
98611752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
98711752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
9887e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
98911752d88SAlan Cox 					db_printf("  |  %6.6d", fl[oind].lcnt);
99011752d88SAlan Cox 				}
99111752d88SAlan Cox 				db_printf("\n");
99211752d88SAlan Cox 			}
99311752d88SAlan Cox 			db_printf("\n");
99411752d88SAlan Cox 		}
9957e226537SAttilio Rao 		db_printf("\n");
9967e226537SAttilio Rao 	}
99711752d88SAlan Cox }
99811752d88SAlan Cox #endif
999