xref: /freebsd/sys/vm/vm_phys.c (revision 38d6b2dcb2cca0609861a6d4b4df0a12e7eb99e1)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
32fbd80bd0SAlan Cox /*
33fbd80bd0SAlan Cox  *	Physical memory system implementation
34fbd80bd0SAlan Cox  *
35fbd80bd0SAlan Cox  * Any external functions defined by this module are only to be used by the
36fbd80bd0SAlan Cox  * virtual memory system.
37fbd80bd0SAlan Cox  */
38fbd80bd0SAlan Cox 
3911752d88SAlan Cox #include <sys/cdefs.h>
4011752d88SAlan Cox __FBSDID("$FreeBSD$");
4111752d88SAlan Cox 
4211752d88SAlan Cox #include "opt_ddb.h"
43174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox 
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
4711752d88SAlan Cox #include <sys/lock.h>
4811752d88SAlan Cox #include <sys/kernel.h>
4911752d88SAlan Cox #include <sys/malloc.h>
5011752d88SAlan Cox #include <sys/mutex.h>
517e226537SAttilio Rao #if MAXMEMDOM > 1
527e226537SAttilio Rao #include <sys/proc.h>
537e226537SAttilio Rao #endif
5411752d88SAlan Cox #include <sys/queue.h>
55*38d6b2dcSRoger Pau Monné #include <sys/rwlock.h>
5611752d88SAlan Cox #include <sys/sbuf.h>
5711752d88SAlan Cox #include <sys/sysctl.h>
58*38d6b2dcSRoger Pau Monné #include <sys/tree.h>
5911752d88SAlan Cox #include <sys/vmmeter.h>
6011752d88SAlan Cox 
6111752d88SAlan Cox #include <ddb/ddb.h>
6211752d88SAlan Cox 
6311752d88SAlan Cox #include <vm/vm.h>
6411752d88SAlan Cox #include <vm/vm_param.h>
6511752d88SAlan Cox #include <vm/vm_kern.h>
6611752d88SAlan Cox #include <vm/vm_object.h>
6711752d88SAlan Cox #include <vm/vm_page.h>
6811752d88SAlan Cox #include <vm/vm_phys.h>
6911752d88SAlan Cox 
70449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
71449c2e92SKonstantin Belousov     "Too many physsegs.");
7211752d88SAlan Cox 
73a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
74a3870a18SJohn Baldwin 
757e226537SAttilio Rao int vm_ndomains = 1;
767e226537SAttilio Rao 
77449c2e92SKonstantin Belousov struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
78449c2e92SKonstantin Belousov int vm_phys_nsegs;
7911752d88SAlan Cox 
80*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg;
81*38d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
82*38d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *);
83*38d6b2dcSRoger Pau Monné 
84*38d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
85*38d6b2dcSRoger Pau Monné     RB_INITIALIZER(_vm_phys_fictitious_tree);
86*38d6b2dcSRoger Pau Monné 
87*38d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg {
88*38d6b2dcSRoger Pau Monné 	RB_ENTRY(vm_phys_fictitious_seg) node;
89*38d6b2dcSRoger Pau Monné 	/* Memory region data */
90b6de32bdSKonstantin Belousov 	vm_paddr_t	start;
91b6de32bdSKonstantin Belousov 	vm_paddr_t	end;
92b6de32bdSKonstantin Belousov 	vm_page_t	first_page;
93*38d6b2dcSRoger Pau Monné };
94*38d6b2dcSRoger Pau Monné 
95*38d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
96*38d6b2dcSRoger Pau Monné     vm_phys_fictitious_cmp);
97*38d6b2dcSRoger Pau Monné 
98*38d6b2dcSRoger Pau Monné static struct rwlock vm_phys_fictitious_reg_lock;
99c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
100b6de32bdSKonstantin Belousov 
10111752d88SAlan Cox static struct vm_freelist
1027e226537SAttilio Rao     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
10311752d88SAlan Cox 
10411752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
10511752d88SAlan Cox 
10611752d88SAlan Cox static int cnt_prezero;
10711752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
10811752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
10911752d88SAlan Cox 
11011752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
11111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
11211752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
11311752d88SAlan Cox 
11411752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
11511752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
11611752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
11711752d88SAlan Cox 
1187e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
1197e226537SAttilio Rao     &vm_ndomains, 0, "Number of physical memory domains available.");
120a3870a18SJohn Baldwin 
121f5c4b077SJohn Baldwin static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
122f5c4b077SJohn Baldwin     int order);
123a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
124a3870a18SJohn Baldwin     int domain);
12511752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
12611752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
12711752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
12811752d88SAlan Cox     int order);
12911752d88SAlan Cox 
130*38d6b2dcSRoger Pau Monné /*
131*38d6b2dcSRoger Pau Monné  * Red-black tree helpers for vm fictitious range management.
132*38d6b2dcSRoger Pau Monné  */
133*38d6b2dcSRoger Pau Monné static inline int
134*38d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
135*38d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *range)
136*38d6b2dcSRoger Pau Monné {
137*38d6b2dcSRoger Pau Monné 
138*38d6b2dcSRoger Pau Monné 	KASSERT(range->start != 0 && range->end != 0,
139*38d6b2dcSRoger Pau Monné 	    ("Invalid range passed on search for vm_fictitious page"));
140*38d6b2dcSRoger Pau Monné 	if (p->start >= range->end)
141*38d6b2dcSRoger Pau Monné 		return (1);
142*38d6b2dcSRoger Pau Monné 	if (p->start < range->start)
143*38d6b2dcSRoger Pau Monné 		return (-1);
144*38d6b2dcSRoger Pau Monné 
145*38d6b2dcSRoger Pau Monné 	return (0);
146*38d6b2dcSRoger Pau Monné }
147*38d6b2dcSRoger Pau Monné 
148*38d6b2dcSRoger Pau Monné static int
149*38d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
150*38d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *p2)
151*38d6b2dcSRoger Pau Monné {
152*38d6b2dcSRoger Pau Monné 
153*38d6b2dcSRoger Pau Monné 	/* Check if this is a search for a page */
154*38d6b2dcSRoger Pau Monné 	if (p1->end == 0)
155*38d6b2dcSRoger Pau Monné 		return (vm_phys_fictitious_in_range(p1, p2));
156*38d6b2dcSRoger Pau Monné 
157*38d6b2dcSRoger Pau Monné 	KASSERT(p2->end != 0,
158*38d6b2dcSRoger Pau Monné     ("Invalid range passed as second parameter to vm fictitious comparison"));
159*38d6b2dcSRoger Pau Monné 
160*38d6b2dcSRoger Pau Monné 	/* Searching to add a new range */
161*38d6b2dcSRoger Pau Monné 	if (p1->end <= p2->start)
162*38d6b2dcSRoger Pau Monné 		return (-1);
163*38d6b2dcSRoger Pau Monné 	if (p1->start >= p2->end)
164*38d6b2dcSRoger Pau Monné 		return (1);
165*38d6b2dcSRoger Pau Monné 
166*38d6b2dcSRoger Pau Monné 	panic("Trying to add overlapping vm fictitious ranges:\n"
167*38d6b2dcSRoger Pau Monné 	    "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
168*38d6b2dcSRoger Pau Monné 	    (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
169*38d6b2dcSRoger Pau Monné }
170*38d6b2dcSRoger Pau Monné 
1717e226537SAttilio Rao static __inline int
1727e226537SAttilio Rao vm_rr_selectdomain(void)
1737e226537SAttilio Rao {
1747e226537SAttilio Rao #if MAXMEMDOM > 1
1757e226537SAttilio Rao 	struct thread *td;
1767e226537SAttilio Rao 
1777e226537SAttilio Rao 	td = curthread;
1787e226537SAttilio Rao 
1797e226537SAttilio Rao 	td->td_dom_rr_idx++;
1807e226537SAttilio Rao 	td->td_dom_rr_idx %= vm_ndomains;
1817e226537SAttilio Rao 	return (td->td_dom_rr_idx);
1827e226537SAttilio Rao #else
1837e226537SAttilio Rao 	return (0);
1847e226537SAttilio Rao #endif
1857e226537SAttilio Rao }
1867e226537SAttilio Rao 
187449c2e92SKonstantin Belousov boolean_t
188449c2e92SKonstantin Belousov vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
189449c2e92SKonstantin Belousov {
190449c2e92SKonstantin Belousov 	struct vm_phys_seg *s;
191449c2e92SKonstantin Belousov 	int idx;
192449c2e92SKonstantin Belousov 
193449c2e92SKonstantin Belousov 	while ((idx = ffsl(mask)) != 0) {
194449c2e92SKonstantin Belousov 		idx--;	/* ffsl counts from 1 */
195449c2e92SKonstantin Belousov 		mask &= ~(1UL << idx);
196449c2e92SKonstantin Belousov 		s = &vm_phys_segs[idx];
197449c2e92SKonstantin Belousov 		if (low < s->end && high > s->start)
198449c2e92SKonstantin Belousov 			return (TRUE);
199449c2e92SKonstantin Belousov 	}
200449c2e92SKonstantin Belousov 	return (FALSE);
201449c2e92SKonstantin Belousov }
202449c2e92SKonstantin Belousov 
20311752d88SAlan Cox /*
20411752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
20511752d88SAlan Cox  * the amount of physical memory in each free list.
20611752d88SAlan Cox  */
20711752d88SAlan Cox static int
20811752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
20911752d88SAlan Cox {
21011752d88SAlan Cox 	struct sbuf sbuf;
21111752d88SAlan Cox 	struct vm_freelist *fl;
2127e226537SAttilio Rao 	int dom, error, flind, oind, pind;
21311752d88SAlan Cox 
21400f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
21500f0e671SMatthew D Fleming 	if (error != 0)
21600f0e671SMatthew D Fleming 		return (error);
2177e226537SAttilio Rao 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
2187e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
219eb2f42fbSAlan Cox 		sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
22011752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
221eb2f42fbSAlan Cox 			sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
22211752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
22311752d88SAlan Cox 			    "\n              ", flind);
22411752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
22511752d88SAlan Cox 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
22611752d88SAlan Cox 			sbuf_printf(&sbuf, "\n--            ");
22711752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
22811752d88SAlan Cox 				sbuf_printf(&sbuf, "-- --      ");
22911752d88SAlan Cox 			sbuf_printf(&sbuf, "--\n");
23011752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
231d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
23211752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
23311752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
2347e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
235eb2f42fbSAlan Cox 					sbuf_printf(&sbuf, "  |  %6d",
2367e226537SAttilio Rao 					    fl[oind].lcnt);
23711752d88SAlan Cox 				}
23811752d88SAlan Cox 				sbuf_printf(&sbuf, "\n");
23911752d88SAlan Cox 			}
2407e226537SAttilio Rao 		}
24111752d88SAlan Cox 	}
2424e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
24311752d88SAlan Cox 	sbuf_delete(&sbuf);
24411752d88SAlan Cox 	return (error);
24511752d88SAlan Cox }
24611752d88SAlan Cox 
24711752d88SAlan Cox /*
24811752d88SAlan Cox  * Outputs the set of physical memory segments.
24911752d88SAlan Cox  */
25011752d88SAlan Cox static int
25111752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
25211752d88SAlan Cox {
25311752d88SAlan Cox 	struct sbuf sbuf;
25411752d88SAlan Cox 	struct vm_phys_seg *seg;
25511752d88SAlan Cox 	int error, segind;
25611752d88SAlan Cox 
25700f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
25800f0e671SMatthew D Fleming 	if (error != 0)
25900f0e671SMatthew D Fleming 		return (error);
2604e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
26111752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
26211752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
26311752d88SAlan Cox 		seg = &vm_phys_segs[segind];
26411752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
26511752d88SAlan Cox 		    (uintmax_t)seg->start);
26611752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
26711752d88SAlan Cox 		    (uintmax_t)seg->end);
268a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
26911752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
27011752d88SAlan Cox 	}
2714e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
27211752d88SAlan Cox 	sbuf_delete(&sbuf);
27311752d88SAlan Cox 	return (error);
27411752d88SAlan Cox }
27511752d88SAlan Cox 
2767e226537SAttilio Rao static void
2777e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
278a3870a18SJohn Baldwin {
279a3870a18SJohn Baldwin 
2807e226537SAttilio Rao 	m->order = order;
2817e226537SAttilio Rao 	if (tail)
282c325e866SKonstantin Belousov 		TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
2837e226537SAttilio Rao 	else
284c325e866SKonstantin Belousov 		TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
2857e226537SAttilio Rao 	fl[order].lcnt++;
286a3870a18SJohn Baldwin }
2877e226537SAttilio Rao 
2887e226537SAttilio Rao static void
2897e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
2907e226537SAttilio Rao {
2917e226537SAttilio Rao 
292c325e866SKonstantin Belousov 	TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
2937e226537SAttilio Rao 	fl[order].lcnt--;
2947e226537SAttilio Rao 	m->order = VM_NFREEORDER;
295a3870a18SJohn Baldwin }
296a3870a18SJohn Baldwin 
29711752d88SAlan Cox /*
29811752d88SAlan Cox  * Create a physical memory segment.
29911752d88SAlan Cox  */
30011752d88SAlan Cox static void
301a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
30211752d88SAlan Cox {
30311752d88SAlan Cox 	struct vm_phys_seg *seg;
30411752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
305d6e9b97bSJohn Baldwin 	long pages;
30611752d88SAlan Cox 	int segind;
30711752d88SAlan Cox 
30811752d88SAlan Cox 	pages = 0;
30911752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
31011752d88SAlan Cox 		seg = &vm_phys_segs[segind];
31111752d88SAlan Cox 		pages += atop(seg->end - seg->start);
31211752d88SAlan Cox 	}
31311752d88SAlan Cox #endif
31411752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
31511752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
3167e226537SAttilio Rao 	KASSERT(domain < vm_ndomains,
3177e226537SAttilio Rao 	    ("vm_phys_create_seg: invalid domain provided"));
31811752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
31911752d88SAlan Cox 	seg->start = start;
32011752d88SAlan Cox 	seg->end = end;
321a3870a18SJohn Baldwin 	seg->domain = domain;
32211752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
32311752d88SAlan Cox 	seg->first_page = &vm_page_array[pages];
32411752d88SAlan Cox #else
32511752d88SAlan Cox 	seg->first_page = PHYS_TO_VM_PAGE(start);
32611752d88SAlan Cox #endif
3277e226537SAttilio Rao 	seg->free_queues = &vm_phys_free_queues[domain][flind];
32811752d88SAlan Cox }
32911752d88SAlan Cox 
330a3870a18SJohn Baldwin static void
331a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
332a3870a18SJohn Baldwin {
333a3870a18SJohn Baldwin 	int i;
334a3870a18SJohn Baldwin 
335a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
336a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
337a3870a18SJohn Baldwin 		return;
338a3870a18SJohn Baldwin 	}
339a3870a18SJohn Baldwin 
340a3870a18SJohn Baldwin 	for (i = 0;; i++) {
341a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
342a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
343a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
344a3870a18SJohn Baldwin 			continue;
345a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
346a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
347a3870a18SJohn Baldwin 			    (uintmax_t)start);
348a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
349a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
350a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
351a3870a18SJohn Baldwin 			break;
352a3870a18SJohn Baldwin 		}
353a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
354a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
355a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
356a3870a18SJohn Baldwin 	}
357a3870a18SJohn Baldwin }
358a3870a18SJohn Baldwin 
35911752d88SAlan Cox /*
36011752d88SAlan Cox  * Initialize the physical memory allocator.
36111752d88SAlan Cox  */
36211752d88SAlan Cox void
36311752d88SAlan Cox vm_phys_init(void)
36411752d88SAlan Cox {
36511752d88SAlan Cox 	struct vm_freelist *fl;
3667e226537SAttilio Rao 	int dom, flind, i, oind, pind;
36711752d88SAlan Cox 
36811752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
36911752d88SAlan Cox #ifdef	VM_FREELIST_ISADMA
37011752d88SAlan Cox 		if (phys_avail[i] < 16777216) {
37111752d88SAlan Cox 			if (phys_avail[i + 1] > 16777216) {
37211752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i], 16777216,
37311752d88SAlan Cox 				    VM_FREELIST_ISADMA);
37411752d88SAlan Cox 				vm_phys_create_seg(16777216, phys_avail[i + 1],
37511752d88SAlan Cox 				    VM_FREELIST_DEFAULT);
37611752d88SAlan Cox 			} else {
37711752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
37811752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_ISADMA);
37911752d88SAlan Cox 			}
38011752d88SAlan Cox 			if (VM_FREELIST_ISADMA >= vm_nfreelists)
38111752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_ISADMA + 1;
38211752d88SAlan Cox 		} else
38311752d88SAlan Cox #endif
38411752d88SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
38511752d88SAlan Cox 		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
38611752d88SAlan Cox 			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
38711752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
38811752d88SAlan Cox 				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
38911752d88SAlan Cox 				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
39011752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
39111752d88SAlan Cox 			} else {
39211752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
39311752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
39411752d88SAlan Cox 			}
39511752d88SAlan Cox 			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
39611752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
39711752d88SAlan Cox 		} else
39811752d88SAlan Cox #endif
39911752d88SAlan Cox 		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
40011752d88SAlan Cox 		    VM_FREELIST_DEFAULT);
40111752d88SAlan Cox 	}
4027e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
40311752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
40411752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
4057e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
40611752d88SAlan Cox 				for (oind = 0; oind < VM_NFREEORDER; oind++)
40711752d88SAlan Cox 					TAILQ_INIT(&fl[oind].pl);
40811752d88SAlan Cox 			}
40911752d88SAlan Cox 		}
410a3870a18SJohn Baldwin 	}
411*38d6b2dcSRoger Pau Monné 	rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
41211752d88SAlan Cox }
41311752d88SAlan Cox 
41411752d88SAlan Cox /*
41511752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
41611752d88SAlan Cox  */
41711752d88SAlan Cox static __inline void
41811752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
41911752d88SAlan Cox {
42011752d88SAlan Cox 	vm_page_t m_buddy;
42111752d88SAlan Cox 
42211752d88SAlan Cox 	while (oind > order) {
42311752d88SAlan Cox 		oind--;
42411752d88SAlan Cox 		m_buddy = &m[1 << oind];
42511752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
42611752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
42711752d88SAlan Cox 		    m_buddy, m_buddy->order));
4287e226537SAttilio Rao 		vm_freelist_add(fl, m_buddy, oind, 0);
42911752d88SAlan Cox         }
43011752d88SAlan Cox }
43111752d88SAlan Cox 
43211752d88SAlan Cox /*
43311752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
43411752d88SAlan Cox  */
43511752d88SAlan Cox void
43611752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
43711752d88SAlan Cox {
43811752d88SAlan Cox 	vm_page_t m;
439449c2e92SKonstantin Belousov 	struct vm_domain *vmd;
44011752d88SAlan Cox 
44144f1c916SBryan Drewery 	vm_cnt.v_page_count++;
44211752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
44311752d88SAlan Cox 	m->phys_addr = pa;
44444e46b9eSAlan Cox 	m->queue = PQ_NONE;
44511752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
446449c2e92SKonstantin Belousov 	vmd = vm_phys_domain(m);
447449c2e92SKonstantin Belousov 	vmd->vmd_page_count++;
448449c2e92SKonstantin Belousov 	vmd->vmd_segs |= 1UL << m->segind;
44911752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
45011752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
45111752d88SAlan Cox 	    m, m->order));
45211752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
45311752d88SAlan Cox 	pmap_page_init(m);
4548941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
455449c2e92SKonstantin Belousov 	vm_phys_freecnt_adj(m, 1);
45611752d88SAlan Cox 	vm_phys_free_pages(m, 0);
4578941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
45811752d88SAlan Cox }
45911752d88SAlan Cox 
46011752d88SAlan Cox /*
46111752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
46211752d88SAlan Cox  * from the free lists.
4638941dc44SAlan Cox  *
4648941dc44SAlan Cox  * The free page queues must be locked.
46511752d88SAlan Cox  */
46611752d88SAlan Cox vm_page_t
46711752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
46811752d88SAlan Cox {
46949ca10d4SJayachandran C. 	vm_page_t m;
4707e226537SAttilio Rao 	int dom, domain, flind;
47149ca10d4SJayachandran C. 
472f5c4b077SJohn Baldwin 	KASSERT(pool < VM_NFREEPOOL,
473f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
474f5c4b077SJohn Baldwin 	KASSERT(order < VM_NFREEORDER,
475f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: order %d is out of range", order));
476f5c4b077SJohn Baldwin 
4777e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
4787e226537SAttilio Rao 		domain = vm_rr_selectdomain();
47949ca10d4SJayachandran C. 		for (flind = 0; flind < vm_nfreelists; flind++) {
4807e226537SAttilio Rao 			m = vm_phys_alloc_domain_pages(domain, flind, pool,
4817e226537SAttilio Rao 			    order);
48249ca10d4SJayachandran C. 			if (m != NULL)
48349ca10d4SJayachandran C. 				return (m);
48449ca10d4SJayachandran C. 		}
4857e226537SAttilio Rao 	}
48649ca10d4SJayachandran C. 	return (NULL);
48749ca10d4SJayachandran C. }
48849ca10d4SJayachandran C. 
48949ca10d4SJayachandran C. /*
49049ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
49149ca10d4SJayachandran C.  * specified pool and order
49249ca10d4SJayachandran C.  */
49349ca10d4SJayachandran C. vm_page_t
49449ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
49549ca10d4SJayachandran C. {
49611752d88SAlan Cox 	vm_page_t m;
4977e226537SAttilio Rao 	int dom, domain;
49811752d88SAlan Cox 
49949ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
50049ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
50111752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
50249ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
50311752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
50449ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
505a3870a18SJohn Baldwin 
5067e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
5077e226537SAttilio Rao 		domain = vm_rr_selectdomain();
5087e226537SAttilio Rao 		m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
509f5c4b077SJohn Baldwin 		if (m != NULL)
510f5c4b077SJohn Baldwin 			return (m);
5117e226537SAttilio Rao 	}
5127e226537SAttilio Rao 	return (NULL);
513f5c4b077SJohn Baldwin }
514f5c4b077SJohn Baldwin 
515f5c4b077SJohn Baldwin static vm_page_t
516f5c4b077SJohn Baldwin vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
517f5c4b077SJohn Baldwin {
518f5c4b077SJohn Baldwin 	struct vm_freelist *fl;
519f5c4b077SJohn Baldwin 	struct vm_freelist *alt;
520f5c4b077SJohn Baldwin 	int oind, pind;
521f5c4b077SJohn Baldwin 	vm_page_t m;
522f5c4b077SJohn Baldwin 
52311752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
5247e226537SAttilio Rao 	fl = &vm_phys_free_queues[domain][flind][pool][0];
52511752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
52611752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
52711752d88SAlan Cox 		if (m != NULL) {
5287e226537SAttilio Rao 			vm_freelist_rem(fl, m, oind);
52911752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
53011752d88SAlan Cox 			return (m);
53111752d88SAlan Cox 		}
53211752d88SAlan Cox 	}
53311752d88SAlan Cox 
53411752d88SAlan Cox 	/*
53511752d88SAlan Cox 	 * The given pool was empty.  Find the largest
53611752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
53711752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
53811752d88SAlan Cox 	 * use them to satisfy the allocation.
53911752d88SAlan Cox 	 */
54011752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
54111752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
5427e226537SAttilio Rao 			alt = &vm_phys_free_queues[domain][flind][pind][0];
54311752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
54411752d88SAlan Cox 			if (m != NULL) {
5457e226537SAttilio Rao 				vm_freelist_rem(alt, m, oind);
54611752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
54711752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
54811752d88SAlan Cox 				return (m);
54911752d88SAlan Cox 			}
55011752d88SAlan Cox 		}
55111752d88SAlan Cox 	}
55211752d88SAlan Cox 	return (NULL);
55311752d88SAlan Cox }
55411752d88SAlan Cox 
55511752d88SAlan Cox /*
55611752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
55711752d88SAlan Cox  */
55811752d88SAlan Cox vm_page_t
55911752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
56011752d88SAlan Cox {
56111752d88SAlan Cox 	struct vm_phys_seg *seg;
56211752d88SAlan Cox 	int segind;
56311752d88SAlan Cox 
56411752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
56511752d88SAlan Cox 		seg = &vm_phys_segs[segind];
56611752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
56711752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
56811752d88SAlan Cox 	}
569f06a3a36SAndrew Thompson 	return (NULL);
57011752d88SAlan Cox }
57111752d88SAlan Cox 
572b6de32bdSKonstantin Belousov vm_page_t
573b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
574b6de32bdSKonstantin Belousov {
575*38d6b2dcSRoger Pau Monné 	struct vm_phys_fictitious_seg tmp, *seg;
576b6de32bdSKonstantin Belousov 	vm_page_t m;
577b6de32bdSKonstantin Belousov 
578b6de32bdSKonstantin Belousov 	m = NULL;
579*38d6b2dcSRoger Pau Monné 	tmp.start = pa;
580*38d6b2dcSRoger Pau Monné 	tmp.end = 0;
581*38d6b2dcSRoger Pau Monné 
582*38d6b2dcSRoger Pau Monné 	rw_rlock(&vm_phys_fictitious_reg_lock);
583*38d6b2dcSRoger Pau Monné 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
584*38d6b2dcSRoger Pau Monné 	rw_runlock(&vm_phys_fictitious_reg_lock);
585*38d6b2dcSRoger Pau Monné 	if (seg == NULL)
586*38d6b2dcSRoger Pau Monné 		return (NULL);
587*38d6b2dcSRoger Pau Monné 
588b6de32bdSKonstantin Belousov 	m = &seg->first_page[atop(pa - seg->start)];
589*38d6b2dcSRoger Pau Monné 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
590*38d6b2dcSRoger Pau Monné 
591b6de32bdSKonstantin Belousov 	return (m);
592b6de32bdSKonstantin Belousov }
593b6de32bdSKonstantin Belousov 
594b6de32bdSKonstantin Belousov int
595b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
596b6de32bdSKonstantin Belousov     vm_memattr_t memattr)
597b6de32bdSKonstantin Belousov {
598b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
599b6de32bdSKonstantin Belousov 	vm_page_t fp;
600b6de32bdSKonstantin Belousov 	long i, page_count;
601b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
602b6de32bdSKonstantin Belousov 	long pi;
603b6de32bdSKonstantin Belousov #endif
604b6de32bdSKonstantin Belousov 
605b6de32bdSKonstantin Belousov 	page_count = (end - start) / PAGE_SIZE;
606b6de32bdSKonstantin Belousov 
607b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
608b6de32bdSKonstantin Belousov 	pi = atop(start);
609a17937bdSKonstantin Belousov 	if (pi >= first_page && pi < vm_page_array_size + first_page) {
610a17937bdSKonstantin Belousov 		if (atop(end) >= vm_page_array_size + first_page)
611a17937bdSKonstantin Belousov 			return (EINVAL);
612b6de32bdSKonstantin Belousov 		fp = &vm_page_array[pi - first_page];
613b6de32bdSKonstantin Belousov 	} else
614b6de32bdSKonstantin Belousov #endif
615b6de32bdSKonstantin Belousov 	{
616b6de32bdSKonstantin Belousov 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
617b6de32bdSKonstantin Belousov 		    M_WAITOK | M_ZERO);
618b6de32bdSKonstantin Belousov 	}
619b6de32bdSKonstantin Belousov 	for (i = 0; i < page_count; i++) {
620b6de32bdSKonstantin Belousov 		vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
621c7aebda8SAttilio Rao 		fp[i].oflags &= ~VPO_UNMANAGED;
622c7aebda8SAttilio Rao 		fp[i].busy_lock = VPB_UNBUSIED;
623b6de32bdSKonstantin Belousov 	}
624*38d6b2dcSRoger Pau Monné 
625*38d6b2dcSRoger Pau Monné 	seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
626b6de32bdSKonstantin Belousov 	seg->start = start;
627b6de32bdSKonstantin Belousov 	seg->end = end;
628b6de32bdSKonstantin Belousov 	seg->first_page = fp;
629*38d6b2dcSRoger Pau Monné 
630*38d6b2dcSRoger Pau Monné 	rw_wlock(&vm_phys_fictitious_reg_lock);
631*38d6b2dcSRoger Pau Monné 	RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
632*38d6b2dcSRoger Pau Monné 	rw_wunlock(&vm_phys_fictitious_reg_lock);
633*38d6b2dcSRoger Pau Monné 
634b6de32bdSKonstantin Belousov 	return (0);
635b6de32bdSKonstantin Belousov }
636b6de32bdSKonstantin Belousov 
637b6de32bdSKonstantin Belousov void
638b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
639b6de32bdSKonstantin Belousov {
640*38d6b2dcSRoger Pau Monné 	struct vm_phys_fictitious_seg *seg, tmp;
641b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
642b6de32bdSKonstantin Belousov 	long pi;
643b6de32bdSKonstantin Belousov #endif
644b6de32bdSKonstantin Belousov 
645b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
646b6de32bdSKonstantin Belousov 	pi = atop(start);
647b6de32bdSKonstantin Belousov #endif
648*38d6b2dcSRoger Pau Monné 	tmp.start = start;
649*38d6b2dcSRoger Pau Monné 	tmp.end = 0;
650b6de32bdSKonstantin Belousov 
651*38d6b2dcSRoger Pau Monné 	rw_wlock(&vm_phys_fictitious_reg_lock);
652*38d6b2dcSRoger Pau Monné 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
653*38d6b2dcSRoger Pau Monné 	if (seg->start != start || seg->end != end) {
654*38d6b2dcSRoger Pau Monné 		rw_wunlock(&vm_phys_fictitious_reg_lock);
655*38d6b2dcSRoger Pau Monné 		panic(
656*38d6b2dcSRoger Pau Monné 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
657*38d6b2dcSRoger Pau Monné 		    (uintmax_t)start, (uintmax_t)end);
658*38d6b2dcSRoger Pau Monné 	}
659*38d6b2dcSRoger Pau Monné 	RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
660*38d6b2dcSRoger Pau Monné 	rw_wunlock(&vm_phys_fictitious_reg_lock);
661b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
662b6de32bdSKonstantin Belousov 	if (pi < first_page || atop(end) >= vm_page_array_size)
663b6de32bdSKonstantin Belousov #endif
664*38d6b2dcSRoger Pau Monné 		free(seg->first_page, M_FICT_PAGES);
665*38d6b2dcSRoger Pau Monné 	free(seg, M_FICT_PAGES);
666b6de32bdSKonstantin Belousov }
667b6de32bdSKonstantin Belousov 
66811752d88SAlan Cox /*
66911752d88SAlan Cox  * Find the segment containing the given physical address.
67011752d88SAlan Cox  */
67111752d88SAlan Cox static int
67211752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
67311752d88SAlan Cox {
67411752d88SAlan Cox 	struct vm_phys_seg *seg;
67511752d88SAlan Cox 	int segind;
67611752d88SAlan Cox 
67711752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
67811752d88SAlan Cox 		seg = &vm_phys_segs[segind];
67911752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
68011752d88SAlan Cox 			return (segind);
68111752d88SAlan Cox 	}
68211752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
68311752d88SAlan Cox 	    (uintmax_t)pa);
68411752d88SAlan Cox }
68511752d88SAlan Cox 
68611752d88SAlan Cox /*
68711752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
6888941dc44SAlan Cox  *
6898941dc44SAlan Cox  * The free page queues must be locked.
69011752d88SAlan Cox  */
69111752d88SAlan Cox void
69211752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
69311752d88SAlan Cox {
69411752d88SAlan Cox 	struct vm_freelist *fl;
69511752d88SAlan Cox 	struct vm_phys_seg *seg;
6965c1f2cc4SAlan Cox 	vm_paddr_t pa;
69711752d88SAlan Cox 	vm_page_t m_buddy;
69811752d88SAlan Cox 
69911752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
7008941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
70111752d88SAlan Cox 	    m, m->order));
70211752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
7038941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
70411752d88SAlan Cox 	    m, m->pool));
70511752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
7068941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
70711752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
70811752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
7095c1f2cc4SAlan Cox 	if (order < VM_NFREEORDER - 1) {
7105c1f2cc4SAlan Cox 		pa = VM_PAGE_TO_PHYS(m);
7115c1f2cc4SAlan Cox 		do {
7125c1f2cc4SAlan Cox 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
7135c1f2cc4SAlan Cox 			if (pa < seg->start || pa >= seg->end)
71411752d88SAlan Cox 				break;
7155c1f2cc4SAlan Cox 			m_buddy = &seg->first_page[atop(pa - seg->start)];
71611752d88SAlan Cox 			if (m_buddy->order != order)
71711752d88SAlan Cox 				break;
71811752d88SAlan Cox 			fl = (*seg->free_queues)[m_buddy->pool];
7197e226537SAttilio Rao 			vm_freelist_rem(fl, m_buddy, order);
72011752d88SAlan Cox 			if (m_buddy->pool != m->pool)
72111752d88SAlan Cox 				vm_phys_set_pool(m->pool, m_buddy, order);
72211752d88SAlan Cox 			order++;
7235c1f2cc4SAlan Cox 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
72411752d88SAlan Cox 			m = &seg->first_page[atop(pa - seg->start)];
7255c1f2cc4SAlan Cox 		} while (order < VM_NFREEORDER - 1);
72611752d88SAlan Cox 	}
72711752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
7287e226537SAttilio Rao 	vm_freelist_add(fl, m, order, 1);
72911752d88SAlan Cox }
73011752d88SAlan Cox 
73111752d88SAlan Cox /*
7325c1f2cc4SAlan Cox  * Free a contiguous, arbitrarily sized set of physical pages.
7335c1f2cc4SAlan Cox  *
7345c1f2cc4SAlan Cox  * The free page queues must be locked.
7355c1f2cc4SAlan Cox  */
7365c1f2cc4SAlan Cox void
7375c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages)
7385c1f2cc4SAlan Cox {
7395c1f2cc4SAlan Cox 	u_int n;
7405c1f2cc4SAlan Cox 	int order;
7415c1f2cc4SAlan Cox 
7425c1f2cc4SAlan Cox 	/*
7435c1f2cc4SAlan Cox 	 * Avoid unnecessary coalescing by freeing the pages in the largest
7445c1f2cc4SAlan Cox 	 * possible power-of-two-sized subsets.
7455c1f2cc4SAlan Cox 	 */
7465c1f2cc4SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7475c1f2cc4SAlan Cox 	for (;; npages -= n) {
7485c1f2cc4SAlan Cox 		/*
7495c1f2cc4SAlan Cox 		 * Unsigned "min" is used here so that "order" is assigned
7505c1f2cc4SAlan Cox 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
7515c1f2cc4SAlan Cox 		 * or the low-order bits of its physical address are zero
7525c1f2cc4SAlan Cox 		 * because the size of a physical address exceeds the size of
7535c1f2cc4SAlan Cox 		 * a long.
7545c1f2cc4SAlan Cox 		 */
7555c1f2cc4SAlan Cox 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
7565c1f2cc4SAlan Cox 		    VM_NFREEORDER - 1);
7575c1f2cc4SAlan Cox 		n = 1 << order;
7585c1f2cc4SAlan Cox 		if (npages < n)
7595c1f2cc4SAlan Cox 			break;
7605c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7615c1f2cc4SAlan Cox 		m += n;
7625c1f2cc4SAlan Cox 	}
7635c1f2cc4SAlan Cox 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
7645c1f2cc4SAlan Cox 	for (; npages > 0; npages -= n) {
7655c1f2cc4SAlan Cox 		order = flsl(npages) - 1;
7665c1f2cc4SAlan Cox 		n = 1 << order;
7675c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7685c1f2cc4SAlan Cox 		m += n;
7695c1f2cc4SAlan Cox 	}
7705c1f2cc4SAlan Cox }
7715c1f2cc4SAlan Cox 
7725c1f2cc4SAlan Cox /*
77311752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
77411752d88SAlan Cox  */
7757bfda801SAlan Cox void
77611752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
77711752d88SAlan Cox {
77811752d88SAlan Cox 	vm_page_t m_tmp;
77911752d88SAlan Cox 
78011752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
78111752d88SAlan Cox 		m_tmp->pool = pool;
78211752d88SAlan Cox }
78311752d88SAlan Cox 
78411752d88SAlan Cox /*
7859742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
7869742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
7879742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
7887bfda801SAlan Cox  *
7897bfda801SAlan Cox  * The free page queues must be locked.
7907bfda801SAlan Cox  */
791e35395ceSAlan Cox boolean_t
7927bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
7937bfda801SAlan Cox {
7947bfda801SAlan Cox 	struct vm_freelist *fl;
7957bfda801SAlan Cox 	struct vm_phys_seg *seg;
7967bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
7977bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
7987bfda801SAlan Cox 	int order;
7997bfda801SAlan Cox 
8007bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8017bfda801SAlan Cox 
8027bfda801SAlan Cox 	/*
8037bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
8047bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
8057bfda801SAlan Cox 	 * assign it to "m_set".
8067bfda801SAlan Cox 	 */
8077bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
8087bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
809bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
8107bfda801SAlan Cox 		order++;
8117bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
8122fbced65SAlan Cox 		if (pa >= seg->start)
8137bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
814e35395ceSAlan Cox 		else
815e35395ceSAlan Cox 			return (FALSE);
8167bfda801SAlan Cox 	}
817e35395ceSAlan Cox 	if (m_set->order < order)
818e35395ceSAlan Cox 		return (FALSE);
819e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
820e35395ceSAlan Cox 		return (FALSE);
8217bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
8227bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
8237bfda801SAlan Cox 	    m_set, m_set->order));
8247bfda801SAlan Cox 
8257bfda801SAlan Cox 	/*
8267bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
8277bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
8287bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
8297bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
8307bfda801SAlan Cox 	 */
8317bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
8327bfda801SAlan Cox 	order = m_set->order;
8337e226537SAttilio Rao 	vm_freelist_rem(fl, m_set, order);
8347bfda801SAlan Cox 	while (order > 0) {
8357bfda801SAlan Cox 		order--;
8367bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
8377bfda801SAlan Cox 		if (m->phys_addr < pa_half)
8387bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
8397bfda801SAlan Cox 		else {
8407bfda801SAlan Cox 			m_tmp = m_set;
8417bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
8427bfda801SAlan Cox 		}
8437e226537SAttilio Rao 		vm_freelist_add(fl, m_tmp, order, 0);
8447bfda801SAlan Cox 	}
8457bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
846e35395ceSAlan Cox 	return (TRUE);
8477bfda801SAlan Cox }
8487bfda801SAlan Cox 
8497bfda801SAlan Cox /*
8507bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
85111752d88SAlan Cox  */
85211752d88SAlan Cox boolean_t
85311752d88SAlan Cox vm_phys_zero_pages_idle(void)
85411752d88SAlan Cox {
8557e226537SAttilio Rao 	static struct vm_freelist *fl;
8567bfda801SAlan Cox 	static int flind, oind, pind;
85711752d88SAlan Cox 	vm_page_t m, m_tmp;
8587e226537SAttilio Rao 	int domain;
85911752d88SAlan Cox 
8607e226537SAttilio Rao 	domain = vm_rr_selectdomain();
8617e226537SAttilio Rao 	fl = vm_phys_free_queues[domain][0][0];
86211752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8637bfda801SAlan Cox 	for (;;) {
864c325e866SKonstantin Belousov 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
8657bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
8667bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
8677bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
868449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, -1);
86911752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
87011752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
87111752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
87211752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
873449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, 1);
8747bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
8757bfda801SAlan Cox 					vm_page_zero_count++;
8767bfda801SAlan Cox 					cnt_prezero++;
87711752d88SAlan Cox 					return (TRUE);
87811752d88SAlan Cox 				}
87911752d88SAlan Cox 			}
88011752d88SAlan Cox 		}
8817bfda801SAlan Cox 		oind++;
8827bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
8837bfda801SAlan Cox 			oind = 0;
8847bfda801SAlan Cox 			pind++;
8857bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
8867bfda801SAlan Cox 				pind = 0;
8877bfda801SAlan Cox 				flind++;
8887bfda801SAlan Cox 				if (flind == vm_nfreelists)
8897bfda801SAlan Cox 					flind = 0;
8907bfda801SAlan Cox 			}
8917e226537SAttilio Rao 			fl = vm_phys_free_queues[domain][flind][pind];
8927bfda801SAlan Cox 		}
8937bfda801SAlan Cox 	}
89411752d88SAlan Cox }
89511752d88SAlan Cox 
89611752d88SAlan Cox /*
8972f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
8982f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
8992f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
9002f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
9012f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
9022f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
9032f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
90411752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
90511752d88SAlan Cox  */
90611752d88SAlan Cox vm_page_t
9075c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
9085c1f2cc4SAlan Cox     u_long alignment, vm_paddr_t boundary)
90911752d88SAlan Cox {
91011752d88SAlan Cox 	struct vm_freelist *fl;
91111752d88SAlan Cox 	struct vm_phys_seg *seg;
91211752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
913fbd80bd0SAlan Cox 	vm_page_t m, m_ret;
9145c1f2cc4SAlan Cox 	u_long npages_end;
9157e226537SAttilio Rao 	int dom, domain, flind, oind, order, pind;
91611752d88SAlan Cox 
917fbd80bd0SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
91811752d88SAlan Cox 	size = npages << PAGE_SHIFT;
91911752d88SAlan Cox 	KASSERT(size != 0,
92011752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
92111752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
92211752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
92311752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
92411752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
92511752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
92611752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
9277e226537SAttilio Rao 	dom = 0;
9287e226537SAttilio Rao restartdom:
9297e226537SAttilio Rao 	domain = vm_rr_selectdomain();
93011752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
93111752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
93211752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
9337e226537SAttilio Rao 				fl = &vm_phys_free_queues[domain][flind][pind][0];
934c325e866SKonstantin Belousov 				TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
93511752d88SAlan Cox 					/*
93611752d88SAlan Cox 					 * A free list may contain physical pages
93711752d88SAlan Cox 					 * from one or more segments.
93811752d88SAlan Cox 					 */
93911752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
94011752d88SAlan Cox 					if (seg->start > high ||
94111752d88SAlan Cox 					    low >= seg->end)
94211752d88SAlan Cox 						continue;
94311752d88SAlan Cox 
94411752d88SAlan Cox 					/*
94511752d88SAlan Cox 					 * Is the size of this allocation request
94611752d88SAlan Cox 					 * larger than the largest block size?
94711752d88SAlan Cox 					 */
94811752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
94911752d88SAlan Cox 						/*
95011752d88SAlan Cox 						 * Determine if a sufficient number
95111752d88SAlan Cox 						 * of subsequent blocks to satisfy
95211752d88SAlan Cox 						 * the allocation request are free.
95311752d88SAlan Cox 						 */
95411752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
95511752d88SAlan Cox 						pa_last = pa + size;
95611752d88SAlan Cox 						for (;;) {
95711752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
95811752d88SAlan Cox 							if (pa >= pa_last)
95911752d88SAlan Cox 								break;
96011752d88SAlan Cox 							if (pa < seg->start ||
96111752d88SAlan Cox 							    pa >= seg->end)
96211752d88SAlan Cox 								break;
96311752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
96411752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
96511752d88SAlan Cox 								break;
96611752d88SAlan Cox 						}
96711752d88SAlan Cox 						/* If not, continue to the next block. */
96811752d88SAlan Cox 						if (pa < pa_last)
96911752d88SAlan Cox 							continue;
97011752d88SAlan Cox 					}
97111752d88SAlan Cox 
97211752d88SAlan Cox 					/*
97311752d88SAlan Cox 					 * Determine if the blocks are within the given range,
97411752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
97511752d88SAlan Cox 					 * given boundary.
97611752d88SAlan Cox 					 */
97711752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
97811752d88SAlan Cox 					if (pa >= low &&
97911752d88SAlan Cox 					    pa + size <= high &&
98011752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
98111752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
98211752d88SAlan Cox 						goto done;
98311752d88SAlan Cox 				}
98411752d88SAlan Cox 			}
98511752d88SAlan Cox 		}
98611752d88SAlan Cox 	}
9877e226537SAttilio Rao 	if (++dom < vm_ndomains)
9887e226537SAttilio Rao 		goto restartdom;
98911752d88SAlan Cox 	return (NULL);
99011752d88SAlan Cox done:
99111752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
99211752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
9937e226537SAttilio Rao 		vm_freelist_rem(fl, m, m->order);
99411752d88SAlan Cox 	}
99511752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
99611752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
99711752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
99811752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
9995c1f2cc4SAlan Cox 	/* Return excess pages to the free lists. */
10005c1f2cc4SAlan Cox 	npages_end = roundup2(npages, 1 << imin(oind, order));
10015c1f2cc4SAlan Cox 	if (npages < npages_end)
10025c1f2cc4SAlan Cox 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
100311752d88SAlan Cox 	return (m_ret);
100411752d88SAlan Cox }
100511752d88SAlan Cox 
100611752d88SAlan Cox #ifdef DDB
100711752d88SAlan Cox /*
100811752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
100911752d88SAlan Cox  */
101011752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
101111752d88SAlan Cox {
101211752d88SAlan Cox 	struct vm_freelist *fl;
10137e226537SAttilio Rao 	int flind, oind, pind, dom;
101411752d88SAlan Cox 
10157e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
10167e226537SAttilio Rao 		db_printf("DOMAIN: %d\n", dom);
101711752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
101811752d88SAlan Cox 			db_printf("FREE LIST %d:\n"
101911752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
102011752d88SAlan Cox 			    "\n              ", flind);
102111752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
102211752d88SAlan Cox 				db_printf("  |  POOL %d", pind);
102311752d88SAlan Cox 			db_printf("\n--            ");
102411752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
102511752d88SAlan Cox 				db_printf("-- --      ");
102611752d88SAlan Cox 			db_printf("--\n");
102711752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
102811752d88SAlan Cox 				db_printf("  %2.2d (%6.6dK)", oind,
102911752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
103011752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
10317e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
103211752d88SAlan Cox 					db_printf("  |  %6.6d", fl[oind].lcnt);
103311752d88SAlan Cox 				}
103411752d88SAlan Cox 				db_printf("\n");
103511752d88SAlan Cox 			}
103611752d88SAlan Cox 			db_printf("\n");
103711752d88SAlan Cox 		}
10387e226537SAttilio Rao 		db_printf("\n");
10397e226537SAttilio Rao 	}
104011752d88SAlan Cox }
104111752d88SAlan Cox #endif
1042