xref: /freebsd/sys/vm/vm_phys.c (revision 271f0f1219b6fa658a127c47a6d7cae321cc9be1)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
32fbd80bd0SAlan Cox /*
33fbd80bd0SAlan Cox  *	Physical memory system implementation
34fbd80bd0SAlan Cox  *
35fbd80bd0SAlan Cox  * Any external functions defined by this module are only to be used by the
36fbd80bd0SAlan Cox  * virtual memory system.
37fbd80bd0SAlan Cox  */
38fbd80bd0SAlan Cox 
3911752d88SAlan Cox #include <sys/cdefs.h>
4011752d88SAlan Cox __FBSDID("$FreeBSD$");
4111752d88SAlan Cox 
4211752d88SAlan Cox #include "opt_ddb.h"
43174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox 
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
4711752d88SAlan Cox #include <sys/lock.h>
4811752d88SAlan Cox #include <sys/kernel.h>
4911752d88SAlan Cox #include <sys/malloc.h>
5011752d88SAlan Cox #include <sys/mutex.h>
517e226537SAttilio Rao #if MAXMEMDOM > 1
527e226537SAttilio Rao #include <sys/proc.h>
537e226537SAttilio Rao #endif
5411752d88SAlan Cox #include <sys/queue.h>
5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h>
5611752d88SAlan Cox #include <sys/sbuf.h>
5711752d88SAlan Cox #include <sys/sysctl.h>
5838d6b2dcSRoger Pau Monné #include <sys/tree.h>
5911752d88SAlan Cox #include <sys/vmmeter.h>
6011752d88SAlan Cox 
6111752d88SAlan Cox #include <ddb/ddb.h>
6211752d88SAlan Cox 
6311752d88SAlan Cox #include <vm/vm.h>
6411752d88SAlan Cox #include <vm/vm_param.h>
6511752d88SAlan Cox #include <vm/vm_kern.h>
6611752d88SAlan Cox #include <vm/vm_object.h>
6711752d88SAlan Cox #include <vm/vm_page.h>
6811752d88SAlan Cox #include <vm/vm_phys.h>
6911752d88SAlan Cox 
70449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
71449c2e92SKonstantin Belousov     "Too many physsegs.");
7211752d88SAlan Cox 
73a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
74a3870a18SJohn Baldwin 
757e226537SAttilio Rao int vm_ndomains = 1;
767e226537SAttilio Rao 
77449c2e92SKonstantin Belousov struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
78449c2e92SKonstantin Belousov int vm_phys_nsegs;
7911752d88SAlan Cox 
8038d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg;
8138d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
8238d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *);
8338d6b2dcSRoger Pau Monné 
8438d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
8538d6b2dcSRoger Pau Monné     RB_INITIALIZER(_vm_phys_fictitious_tree);
8638d6b2dcSRoger Pau Monné 
8738d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg {
8838d6b2dcSRoger Pau Monné 	RB_ENTRY(vm_phys_fictitious_seg) node;
8938d6b2dcSRoger Pau Monné 	/* Memory region data */
90b6de32bdSKonstantin Belousov 	vm_paddr_t	start;
91b6de32bdSKonstantin Belousov 	vm_paddr_t	end;
92b6de32bdSKonstantin Belousov 	vm_page_t	first_page;
9338d6b2dcSRoger Pau Monné };
9438d6b2dcSRoger Pau Monné 
9538d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
9638d6b2dcSRoger Pau Monné     vm_phys_fictitious_cmp);
9738d6b2dcSRoger Pau Monné 
9838d6b2dcSRoger Pau Monné static struct rwlock vm_phys_fictitious_reg_lock;
99c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
100b6de32bdSKonstantin Belousov 
10111752d88SAlan Cox static struct vm_freelist
1027e226537SAttilio Rao     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
10311752d88SAlan Cox 
10411752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
10511752d88SAlan Cox 
10611752d88SAlan Cox static int cnt_prezero;
10711752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
10811752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
10911752d88SAlan Cox 
11011752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
11111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
11211752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
11311752d88SAlan Cox 
11411752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
11511752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
11611752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
11711752d88SAlan Cox 
1187e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
1197e226537SAttilio Rao     &vm_ndomains, 0, "Number of physical memory domains available.");
120a3870a18SJohn Baldwin 
121f5c4b077SJohn Baldwin static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
122f5c4b077SJohn Baldwin     int order);
123a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
124a3870a18SJohn Baldwin     int domain);
12511752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
12611752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
12711752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
12811752d88SAlan Cox     int order);
12911752d88SAlan Cox 
13038d6b2dcSRoger Pau Monné /*
13138d6b2dcSRoger Pau Monné  * Red-black tree helpers for vm fictitious range management.
13238d6b2dcSRoger Pau Monné  */
13338d6b2dcSRoger Pau Monné static inline int
13438d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
13538d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *range)
13638d6b2dcSRoger Pau Monné {
13738d6b2dcSRoger Pau Monné 
13838d6b2dcSRoger Pau Monné 	KASSERT(range->start != 0 && range->end != 0,
13938d6b2dcSRoger Pau Monné 	    ("Invalid range passed on search for vm_fictitious page"));
14038d6b2dcSRoger Pau Monné 	if (p->start >= range->end)
14138d6b2dcSRoger Pau Monné 		return (1);
14238d6b2dcSRoger Pau Monné 	if (p->start < range->start)
14338d6b2dcSRoger Pau Monné 		return (-1);
14438d6b2dcSRoger Pau Monné 
14538d6b2dcSRoger Pau Monné 	return (0);
14638d6b2dcSRoger Pau Monné }
14738d6b2dcSRoger Pau Monné 
14838d6b2dcSRoger Pau Monné static int
14938d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
15038d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *p2)
15138d6b2dcSRoger Pau Monné {
15238d6b2dcSRoger Pau Monné 
15338d6b2dcSRoger Pau Monné 	/* Check if this is a search for a page */
15438d6b2dcSRoger Pau Monné 	if (p1->end == 0)
15538d6b2dcSRoger Pau Monné 		return (vm_phys_fictitious_in_range(p1, p2));
15638d6b2dcSRoger Pau Monné 
15738d6b2dcSRoger Pau Monné 	KASSERT(p2->end != 0,
15838d6b2dcSRoger Pau Monné     ("Invalid range passed as second parameter to vm fictitious comparison"));
15938d6b2dcSRoger Pau Monné 
16038d6b2dcSRoger Pau Monné 	/* Searching to add a new range */
16138d6b2dcSRoger Pau Monné 	if (p1->end <= p2->start)
16238d6b2dcSRoger Pau Monné 		return (-1);
16338d6b2dcSRoger Pau Monné 	if (p1->start >= p2->end)
16438d6b2dcSRoger Pau Monné 		return (1);
16538d6b2dcSRoger Pau Monné 
16638d6b2dcSRoger Pau Monné 	panic("Trying to add overlapping vm fictitious ranges:\n"
16738d6b2dcSRoger Pau Monné 	    "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
16838d6b2dcSRoger Pau Monné 	    (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
16938d6b2dcSRoger Pau Monné }
17038d6b2dcSRoger Pau Monné 
1717e226537SAttilio Rao static __inline int
1727e226537SAttilio Rao vm_rr_selectdomain(void)
1737e226537SAttilio Rao {
1747e226537SAttilio Rao #if MAXMEMDOM > 1
1757e226537SAttilio Rao 	struct thread *td;
1767e226537SAttilio Rao 
1777e226537SAttilio Rao 	td = curthread;
1787e226537SAttilio Rao 
1797e226537SAttilio Rao 	td->td_dom_rr_idx++;
1807e226537SAttilio Rao 	td->td_dom_rr_idx %= vm_ndomains;
1817e226537SAttilio Rao 	return (td->td_dom_rr_idx);
1827e226537SAttilio Rao #else
1837e226537SAttilio Rao 	return (0);
1847e226537SAttilio Rao #endif
1857e226537SAttilio Rao }
1867e226537SAttilio Rao 
187449c2e92SKonstantin Belousov boolean_t
188449c2e92SKonstantin Belousov vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
189449c2e92SKonstantin Belousov {
190449c2e92SKonstantin Belousov 	struct vm_phys_seg *s;
191449c2e92SKonstantin Belousov 	int idx;
192449c2e92SKonstantin Belousov 
193449c2e92SKonstantin Belousov 	while ((idx = ffsl(mask)) != 0) {
194449c2e92SKonstantin Belousov 		idx--;	/* ffsl counts from 1 */
195449c2e92SKonstantin Belousov 		mask &= ~(1UL << idx);
196449c2e92SKonstantin Belousov 		s = &vm_phys_segs[idx];
197449c2e92SKonstantin Belousov 		if (low < s->end && high > s->start)
198449c2e92SKonstantin Belousov 			return (TRUE);
199449c2e92SKonstantin Belousov 	}
200449c2e92SKonstantin Belousov 	return (FALSE);
201449c2e92SKonstantin Belousov }
202449c2e92SKonstantin Belousov 
20311752d88SAlan Cox /*
20411752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
20511752d88SAlan Cox  * the amount of physical memory in each free list.
20611752d88SAlan Cox  */
20711752d88SAlan Cox static int
20811752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
20911752d88SAlan Cox {
21011752d88SAlan Cox 	struct sbuf sbuf;
21111752d88SAlan Cox 	struct vm_freelist *fl;
2127e226537SAttilio Rao 	int dom, error, flind, oind, pind;
21311752d88SAlan Cox 
21400f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
21500f0e671SMatthew D Fleming 	if (error != 0)
21600f0e671SMatthew D Fleming 		return (error);
2177e226537SAttilio Rao 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
2187e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
219eb2f42fbSAlan Cox 		sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
22011752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
221eb2f42fbSAlan Cox 			sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
22211752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
22311752d88SAlan Cox 			    "\n              ", flind);
22411752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
22511752d88SAlan Cox 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
22611752d88SAlan Cox 			sbuf_printf(&sbuf, "\n--            ");
22711752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
22811752d88SAlan Cox 				sbuf_printf(&sbuf, "-- --      ");
22911752d88SAlan Cox 			sbuf_printf(&sbuf, "--\n");
23011752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
231d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
23211752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
23311752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
2347e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
235eb2f42fbSAlan Cox 					sbuf_printf(&sbuf, "  |  %6d",
2367e226537SAttilio Rao 					    fl[oind].lcnt);
23711752d88SAlan Cox 				}
23811752d88SAlan Cox 				sbuf_printf(&sbuf, "\n");
23911752d88SAlan Cox 			}
2407e226537SAttilio Rao 		}
24111752d88SAlan Cox 	}
2424e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
24311752d88SAlan Cox 	sbuf_delete(&sbuf);
24411752d88SAlan Cox 	return (error);
24511752d88SAlan Cox }
24611752d88SAlan Cox 
24711752d88SAlan Cox /*
24811752d88SAlan Cox  * Outputs the set of physical memory segments.
24911752d88SAlan Cox  */
25011752d88SAlan Cox static int
25111752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
25211752d88SAlan Cox {
25311752d88SAlan Cox 	struct sbuf sbuf;
25411752d88SAlan Cox 	struct vm_phys_seg *seg;
25511752d88SAlan Cox 	int error, segind;
25611752d88SAlan Cox 
25700f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
25800f0e671SMatthew D Fleming 	if (error != 0)
25900f0e671SMatthew D Fleming 		return (error);
2604e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
26111752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
26211752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
26311752d88SAlan Cox 		seg = &vm_phys_segs[segind];
26411752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
26511752d88SAlan Cox 		    (uintmax_t)seg->start);
26611752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
26711752d88SAlan Cox 		    (uintmax_t)seg->end);
268a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
26911752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
27011752d88SAlan Cox 	}
2714e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
27211752d88SAlan Cox 	sbuf_delete(&sbuf);
27311752d88SAlan Cox 	return (error);
27411752d88SAlan Cox }
27511752d88SAlan Cox 
2767e226537SAttilio Rao static void
2777e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
278a3870a18SJohn Baldwin {
279a3870a18SJohn Baldwin 
2807e226537SAttilio Rao 	m->order = order;
2817e226537SAttilio Rao 	if (tail)
282c325e866SKonstantin Belousov 		TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
2837e226537SAttilio Rao 	else
284c325e866SKonstantin Belousov 		TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
2857e226537SAttilio Rao 	fl[order].lcnt++;
286a3870a18SJohn Baldwin }
2877e226537SAttilio Rao 
2887e226537SAttilio Rao static void
2897e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
2907e226537SAttilio Rao {
2917e226537SAttilio Rao 
292c325e866SKonstantin Belousov 	TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
2937e226537SAttilio Rao 	fl[order].lcnt--;
2947e226537SAttilio Rao 	m->order = VM_NFREEORDER;
295a3870a18SJohn Baldwin }
296a3870a18SJohn Baldwin 
29711752d88SAlan Cox /*
29811752d88SAlan Cox  * Create a physical memory segment.
29911752d88SAlan Cox  */
30011752d88SAlan Cox static void
301a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
30211752d88SAlan Cox {
30311752d88SAlan Cox 	struct vm_phys_seg *seg;
30411752d88SAlan Cox 
30511752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
30611752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
3077e226537SAttilio Rao 	KASSERT(domain < vm_ndomains,
3087e226537SAttilio Rao 	    ("vm_phys_create_seg: invalid domain provided"));
30911752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
310*271f0f12SAlan Cox 	while (seg > vm_phys_segs && (seg - 1)->start >= end) {
311*271f0f12SAlan Cox 		*seg = *(seg - 1);
312*271f0f12SAlan Cox 		seg--;
313*271f0f12SAlan Cox 	}
31411752d88SAlan Cox 	seg->start = start;
31511752d88SAlan Cox 	seg->end = end;
316a3870a18SJohn Baldwin 	seg->domain = domain;
3177e226537SAttilio Rao 	seg->free_queues = &vm_phys_free_queues[domain][flind];
31811752d88SAlan Cox }
31911752d88SAlan Cox 
320a3870a18SJohn Baldwin static void
321a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
322a3870a18SJohn Baldwin {
323a3870a18SJohn Baldwin 	int i;
324a3870a18SJohn Baldwin 
325a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
326a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
327a3870a18SJohn Baldwin 		return;
328a3870a18SJohn Baldwin 	}
329a3870a18SJohn Baldwin 
330a3870a18SJohn Baldwin 	for (i = 0;; i++) {
331a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
332a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
333a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
334a3870a18SJohn Baldwin 			continue;
335a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
336a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
337a3870a18SJohn Baldwin 			    (uintmax_t)start);
338a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
339a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
340a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
341a3870a18SJohn Baldwin 			break;
342a3870a18SJohn Baldwin 		}
343a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
344a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
345a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
346a3870a18SJohn Baldwin 	}
347a3870a18SJohn Baldwin }
348a3870a18SJohn Baldwin 
34911752d88SAlan Cox /*
350*271f0f12SAlan Cox  * Add a physical memory segment.
351*271f0f12SAlan Cox  */
352*271f0f12SAlan Cox void
353*271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
354*271f0f12SAlan Cox {
355*271f0f12SAlan Cox 
356*271f0f12SAlan Cox 	KASSERT((start & PAGE_MASK) == 0,
357*271f0f12SAlan Cox 	    ("vm_phys_define_seg: start is not page aligned"));
358*271f0f12SAlan Cox 	KASSERT((end & PAGE_MASK) == 0,
359*271f0f12SAlan Cox 	    ("vm_phys_define_seg: end is not page aligned"));
360*271f0f12SAlan Cox #ifdef	VM_FREELIST_ISADMA
361*271f0f12SAlan Cox 	if (start < 16777216) {
362*271f0f12SAlan Cox 		if (end > 16777216) {
363*271f0f12SAlan Cox 			vm_phys_create_seg(start, 16777216,
364*271f0f12SAlan Cox 			    VM_FREELIST_ISADMA);
365*271f0f12SAlan Cox 			vm_phys_create_seg(16777216, end, VM_FREELIST_DEFAULT);
366*271f0f12SAlan Cox 		} else
367*271f0f12SAlan Cox 			vm_phys_create_seg(start, end, VM_FREELIST_ISADMA);
368*271f0f12SAlan Cox 		if (VM_FREELIST_ISADMA >= vm_nfreelists)
369*271f0f12SAlan Cox 			vm_nfreelists = VM_FREELIST_ISADMA + 1;
370*271f0f12SAlan Cox 	} else
371*271f0f12SAlan Cox #endif
372*271f0f12SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
373*271f0f12SAlan Cox 	if (end > VM_HIGHMEM_ADDRESS) {
374*271f0f12SAlan Cox 		if (start < VM_HIGHMEM_ADDRESS) {
375*271f0f12SAlan Cox 			vm_phys_create_seg(start, VM_HIGHMEM_ADDRESS,
376*271f0f12SAlan Cox 			    VM_FREELIST_DEFAULT);
377*271f0f12SAlan Cox 			vm_phys_create_seg(VM_HIGHMEM_ADDRESS, end,
378*271f0f12SAlan Cox 			    VM_FREELIST_HIGHMEM);
379*271f0f12SAlan Cox 		} else
380*271f0f12SAlan Cox 			vm_phys_create_seg(start, end, VM_FREELIST_HIGHMEM);
381*271f0f12SAlan Cox 		if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
382*271f0f12SAlan Cox 			vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
383*271f0f12SAlan Cox 	} else
384*271f0f12SAlan Cox #endif
385*271f0f12SAlan Cox 	vm_phys_create_seg(start, end, VM_FREELIST_DEFAULT);
386*271f0f12SAlan Cox }
387*271f0f12SAlan Cox 
388*271f0f12SAlan Cox /*
38911752d88SAlan Cox  * Initialize the physical memory allocator.
39011752d88SAlan Cox  */
39111752d88SAlan Cox void
39211752d88SAlan Cox vm_phys_init(void)
39311752d88SAlan Cox {
39411752d88SAlan Cox 	struct vm_freelist *fl;
395*271f0f12SAlan Cox 	struct vm_phys_seg *seg;
396*271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE
397*271f0f12SAlan Cox 	long pages;
398*271f0f12SAlan Cox #endif
399*271f0f12SAlan Cox 	int dom, flind, oind, pind, segind;
40011752d88SAlan Cox 
401*271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE
402*271f0f12SAlan Cox 	pages = 0;
40311752d88SAlan Cox #endif
404*271f0f12SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
405*271f0f12SAlan Cox 		seg = &vm_phys_segs[segind];
406*271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE
407*271f0f12SAlan Cox 		seg->first_page = &vm_page_array[pages];
408*271f0f12SAlan Cox 		pages += atop(seg->end - seg->start);
409*271f0f12SAlan Cox #else
410*271f0f12SAlan Cox 		seg->first_page = PHYS_TO_VM_PAGE(seg->start);
41111752d88SAlan Cox #endif
41211752d88SAlan Cox 	}
4137e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
41411752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
41511752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
4167e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
41711752d88SAlan Cox 				for (oind = 0; oind < VM_NFREEORDER; oind++)
41811752d88SAlan Cox 					TAILQ_INIT(&fl[oind].pl);
41911752d88SAlan Cox 			}
42011752d88SAlan Cox 		}
421a3870a18SJohn Baldwin 	}
42238d6b2dcSRoger Pau Monné 	rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
42311752d88SAlan Cox }
42411752d88SAlan Cox 
42511752d88SAlan Cox /*
42611752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
42711752d88SAlan Cox  */
42811752d88SAlan Cox static __inline void
42911752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
43011752d88SAlan Cox {
43111752d88SAlan Cox 	vm_page_t m_buddy;
43211752d88SAlan Cox 
43311752d88SAlan Cox 	while (oind > order) {
43411752d88SAlan Cox 		oind--;
43511752d88SAlan Cox 		m_buddy = &m[1 << oind];
43611752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
43711752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
43811752d88SAlan Cox 		    m_buddy, m_buddy->order));
4397e226537SAttilio Rao 		vm_freelist_add(fl, m_buddy, oind, 0);
44011752d88SAlan Cox         }
44111752d88SAlan Cox }
44211752d88SAlan Cox 
44311752d88SAlan Cox /*
44411752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
44511752d88SAlan Cox  */
44611752d88SAlan Cox void
44711752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
44811752d88SAlan Cox {
44911752d88SAlan Cox 	vm_page_t m;
450449c2e92SKonstantin Belousov 	struct vm_domain *vmd;
45111752d88SAlan Cox 
45244f1c916SBryan Drewery 	vm_cnt.v_page_count++;
45311752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
45411752d88SAlan Cox 	m->phys_addr = pa;
45544e46b9eSAlan Cox 	m->queue = PQ_NONE;
45611752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
457449c2e92SKonstantin Belousov 	vmd = vm_phys_domain(m);
458449c2e92SKonstantin Belousov 	vmd->vmd_page_count++;
459449c2e92SKonstantin Belousov 	vmd->vmd_segs |= 1UL << m->segind;
46011752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
46111752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
46211752d88SAlan Cox 	    m, m->order));
46311752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
46411752d88SAlan Cox 	pmap_page_init(m);
4658941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
466449c2e92SKonstantin Belousov 	vm_phys_freecnt_adj(m, 1);
46711752d88SAlan Cox 	vm_phys_free_pages(m, 0);
4688941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
46911752d88SAlan Cox }
47011752d88SAlan Cox 
47111752d88SAlan Cox /*
47211752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
47311752d88SAlan Cox  * from the free lists.
4748941dc44SAlan Cox  *
4758941dc44SAlan Cox  * The free page queues must be locked.
47611752d88SAlan Cox  */
47711752d88SAlan Cox vm_page_t
47811752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
47911752d88SAlan Cox {
48049ca10d4SJayachandran C. 	vm_page_t m;
4817e226537SAttilio Rao 	int dom, domain, flind;
48249ca10d4SJayachandran C. 
483f5c4b077SJohn Baldwin 	KASSERT(pool < VM_NFREEPOOL,
484f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
485f5c4b077SJohn Baldwin 	KASSERT(order < VM_NFREEORDER,
486f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: order %d is out of range", order));
487f5c4b077SJohn Baldwin 
4887e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
4897e226537SAttilio Rao 		domain = vm_rr_selectdomain();
49049ca10d4SJayachandran C. 		for (flind = 0; flind < vm_nfreelists; flind++) {
4917e226537SAttilio Rao 			m = vm_phys_alloc_domain_pages(domain, flind, pool,
4927e226537SAttilio Rao 			    order);
49349ca10d4SJayachandran C. 			if (m != NULL)
49449ca10d4SJayachandran C. 				return (m);
49549ca10d4SJayachandran C. 		}
4967e226537SAttilio Rao 	}
49749ca10d4SJayachandran C. 	return (NULL);
49849ca10d4SJayachandran C. }
49949ca10d4SJayachandran C. 
50049ca10d4SJayachandran C. /*
50149ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
50249ca10d4SJayachandran C.  * specified pool and order
50349ca10d4SJayachandran C.  */
50449ca10d4SJayachandran C. vm_page_t
50549ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
50649ca10d4SJayachandran C. {
50711752d88SAlan Cox 	vm_page_t m;
5087e226537SAttilio Rao 	int dom, domain;
50911752d88SAlan Cox 
51049ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
51149ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
51211752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
51349ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
51411752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
51549ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
516a3870a18SJohn Baldwin 
5177e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
5187e226537SAttilio Rao 		domain = vm_rr_selectdomain();
5197e226537SAttilio Rao 		m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
520f5c4b077SJohn Baldwin 		if (m != NULL)
521f5c4b077SJohn Baldwin 			return (m);
5227e226537SAttilio Rao 	}
5237e226537SAttilio Rao 	return (NULL);
524f5c4b077SJohn Baldwin }
525f5c4b077SJohn Baldwin 
526f5c4b077SJohn Baldwin static vm_page_t
527f5c4b077SJohn Baldwin vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
528f5c4b077SJohn Baldwin {
529f5c4b077SJohn Baldwin 	struct vm_freelist *fl;
530f5c4b077SJohn Baldwin 	struct vm_freelist *alt;
531f5c4b077SJohn Baldwin 	int oind, pind;
532f5c4b077SJohn Baldwin 	vm_page_t m;
533f5c4b077SJohn Baldwin 
53411752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
5357e226537SAttilio Rao 	fl = &vm_phys_free_queues[domain][flind][pool][0];
53611752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
53711752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
53811752d88SAlan Cox 		if (m != NULL) {
5397e226537SAttilio Rao 			vm_freelist_rem(fl, m, oind);
54011752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
54111752d88SAlan Cox 			return (m);
54211752d88SAlan Cox 		}
54311752d88SAlan Cox 	}
54411752d88SAlan Cox 
54511752d88SAlan Cox 	/*
54611752d88SAlan Cox 	 * The given pool was empty.  Find the largest
54711752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
54811752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
54911752d88SAlan Cox 	 * use them to satisfy the allocation.
55011752d88SAlan Cox 	 */
55111752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
55211752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
5537e226537SAttilio Rao 			alt = &vm_phys_free_queues[domain][flind][pind][0];
55411752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
55511752d88SAlan Cox 			if (m != NULL) {
5567e226537SAttilio Rao 				vm_freelist_rem(alt, m, oind);
55711752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
55811752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
55911752d88SAlan Cox 				return (m);
56011752d88SAlan Cox 			}
56111752d88SAlan Cox 		}
56211752d88SAlan Cox 	}
56311752d88SAlan Cox 	return (NULL);
56411752d88SAlan Cox }
56511752d88SAlan Cox 
56611752d88SAlan Cox /*
56711752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
56811752d88SAlan Cox  */
56911752d88SAlan Cox vm_page_t
57011752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
57111752d88SAlan Cox {
57211752d88SAlan Cox 	struct vm_phys_seg *seg;
57311752d88SAlan Cox 	int segind;
57411752d88SAlan Cox 
57511752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
57611752d88SAlan Cox 		seg = &vm_phys_segs[segind];
57711752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
57811752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
57911752d88SAlan Cox 	}
580f06a3a36SAndrew Thompson 	return (NULL);
58111752d88SAlan Cox }
58211752d88SAlan Cox 
583b6de32bdSKonstantin Belousov vm_page_t
584b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
585b6de32bdSKonstantin Belousov {
58638d6b2dcSRoger Pau Monné 	struct vm_phys_fictitious_seg tmp, *seg;
587b6de32bdSKonstantin Belousov 	vm_page_t m;
588b6de32bdSKonstantin Belousov 
589b6de32bdSKonstantin Belousov 	m = NULL;
59038d6b2dcSRoger Pau Monné 	tmp.start = pa;
59138d6b2dcSRoger Pau Monné 	tmp.end = 0;
59238d6b2dcSRoger Pau Monné 
59338d6b2dcSRoger Pau Monné 	rw_rlock(&vm_phys_fictitious_reg_lock);
59438d6b2dcSRoger Pau Monné 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
59538d6b2dcSRoger Pau Monné 	rw_runlock(&vm_phys_fictitious_reg_lock);
59638d6b2dcSRoger Pau Monné 	if (seg == NULL)
59738d6b2dcSRoger Pau Monné 		return (NULL);
59838d6b2dcSRoger Pau Monné 
599b6de32bdSKonstantin Belousov 	m = &seg->first_page[atop(pa - seg->start)];
60038d6b2dcSRoger Pau Monné 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
60138d6b2dcSRoger Pau Monné 
602b6de32bdSKonstantin Belousov 	return (m);
603b6de32bdSKonstantin Belousov }
604b6de32bdSKonstantin Belousov 
6055ebe728dSRoger Pau Monné static inline void
6065ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
6075ebe728dSRoger Pau Monné     long page_count, vm_memattr_t memattr)
6085ebe728dSRoger Pau Monné {
6095ebe728dSRoger Pau Monné 	long i;
6105ebe728dSRoger Pau Monné 
6115ebe728dSRoger Pau Monné 	for (i = 0; i < page_count; i++) {
6125ebe728dSRoger Pau Monné 		vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
6135ebe728dSRoger Pau Monné 		range[i].oflags &= ~VPO_UNMANAGED;
6145ebe728dSRoger Pau Monné 		range[i].busy_lock = VPB_UNBUSIED;
6155ebe728dSRoger Pau Monné 	}
6165ebe728dSRoger Pau Monné }
6175ebe728dSRoger Pau Monné 
618b6de32bdSKonstantin Belousov int
619b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
620b6de32bdSKonstantin Belousov     vm_memattr_t memattr)
621b6de32bdSKonstantin Belousov {
622b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
623b6de32bdSKonstantin Belousov 	vm_page_t fp;
6245ebe728dSRoger Pau Monné 	long page_count;
625b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
6265ebe728dSRoger Pau Monné 	long pi, pe;
6275ebe728dSRoger Pau Monné 	long dpage_count;
628b6de32bdSKonstantin Belousov #endif
629b6de32bdSKonstantin Belousov 
6305ebe728dSRoger Pau Monné 	KASSERT(start < end,
6315ebe728dSRoger Pau Monné 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
6325ebe728dSRoger Pau Monné 	    (uintmax_t)start, (uintmax_t)end));
6335ebe728dSRoger Pau Monné 
634b6de32bdSKonstantin Belousov 	page_count = (end - start) / PAGE_SIZE;
635b6de32bdSKonstantin Belousov 
636b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
637b6de32bdSKonstantin Belousov 	pi = atop(start);
6385ebe728dSRoger Pau Monné 	pe = atop(end);
6395ebe728dSRoger Pau Monné 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
640b6de32bdSKonstantin Belousov 		fp = &vm_page_array[pi - first_page];
6415ebe728dSRoger Pau Monné 		if ((pe - first_page) > vm_page_array_size) {
6425ebe728dSRoger Pau Monné 			/*
6435ebe728dSRoger Pau Monné 			 * We have a segment that starts inside
6445ebe728dSRoger Pau Monné 			 * of vm_page_array, but ends outside of it.
6455ebe728dSRoger Pau Monné 			 *
6465ebe728dSRoger Pau Monné 			 * Use vm_page_array pages for those that are
6475ebe728dSRoger Pau Monné 			 * inside of the vm_page_array range, and
6485ebe728dSRoger Pau Monné 			 * allocate the remaining ones.
6495ebe728dSRoger Pau Monné 			 */
6505ebe728dSRoger Pau Monné 			dpage_count = vm_page_array_size - (pi - first_page);
6515ebe728dSRoger Pau Monné 			vm_phys_fictitious_init_range(fp, start, dpage_count,
6525ebe728dSRoger Pau Monné 			    memattr);
6535ebe728dSRoger Pau Monné 			page_count -= dpage_count;
6545ebe728dSRoger Pau Monné 			start += ptoa(dpage_count);
6555ebe728dSRoger Pau Monné 			goto alloc;
6565ebe728dSRoger Pau Monné 		}
6575ebe728dSRoger Pau Monné 		/*
6585ebe728dSRoger Pau Monné 		 * We can allocate the full range from vm_page_array,
6595ebe728dSRoger Pau Monné 		 * so there's no need to register the range in the tree.
6605ebe728dSRoger Pau Monné 		 */
6615ebe728dSRoger Pau Monné 		vm_phys_fictitious_init_range(fp, start, page_count, memattr);
6625ebe728dSRoger Pau Monné 		return (0);
6635ebe728dSRoger Pau Monné 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
6645ebe728dSRoger Pau Monné 		/*
6655ebe728dSRoger Pau Monné 		 * We have a segment that ends inside of vm_page_array,
6665ebe728dSRoger Pau Monné 		 * but starts outside of it.
6675ebe728dSRoger Pau Monné 		 */
6685ebe728dSRoger Pau Monné 		fp = &vm_page_array[0];
6695ebe728dSRoger Pau Monné 		dpage_count = pe - first_page;
6705ebe728dSRoger Pau Monné 		vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
6715ebe728dSRoger Pau Monné 		    memattr);
6725ebe728dSRoger Pau Monné 		end -= ptoa(dpage_count);
6735ebe728dSRoger Pau Monné 		page_count -= dpage_count;
6745ebe728dSRoger Pau Monné 		goto alloc;
6755ebe728dSRoger Pau Monné 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
6765ebe728dSRoger Pau Monné 		/*
6775ebe728dSRoger Pau Monné 		 * Trying to register a fictitious range that expands before
6785ebe728dSRoger Pau Monné 		 * and after vm_page_array.
6795ebe728dSRoger Pau Monné 		 */
6805ebe728dSRoger Pau Monné 		return (EINVAL);
6815ebe728dSRoger Pau Monné 	} else {
6825ebe728dSRoger Pau Monné alloc:
683b6de32bdSKonstantin Belousov #endif
684b6de32bdSKonstantin Belousov 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
685b6de32bdSKonstantin Belousov 		    M_WAITOK | M_ZERO);
6865ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE
687b6de32bdSKonstantin Belousov 	}
6885ebe728dSRoger Pau Monné #endif
6895ebe728dSRoger Pau Monné 	vm_phys_fictitious_init_range(fp, start, page_count, memattr);
69038d6b2dcSRoger Pau Monné 
69138d6b2dcSRoger Pau Monné 	seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
692b6de32bdSKonstantin Belousov 	seg->start = start;
693b6de32bdSKonstantin Belousov 	seg->end = end;
694b6de32bdSKonstantin Belousov 	seg->first_page = fp;
69538d6b2dcSRoger Pau Monné 
69638d6b2dcSRoger Pau Monné 	rw_wlock(&vm_phys_fictitious_reg_lock);
69738d6b2dcSRoger Pau Monné 	RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
69838d6b2dcSRoger Pau Monné 	rw_wunlock(&vm_phys_fictitious_reg_lock);
69938d6b2dcSRoger Pau Monné 
700b6de32bdSKonstantin Belousov 	return (0);
701b6de32bdSKonstantin Belousov }
702b6de32bdSKonstantin Belousov 
703b6de32bdSKonstantin Belousov void
704b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
705b6de32bdSKonstantin Belousov {
70638d6b2dcSRoger Pau Monné 	struct vm_phys_fictitious_seg *seg, tmp;
707b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
7085ebe728dSRoger Pau Monné 	long pi, pe;
709b6de32bdSKonstantin Belousov #endif
710b6de32bdSKonstantin Belousov 
7115ebe728dSRoger Pau Monné 	KASSERT(start < end,
7125ebe728dSRoger Pau Monné 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
7135ebe728dSRoger Pau Monné 	    (uintmax_t)start, (uintmax_t)end));
7145ebe728dSRoger Pau Monné 
715b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
716b6de32bdSKonstantin Belousov 	pi = atop(start);
7175ebe728dSRoger Pau Monné 	pe = atop(end);
7185ebe728dSRoger Pau Monné 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
7195ebe728dSRoger Pau Monné 		if ((pe - first_page) <= vm_page_array_size) {
7205ebe728dSRoger Pau Monné 			/*
7215ebe728dSRoger Pau Monné 			 * This segment was allocated using vm_page_array
7225ebe728dSRoger Pau Monné 			 * only, there's nothing to do since those pages
7235ebe728dSRoger Pau Monné 			 * were never added to the tree.
7245ebe728dSRoger Pau Monné 			 */
7255ebe728dSRoger Pau Monné 			return;
7265ebe728dSRoger Pau Monné 		}
7275ebe728dSRoger Pau Monné 		/*
7285ebe728dSRoger Pau Monné 		 * We have a segment that starts inside
7295ebe728dSRoger Pau Monné 		 * of vm_page_array, but ends outside of it.
7305ebe728dSRoger Pau Monné 		 *
7315ebe728dSRoger Pau Monné 		 * Calculate how many pages were added to the
7325ebe728dSRoger Pau Monné 		 * tree and free them.
7335ebe728dSRoger Pau Monné 		 */
7345ebe728dSRoger Pau Monné 		start = ptoa(first_page + vm_page_array_size);
7355ebe728dSRoger Pau Monné 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
7365ebe728dSRoger Pau Monné 		/*
7375ebe728dSRoger Pau Monné 		 * We have a segment that ends inside of vm_page_array,
7385ebe728dSRoger Pau Monné 		 * but starts outside of it.
7395ebe728dSRoger Pau Monné 		 */
7405ebe728dSRoger Pau Monné 		end = ptoa(first_page);
7415ebe728dSRoger Pau Monné 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
7425ebe728dSRoger Pau Monné 		/* Since it's not possible to register such a range, panic. */
7435ebe728dSRoger Pau Monné 		panic(
7445ebe728dSRoger Pau Monné 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
7455ebe728dSRoger Pau Monné 		    (uintmax_t)start, (uintmax_t)end);
7465ebe728dSRoger Pau Monné 	}
747b6de32bdSKonstantin Belousov #endif
74838d6b2dcSRoger Pau Monné 	tmp.start = start;
74938d6b2dcSRoger Pau Monné 	tmp.end = 0;
750b6de32bdSKonstantin Belousov 
75138d6b2dcSRoger Pau Monné 	rw_wlock(&vm_phys_fictitious_reg_lock);
75238d6b2dcSRoger Pau Monné 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
75338d6b2dcSRoger Pau Monné 	if (seg->start != start || seg->end != end) {
75438d6b2dcSRoger Pau Monné 		rw_wunlock(&vm_phys_fictitious_reg_lock);
75538d6b2dcSRoger Pau Monné 		panic(
75638d6b2dcSRoger Pau Monné 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
75738d6b2dcSRoger Pau Monné 		    (uintmax_t)start, (uintmax_t)end);
75838d6b2dcSRoger Pau Monné 	}
75938d6b2dcSRoger Pau Monné 	RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
76038d6b2dcSRoger Pau Monné 	rw_wunlock(&vm_phys_fictitious_reg_lock);
76138d6b2dcSRoger Pau Monné 	free(seg->first_page, M_FICT_PAGES);
76238d6b2dcSRoger Pau Monné 	free(seg, M_FICT_PAGES);
763b6de32bdSKonstantin Belousov }
764b6de32bdSKonstantin Belousov 
76511752d88SAlan Cox /*
76611752d88SAlan Cox  * Find the segment containing the given physical address.
76711752d88SAlan Cox  */
76811752d88SAlan Cox static int
76911752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
77011752d88SAlan Cox {
77111752d88SAlan Cox 	struct vm_phys_seg *seg;
77211752d88SAlan Cox 	int segind;
77311752d88SAlan Cox 
77411752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
77511752d88SAlan Cox 		seg = &vm_phys_segs[segind];
77611752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
77711752d88SAlan Cox 			return (segind);
77811752d88SAlan Cox 	}
77911752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
78011752d88SAlan Cox 	    (uintmax_t)pa);
78111752d88SAlan Cox }
78211752d88SAlan Cox 
78311752d88SAlan Cox /*
78411752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
7858941dc44SAlan Cox  *
7868941dc44SAlan Cox  * The free page queues must be locked.
78711752d88SAlan Cox  */
78811752d88SAlan Cox void
78911752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
79011752d88SAlan Cox {
79111752d88SAlan Cox 	struct vm_freelist *fl;
79211752d88SAlan Cox 	struct vm_phys_seg *seg;
7935c1f2cc4SAlan Cox 	vm_paddr_t pa;
79411752d88SAlan Cox 	vm_page_t m_buddy;
79511752d88SAlan Cox 
79611752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
7978941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
79811752d88SAlan Cox 	    m, m->order));
79911752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
8008941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
80111752d88SAlan Cox 	    m, m->pool));
80211752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
8038941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
80411752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
80511752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
8065c1f2cc4SAlan Cox 	if (order < VM_NFREEORDER - 1) {
8075c1f2cc4SAlan Cox 		pa = VM_PAGE_TO_PHYS(m);
8085c1f2cc4SAlan Cox 		do {
8095c1f2cc4SAlan Cox 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
8105c1f2cc4SAlan Cox 			if (pa < seg->start || pa >= seg->end)
81111752d88SAlan Cox 				break;
8125c1f2cc4SAlan Cox 			m_buddy = &seg->first_page[atop(pa - seg->start)];
81311752d88SAlan Cox 			if (m_buddy->order != order)
81411752d88SAlan Cox 				break;
81511752d88SAlan Cox 			fl = (*seg->free_queues)[m_buddy->pool];
8167e226537SAttilio Rao 			vm_freelist_rem(fl, m_buddy, order);
81711752d88SAlan Cox 			if (m_buddy->pool != m->pool)
81811752d88SAlan Cox 				vm_phys_set_pool(m->pool, m_buddy, order);
81911752d88SAlan Cox 			order++;
8205c1f2cc4SAlan Cox 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
82111752d88SAlan Cox 			m = &seg->first_page[atop(pa - seg->start)];
8225c1f2cc4SAlan Cox 		} while (order < VM_NFREEORDER - 1);
82311752d88SAlan Cox 	}
82411752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
8257e226537SAttilio Rao 	vm_freelist_add(fl, m, order, 1);
82611752d88SAlan Cox }
82711752d88SAlan Cox 
82811752d88SAlan Cox /*
8295c1f2cc4SAlan Cox  * Free a contiguous, arbitrarily sized set of physical pages.
8305c1f2cc4SAlan Cox  *
8315c1f2cc4SAlan Cox  * The free page queues must be locked.
8325c1f2cc4SAlan Cox  */
8335c1f2cc4SAlan Cox void
8345c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages)
8355c1f2cc4SAlan Cox {
8365c1f2cc4SAlan Cox 	u_int n;
8375c1f2cc4SAlan Cox 	int order;
8385c1f2cc4SAlan Cox 
8395c1f2cc4SAlan Cox 	/*
8405c1f2cc4SAlan Cox 	 * Avoid unnecessary coalescing by freeing the pages in the largest
8415c1f2cc4SAlan Cox 	 * possible power-of-two-sized subsets.
8425c1f2cc4SAlan Cox 	 */
8435c1f2cc4SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8445c1f2cc4SAlan Cox 	for (;; npages -= n) {
8455c1f2cc4SAlan Cox 		/*
8465c1f2cc4SAlan Cox 		 * Unsigned "min" is used here so that "order" is assigned
8475c1f2cc4SAlan Cox 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
8485c1f2cc4SAlan Cox 		 * or the low-order bits of its physical address are zero
8495c1f2cc4SAlan Cox 		 * because the size of a physical address exceeds the size of
8505c1f2cc4SAlan Cox 		 * a long.
8515c1f2cc4SAlan Cox 		 */
8525c1f2cc4SAlan Cox 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
8535c1f2cc4SAlan Cox 		    VM_NFREEORDER - 1);
8545c1f2cc4SAlan Cox 		n = 1 << order;
8555c1f2cc4SAlan Cox 		if (npages < n)
8565c1f2cc4SAlan Cox 			break;
8575c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
8585c1f2cc4SAlan Cox 		m += n;
8595c1f2cc4SAlan Cox 	}
8605c1f2cc4SAlan Cox 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
8615c1f2cc4SAlan Cox 	for (; npages > 0; npages -= n) {
8625c1f2cc4SAlan Cox 		order = flsl(npages) - 1;
8635c1f2cc4SAlan Cox 		n = 1 << order;
8645c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
8655c1f2cc4SAlan Cox 		m += n;
8665c1f2cc4SAlan Cox 	}
8675c1f2cc4SAlan Cox }
8685c1f2cc4SAlan Cox 
8695c1f2cc4SAlan Cox /*
87011752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
87111752d88SAlan Cox  */
8727bfda801SAlan Cox void
87311752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
87411752d88SAlan Cox {
87511752d88SAlan Cox 	vm_page_t m_tmp;
87611752d88SAlan Cox 
87711752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
87811752d88SAlan Cox 		m_tmp->pool = pool;
87911752d88SAlan Cox }
88011752d88SAlan Cox 
88111752d88SAlan Cox /*
8829742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
8839742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
8849742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
8857bfda801SAlan Cox  *
8867bfda801SAlan Cox  * The free page queues must be locked.
8877bfda801SAlan Cox  */
888e35395ceSAlan Cox boolean_t
8897bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
8907bfda801SAlan Cox {
8917bfda801SAlan Cox 	struct vm_freelist *fl;
8927bfda801SAlan Cox 	struct vm_phys_seg *seg;
8937bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
8947bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
8957bfda801SAlan Cox 	int order;
8967bfda801SAlan Cox 
8977bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8987bfda801SAlan Cox 
8997bfda801SAlan Cox 	/*
9007bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
9017bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
9027bfda801SAlan Cox 	 * assign it to "m_set".
9037bfda801SAlan Cox 	 */
9047bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
9057bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
906bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
9077bfda801SAlan Cox 		order++;
9087bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
9092fbced65SAlan Cox 		if (pa >= seg->start)
9107bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
911e35395ceSAlan Cox 		else
912e35395ceSAlan Cox 			return (FALSE);
9137bfda801SAlan Cox 	}
914e35395ceSAlan Cox 	if (m_set->order < order)
915e35395ceSAlan Cox 		return (FALSE);
916e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
917e35395ceSAlan Cox 		return (FALSE);
9187bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
9197bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
9207bfda801SAlan Cox 	    m_set, m_set->order));
9217bfda801SAlan Cox 
9227bfda801SAlan Cox 	/*
9237bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
9247bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
9257bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
9267bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
9277bfda801SAlan Cox 	 */
9287bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
9297bfda801SAlan Cox 	order = m_set->order;
9307e226537SAttilio Rao 	vm_freelist_rem(fl, m_set, order);
9317bfda801SAlan Cox 	while (order > 0) {
9327bfda801SAlan Cox 		order--;
9337bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
9347bfda801SAlan Cox 		if (m->phys_addr < pa_half)
9357bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
9367bfda801SAlan Cox 		else {
9377bfda801SAlan Cox 			m_tmp = m_set;
9387bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
9397bfda801SAlan Cox 		}
9407e226537SAttilio Rao 		vm_freelist_add(fl, m_tmp, order, 0);
9417bfda801SAlan Cox 	}
9427bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
943e35395ceSAlan Cox 	return (TRUE);
9447bfda801SAlan Cox }
9457bfda801SAlan Cox 
9467bfda801SAlan Cox /*
9477bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
94811752d88SAlan Cox  */
94911752d88SAlan Cox boolean_t
95011752d88SAlan Cox vm_phys_zero_pages_idle(void)
95111752d88SAlan Cox {
9527e226537SAttilio Rao 	static struct vm_freelist *fl;
9537bfda801SAlan Cox 	static int flind, oind, pind;
95411752d88SAlan Cox 	vm_page_t m, m_tmp;
9557e226537SAttilio Rao 	int domain;
95611752d88SAlan Cox 
9577e226537SAttilio Rao 	domain = vm_rr_selectdomain();
9587e226537SAttilio Rao 	fl = vm_phys_free_queues[domain][0][0];
95911752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
9607bfda801SAlan Cox 	for (;;) {
961c325e866SKonstantin Belousov 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
9627bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
9637bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
9647bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
965449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, -1);
96611752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
96711752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
96811752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
96911752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
970449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, 1);
9717bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
9727bfda801SAlan Cox 					vm_page_zero_count++;
9737bfda801SAlan Cox 					cnt_prezero++;
97411752d88SAlan Cox 					return (TRUE);
97511752d88SAlan Cox 				}
97611752d88SAlan Cox 			}
97711752d88SAlan Cox 		}
9787bfda801SAlan Cox 		oind++;
9797bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
9807bfda801SAlan Cox 			oind = 0;
9817bfda801SAlan Cox 			pind++;
9827bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
9837bfda801SAlan Cox 				pind = 0;
9847bfda801SAlan Cox 				flind++;
9857bfda801SAlan Cox 				if (flind == vm_nfreelists)
9867bfda801SAlan Cox 					flind = 0;
9877bfda801SAlan Cox 			}
9887e226537SAttilio Rao 			fl = vm_phys_free_queues[domain][flind][pind];
9897bfda801SAlan Cox 		}
9907bfda801SAlan Cox 	}
99111752d88SAlan Cox }
99211752d88SAlan Cox 
99311752d88SAlan Cox /*
9942f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
9952f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
9962f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
9972f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
9982f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
9992f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
10002f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
100111752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
100211752d88SAlan Cox  */
100311752d88SAlan Cox vm_page_t
10045c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
10055c1f2cc4SAlan Cox     u_long alignment, vm_paddr_t boundary)
100611752d88SAlan Cox {
100711752d88SAlan Cox 	struct vm_freelist *fl;
100811752d88SAlan Cox 	struct vm_phys_seg *seg;
100911752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
1010fbd80bd0SAlan Cox 	vm_page_t m, m_ret;
10115c1f2cc4SAlan Cox 	u_long npages_end;
10127e226537SAttilio Rao 	int dom, domain, flind, oind, order, pind;
101311752d88SAlan Cox 
1014fbd80bd0SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
101511752d88SAlan Cox 	size = npages << PAGE_SHIFT;
101611752d88SAlan Cox 	KASSERT(size != 0,
101711752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
101811752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
101911752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
102011752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
102111752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
102211752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
102311752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
10247e226537SAttilio Rao 	dom = 0;
10257e226537SAttilio Rao restartdom:
10267e226537SAttilio Rao 	domain = vm_rr_selectdomain();
102711752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
102811752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
102911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
10307e226537SAttilio Rao 				fl = &vm_phys_free_queues[domain][flind][pind][0];
1031c325e866SKonstantin Belousov 				TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
103211752d88SAlan Cox 					/*
103311752d88SAlan Cox 					 * A free list may contain physical pages
103411752d88SAlan Cox 					 * from one or more segments.
103511752d88SAlan Cox 					 */
103611752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
103711752d88SAlan Cox 					if (seg->start > high ||
103811752d88SAlan Cox 					    low >= seg->end)
103911752d88SAlan Cox 						continue;
104011752d88SAlan Cox 
104111752d88SAlan Cox 					/*
104211752d88SAlan Cox 					 * Is the size of this allocation request
104311752d88SAlan Cox 					 * larger than the largest block size?
104411752d88SAlan Cox 					 */
104511752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
104611752d88SAlan Cox 						/*
104711752d88SAlan Cox 						 * Determine if a sufficient number
104811752d88SAlan Cox 						 * of subsequent blocks to satisfy
104911752d88SAlan Cox 						 * the allocation request are free.
105011752d88SAlan Cox 						 */
105111752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
105211752d88SAlan Cox 						pa_last = pa + size;
105311752d88SAlan Cox 						for (;;) {
105411752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
105511752d88SAlan Cox 							if (pa >= pa_last)
105611752d88SAlan Cox 								break;
105711752d88SAlan Cox 							if (pa < seg->start ||
105811752d88SAlan Cox 							    pa >= seg->end)
105911752d88SAlan Cox 								break;
106011752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
106111752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
106211752d88SAlan Cox 								break;
106311752d88SAlan Cox 						}
106411752d88SAlan Cox 						/* If not, continue to the next block. */
106511752d88SAlan Cox 						if (pa < pa_last)
106611752d88SAlan Cox 							continue;
106711752d88SAlan Cox 					}
106811752d88SAlan Cox 
106911752d88SAlan Cox 					/*
107011752d88SAlan Cox 					 * Determine if the blocks are within the given range,
107111752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
107211752d88SAlan Cox 					 * given boundary.
107311752d88SAlan Cox 					 */
107411752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
107511752d88SAlan Cox 					if (pa >= low &&
107611752d88SAlan Cox 					    pa + size <= high &&
107711752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
107811752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
107911752d88SAlan Cox 						goto done;
108011752d88SAlan Cox 				}
108111752d88SAlan Cox 			}
108211752d88SAlan Cox 		}
108311752d88SAlan Cox 	}
10847e226537SAttilio Rao 	if (++dom < vm_ndomains)
10857e226537SAttilio Rao 		goto restartdom;
108611752d88SAlan Cox 	return (NULL);
108711752d88SAlan Cox done:
108811752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
108911752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
10907e226537SAttilio Rao 		vm_freelist_rem(fl, m, m->order);
109111752d88SAlan Cox 	}
109211752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
109311752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
109411752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
109511752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
10965c1f2cc4SAlan Cox 	/* Return excess pages to the free lists. */
10975c1f2cc4SAlan Cox 	npages_end = roundup2(npages, 1 << imin(oind, order));
10985c1f2cc4SAlan Cox 	if (npages < npages_end)
10995c1f2cc4SAlan Cox 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
110011752d88SAlan Cox 	return (m_ret);
110111752d88SAlan Cox }
110211752d88SAlan Cox 
110311752d88SAlan Cox #ifdef DDB
110411752d88SAlan Cox /*
110511752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
110611752d88SAlan Cox  */
110711752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
110811752d88SAlan Cox {
110911752d88SAlan Cox 	struct vm_freelist *fl;
11107e226537SAttilio Rao 	int flind, oind, pind, dom;
111111752d88SAlan Cox 
11127e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
11137e226537SAttilio Rao 		db_printf("DOMAIN: %d\n", dom);
111411752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
111511752d88SAlan Cox 			db_printf("FREE LIST %d:\n"
111611752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
111711752d88SAlan Cox 			    "\n              ", flind);
111811752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
111911752d88SAlan Cox 				db_printf("  |  POOL %d", pind);
112011752d88SAlan Cox 			db_printf("\n--            ");
112111752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
112211752d88SAlan Cox 				db_printf("-- --      ");
112311752d88SAlan Cox 			db_printf("--\n");
112411752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
112511752d88SAlan Cox 				db_printf("  %2.2d (%6.6dK)", oind,
112611752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
112711752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
11287e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
112911752d88SAlan Cox 					db_printf("  |  %6.6d", fl[oind].lcnt);
113011752d88SAlan Cox 				}
113111752d88SAlan Cox 				db_printf("\n");
113211752d88SAlan Cox 			}
113311752d88SAlan Cox 			db_printf("\n");
113411752d88SAlan Cox 		}
11357e226537SAttilio Rao 		db_printf("\n");
11367e226537SAttilio Rao 	}
113711752d88SAlan Cox }
113811752d88SAlan Cox #endif
1139