xref: /freebsd/sys/vm/vm_phys.c (revision 32e77bcdec5c034a9252876aa018f0bf34b36dbc)
111752d88SAlan Cox /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni  *
411752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
511752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
611752d88SAlan Cox  * All rights reserved.
711752d88SAlan Cox  *
811752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
911752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
1011752d88SAlan Cox  *
1111752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1211752d88SAlan Cox  * modification, are permitted provided that the following conditions
1311752d88SAlan Cox  * are met:
1411752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1611752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1711752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1811752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1911752d88SAlan Cox  *
2011752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2111752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2211752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2311752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2411752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2511752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2611752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2711752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2811752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2911752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
3011752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3111752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3211752d88SAlan Cox  */
3311752d88SAlan Cox 
34fbd80bd0SAlan Cox /*
35fbd80bd0SAlan Cox  *	Physical memory system implementation
36fbd80bd0SAlan Cox  *
37fbd80bd0SAlan Cox  * Any external functions defined by this module are only to be used by the
38fbd80bd0SAlan Cox  * virtual memory system.
39fbd80bd0SAlan Cox  */
40fbd80bd0SAlan Cox 
4111752d88SAlan Cox #include <sys/cdefs.h>
4211752d88SAlan Cox #include "opt_ddb.h"
43174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox 
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
47662e7fa8SMark Johnston #include <sys/domainset.h>
4811752d88SAlan Cox #include <sys/lock.h>
4911752d88SAlan Cox #include <sys/kernel.h>
50b16b4c22SMark Johnston #include <sys/kthread.h>
5111752d88SAlan Cox #include <sys/malloc.h>
5211752d88SAlan Cox #include <sys/mutex.h>
537e226537SAttilio Rao #include <sys/proc.h>
5411752d88SAlan Cox #include <sys/queue.h>
5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h>
5611752d88SAlan Cox #include <sys/sbuf.h>
57b16b4c22SMark Johnston #include <sys/sched.h>
5811752d88SAlan Cox #include <sys/sysctl.h>
5938d6b2dcSRoger Pau Monné #include <sys/tree.h>
60b16b4c22SMark Johnston #include <sys/tslog.h>
61b16b4c22SMark Johnston #include <sys/unistd.h>
6211752d88SAlan Cox #include <sys/vmmeter.h>
6311752d88SAlan Cox 
6411752d88SAlan Cox #include <ddb/ddb.h>
6511752d88SAlan Cox 
6611752d88SAlan Cox #include <vm/vm.h>
6701e115abSDoug Moore #include <vm/vm_extern.h>
6811752d88SAlan Cox #include <vm/vm_param.h>
6911752d88SAlan Cox #include <vm/vm_kern.h>
7011752d88SAlan Cox #include <vm/vm_object.h>
7111752d88SAlan Cox #include <vm/vm_page.h>
7211752d88SAlan Cox #include <vm/vm_phys.h>
73e2068d0bSJeff Roberson #include <vm/vm_pagequeue.h>
7411752d88SAlan Cox 
75449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
76449c2e92SKonstantin Belousov     "Too many physsegs.");
77c9b06fa5SDoug Moore _Static_assert(sizeof(long long) >= sizeof(vm_paddr_t),
78c9b06fa5SDoug Moore     "vm_paddr_t too big for ffsll, flsll.");
7911752d88SAlan Cox 
80b6715dabSJeff Roberson #ifdef NUMA
81cdfeced8SJeff Roberson struct mem_affinity __read_mostly *mem_affinity;
82cdfeced8SJeff Roberson int __read_mostly *mem_locality;
83c415cfc8SZhenlei Huang 
84c415cfc8SZhenlei Huang static int numa_disabled;
85c415cfc8SZhenlei Huang static SYSCTL_NODE(_vm, OID_AUTO, numa, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
86c415cfc8SZhenlei Huang     "NUMA options");
87c415cfc8SZhenlei Huang SYSCTL_INT(_vm_numa, OID_AUTO, disabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
88c415cfc8SZhenlei Huang     &numa_disabled, 0, "NUMA-awareness in the allocators is disabled");
8962d70a81SJohn Baldwin #endif
90a3870a18SJohn Baldwin 
91cdfeced8SJeff Roberson int __read_mostly vm_ndomains = 1;
92463406acSMark Johnston domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1);
937e226537SAttilio Rao 
94cdfeced8SJeff Roberson struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX];
95cdfeced8SJeff Roberson int __read_mostly vm_phys_nsegs;
9681302f1dSMark Johnston static struct vm_phys_seg vm_phys_early_segs[8];
9781302f1dSMark Johnston static int vm_phys_early_nsegs;
9811752d88SAlan Cox 
9938d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg;
10038d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
10138d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *);
10238d6b2dcSRoger Pau Monné 
10338d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
104b649c2acSDoug Moore     RB_INITIALIZER(&vm_phys_fictitious_tree);
10538d6b2dcSRoger Pau Monné 
10638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg {
10738d6b2dcSRoger Pau Monné 	RB_ENTRY(vm_phys_fictitious_seg) node;
10838d6b2dcSRoger Pau Monné 	/* Memory region data */
109b6de32bdSKonstantin Belousov 	vm_paddr_t	start;
110b6de32bdSKonstantin Belousov 	vm_paddr_t	end;
111b6de32bdSKonstantin Belousov 	vm_page_t	first_page;
11238d6b2dcSRoger Pau Monné };
11338d6b2dcSRoger Pau Monné 
11438d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
11538d6b2dcSRoger Pau Monné     vm_phys_fictitious_cmp);
11638d6b2dcSRoger Pau Monné 
117cdfeced8SJeff Roberson static struct rwlock_padalign vm_phys_fictitious_reg_lock;
118c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
119b6de32bdSKonstantin Belousov 
120cdfeced8SJeff Roberson static struct vm_freelist __aligned(CACHE_LINE_SIZE)
121f2a496d6SKonstantin Belousov     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL]
122f2a496d6SKonstantin Belousov     [VM_NFREEORDER_MAX];
12311752d88SAlan Cox 
124cdfeced8SJeff Roberson static int __read_mostly vm_nfreelists;
125d866a563SAlan Cox 
126d866a563SAlan Cox /*
12721943937SJeff Roberson  * These "avail lists" are globals used to communicate boot-time physical
12821943937SJeff Roberson  * memory layout to other parts of the kernel.  Each physically contiguous
12921943937SJeff Roberson  * region of memory is defined by a start address at an even index and an
13021943937SJeff Roberson  * end address at the following odd index.  Each list is terminated by a
13121943937SJeff Roberson  * pair of zero entries.
13221943937SJeff Roberson  *
13321943937SJeff Roberson  * dump_avail tells the dump code what regions to include in a crash dump, and
13421943937SJeff Roberson  * phys_avail is all of the remaining physical memory that is available for
13521943937SJeff Roberson  * the vm system.
13621943937SJeff Roberson  *
13721943937SJeff Roberson  * Initially dump_avail and phys_avail are identical.  Boot time memory
13821943937SJeff Roberson  * allocations remove extents from phys_avail that may still be included
13921943937SJeff Roberson  * in dumps.
14021943937SJeff Roberson  */
14121943937SJeff Roberson vm_paddr_t phys_avail[PHYS_AVAIL_COUNT];
14221943937SJeff Roberson vm_paddr_t dump_avail[PHYS_AVAIL_COUNT];
14321943937SJeff Roberson 
14421943937SJeff Roberson /*
145d866a563SAlan Cox  * Provides the mapping from VM_FREELIST_* to free list indices (flind).
146d866a563SAlan Cox  */
147cdfeced8SJeff Roberson static int __read_mostly vm_freelist_to_flind[VM_NFREELIST];
148b16b4c22SMark Johnston static int __read_mostly vm_default_freepool;
149d866a563SAlan Cox 
150d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0);
151d866a563SAlan Cox 
152d866a563SAlan Cox #ifdef VM_FREELIST_DMA32
153d866a563SAlan Cox #define	VM_DMA32_BOUNDARY	((vm_paddr_t)1 << 32)
154d866a563SAlan Cox #endif
155d866a563SAlan Cox 
156d866a563SAlan Cox /*
157d866a563SAlan Cox  * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
158d866a563SAlan Cox  * the ordering of the free list boundaries.
159d866a563SAlan Cox  */
160d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
161d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
162d866a563SAlan Cox #endif
16311752d88SAlan Cox 
16411752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
1657029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_free,
166114484b7SMark Johnston     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1677029da5cSPawel Biernacki     sysctl_vm_phys_free, "A",
1687029da5cSPawel Biernacki     "Phys Free Info");
16911752d88SAlan Cox 
17011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
1717029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_segs,
172114484b7SMark Johnston     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1737029da5cSPawel Biernacki     sysctl_vm_phys_segs, "A",
1747029da5cSPawel Biernacki     "Phys Seg Info");
17511752d88SAlan Cox 
176b6715dabSJeff Roberson #ifdef NUMA
177415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
1787029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_locality,
179114484b7SMark Johnston     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1807029da5cSPawel Biernacki     sysctl_vm_phys_locality, "A",
1817029da5cSPawel Biernacki     "Phys Locality Info");
1826520495aSAdrian Chadd #endif
183415d7ccaSAdrian Chadd 
1847e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
1857e226537SAttilio Rao     &vm_ndomains, 0, "Number of physical memory domains available.");
186a3870a18SJohn Baldwin 
187d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
188d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
18911752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
1900078df5fSDoug Moore     int order, int pool, int tail);
191c606ab59SDoug Moore 
192b16b4c22SMark Johnston static bool __diagused
vm_phys_pool_valid(int pool)193b16b4c22SMark Johnston vm_phys_pool_valid(int pool)
194b16b4c22SMark Johnston {
195b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
196b16b4c22SMark Johnston 	if (pool == VM_FREEPOOL_LAZYINIT)
197b16b4c22SMark Johnston 		return (false);
198b16b4c22SMark Johnston #endif
199b16b4c22SMark Johnston 	return (pool >= 0 && pool < VM_NFREEPOOL);
200b16b4c22SMark Johnston }
201b16b4c22SMark Johnston 
20238d6b2dcSRoger Pau Monné /*
20338d6b2dcSRoger Pau Monné  * Red-black tree helpers for vm fictitious range management.
20438d6b2dcSRoger Pau Monné  */
20538d6b2dcSRoger Pau Monné static inline int
vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg * p,struct vm_phys_fictitious_seg * range)20638d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
20738d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *range)
20838d6b2dcSRoger Pau Monné {
20938d6b2dcSRoger Pau Monné 
21038d6b2dcSRoger Pau Monné 	KASSERT(range->start != 0 && range->end != 0,
21138d6b2dcSRoger Pau Monné 	    ("Invalid range passed on search for vm_fictitious page"));
21238d6b2dcSRoger Pau Monné 	if (p->start >= range->end)
21338d6b2dcSRoger Pau Monné 		return (1);
21438d6b2dcSRoger Pau Monné 	if (p->start < range->start)
21538d6b2dcSRoger Pau Monné 		return (-1);
21638d6b2dcSRoger Pau Monné 
21738d6b2dcSRoger Pau Monné 	return (0);
21838d6b2dcSRoger Pau Monné }
21938d6b2dcSRoger Pau Monné 
22038d6b2dcSRoger Pau Monné static int
vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg * p1,struct vm_phys_fictitious_seg * p2)22138d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
22238d6b2dcSRoger Pau Monné     struct vm_phys_fictitious_seg *p2)
22338d6b2dcSRoger Pau Monné {
22438d6b2dcSRoger Pau Monné 
22538d6b2dcSRoger Pau Monné 	/* Check if this is a search for a page */
22638d6b2dcSRoger Pau Monné 	if (p1->end == 0)
22738d6b2dcSRoger Pau Monné 		return (vm_phys_fictitious_in_range(p1, p2));
22838d6b2dcSRoger Pau Monné 
22938d6b2dcSRoger Pau Monné 	KASSERT(p2->end != 0,
23038d6b2dcSRoger Pau Monné     ("Invalid range passed as second parameter to vm fictitious comparison"));
23138d6b2dcSRoger Pau Monné 
23238d6b2dcSRoger Pau Monné 	/* Searching to add a new range */
23338d6b2dcSRoger Pau Monné 	if (p1->end <= p2->start)
23438d6b2dcSRoger Pau Monné 		return (-1);
23538d6b2dcSRoger Pau Monné 	if (p1->start >= p2->end)
23638d6b2dcSRoger Pau Monné 		return (1);
23738d6b2dcSRoger Pau Monné 
23838d6b2dcSRoger Pau Monné 	panic("Trying to add overlapping vm fictitious ranges:\n"
23938d6b2dcSRoger Pau Monné 	    "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
24038d6b2dcSRoger Pau Monné 	    (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
24138d6b2dcSRoger Pau Monné }
24238d6b2dcSRoger Pau Monné 
2436f4acaf4SJeff Roberson int
vm_phys_domain_match(int prefer __numa_used,vm_paddr_t low __numa_used,vm_paddr_t high __numa_used)244cb20a74cSStephen J. Kiernan vm_phys_domain_match(int prefer __numa_used, vm_paddr_t low __numa_used,
245cb20a74cSStephen J. Kiernan     vm_paddr_t high __numa_used)
246449c2e92SKonstantin Belousov {
247b6715dabSJeff Roberson #ifdef NUMA
2486f4acaf4SJeff Roberson 	domainset_t mask;
2496f4acaf4SJeff Roberson 	int i;
250449c2e92SKonstantin Belousov 
2516f4acaf4SJeff Roberson 	if (vm_ndomains == 1 || mem_affinity == NULL)
2526f4acaf4SJeff Roberson 		return (0);
2536f4acaf4SJeff Roberson 
2546f4acaf4SJeff Roberson 	DOMAINSET_ZERO(&mask);
2556f4acaf4SJeff Roberson 	/*
2566f4acaf4SJeff Roberson 	 * Check for any memory that overlaps low, high.
2576f4acaf4SJeff Roberson 	 */
2586f4acaf4SJeff Roberson 	for (i = 0; mem_affinity[i].end != 0; i++)
2596f4acaf4SJeff Roberson 		if (mem_affinity[i].start <= high &&
2606f4acaf4SJeff Roberson 		    mem_affinity[i].end >= low)
2616f4acaf4SJeff Roberson 			DOMAINSET_SET(mem_affinity[i].domain, &mask);
2626f4acaf4SJeff Roberson 	if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask))
2636f4acaf4SJeff Roberson 		return (prefer);
2646f4acaf4SJeff Roberson 	if (DOMAINSET_EMPTY(&mask))
2656f4acaf4SJeff Roberson 		panic("vm_phys_domain_match:  Impossible constraint");
2666f4acaf4SJeff Roberson 	return (DOMAINSET_FFS(&mask) - 1);
2676f4acaf4SJeff Roberson #else
2686f4acaf4SJeff Roberson 	return (0);
2696f4acaf4SJeff Roberson #endif
270449c2e92SKonstantin Belousov }
271449c2e92SKonstantin Belousov 
27211752d88SAlan Cox /*
27311752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
27411752d88SAlan Cox  * the amount of physical memory in each free list.
27511752d88SAlan Cox  */
27611752d88SAlan Cox static int
sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)27711752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
27811752d88SAlan Cox {
27911752d88SAlan Cox 	struct sbuf sbuf;
28011752d88SAlan Cox 	struct vm_freelist *fl;
2817e226537SAttilio Rao 	int dom, error, flind, oind, pind;
28211752d88SAlan Cox 
28300f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
28400f0e671SMatthew D Fleming 	if (error != 0)
28500f0e671SMatthew D Fleming 		return (error);
2867e226537SAttilio Rao 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
2877e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
288eb2f42fbSAlan Cox 		sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
28911752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
290eb2f42fbSAlan Cox 			sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
29111752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
29211752d88SAlan Cox 			    "\n              ", flind);
29311752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
29411752d88SAlan Cox 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
29511752d88SAlan Cox 			sbuf_printf(&sbuf, "\n--            ");
29611752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
29711752d88SAlan Cox 				sbuf_printf(&sbuf, "-- --      ");
29811752d88SAlan Cox 			sbuf_printf(&sbuf, "--\n");
29911752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
300d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
30111752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
30211752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
3037e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
304eb2f42fbSAlan Cox 					sbuf_printf(&sbuf, "  |  %6d",
3057e226537SAttilio Rao 					    fl[oind].lcnt);
30611752d88SAlan Cox 				}
30711752d88SAlan Cox 				sbuf_printf(&sbuf, "\n");
30811752d88SAlan Cox 			}
3097e226537SAttilio Rao 		}
31011752d88SAlan Cox 	}
3114e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
31211752d88SAlan Cox 	sbuf_delete(&sbuf);
31311752d88SAlan Cox 	return (error);
31411752d88SAlan Cox }
31511752d88SAlan Cox 
31611752d88SAlan Cox /*
31711752d88SAlan Cox  * Outputs the set of physical memory segments.
31811752d88SAlan Cox  */
31911752d88SAlan Cox static int
sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)32011752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
32111752d88SAlan Cox {
32211752d88SAlan Cox 	struct sbuf sbuf;
32311752d88SAlan Cox 	struct vm_phys_seg *seg;
32411752d88SAlan Cox 	int error, segind;
32511752d88SAlan Cox 
32600f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
32700f0e671SMatthew D Fleming 	if (error != 0)
32800f0e671SMatthew D Fleming 		return (error);
3294e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
33011752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
33111752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
33211752d88SAlan Cox 		seg = &vm_phys_segs[segind];
33311752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
33411752d88SAlan Cox 		    (uintmax_t)seg->start);
33511752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
33611752d88SAlan Cox 		    (uintmax_t)seg->end);
337a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
33811752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
33911752d88SAlan Cox 	}
3404e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
34111752d88SAlan Cox 	sbuf_delete(&sbuf);
34211752d88SAlan Cox 	return (error);
34311752d88SAlan Cox }
34411752d88SAlan Cox 
345415d7ccaSAdrian Chadd /*
346415d7ccaSAdrian Chadd  * Return affinity, or -1 if there's no affinity information.
347415d7ccaSAdrian Chadd  */
3486520495aSAdrian Chadd int
vm_phys_mem_affinity(int f __numa_used,int t __numa_used)349cb20a74cSStephen J. Kiernan vm_phys_mem_affinity(int f __numa_used, int t __numa_used)
350415d7ccaSAdrian Chadd {
351415d7ccaSAdrian Chadd 
352b6715dabSJeff Roberson #ifdef NUMA
353415d7ccaSAdrian Chadd 	if (mem_locality == NULL)
354415d7ccaSAdrian Chadd 		return (-1);
355415d7ccaSAdrian Chadd 	if (f >= vm_ndomains || t >= vm_ndomains)
356415d7ccaSAdrian Chadd 		return (-1);
357415d7ccaSAdrian Chadd 	return (mem_locality[f * vm_ndomains + t]);
3586520495aSAdrian Chadd #else
3596520495aSAdrian Chadd 	return (-1);
3606520495aSAdrian Chadd #endif
361415d7ccaSAdrian Chadd }
362415d7ccaSAdrian Chadd 
363b6715dabSJeff Roberson #ifdef NUMA
364415d7ccaSAdrian Chadd /*
365415d7ccaSAdrian Chadd  * Outputs the VM locality table.
366415d7ccaSAdrian Chadd  */
367415d7ccaSAdrian Chadd static int
sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)368415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
369415d7ccaSAdrian Chadd {
370415d7ccaSAdrian Chadd 	struct sbuf sbuf;
371415d7ccaSAdrian Chadd 	int error, i, j;
372415d7ccaSAdrian Chadd 
373415d7ccaSAdrian Chadd 	error = sysctl_wire_old_buffer(req, 0);
374415d7ccaSAdrian Chadd 	if (error != 0)
375415d7ccaSAdrian Chadd 		return (error);
376415d7ccaSAdrian Chadd 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
377415d7ccaSAdrian Chadd 
378415d7ccaSAdrian Chadd 	sbuf_printf(&sbuf, "\n");
379415d7ccaSAdrian Chadd 
380415d7ccaSAdrian Chadd 	for (i = 0; i < vm_ndomains; i++) {
381415d7ccaSAdrian Chadd 		sbuf_printf(&sbuf, "%d: ", i);
382415d7ccaSAdrian Chadd 		for (j = 0; j < vm_ndomains; j++) {
383415d7ccaSAdrian Chadd 			sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j));
384415d7ccaSAdrian Chadd 		}
385415d7ccaSAdrian Chadd 		sbuf_printf(&sbuf, "\n");
386415d7ccaSAdrian Chadd 	}
387415d7ccaSAdrian Chadd 	error = sbuf_finish(&sbuf);
388415d7ccaSAdrian Chadd 	sbuf_delete(&sbuf);
389415d7ccaSAdrian Chadd 	return (error);
390415d7ccaSAdrian Chadd }
3916520495aSAdrian Chadd #endif
392415d7ccaSAdrian Chadd 
3937e226537SAttilio Rao static void
vm_freelist_add(struct vm_freelist * fl,vm_page_t m,int order,int pool,int tail)3940078df5fSDoug Moore vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool,
3950078df5fSDoug Moore     int tail)
396a3870a18SJohn Baldwin {
397a3870a18SJohn Baldwin 
3987e226537SAttilio Rao 	m->order = order;
3990078df5fSDoug Moore 	m->pool = pool;
4007e226537SAttilio Rao 	if (tail)
4015cd29d0fSMark Johnston 		TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
4027e226537SAttilio Rao 	else
4035cd29d0fSMark Johnston 		TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
4047e226537SAttilio Rao 	fl[order].lcnt++;
405a3870a18SJohn Baldwin }
4067e226537SAttilio Rao 
4077e226537SAttilio Rao static void
vm_freelist_rem(struct vm_freelist * fl,vm_page_t m,int order)4087e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
4097e226537SAttilio Rao {
4107e226537SAttilio Rao 
4115cd29d0fSMark Johnston 	TAILQ_REMOVE(&fl[order].pl, m, listq);
4127e226537SAttilio Rao 	fl[order].lcnt--;
4137e226537SAttilio Rao 	m->order = VM_NFREEORDER;
414a3870a18SJohn Baldwin }
415a3870a18SJohn Baldwin 
41611752d88SAlan Cox /*
41711752d88SAlan Cox  * Create a physical memory segment.
41811752d88SAlan Cox  */
41911752d88SAlan Cox static void
_vm_phys_create_seg(vm_paddr_t start,vm_paddr_t end,int domain)420d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
42111752d88SAlan Cox {
42211752d88SAlan Cox 	struct vm_phys_seg *seg;
42311752d88SAlan Cox 
4248a14ddccSOlivier Certner 	if (!(0 <= domain && domain < vm_ndomains))
4258a14ddccSOlivier Certner 		panic("%s: Invalid domain %d ('vm_ndomains' is %d)",
4268a14ddccSOlivier Certner 		    __func__, domain, vm_ndomains);
4278a14ddccSOlivier Certner 	if (vm_phys_nsegs >= VM_PHYSSEG_MAX)
4288a14ddccSOlivier Certner 		panic("Not enough storage for physical segments, "
4298a14ddccSOlivier Certner 		    "increase VM_PHYSSEG_MAX");
4308a14ddccSOlivier Certner 
43111752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
4328a14ddccSOlivier Certner 	while (seg > vm_phys_segs && seg[-1].start >= end) {
433271f0f12SAlan Cox 		*seg = *(seg - 1);
434271f0f12SAlan Cox 		seg--;
435271f0f12SAlan Cox 	}
43611752d88SAlan Cox 	seg->start = start;
43711752d88SAlan Cox 	seg->end = end;
438a3870a18SJohn Baldwin 	seg->domain = domain;
4398a14ddccSOlivier Certner 	if (seg != vm_phys_segs && seg[-1].end > start)
4408a14ddccSOlivier Certner 		panic("Overlapping physical segments: Current [%#jx,%#jx) "
4418a14ddccSOlivier Certner 		    "at index %zu, previous [%#jx,%#jx)",
4428a14ddccSOlivier Certner 		    (uintmax_t)start, (uintmax_t)end, seg - vm_phys_segs,
4438a14ddccSOlivier Certner 		    (uintmax_t)seg[-1].start, (uintmax_t)seg[-1].end);
44411752d88SAlan Cox }
44511752d88SAlan Cox 
446a3870a18SJohn Baldwin static void
vm_phys_create_seg(vm_paddr_t start,vm_paddr_t end)447d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
448a3870a18SJohn Baldwin {
449b6715dabSJeff Roberson #ifdef NUMA
450a3870a18SJohn Baldwin 	int i;
451a3870a18SJohn Baldwin 
452a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
453d866a563SAlan Cox 		_vm_phys_create_seg(start, end, 0);
454a3870a18SJohn Baldwin 		return;
455a3870a18SJohn Baldwin 	}
456a3870a18SJohn Baldwin 
457a3870a18SJohn Baldwin 	for (i = 0;; i++) {
458a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
459a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
460a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
461a3870a18SJohn Baldwin 			continue;
462a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
463a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
464a3870a18SJohn Baldwin 			    (uintmax_t)start);
465a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
466d866a563SAlan Cox 			_vm_phys_create_seg(start, end,
467a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
468a3870a18SJohn Baldwin 			break;
469a3870a18SJohn Baldwin 		}
470d866a563SAlan Cox 		_vm_phys_create_seg(start, mem_affinity[i].end,
471a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
472a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
473a3870a18SJohn Baldwin 	}
47462d70a81SJohn Baldwin #else
47562d70a81SJohn Baldwin 	_vm_phys_create_seg(start, end, 0);
47662d70a81SJohn Baldwin #endif
477a3870a18SJohn Baldwin }
478a3870a18SJohn Baldwin 
47911752d88SAlan Cox /*
480271f0f12SAlan Cox  * Add a physical memory segment.
481271f0f12SAlan Cox  */
482271f0f12SAlan Cox void
vm_phys_add_seg(vm_paddr_t start,vm_paddr_t end)483271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
484271f0f12SAlan Cox {
485d866a563SAlan Cox 	vm_paddr_t paddr;
486271f0f12SAlan Cox 
487f30309abSOlivier Certner 	if ((start & PAGE_MASK) != 0)
488f30309abSOlivier Certner 		panic("%s: start (%jx) is not page aligned", __func__,
489f30309abSOlivier Certner 		    (uintmax_t)start);
490f30309abSOlivier Certner 	if ((end & PAGE_MASK) != 0)
491f30309abSOlivier Certner 		panic("%s: end (%jx) is not page aligned", __func__,
492f30309abSOlivier Certner 		    (uintmax_t)end);
493f30309abSOlivier Certner 	if (start > end)
494f30309abSOlivier Certner 		panic("%s: start (%jx) > end (%jx)!", __func__,
495f30309abSOlivier Certner 		    (uintmax_t)start, (uintmax_t)end);
496f30309abSOlivier Certner 
497f30309abSOlivier Certner 	if (start == end)
498f30309abSOlivier Certner 		return;
499d866a563SAlan Cox 
500d866a563SAlan Cox 	/*
501d866a563SAlan Cox 	 * Split the physical memory segment if it spans two or more free
502d866a563SAlan Cox 	 * list boundaries.
503d866a563SAlan Cox 	 */
504d866a563SAlan Cox 	paddr = start;
505d866a563SAlan Cox #ifdef	VM_FREELIST_LOWMEM
506d866a563SAlan Cox 	if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
507d866a563SAlan Cox 		vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
508d866a563SAlan Cox 		paddr = VM_LOWMEM_BOUNDARY;
509d866a563SAlan Cox 	}
510271f0f12SAlan Cox #endif
511d866a563SAlan Cox #ifdef	VM_FREELIST_DMA32
512d866a563SAlan Cox 	if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
513d866a563SAlan Cox 		vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
514d866a563SAlan Cox 		paddr = VM_DMA32_BOUNDARY;
515d866a563SAlan Cox 	}
516d866a563SAlan Cox #endif
517d866a563SAlan Cox 	vm_phys_create_seg(paddr, end);
518271f0f12SAlan Cox }
519271f0f12SAlan Cox 
520271f0f12SAlan Cox /*
52111752d88SAlan Cox  * Initialize the physical memory allocator.
522d866a563SAlan Cox  *
523d866a563SAlan Cox  * Requires that vm_page_array is initialized!
52411752d88SAlan Cox  */
52511752d88SAlan Cox void
vm_phys_init(void)52611752d88SAlan Cox vm_phys_init(void)
52711752d88SAlan Cox {
52811752d88SAlan Cox 	struct vm_freelist *fl;
52972aebdd7SAlan Cox 	struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg;
53052526922SJohn Baldwin #if defined(VM_DMA32_NPAGES_THRESHOLD) || defined(VM_PHYSSEG_SPARSE)
531d866a563SAlan Cox 	u_long npages;
53252526922SJohn Baldwin #endif
533d866a563SAlan Cox 	int dom, flind, freelist, oind, pind, segind;
53411752d88SAlan Cox 
535d866a563SAlan Cox 	/*
536d866a563SAlan Cox 	 * Compute the number of free lists, and generate the mapping from the
537d866a563SAlan Cox 	 * manifest constants VM_FREELIST_* to the free list indices.
538d866a563SAlan Cox 	 *
539d866a563SAlan Cox 	 * Initially, the entries of vm_freelist_to_flind[] are set to either
540d866a563SAlan Cox 	 * 0 or 1 to indicate which free lists should be created.
541d866a563SAlan Cox 	 */
54252526922SJohn Baldwin #ifdef	VM_DMA32_NPAGES_THRESHOLD
543d866a563SAlan Cox 	npages = 0;
54452526922SJohn Baldwin #endif
545d866a563SAlan Cox 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
546d866a563SAlan Cox 		seg = &vm_phys_segs[segind];
547d866a563SAlan Cox #ifdef	VM_FREELIST_LOWMEM
548d866a563SAlan Cox 		if (seg->end <= VM_LOWMEM_BOUNDARY)
549d866a563SAlan Cox 			vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
550d866a563SAlan Cox 		else
551d866a563SAlan Cox #endif
552d866a563SAlan Cox #ifdef	VM_FREELIST_DMA32
553d866a563SAlan Cox 		if (
554d866a563SAlan Cox #ifdef	VM_DMA32_NPAGES_THRESHOLD
555d866a563SAlan Cox 		    /*
556d866a563SAlan Cox 		     * Create the DMA32 free list only if the amount of
557d866a563SAlan Cox 		     * physical memory above physical address 4G exceeds the
558d866a563SAlan Cox 		     * given threshold.
559d866a563SAlan Cox 		     */
560d866a563SAlan Cox 		    npages > VM_DMA32_NPAGES_THRESHOLD &&
561d866a563SAlan Cox #endif
562d866a563SAlan Cox 		    seg->end <= VM_DMA32_BOUNDARY)
563d866a563SAlan Cox 			vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
564d866a563SAlan Cox 		else
565d866a563SAlan Cox #endif
566d866a563SAlan Cox 		{
56752526922SJohn Baldwin #ifdef	VM_DMA32_NPAGES_THRESHOLD
568d866a563SAlan Cox 			npages += atop(seg->end - seg->start);
56952526922SJohn Baldwin #endif
570d866a563SAlan Cox 			vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
571d866a563SAlan Cox 		}
572d866a563SAlan Cox 	}
573d866a563SAlan Cox 	/* Change each entry into a running total of the free lists. */
574d866a563SAlan Cox 	for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
575d866a563SAlan Cox 		vm_freelist_to_flind[freelist] +=
576d866a563SAlan Cox 		    vm_freelist_to_flind[freelist - 1];
577d866a563SAlan Cox 	}
578d866a563SAlan Cox 	vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
579d866a563SAlan Cox 	KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
580d866a563SAlan Cox 	/* Change each entry into a free list index. */
581d866a563SAlan Cox 	for (freelist = 0; freelist < VM_NFREELIST; freelist++)
582d866a563SAlan Cox 		vm_freelist_to_flind[freelist]--;
583d866a563SAlan Cox 
584d866a563SAlan Cox 	/*
585d866a563SAlan Cox 	 * Initialize the first_page and free_queues fields of each physical
586d866a563SAlan Cox 	 * memory segment.
587d866a563SAlan Cox 	 */
588271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE
589d866a563SAlan Cox 	npages = 0;
59011752d88SAlan Cox #endif
591271f0f12SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
592271f0f12SAlan Cox 		seg = &vm_phys_segs[segind];
593271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE
594d866a563SAlan Cox 		seg->first_page = &vm_page_array[npages];
595d866a563SAlan Cox 		npages += atop(seg->end - seg->start);
596271f0f12SAlan Cox #else
597271f0f12SAlan Cox 		seg->first_page = PHYS_TO_VM_PAGE(seg->start);
59811752d88SAlan Cox #endif
599d866a563SAlan Cox #ifdef	VM_FREELIST_LOWMEM
600d866a563SAlan Cox 		if (seg->end <= VM_LOWMEM_BOUNDARY) {
601d866a563SAlan Cox 			flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
602d866a563SAlan Cox 			KASSERT(flind >= 0,
603d866a563SAlan Cox 			    ("vm_phys_init: LOWMEM flind < 0"));
604d866a563SAlan Cox 		} else
605d866a563SAlan Cox #endif
606d866a563SAlan Cox #ifdef	VM_FREELIST_DMA32
607d866a563SAlan Cox 		if (seg->end <= VM_DMA32_BOUNDARY) {
608d866a563SAlan Cox 			flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
609d866a563SAlan Cox 			KASSERT(flind >= 0,
610d866a563SAlan Cox 			    ("vm_phys_init: DMA32 flind < 0"));
611d866a563SAlan Cox 		} else
612d866a563SAlan Cox #endif
613d866a563SAlan Cox 		{
614d866a563SAlan Cox 			flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
615d866a563SAlan Cox 			KASSERT(flind >= 0,
616d866a563SAlan Cox 			    ("vm_phys_init: DEFAULT flind < 0"));
61711752d88SAlan Cox 		}
618d866a563SAlan Cox 		seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
619d866a563SAlan Cox 	}
620d866a563SAlan Cox 
621d866a563SAlan Cox 	/*
62272aebdd7SAlan Cox 	 * Coalesce physical memory segments that are contiguous and share the
62372aebdd7SAlan Cox 	 * same per-domain free queues.
62472aebdd7SAlan Cox 	 */
62572aebdd7SAlan Cox 	prev_seg = vm_phys_segs;
62672aebdd7SAlan Cox 	seg = &vm_phys_segs[1];
62772aebdd7SAlan Cox 	end_seg = &vm_phys_segs[vm_phys_nsegs];
62872aebdd7SAlan Cox 	while (seg < end_seg) {
62972aebdd7SAlan Cox 		if (prev_seg->end == seg->start &&
63072aebdd7SAlan Cox 		    prev_seg->free_queues == seg->free_queues) {
63172aebdd7SAlan Cox 			prev_seg->end = seg->end;
63272aebdd7SAlan Cox 			KASSERT(prev_seg->domain == seg->domain,
63372aebdd7SAlan Cox 			    ("vm_phys_init: free queues cannot span domains"));
63472aebdd7SAlan Cox 			vm_phys_nsegs--;
63572aebdd7SAlan Cox 			end_seg--;
63672aebdd7SAlan Cox 			for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++)
63772aebdd7SAlan Cox 				*tmp_seg = *(tmp_seg + 1);
63872aebdd7SAlan Cox 		} else {
63972aebdd7SAlan Cox 			prev_seg = seg;
64072aebdd7SAlan Cox 			seg++;
64172aebdd7SAlan Cox 		}
64272aebdd7SAlan Cox 	}
64372aebdd7SAlan Cox 
64472aebdd7SAlan Cox 	/*
645d866a563SAlan Cox 	 * Initialize the free queues.
646d866a563SAlan Cox 	 */
6477e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
64811752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
64911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
6507e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
65111752d88SAlan Cox 				for (oind = 0; oind < VM_NFREEORDER; oind++)
65211752d88SAlan Cox 					TAILQ_INIT(&fl[oind].pl);
65311752d88SAlan Cox 			}
65411752d88SAlan Cox 		}
655a3870a18SJohn Baldwin 	}
656d866a563SAlan Cox 
657b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
658b16b4c22SMark Johnston 	vm_default_freepool = VM_FREEPOOL_LAZYINIT;
659b16b4c22SMark Johnston #else
660b16b4c22SMark Johnston 	vm_default_freepool = VM_FREEPOOL_DEFAULT;
661b16b4c22SMark Johnston #endif
662b16b4c22SMark Johnston 
66338d6b2dcSRoger Pau Monné 	rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
66411752d88SAlan Cox }
66511752d88SAlan Cox 
66611752d88SAlan Cox /*
667662e7fa8SMark Johnston  * Register info about the NUMA topology of the system.
668662e7fa8SMark Johnston  *
669662e7fa8SMark Johnston  * Invoked by platform-dependent code prior to vm_phys_init().
670662e7fa8SMark Johnston  */
671662e7fa8SMark Johnston void
vm_phys_register_domains(int ndomains __numa_used,struct mem_affinity * affinity __numa_used,int * locality __numa_used)672cb20a74cSStephen J. Kiernan vm_phys_register_domains(int ndomains __numa_used,
673cb20a74cSStephen J. Kiernan     struct mem_affinity *affinity __numa_used, int *locality __numa_used)
674662e7fa8SMark Johnston {
675662e7fa8SMark Johnston #ifdef NUMA
676c415cfc8SZhenlei Huang 	int i;
677662e7fa8SMark Johnston 
678b61f3142SMark Johnston 	/*
679b61f3142SMark Johnston 	 * For now the only override value that we support is 1, which
680b61f3142SMark Johnston 	 * effectively disables NUMA-awareness in the allocators.
681b61f3142SMark Johnston 	 */
682c415cfc8SZhenlei Huang 	TUNABLE_INT_FETCH("vm.numa.disabled", &numa_disabled);
683c415cfc8SZhenlei Huang 	if (numa_disabled)
684b61f3142SMark Johnston 		ndomains = 1;
685b61f3142SMark Johnston 
686b61f3142SMark Johnston 	if (ndomains > 1) {
687662e7fa8SMark Johnston 		vm_ndomains = ndomains;
688662e7fa8SMark Johnston 		mem_affinity = affinity;
689662e7fa8SMark Johnston 		mem_locality = locality;
690b61f3142SMark Johnston 	}
691662e7fa8SMark Johnston 
692662e7fa8SMark Johnston 	for (i = 0; i < vm_ndomains; i++)
693662e7fa8SMark Johnston 		DOMAINSET_SET(i, &all_domains);
694662e7fa8SMark Johnston #endif
695662e7fa8SMark Johnston }
696662e7fa8SMark Johnston 
697662e7fa8SMark Johnston /*
69811752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
699370a338aSAlan Cox  *
700370a338aSAlan Cox  * When this function is called by a page allocation function, the caller
701370a338aSAlan Cox  * should request insertion at the head unless the order [order, oind) queues
702370a338aSAlan Cox  * are known to be empty.  The objective being to reduce the likelihood of
703370a338aSAlan Cox  * long-term fragmentation by promoting contemporaneous allocation and
704370a338aSAlan Cox  * (hopefully) deallocation.
70511752d88SAlan Cox  */
70611752d88SAlan Cox static __inline void
vm_phys_split_pages(vm_page_t m,int oind,struct vm_freelist * fl,int order,int pool,int tail)707370a338aSAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order,
7080078df5fSDoug Moore     int pool, int tail)
70911752d88SAlan Cox {
71011752d88SAlan Cox 	vm_page_t m_buddy;
71111752d88SAlan Cox 
71211752d88SAlan Cox 	while (oind > order) {
71311752d88SAlan Cox 		oind--;
71411752d88SAlan Cox 		m_buddy = &m[1 << oind];
71511752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
71611752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
71711752d88SAlan Cox 		    m_buddy, m_buddy->order));
7180078df5fSDoug Moore 		vm_freelist_add(fl, m_buddy, oind, pool, tail);
71911752d88SAlan Cox         }
72011752d88SAlan Cox }
72111752d88SAlan Cox 
722d7ec4a88SMark Johnston static void
vm_phys_enq_chunk(struct vm_freelist * fl,vm_page_t m,int order,int pool,int tail)7230078df5fSDoug Moore vm_phys_enq_chunk(struct vm_freelist *fl, vm_page_t m, int order, int pool,
7240078df5fSDoug Moore     int tail)
725d7ec4a88SMark Johnston {
726d7ec4a88SMark Johnston 	KASSERT(order >= 0 && order < VM_NFREEORDER,
727d7ec4a88SMark Johnston 	    ("%s: invalid order %d", __func__, order));
728d7ec4a88SMark Johnston 
7290078df5fSDoug Moore 	vm_freelist_add(fl, m, order, pool, tail);
730b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
7310078df5fSDoug Moore 	if (__predict_false(pool == VM_FREEPOOL_LAZYINIT)) {
732b16b4c22SMark Johnston 		vm_page_t m_next;
733517c5854SMark Johnston 		vm_paddr_t pa;
734b16b4c22SMark Johnston 		int npages;
735b16b4c22SMark Johnston 
736b16b4c22SMark Johnston 		npages = 1 << order;
737b16b4c22SMark Johnston 		m_next = m + npages;
738517c5854SMark Johnston 		pa = m->phys_addr + ptoa(npages);
739517c5854SMark Johnston 		if (pa < vm_phys_segs[m->segind].end) {
740517c5854SMark Johnston 			vm_page_init_page(m_next, pa, m->segind,
741b16b4c22SMark Johnston 			    VM_FREEPOOL_LAZYINIT);
742b16b4c22SMark Johnston 		}
743517c5854SMark Johnston 	}
744b16b4c22SMark Johnston #endif
745d7ec4a88SMark Johnston }
746d7ec4a88SMark Johnston 
74711752d88SAlan Cox /*
748e77f4e7fSDoug Moore  * Add the physical pages [m, m + npages) at the beginning of a power-of-two
749e77f4e7fSDoug Moore  * aligned and sized set to the specified free list.
750e77f4e7fSDoug Moore  *
751e77f4e7fSDoug Moore  * When this function is called by a page allocation function, the caller
752e77f4e7fSDoug Moore  * should request insertion at the head unless the lower-order queues are
753e77f4e7fSDoug Moore  * known to be empty.  The objective being to reduce the likelihood of long-
754e77f4e7fSDoug Moore  * term fragmentation by promoting contemporaneous allocation and (hopefully)
755e77f4e7fSDoug Moore  * deallocation.
756e77f4e7fSDoug Moore  *
757e77f4e7fSDoug Moore  * The physical page m's buddy must not be free.
758e77f4e7fSDoug Moore  */
759e77f4e7fSDoug Moore static void
vm_phys_enq_beg(vm_page_t m,u_int npages,struct vm_freelist * fl,int pool,int tail)7600078df5fSDoug Moore vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool,
7610078df5fSDoug Moore     int tail)
762e77f4e7fSDoug Moore {
763e77f4e7fSDoug Moore         int order;
764e77f4e7fSDoug Moore 
765e77f4e7fSDoug Moore 	KASSERT(npages == 0 ||
766e77f4e7fSDoug Moore 	    (VM_PAGE_TO_PHYS(m) &
767543d55d7SDoug Moore 	    ((PAGE_SIZE << ilog2(npages)) - 1)) == 0,
768e77f4e7fSDoug Moore 	    ("%s: page %p and npages %u are misaligned",
769e77f4e7fSDoug Moore 	    __func__, m, npages));
770e77f4e7fSDoug Moore         while (npages > 0) {
771e77f4e7fSDoug Moore 		KASSERT(m->order == VM_NFREEORDER,
772e77f4e7fSDoug Moore 		    ("%s: page %p has unexpected order %d",
773e77f4e7fSDoug Moore 		    __func__, m, m->order));
774543d55d7SDoug Moore 		order = ilog2(npages);
775e77f4e7fSDoug Moore 		KASSERT(order < VM_NFREEORDER,
776e77f4e7fSDoug Moore 		    ("%s: order %d is out of range", __func__, order));
7770078df5fSDoug Moore 		vm_phys_enq_chunk(fl, m, order, pool, tail);
778e77f4e7fSDoug Moore 		m += 1 << order;
779e77f4e7fSDoug Moore 		npages -= 1 << order;
780e77f4e7fSDoug Moore 	}
781e77f4e7fSDoug Moore }
782e77f4e7fSDoug Moore 
783e77f4e7fSDoug Moore /*
7847493904eSAlan Cox  * Add the physical pages [m, m + npages) at the end of a power-of-two aligned
7857493904eSAlan Cox  * and sized set to the specified free list.
7867493904eSAlan Cox  *
7877493904eSAlan Cox  * When this function is called by a page allocation function, the caller
7887493904eSAlan Cox  * should request insertion at the head unless the lower-order queues are
7897493904eSAlan Cox  * known to be empty.  The objective being to reduce the likelihood of long-
7907493904eSAlan Cox  * term fragmentation by promoting contemporaneous allocation and (hopefully)
7917493904eSAlan Cox  * deallocation.
7927493904eSAlan Cox  *
793ccdb2827SDoug Moore  * If npages is zero, this function does nothing and ignores the physical page
794ccdb2827SDoug Moore  * parameter m.  Otherwise, the physical page m's buddy must not be free.
7957493904eSAlan Cox  */
796c9b06fa5SDoug Moore static vm_page_t
vm_phys_enq_range(vm_page_t m,u_int npages,struct vm_freelist * fl,int pool,int tail)7970078df5fSDoug Moore vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool,
7980078df5fSDoug Moore     int tail)
7997493904eSAlan Cox {
8007493904eSAlan Cox 	int order;
8017493904eSAlan Cox 
802ccdb2827SDoug Moore 	KASSERT(npages == 0 ||
803ccdb2827SDoug Moore 	    ((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) &
804543d55d7SDoug Moore 	    ((PAGE_SIZE << ilog2(npages)) - 1)) == 0,
8057493904eSAlan Cox 	    ("vm_phys_enq_range: page %p and npages %u are misaligned",
8067493904eSAlan Cox 	    m, npages));
807c9b06fa5SDoug Moore 	while (npages > 0) {
8087493904eSAlan Cox 		KASSERT(m->order == VM_NFREEORDER,
8097493904eSAlan Cox 		    ("vm_phys_enq_range: page %p has unexpected order %d",
8107493904eSAlan Cox 		    m, m->order));
8117493904eSAlan Cox 		order = ffs(npages) - 1;
8120078df5fSDoug Moore 		vm_phys_enq_chunk(fl, m, order, pool, tail);
813c9b06fa5SDoug Moore 		m += 1 << order;
814c9b06fa5SDoug Moore 		npages -= 1 << order;
815c9b06fa5SDoug Moore 	}
816c9b06fa5SDoug Moore 	return (m);
8177493904eSAlan Cox }
8187493904eSAlan Cox 
8197493904eSAlan Cox /*
8200078df5fSDoug Moore  * Complete initialization a contiguous, power of two-sized set of physical
8210078df5fSDoug Moore  * pages.
822b16b4c22SMark Johnston  *
823b16b4c22SMark Johnston  * If the pages currently belong to the lazy init pool, then the corresponding
824b16b4c22SMark Johnston  * page structures must be initialized.  In this case it is assumed that the
825b16b4c22SMark Johnston  * first page in the run has already been initialized.
826e3537f92SDoug Moore  */
827e3537f92SDoug Moore static void
vm_phys_finish_init(vm_page_t m,int order)8280078df5fSDoug Moore vm_phys_finish_init(vm_page_t m, int order)
829e3537f92SDoug Moore {
830b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
831b16b4c22SMark Johnston 	if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
832b16b4c22SMark Johnston 		vm_paddr_t pa;
833b16b4c22SMark Johnston 		int segind;
834e3537f92SDoug Moore 
835b16b4c22SMark Johnston 		TSENTER();
836b16b4c22SMark Johnston 		pa = m->phys_addr + PAGE_SIZE;
837b16b4c22SMark Johnston 		segind = m->segind;
838b16b4c22SMark Johnston 		for (vm_page_t m_tmp = m + 1; m_tmp < &m[1 << order];
839b16b4c22SMark Johnston 		    m_tmp++, pa += PAGE_SIZE)
8400078df5fSDoug Moore 			vm_page_init_page(m_tmp, pa, segind, VM_NFREEPOOL);
841b16b4c22SMark Johnston 		TSEXIT();
8420078df5fSDoug Moore 	}
843b16b4c22SMark Johnston #endif
844e3537f92SDoug Moore }
845e3537f92SDoug Moore 
846e3537f92SDoug Moore /*
84789ea39a7SAlan Cox  * Tries to allocate the specified number of pages from the specified pool
84889ea39a7SAlan Cox  * within the specified domain.  Returns the actual number of allocated pages
84989ea39a7SAlan Cox  * and a pointer to each page through the array ma[].
85089ea39a7SAlan Cox  *
85132d81f21SAlan Cox  * The returned pages may not be physically contiguous.  However, in contrast
85232d81f21SAlan Cox  * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0),
85332d81f21SAlan Cox  * calling this function once to allocate the desired number of pages will
8540078df5fSDoug Moore  * avoid wasted time in vm_phys_split_pages().  The allocated pages have no
8550078df5fSDoug Moore  * valid pool field set.
85689ea39a7SAlan Cox  *
85789ea39a7SAlan Cox  * The free page queues for the specified domain must be locked.
85889ea39a7SAlan Cox  */
85989ea39a7SAlan Cox int
vm_phys_alloc_npages(int domain,int pool,int npages,vm_page_t ma[])86089ea39a7SAlan Cox vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[])
86189ea39a7SAlan Cox {
86289ea39a7SAlan Cox 	struct vm_freelist *alt, *fl;
86389ea39a7SAlan Cox 	vm_page_t m;
864c9b06fa5SDoug Moore 	int avail, end, flind, freelist, i, oind, pind;
86589ea39a7SAlan Cox 
86689ea39a7SAlan Cox 	KASSERT(domain >= 0 && domain < vm_ndomains,
86789ea39a7SAlan Cox 	    ("vm_phys_alloc_npages: domain %d is out of range", domain));
868b16b4c22SMark Johnston 	KASSERT(vm_phys_pool_valid(pool),
86989ea39a7SAlan Cox 	    ("vm_phys_alloc_npages: pool %d is out of range", pool));
87089ea39a7SAlan Cox 	KASSERT(npages <= 1 << (VM_NFREEORDER - 1),
87189ea39a7SAlan Cox 	    ("vm_phys_alloc_npages: npages %d is out of range", npages));
87289ea39a7SAlan Cox 	vm_domain_free_assert_locked(VM_DOMAIN(domain));
87389ea39a7SAlan Cox 	i = 0;
87489ea39a7SAlan Cox 	for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
87589ea39a7SAlan Cox 		flind = vm_freelist_to_flind[freelist];
87689ea39a7SAlan Cox 		if (flind < 0)
87789ea39a7SAlan Cox 			continue;
87889ea39a7SAlan Cox 		fl = vm_phys_free_queues[domain][flind][pool];
87989ea39a7SAlan Cox 		for (oind = 0; oind < VM_NFREEORDER; oind++) {
88089ea39a7SAlan Cox 			while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) {
88189ea39a7SAlan Cox 				vm_freelist_rem(fl, m, oind);
882c9b06fa5SDoug Moore 				avail = i + (1 << oind);
883c9b06fa5SDoug Moore 				end = imin(npages, avail);
884e3537f92SDoug Moore 				while (i < end)
88589ea39a7SAlan Cox 					ma[i++] = m++;
886c9b06fa5SDoug Moore 				if (i == npages) {
8877493904eSAlan Cox 					/*
888c9b06fa5SDoug Moore 					 * Return excess pages to fl.  Its order
889c9b06fa5SDoug Moore 					 * [0, oind) queues are empty.
8907493904eSAlan Cox 					 */
8910078df5fSDoug Moore 					vm_phys_enq_range(m, avail - i, fl,
8920078df5fSDoug Moore 					    pool, 1);
89389ea39a7SAlan Cox 					return (npages);
894c9b06fa5SDoug Moore 				}
89589ea39a7SAlan Cox 			}
89689ea39a7SAlan Cox 		}
89789ea39a7SAlan Cox 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
898b16b4c22SMark Johnston 			for (pind = vm_default_freepool; pind < VM_NFREEPOOL;
899b16b4c22SMark Johnston 			    pind++) {
90089ea39a7SAlan Cox 				alt = vm_phys_free_queues[domain][flind][pind];
90189ea39a7SAlan Cox 				while ((m = TAILQ_FIRST(&alt[oind].pl)) !=
90289ea39a7SAlan Cox 				    NULL) {
90389ea39a7SAlan Cox 					vm_freelist_rem(alt, m, oind);
9040078df5fSDoug Moore 					vm_phys_finish_init(m, oind);
905c9b06fa5SDoug Moore 					avail = i + (1 << oind);
906c9b06fa5SDoug Moore 					end = imin(npages, avail);
907e3537f92SDoug Moore 					while (i < end)
90889ea39a7SAlan Cox 						ma[i++] = m++;
909c9b06fa5SDoug Moore 					if (i == npages) {
9107493904eSAlan Cox 						/*
9117493904eSAlan Cox 						 * Return excess pages to fl.
9127493904eSAlan Cox 						 * Its order [0, oind) queues
9137493904eSAlan Cox 						 * are empty.
9147493904eSAlan Cox 						 */
915c9b06fa5SDoug Moore 						vm_phys_enq_range(m, avail - i,
9160078df5fSDoug Moore 						    fl, pool, 1);
91789ea39a7SAlan Cox 						return (npages);
918c9b06fa5SDoug Moore 					}
91989ea39a7SAlan Cox 				}
92089ea39a7SAlan Cox 			}
92189ea39a7SAlan Cox 		}
92289ea39a7SAlan Cox 	}
92389ea39a7SAlan Cox 	return (i);
92489ea39a7SAlan Cox }
92589ea39a7SAlan Cox 
92689ea39a7SAlan Cox /*
927d866a563SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages from the
928d866a563SAlan Cox  * specified free list.  The free list must be specified using one of the
929e3537f92SDoug Moore  * manifest constants VM_FREELIST_*.
930d866a563SAlan Cox  *
931d866a563SAlan Cox  * The free page queues must be locked.
93249ca10d4SJayachandran C.  */
9336aede562SDoug Moore static vm_page_t
vm_phys_alloc_freelist_pages(int domain,int freelist,int pool,int order)9340db2102aSMichael Zhilin vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
93549ca10d4SJayachandran C. {
936ef435ae7SJeff Roberson 	struct vm_freelist *alt, *fl;
93711752d88SAlan Cox 	vm_page_t m;
9380db2102aSMichael Zhilin 	int oind, pind, flind;
93911752d88SAlan Cox 
940ef435ae7SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
941ef435ae7SJeff Roberson 	    ("vm_phys_alloc_freelist_pages: domain %d is out of range",
942ef435ae7SJeff Roberson 	    domain));
9430db2102aSMichael Zhilin 	KASSERT(freelist < VM_NFREELIST,
944d866a563SAlan Cox 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
9455be93778SAndrew Turner 	    freelist));
946b16b4c22SMark Johnston 	KASSERT(vm_phys_pool_valid(pool),
94749ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
94811752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
94949ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
9506520495aSAdrian Chadd 
9510db2102aSMichael Zhilin 	flind = vm_freelist_to_flind[freelist];
9520db2102aSMichael Zhilin 	/* Check if freelist is present */
9530db2102aSMichael Zhilin 	if (flind < 0)
9540db2102aSMichael Zhilin 		return (NULL);
9550db2102aSMichael Zhilin 
956e2068d0bSJeff Roberson 	vm_domain_free_assert_locked(VM_DOMAIN(domain));
9577e226537SAttilio Rao 	fl = &vm_phys_free_queues[domain][flind][pool][0];
95811752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
95911752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
96011752d88SAlan Cox 		if (m != NULL) {
9617e226537SAttilio Rao 			vm_freelist_rem(fl, m, oind);
962370a338aSAlan Cox 			/* The order [order, oind) queues are empty. */
9630078df5fSDoug Moore 			vm_phys_split_pages(m, oind, fl, order, pool, 1);
96411752d88SAlan Cox 			return (m);
96511752d88SAlan Cox 		}
96611752d88SAlan Cox 	}
96711752d88SAlan Cox 
96811752d88SAlan Cox 	/*
96911752d88SAlan Cox 	 * The given pool was empty.  Find the largest
97011752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
97111752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
97211752d88SAlan Cox 	 * use them to satisfy the allocation.
97311752d88SAlan Cox 	 */
97411752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
975b16b4c22SMark Johnston 		for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
9767e226537SAttilio Rao 			alt = &vm_phys_free_queues[domain][flind][pind][0];
97711752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
97811752d88SAlan Cox 			if (m != NULL) {
9797e226537SAttilio Rao 				vm_freelist_rem(alt, m, oind);
9800078df5fSDoug Moore 				vm_phys_finish_init(m, oind);
981370a338aSAlan Cox 				/* The order [order, oind) queues are empty. */
9820078df5fSDoug Moore 				vm_phys_split_pages(m, oind, fl, order, pool, 1);
98311752d88SAlan Cox 				return (m);
98411752d88SAlan Cox 			}
98511752d88SAlan Cox 		}
98611752d88SAlan Cox 	}
98711752d88SAlan Cox 	return (NULL);
98811752d88SAlan Cox }
98911752d88SAlan Cox 
99011752d88SAlan Cox /*
9916aede562SDoug Moore  * Allocate a contiguous, power of two-sized set of physical pages
9926aede562SDoug Moore  * from the free lists.
9936aede562SDoug Moore  *
9946aede562SDoug Moore  * The free page queues must be locked.
9956aede562SDoug Moore  */
9966aede562SDoug Moore vm_page_t
vm_phys_alloc_pages(int domain,int pool,int order)9976aede562SDoug Moore vm_phys_alloc_pages(int domain, int pool, int order)
9986aede562SDoug Moore {
9996aede562SDoug Moore 	vm_page_t m;
10006aede562SDoug Moore 	int freelist;
10016aede562SDoug Moore 
10026aede562SDoug Moore 	for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
10036aede562SDoug Moore 		m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order);
10046aede562SDoug Moore 		if (m != NULL)
10056aede562SDoug Moore 			return (m);
10066aede562SDoug Moore 	}
10076aede562SDoug Moore 	return (NULL);
10086aede562SDoug Moore }
10096aede562SDoug Moore 
10106aede562SDoug Moore /*
101169cbb187SMark Johnston  * Find the vm_page corresponding to the given physical address, which must lie
101269cbb187SMark Johnston  * within the given physical memory segment.
101369cbb187SMark Johnston  */
101469cbb187SMark Johnston vm_page_t
vm_phys_seg_paddr_to_vm_page(struct vm_phys_seg * seg,vm_paddr_t pa)101569cbb187SMark Johnston vm_phys_seg_paddr_to_vm_page(struct vm_phys_seg *seg, vm_paddr_t pa)
101669cbb187SMark Johnston {
101769cbb187SMark Johnston 	KASSERT(pa >= seg->start && pa < seg->end,
101869cbb187SMark Johnston 	    ("%s: pa %#jx is out of range", __func__, (uintmax_t)pa));
101969cbb187SMark Johnston 
102069cbb187SMark Johnston 	return (&seg->first_page[atop(pa - seg->start)]);
102169cbb187SMark Johnston }
102269cbb187SMark Johnston 
102369cbb187SMark Johnston /*
102411752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
102511752d88SAlan Cox  */
102611752d88SAlan Cox vm_page_t
vm_phys_paddr_to_vm_page(vm_paddr_t pa)102711752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
102811752d88SAlan Cox {
102911752d88SAlan Cox 	struct vm_phys_seg *seg;
103011752d88SAlan Cox 
10319e817428SDoug Moore 	if ((seg = vm_phys_paddr_to_seg(pa)) != NULL)
103269cbb187SMark Johnston 		return (vm_phys_seg_paddr_to_vm_page(seg, pa));
1033f06a3a36SAndrew Thompson 	return (NULL);
103411752d88SAlan Cox }
103511752d88SAlan Cox 
1036b6de32bdSKonstantin Belousov vm_page_t
vm_phys_fictitious_to_vm_page(vm_paddr_t pa)1037b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
1038b6de32bdSKonstantin Belousov {
103938d6b2dcSRoger Pau Monné 	struct vm_phys_fictitious_seg tmp, *seg;
1040b6de32bdSKonstantin Belousov 	vm_page_t m;
1041b6de32bdSKonstantin Belousov 
1042b6de32bdSKonstantin Belousov 	m = NULL;
104338d6b2dcSRoger Pau Monné 	tmp.start = pa;
104438d6b2dcSRoger Pau Monné 	tmp.end = 0;
104538d6b2dcSRoger Pau Monné 
104638d6b2dcSRoger Pau Monné 	rw_rlock(&vm_phys_fictitious_reg_lock);
104738d6b2dcSRoger Pau Monné 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
104838d6b2dcSRoger Pau Monné 	rw_runlock(&vm_phys_fictitious_reg_lock);
104938d6b2dcSRoger Pau Monné 	if (seg == NULL)
105038d6b2dcSRoger Pau Monné 		return (NULL);
105138d6b2dcSRoger Pau Monné 
1052b6de32bdSKonstantin Belousov 	m = &seg->first_page[atop(pa - seg->start)];
105338d6b2dcSRoger Pau Monné 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
105438d6b2dcSRoger Pau Monné 
1055b6de32bdSKonstantin Belousov 	return (m);
1056b6de32bdSKonstantin Belousov }
1057b6de32bdSKonstantin Belousov 
10585ebe728dSRoger Pau Monné static inline void
vm_phys_fictitious_init_range(vm_page_t range,vm_paddr_t start,long page_count,vm_memattr_t memattr)10595ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
10605ebe728dSRoger Pau Monné     long page_count, vm_memattr_t memattr)
10615ebe728dSRoger Pau Monné {
10625ebe728dSRoger Pau Monné 	long i;
10635ebe728dSRoger Pau Monné 
1064f93f7cf1SMark Johnston 	bzero(range, page_count * sizeof(*range));
10655ebe728dSRoger Pau Monné 	for (i = 0; i < page_count; i++) {
10665ebe728dSRoger Pau Monné 		vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
10675ebe728dSRoger Pau Monné 		range[i].oflags &= ~VPO_UNMANAGED;
10685ebe728dSRoger Pau Monné 		range[i].busy_lock = VPB_UNBUSIED;
10695ebe728dSRoger Pau Monné 	}
10705ebe728dSRoger Pau Monné }
10715ebe728dSRoger Pau Monné 
1072b6de32bdSKonstantin Belousov int
vm_phys_fictitious_reg_range(vm_paddr_t start,vm_paddr_t end,vm_memattr_t memattr)1073b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
1074b6de32bdSKonstantin Belousov     vm_memattr_t memattr)
1075b6de32bdSKonstantin Belousov {
1076b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
1077b6de32bdSKonstantin Belousov 	vm_page_t fp;
10785ebe728dSRoger Pau Monné 	long page_count;
1079b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
10805ebe728dSRoger Pau Monné 	long pi, pe;
10815ebe728dSRoger Pau Monné 	long dpage_count;
1082b6de32bdSKonstantin Belousov #endif
1083b6de32bdSKonstantin Belousov 
10845ebe728dSRoger Pau Monné 	KASSERT(start < end,
10855ebe728dSRoger Pau Monné 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
10865ebe728dSRoger Pau Monné 	    (uintmax_t)start, (uintmax_t)end));
10875ebe728dSRoger Pau Monné 
1088b6de32bdSKonstantin Belousov 	page_count = (end - start) / PAGE_SIZE;
1089b6de32bdSKonstantin Belousov 
1090b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
1091b6de32bdSKonstantin Belousov 	pi = atop(start);
10925ebe728dSRoger Pau Monné 	pe = atop(end);
10935ebe728dSRoger Pau Monné 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
1094b6de32bdSKonstantin Belousov 		fp = &vm_page_array[pi - first_page];
10955ebe728dSRoger Pau Monné 		if ((pe - first_page) > vm_page_array_size) {
10965ebe728dSRoger Pau Monné 			/*
10975ebe728dSRoger Pau Monné 			 * We have a segment that starts inside
10985ebe728dSRoger Pau Monné 			 * of vm_page_array, but ends outside of it.
10995ebe728dSRoger Pau Monné 			 *
11005ebe728dSRoger Pau Monné 			 * Use vm_page_array pages for those that are
11015ebe728dSRoger Pau Monné 			 * inside of the vm_page_array range, and
11025ebe728dSRoger Pau Monné 			 * allocate the remaining ones.
11035ebe728dSRoger Pau Monné 			 */
11045ebe728dSRoger Pau Monné 			dpage_count = vm_page_array_size - (pi - first_page);
11055ebe728dSRoger Pau Monné 			vm_phys_fictitious_init_range(fp, start, dpage_count,
11065ebe728dSRoger Pau Monné 			    memattr);
11075ebe728dSRoger Pau Monné 			page_count -= dpage_count;
11085ebe728dSRoger Pau Monné 			start += ptoa(dpage_count);
11095ebe728dSRoger Pau Monné 			goto alloc;
11105ebe728dSRoger Pau Monné 		}
11115ebe728dSRoger Pau Monné 		/*
11125ebe728dSRoger Pau Monné 		 * We can allocate the full range from vm_page_array,
11135ebe728dSRoger Pau Monné 		 * so there's no need to register the range in the tree.
11145ebe728dSRoger Pau Monné 		 */
11155ebe728dSRoger Pau Monné 		vm_phys_fictitious_init_range(fp, start, page_count, memattr);
11165ebe728dSRoger Pau Monné 		return (0);
11175ebe728dSRoger Pau Monné 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
11185ebe728dSRoger Pau Monné 		/*
11195ebe728dSRoger Pau Monné 		 * We have a segment that ends inside of vm_page_array,
11205ebe728dSRoger Pau Monné 		 * but starts outside of it.
11215ebe728dSRoger Pau Monné 		 */
11225ebe728dSRoger Pau Monné 		fp = &vm_page_array[0];
11235ebe728dSRoger Pau Monné 		dpage_count = pe - first_page;
11245ebe728dSRoger Pau Monné 		vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
11255ebe728dSRoger Pau Monné 		    memattr);
11265ebe728dSRoger Pau Monné 		end -= ptoa(dpage_count);
11275ebe728dSRoger Pau Monné 		page_count -= dpage_count;
11285ebe728dSRoger Pau Monné 		goto alloc;
11295ebe728dSRoger Pau Monné 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
11305ebe728dSRoger Pau Monné 		/*
11315ebe728dSRoger Pau Monné 		 * Trying to register a fictitious range that expands before
11325ebe728dSRoger Pau Monné 		 * and after vm_page_array.
11335ebe728dSRoger Pau Monné 		 */
11345ebe728dSRoger Pau Monné 		return (EINVAL);
11355ebe728dSRoger Pau Monné 	} else {
11365ebe728dSRoger Pau Monné alloc:
1137b6de32bdSKonstantin Belousov #endif
1138b6de32bdSKonstantin Belousov 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
1139f93f7cf1SMark Johnston 		    M_WAITOK);
11405ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE
1141b6de32bdSKonstantin Belousov 	}
11425ebe728dSRoger Pau Monné #endif
11435ebe728dSRoger Pau Monné 	vm_phys_fictitious_init_range(fp, start, page_count, memattr);
114438d6b2dcSRoger Pau Monné 
114538d6b2dcSRoger Pau Monné 	seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
1146b6de32bdSKonstantin Belousov 	seg->start = start;
1147b6de32bdSKonstantin Belousov 	seg->end = end;
1148b6de32bdSKonstantin Belousov 	seg->first_page = fp;
114938d6b2dcSRoger Pau Monné 
115038d6b2dcSRoger Pau Monné 	rw_wlock(&vm_phys_fictitious_reg_lock);
115138d6b2dcSRoger Pau Monné 	RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
115238d6b2dcSRoger Pau Monné 	rw_wunlock(&vm_phys_fictitious_reg_lock);
115338d6b2dcSRoger Pau Monné 
1154b6de32bdSKonstantin Belousov 	return (0);
1155b6de32bdSKonstantin Belousov }
1156b6de32bdSKonstantin Belousov 
1157b6de32bdSKonstantin Belousov void
vm_phys_fictitious_unreg_range(vm_paddr_t start,vm_paddr_t end)1158b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
1159b6de32bdSKonstantin Belousov {
116038d6b2dcSRoger Pau Monné 	struct vm_phys_fictitious_seg *seg, tmp;
1161b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
11625ebe728dSRoger Pau Monné 	long pi, pe;
1163b6de32bdSKonstantin Belousov #endif
1164b6de32bdSKonstantin Belousov 
11655ebe728dSRoger Pau Monné 	KASSERT(start < end,
11665ebe728dSRoger Pau Monné 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
11675ebe728dSRoger Pau Monné 	    (uintmax_t)start, (uintmax_t)end));
11685ebe728dSRoger Pau Monné 
1169b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
1170b6de32bdSKonstantin Belousov 	pi = atop(start);
11715ebe728dSRoger Pau Monné 	pe = atop(end);
11725ebe728dSRoger Pau Monné 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
11735ebe728dSRoger Pau Monné 		if ((pe - first_page) <= vm_page_array_size) {
11745ebe728dSRoger Pau Monné 			/*
11755ebe728dSRoger Pau Monné 			 * This segment was allocated using vm_page_array
11765ebe728dSRoger Pau Monné 			 * only, there's nothing to do since those pages
11775ebe728dSRoger Pau Monné 			 * were never added to the tree.
11785ebe728dSRoger Pau Monné 			 */
11795ebe728dSRoger Pau Monné 			return;
11805ebe728dSRoger Pau Monné 		}
11815ebe728dSRoger Pau Monné 		/*
11825ebe728dSRoger Pau Monné 		 * We have a segment that starts inside
11835ebe728dSRoger Pau Monné 		 * of vm_page_array, but ends outside of it.
11845ebe728dSRoger Pau Monné 		 *
11855ebe728dSRoger Pau Monné 		 * Calculate how many pages were added to the
11865ebe728dSRoger Pau Monné 		 * tree and free them.
11875ebe728dSRoger Pau Monné 		 */
11885ebe728dSRoger Pau Monné 		start = ptoa(first_page + vm_page_array_size);
11895ebe728dSRoger Pau Monné 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
11905ebe728dSRoger Pau Monné 		/*
11915ebe728dSRoger Pau Monné 		 * We have a segment that ends inside of vm_page_array,
11925ebe728dSRoger Pau Monné 		 * but starts outside of it.
11935ebe728dSRoger Pau Monné 		 */
11945ebe728dSRoger Pau Monné 		end = ptoa(first_page);
11955ebe728dSRoger Pau Monné 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
11965ebe728dSRoger Pau Monné 		/* Since it's not possible to register such a range, panic. */
11975ebe728dSRoger Pau Monné 		panic(
11985ebe728dSRoger Pau Monné 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
11995ebe728dSRoger Pau Monné 		    (uintmax_t)start, (uintmax_t)end);
12005ebe728dSRoger Pau Monné 	}
1201b6de32bdSKonstantin Belousov #endif
120238d6b2dcSRoger Pau Monné 	tmp.start = start;
120338d6b2dcSRoger Pau Monné 	tmp.end = 0;
1204b6de32bdSKonstantin Belousov 
120538d6b2dcSRoger Pau Monné 	rw_wlock(&vm_phys_fictitious_reg_lock);
120638d6b2dcSRoger Pau Monné 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
120738d6b2dcSRoger Pau Monné 	if (seg->start != start || seg->end != end) {
120838d6b2dcSRoger Pau Monné 		rw_wunlock(&vm_phys_fictitious_reg_lock);
120938d6b2dcSRoger Pau Monné 		panic(
121038d6b2dcSRoger Pau Monné 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
121138d6b2dcSRoger Pau Monné 		    (uintmax_t)start, (uintmax_t)end);
121238d6b2dcSRoger Pau Monné 	}
121338d6b2dcSRoger Pau Monné 	RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
121438d6b2dcSRoger Pau Monné 	rw_wunlock(&vm_phys_fictitious_reg_lock);
121538d6b2dcSRoger Pau Monné 	free(seg->first_page, M_FICT_PAGES);
121638d6b2dcSRoger Pau Monné 	free(seg, M_FICT_PAGES);
1217b6de32bdSKonstantin Belousov }
1218b6de32bdSKonstantin Belousov 
121911752d88SAlan Cox /*
1220e3537f92SDoug Moore  * Free a contiguous, power of two-sized set of physical pages.
12210078df5fSDoug Moore  * The pool field in the first page determines the destination pool.
12228941dc44SAlan Cox  *
12238941dc44SAlan Cox  * The free page queues must be locked.
122411752d88SAlan Cox  */
122511752d88SAlan Cox void
vm_phys_free_pages(vm_page_t m,int pool,int order)12260078df5fSDoug Moore vm_phys_free_pages(vm_page_t m, int pool, int order)
122711752d88SAlan Cox {
122811752d88SAlan Cox 	struct vm_freelist *fl;
122911752d88SAlan Cox 	struct vm_phys_seg *seg;
12305c1f2cc4SAlan Cox 	vm_paddr_t pa;
123111752d88SAlan Cox 	vm_page_t m_buddy;
123211752d88SAlan Cox 
123311752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
12340078df5fSDoug Moore 	    ("%s: page %p has unexpected order %d",
12350078df5fSDoug Moore 	    __func__, m, m->order));
12360078df5fSDoug Moore 	KASSERT(vm_phys_pool_valid(pool),
12370078df5fSDoug Moore 	    ("%s: unexpected pool param %d", __func__, pool));
123811752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
12390078df5fSDoug Moore 	    ("%s: order %d is out of range", __func__, order));
124011752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
1241e2068d0bSJeff Roberson 	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
12425c1f2cc4SAlan Cox 	if (order < VM_NFREEORDER - 1) {
12435c1f2cc4SAlan Cox 		pa = VM_PAGE_TO_PHYS(m);
12445c1f2cc4SAlan Cox 		do {
12455c1f2cc4SAlan Cox 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
12465c1f2cc4SAlan Cox 			if (pa < seg->start || pa >= seg->end)
124711752d88SAlan Cox 				break;
124869cbb187SMark Johnston 			m_buddy = vm_phys_seg_paddr_to_vm_page(seg, pa);
124911752d88SAlan Cox 			if (m_buddy->order != order)
125011752d88SAlan Cox 				break;
125111752d88SAlan Cox 			fl = (*seg->free_queues)[m_buddy->pool];
12527e226537SAttilio Rao 			vm_freelist_rem(fl, m_buddy, order);
12530078df5fSDoug Moore 			vm_phys_finish_init(m_buddy, order);
125411752d88SAlan Cox 			order++;
12555c1f2cc4SAlan Cox 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
125669cbb187SMark Johnston 			m = vm_phys_seg_paddr_to_vm_page(seg, pa);
12575c1f2cc4SAlan Cox 		} while (order < VM_NFREEORDER - 1);
125811752d88SAlan Cox 	}
12590078df5fSDoug Moore 	fl = (*seg->free_queues)[pool];
12600078df5fSDoug Moore 	vm_freelist_add(fl, m, order, pool, 1);
126111752d88SAlan Cox }
126211752d88SAlan Cox 
1263b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
1264b16b4c22SMark Johnston /*
1265b16b4c22SMark Johnston  * Initialize all pages lingering in the lazy init pool of a NUMA domain, moving
1266b16b4c22SMark Johnston  * them to the default pool.  This is a prerequisite for some rare operations
1267b16b4c22SMark Johnston  * which need to scan the page array and thus depend on all pages being
1268b16b4c22SMark Johnston  * initialized.
1269b16b4c22SMark Johnston  */
1270b16b4c22SMark Johnston static void
vm_phys_lazy_init_domain(int domain,bool locked)1271b16b4c22SMark Johnston vm_phys_lazy_init_domain(int domain, bool locked)
1272b16b4c22SMark Johnston {
1273b16b4c22SMark Johnston 	static bool initdone[MAXMEMDOM];
1274b16b4c22SMark Johnston 	struct vm_domain *vmd;
1275b16b4c22SMark Johnston 	struct vm_freelist *fl;
1276b16b4c22SMark Johnston 	vm_page_t m;
1277b16b4c22SMark Johnston 	int pind;
1278b16b4c22SMark Johnston 	bool unlocked;
1279b16b4c22SMark Johnston 
1280b16b4c22SMark Johnston 	if (__predict_true(atomic_load_bool(&initdone[domain])))
1281b16b4c22SMark Johnston 		return;
1282b16b4c22SMark Johnston 
1283b16b4c22SMark Johnston 	vmd = VM_DOMAIN(domain);
1284b16b4c22SMark Johnston 	if (locked)
1285b16b4c22SMark Johnston 		vm_domain_free_assert_locked(vmd);
1286b16b4c22SMark Johnston 	else
1287b16b4c22SMark Johnston 		vm_domain_free_lock(vmd);
1288b16b4c22SMark Johnston 	if (atomic_load_bool(&initdone[domain]))
1289b16b4c22SMark Johnston 		goto out;
1290b16b4c22SMark Johnston 	pind = VM_FREEPOOL_LAZYINIT;
1291b16b4c22SMark Johnston 	for (int freelist = 0; freelist < VM_NFREELIST; freelist++) {
1292b16b4c22SMark Johnston 		int flind;
1293b16b4c22SMark Johnston 
1294b16b4c22SMark Johnston 		flind = vm_freelist_to_flind[freelist];
1295b16b4c22SMark Johnston 		if (flind < 0)
1296b16b4c22SMark Johnston 			continue;
1297b16b4c22SMark Johnston 		fl = vm_phys_free_queues[domain][flind][pind];
1298b16b4c22SMark Johnston 		for (int oind = 0; oind < VM_NFREEORDER; oind++) {
1299b16b4c22SMark Johnston 			if (atomic_load_int(&fl[oind].lcnt) == 0)
1300b16b4c22SMark Johnston 				continue;
1301b16b4c22SMark Johnston 			while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) {
1302b16b4c22SMark Johnston 				/*
1303b16b4c22SMark Johnston 				 * Avoid holding the lock across the
1304b16b4c22SMark Johnston 				 * initialization unless there's a free page
1305b16b4c22SMark Johnston 				 * shortage.
1306b16b4c22SMark Johnston 				 */
1307b16b4c22SMark Johnston 				vm_freelist_rem(fl, m, oind);
1308b16b4c22SMark Johnston 				unlocked = vm_domain_allocate(vmd,
1309b16b4c22SMark Johnston 				    VM_ALLOC_NORMAL, 1 << oind);
1310b16b4c22SMark Johnston 				if (unlocked)
1311b16b4c22SMark Johnston 					vm_domain_free_unlock(vmd);
13120078df5fSDoug Moore 				vm_phys_finish_init(m, oind);
1313b16b4c22SMark Johnston 				if (unlocked) {
1314b16b4c22SMark Johnston 					vm_domain_freecnt_inc(vmd, 1 << oind);
1315b16b4c22SMark Johnston 					vm_domain_free_lock(vmd);
1316b16b4c22SMark Johnston 				}
13170078df5fSDoug Moore 				vm_phys_free_pages(m, VM_FREEPOOL_DEFAULT,
13180078df5fSDoug Moore 				    oind);
1319b16b4c22SMark Johnston 			}
1320b16b4c22SMark Johnston 		}
1321b16b4c22SMark Johnston 	}
1322b16b4c22SMark Johnston 	atomic_store_bool(&initdone[domain], true);
1323b16b4c22SMark Johnston out:
1324b16b4c22SMark Johnston 	if (!locked)
1325b16b4c22SMark Johnston 		vm_domain_free_unlock(vmd);
1326b16b4c22SMark Johnston }
1327b16b4c22SMark Johnston 
1328b16b4c22SMark Johnston static void
vm_phys_lazy_init(void)1329b16b4c22SMark Johnston vm_phys_lazy_init(void)
1330b16b4c22SMark Johnston {
1331b16b4c22SMark Johnston 	for (int domain = 0; domain < vm_ndomains; domain++)
1332b16b4c22SMark Johnston 		vm_phys_lazy_init_domain(domain, false);
1333b16b4c22SMark Johnston 	atomic_store_int(&vm_default_freepool, VM_FREEPOOL_DEFAULT);
1334b16b4c22SMark Johnston }
1335b16b4c22SMark Johnston 
1336b16b4c22SMark Johnston static void
vm_phys_lazy_init_kthr(void * arg __unused)1337b16b4c22SMark Johnston vm_phys_lazy_init_kthr(void *arg __unused)
1338b16b4c22SMark Johnston {
1339b16b4c22SMark Johnston 	vm_phys_lazy_init();
1340b16b4c22SMark Johnston 	kthread_exit();
1341b16b4c22SMark Johnston }
1342b16b4c22SMark Johnston 
1343b16b4c22SMark Johnston static void
vm_phys_lazy_sysinit(void * arg __unused)1344b16b4c22SMark Johnston vm_phys_lazy_sysinit(void *arg __unused)
1345b16b4c22SMark Johnston {
1346b16b4c22SMark Johnston 	struct thread *td;
1347b16b4c22SMark Johnston 	int error;
1348b16b4c22SMark Johnston 
1349b16b4c22SMark Johnston 	error = kthread_add(vm_phys_lazy_init_kthr, NULL, curproc, &td,
1350b16b4c22SMark Johnston 	    RFSTOPPED, 0, "vmlazyinit");
1351b16b4c22SMark Johnston 	if (error == 0) {
1352b16b4c22SMark Johnston 		thread_lock(td);
1353b16b4c22SMark Johnston 		sched_prio(td, PRI_MIN_IDLE);
1354b16b4c22SMark Johnston 		sched_add(td, SRQ_BORING);
1355b16b4c22SMark Johnston 	} else {
1356b16b4c22SMark Johnston 		printf("%s: could not create lazy init thread: %d\n",
1357b16b4c22SMark Johnston 		    __func__, error);
1358b16b4c22SMark Johnston 		vm_phys_lazy_init();
1359b16b4c22SMark Johnston 	}
1360b16b4c22SMark Johnston }
1361b16b4c22SMark Johnston SYSINIT(vm_phys_lazy_init, SI_SUB_SMP, SI_ORDER_ANY, vm_phys_lazy_sysinit,
1362b16b4c22SMark Johnston     NULL);
1363b16b4c22SMark Johnston #endif /* VM_FREEPOOL_LAZYINIT */
1364b16b4c22SMark Johnston 
136511752d88SAlan Cox /*
1366e3537f92SDoug Moore  * Free a contiguous, arbitrarily sized set of physical pages, without
13670078df5fSDoug Moore  * merging across set boundaries.  Assumes no pages have a valid pool field.
1368b8590daeSDoug Moore  *
1369b8590daeSDoug Moore  * The free page queues must be locked.
1370b8590daeSDoug Moore  */
1371b8590daeSDoug Moore void
vm_phys_enqueue_contig(vm_page_t m,int pool,u_long npages)13720078df5fSDoug Moore vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages)
1373b8590daeSDoug Moore {
1374b8590daeSDoug Moore 	struct vm_freelist *fl;
1375b8590daeSDoug Moore 	struct vm_phys_seg *seg;
1376b8590daeSDoug Moore 	vm_page_t m_end;
1377c9b06fa5SDoug Moore 	vm_paddr_t diff, lo;
1378b8590daeSDoug Moore 	int order;
1379b8590daeSDoug Moore 
1380b8590daeSDoug Moore 	/*
1381b8590daeSDoug Moore 	 * Avoid unnecessary coalescing by freeing the pages in the largest
1382b8590daeSDoug Moore 	 * possible power-of-two-sized subsets.
1383b8590daeSDoug Moore 	 */
1384b8590daeSDoug Moore 	vm_domain_free_assert_locked(vm_pagequeue_domain(m));
1385b8590daeSDoug Moore 	seg = &vm_phys_segs[m->segind];
13860078df5fSDoug Moore 	fl = (*seg->free_queues)[pool];
1387b8590daeSDoug Moore 	m_end = m + npages;
1388b8590daeSDoug Moore 	/* Free blocks of increasing size. */
13896dd15b7aSDoug Moore 	lo = atop(VM_PAGE_TO_PHYS(m));
1390c9b06fa5SDoug Moore 	if (m < m_end &&
1391c9b06fa5SDoug Moore 	    (diff = lo ^ (lo + npages - 1)) != 0) {
1392543d55d7SDoug Moore 		order = min(ilog2(diff), VM_NFREEORDER - 1);
13930078df5fSDoug Moore 		m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl,
13940078df5fSDoug Moore 		    pool, 1);
13955c1f2cc4SAlan Cox 	}
1396c9b06fa5SDoug Moore 
1397b8590daeSDoug Moore 	/* Free blocks of maximum size. */
1398c9b06fa5SDoug Moore 	order = VM_NFREEORDER - 1;
1399b8590daeSDoug Moore 	while (m + (1 << order) <= m_end) {
1400b8590daeSDoug Moore 		KASSERT(seg == &vm_phys_segs[m->segind],
1401b8590daeSDoug Moore 		    ("%s: page range [%p,%p) spans multiple segments",
1402b8590daeSDoug Moore 		    __func__, m_end - npages, m));
14030078df5fSDoug Moore 		vm_phys_enq_chunk(fl, m, order, pool, 1);
1404b8590daeSDoug Moore 		m += 1 << order;
1405b8590daeSDoug Moore 	}
1406b8590daeSDoug Moore 	/* Free blocks of diminishing size. */
14070078df5fSDoug Moore 	vm_phys_enq_beg(m, m_end - m, fl, pool, 1);
1408b8590daeSDoug Moore }
1409b8590daeSDoug Moore 
1410b8590daeSDoug Moore /*
1411b8590daeSDoug Moore  * Free a contiguous, arbitrarily sized set of physical pages.
14120078df5fSDoug Moore  * Assumes that every page but the first has no valid pool field.
14130078df5fSDoug Moore  * Uses the pool value in the first page if valid, otherwise default.
1414b8590daeSDoug Moore  *
1415b8590daeSDoug Moore  * The free page queues must be locked.
1416b8590daeSDoug Moore  */
1417b8590daeSDoug Moore void
vm_phys_free_contig(vm_page_t m,int pool,u_long npages)14180078df5fSDoug Moore vm_phys_free_contig(vm_page_t m, int pool, u_long npages)
1419b8590daeSDoug Moore {
14206dd15b7aSDoug Moore 	vm_paddr_t lo;
1421b8590daeSDoug Moore 	vm_page_t m_start, m_end;
14226dd15b7aSDoug Moore 	unsigned max_order, order_start, order_end;
1423b8590daeSDoug Moore 
1424b8590daeSDoug Moore 	vm_domain_free_assert_locked(vm_pagequeue_domain(m));
1425b8590daeSDoug Moore 
14266dd15b7aSDoug Moore 	lo = atop(VM_PAGE_TO_PHYS(m));
1427543d55d7SDoug Moore 	max_order = min(ilog2(lo ^ (lo + npages)), VM_NFREEORDER - 1);
1428e3537f92SDoug Moore 
1429e3537f92SDoug Moore 	m_start = m;
14306dd15b7aSDoug Moore 	order_start = ffsll(lo) - 1;
14316dd15b7aSDoug Moore 	if (order_start < max_order)
1432b8590daeSDoug Moore 		m_start += 1 << order_start;
1433e3537f92SDoug Moore 	m_end = m + npages;
14346dd15b7aSDoug Moore 	order_end = ffsll(lo + npages) - 1;
14356dd15b7aSDoug Moore 	if (order_end < max_order)
1436b8590daeSDoug Moore 		m_end -= 1 << order_end;
1437b8590daeSDoug Moore 	/*
1438b8590daeSDoug Moore 	 * Avoid unnecessary coalescing by freeing the pages at the start and
1439b8590daeSDoug Moore 	 * end of the range last.
1440b8590daeSDoug Moore 	 */
1441b8590daeSDoug Moore 	if (m_start < m_end)
14420078df5fSDoug Moore 		vm_phys_enqueue_contig(m_start, pool, m_end - m_start);
1443e3537f92SDoug Moore 	if (order_start < max_order)
14440078df5fSDoug Moore 		vm_phys_free_pages(m, pool, order_start);
1445e3537f92SDoug Moore 	if (order_end < max_order)
14460078df5fSDoug Moore 		vm_phys_free_pages(m_end, pool, order_end);
14475c1f2cc4SAlan Cox }
14485c1f2cc4SAlan Cox 
14495c1f2cc4SAlan Cox /*
14509e817428SDoug Moore  * Identify the first address range within segment segind or greater
14519e817428SDoug Moore  * that matches the domain, lies within the low/high range, and has
14529e817428SDoug Moore  * enough pages.  Return -1 if there is none.
1453c869e672SAlan Cox  */
14549e817428SDoug Moore int
vm_phys_find_range(vm_page_t bounds[],int segind,int domain,u_long npages,vm_paddr_t low,vm_paddr_t high)14559e817428SDoug Moore vm_phys_find_range(vm_page_t bounds[], int segind, int domain,
14569e817428SDoug Moore     u_long npages, vm_paddr_t low, vm_paddr_t high)
1457c869e672SAlan Cox {
14589e817428SDoug Moore 	vm_paddr_t pa_end, pa_start;
14599e817428SDoug Moore 	struct vm_phys_seg *end_seg, *seg;
1460c869e672SAlan Cox 
14619e817428SDoug Moore 	KASSERT(npages > 0, ("npages is zero"));
146258d42717SAlan Cox 	KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range"));
14639e817428SDoug Moore 	end_seg = &vm_phys_segs[vm_phys_nsegs];
14649e817428SDoug Moore 	for (seg = &vm_phys_segs[segind]; seg < end_seg; seg++) {
14653f289c3fSJeff Roberson 		if (seg->domain != domain)
14663f289c3fSJeff Roberson 			continue;
1467c869e672SAlan Cox 		if (seg->start >= high)
14689e817428SDoug Moore 			return (-1);
14699e817428SDoug Moore 		pa_start = MAX(low, seg->start);
14709e817428SDoug Moore 		pa_end = MIN(high, seg->end);
14719e817428SDoug Moore 		if (pa_end - pa_start < ptoa(npages))
1472c869e672SAlan Cox 			continue;
1473b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
1474b16b4c22SMark Johnston 		/*
1475b16b4c22SMark Johnston 		 * The pages on the free lists must be initialized.
1476b16b4c22SMark Johnston 		 */
1477b16b4c22SMark Johnston 		vm_phys_lazy_init_domain(domain, false);
1478b16b4c22SMark Johnston #endif
147969cbb187SMark Johnston 		bounds[0] = vm_phys_seg_paddr_to_vm_page(seg, pa_start);
1480fbff6d54SMark Johnston 		bounds[1] = &seg->first_page[atop(pa_end - seg->start)];
14819e817428SDoug Moore 		return (seg - vm_phys_segs);
1482c869e672SAlan Cox 	}
14839e817428SDoug Moore 	return (-1);
1484c869e672SAlan Cox }
1485c869e672SAlan Cox 
1486c869e672SAlan Cox /*
14879742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
14886062d9faSMark Johnston  * succeeds, remove "m" from the free lists and return true.  Otherwise, return
14896062d9faSMark Johnston  * false, indicating that "m" is not in the free lists.
14907bfda801SAlan Cox  *
14917bfda801SAlan Cox  * The free page queues must be locked.
14927bfda801SAlan Cox  */
14936062d9faSMark Johnston bool
vm_phys_unfree_page(vm_paddr_t pa)1494b16b4c22SMark Johnston vm_phys_unfree_page(vm_paddr_t pa)
14957bfda801SAlan Cox {
14967bfda801SAlan Cox 	struct vm_freelist *fl;
14977bfda801SAlan Cox 	struct vm_phys_seg *seg;
1498b16b4c22SMark Johnston 	vm_paddr_t pa_half;
1499b16b4c22SMark Johnston 	vm_page_t m, m_set, m_tmp;
15000078df5fSDoug Moore 	int order, pool;
15017bfda801SAlan Cox 
1502b16b4c22SMark Johnston 	seg = vm_phys_paddr_to_seg(pa);
1503b16b4c22SMark Johnston 	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
1504b16b4c22SMark Johnston 
15050078df5fSDoug Moore #ifdef VM_FREEPOOL_LAZYINIT
1506b16b4c22SMark Johnston 	/*
1507b16b4c22SMark Johnston 	 * The pages on the free lists must be initialized.
1508b16b4c22SMark Johnston 	 */
1509b16b4c22SMark Johnston 	vm_phys_lazy_init_domain(seg->domain, true);
1510b16b4c22SMark Johnston #endif
1511b16b4c22SMark Johnston 
15127bfda801SAlan Cox 	/*
15137bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
15147bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
15157bfda801SAlan Cox 	 * assign it to "m_set".
15167bfda801SAlan Cox 	 */
1517b16b4c22SMark Johnston 	m = vm_phys_paddr_to_vm_page(pa);
15187bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
1519bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
15207bfda801SAlan Cox 		order++;
15217bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
15222fbced65SAlan Cox 		if (pa >= seg->start)
152369cbb187SMark Johnston 			m_set = vm_phys_seg_paddr_to_vm_page(seg, pa);
1524e35395ceSAlan Cox 		else
15256062d9faSMark Johnston 			return (false);
15267bfda801SAlan Cox 	}
1527e35395ceSAlan Cox 	if (m_set->order < order)
15286062d9faSMark Johnston 		return (false);
1529e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
15306062d9faSMark Johnston 		return (false);
15317bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
15327bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
15337bfda801SAlan Cox 	    m_set, m_set->order));
15347bfda801SAlan Cox 
15357bfda801SAlan Cox 	/*
15367bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
15377bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
15387bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
15397bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
15407bfda801SAlan Cox 	 */
15410078df5fSDoug Moore 	pool = m_set->pool;
15420078df5fSDoug Moore 	fl = (*seg->free_queues)[pool];
15437bfda801SAlan Cox 	order = m_set->order;
15447e226537SAttilio Rao 	vm_freelist_rem(fl, m_set, order);
15457bfda801SAlan Cox 	while (order > 0) {
15467bfda801SAlan Cox 		order--;
15477bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
15487bfda801SAlan Cox 		if (m->phys_addr < pa_half)
154969cbb187SMark Johnston 			m_tmp = vm_phys_seg_paddr_to_vm_page(seg, pa_half);
15507bfda801SAlan Cox 		else {
15517bfda801SAlan Cox 			m_tmp = m_set;
155269cbb187SMark Johnston 			m_set = vm_phys_seg_paddr_to_vm_page(seg, pa_half);
15537bfda801SAlan Cox 		}
15540078df5fSDoug Moore 		vm_freelist_add(fl, m_tmp, order, pool, 0);
15557bfda801SAlan Cox 	}
15567bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
15576062d9faSMark Johnston 	return (true);
15587bfda801SAlan Cox }
15597bfda801SAlan Cox 
15607bfda801SAlan Cox /*
15612a4897bdSDoug Moore  * Find a run of contiguous physical pages, meeting alignment requirements, from
15622a4897bdSDoug Moore  * a list of max-sized page blocks, where we need at least two consecutive
15632a4897bdSDoug Moore  * blocks to satisfy the (large) page request.
1564fa8a6585SDoug Moore  */
1565fa8a6585SDoug Moore static vm_page_t
vm_phys_find_freelist_contig(struct vm_freelist * fl,u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)15662a4897bdSDoug Moore vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages,
1567fa8a6585SDoug Moore     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
1568fa8a6585SDoug Moore {
1569fa8a6585SDoug Moore 	struct vm_phys_seg *seg;
15702a4897bdSDoug Moore 	vm_page_t m, m_iter, m_ret;
15712a4897bdSDoug Moore 	vm_paddr_t max_size, size;
15722a4897bdSDoug Moore 	int max_order;
1573fa8a6585SDoug Moore 
15742a4897bdSDoug Moore 	max_order = VM_NFREEORDER - 1;
1575fa8a6585SDoug Moore 	size = npages << PAGE_SHIFT;
15762a4897bdSDoug Moore 	max_size = (vm_paddr_t)1 << (PAGE_SHIFT + max_order);
15772a4897bdSDoug Moore 	KASSERT(size > max_size, ("size is too small"));
15782a4897bdSDoug Moore 
1579fa8a6585SDoug Moore 	/*
15802a4897bdSDoug Moore 	 * In order to avoid examining any free max-sized page block more than
15812a4897bdSDoug Moore 	 * twice, identify the ones that are first in a physically-contiguous
15822a4897bdSDoug Moore 	 * sequence of such blocks, and only for those walk the sequence to
15832a4897bdSDoug Moore 	 * check if there are enough free blocks starting at a properly aligned
15842a4897bdSDoug Moore 	 * block.  Thus, no block is checked for free-ness more than twice.
1585fa8a6585SDoug Moore 	 */
15862a4897bdSDoug Moore 	TAILQ_FOREACH(m, &fl[max_order].pl, listq) {
15872a4897bdSDoug Moore 		/*
15882a4897bdSDoug Moore 		 * Skip m unless it is first in a sequence of free max page
15892a4897bdSDoug Moore 		 * blocks >= low in its segment.
15902a4897bdSDoug Moore 		 */
15912a4897bdSDoug Moore 		seg = &vm_phys_segs[m->segind];
15922a4897bdSDoug Moore 		if (VM_PAGE_TO_PHYS(m) < MAX(low, seg->start))
15932a4897bdSDoug Moore 			continue;
15942a4897bdSDoug Moore 		if (VM_PAGE_TO_PHYS(m) >= max_size &&
15952a4897bdSDoug Moore 		    VM_PAGE_TO_PHYS(m) - max_size >= MAX(low, seg->start) &&
15962a4897bdSDoug Moore 		    max_order == m[-1 << max_order].order)
1597fa8a6585SDoug Moore 			continue;
1598fa8a6585SDoug Moore 
1599fa8a6585SDoug Moore 		/*
16002a4897bdSDoug Moore 		 * Advance m_ret from m to the first of the sequence, if any,
16012a4897bdSDoug Moore 		 * that satisfies alignment conditions and might leave enough
16022a4897bdSDoug Moore 		 * space.
1603fa8a6585SDoug Moore 		 */
16042a4897bdSDoug Moore 		m_ret = m;
16052a4897bdSDoug Moore 		while (!vm_addr_ok(VM_PAGE_TO_PHYS(m_ret),
16062a4897bdSDoug Moore 		    size, alignment, boundary) &&
16072a4897bdSDoug Moore 		    VM_PAGE_TO_PHYS(m_ret) + size <= MIN(high, seg->end) &&
16082a4897bdSDoug Moore 		    max_order == m_ret[1 << max_order].order)
16092a4897bdSDoug Moore 			m_ret += 1 << max_order;
16102a4897bdSDoug Moore 
16112a4897bdSDoug Moore 		/*
16122a4897bdSDoug Moore 		 * Skip m unless some block m_ret in the sequence is properly
16132a4897bdSDoug Moore 		 * aligned, and begins a sequence of enough pages less than
16142a4897bdSDoug Moore 		 * high, and in the same segment.
16152a4897bdSDoug Moore 		 */
16162a4897bdSDoug Moore 		if (VM_PAGE_TO_PHYS(m_ret) + size > MIN(high, seg->end))
1617fa8a6585SDoug Moore 			continue;
1618fa8a6585SDoug Moore 
1619fa8a6585SDoug Moore 		/*
16202a4897bdSDoug Moore 		 * Skip m unless the blocks to allocate starting at m_ret are
16212a4897bdSDoug Moore 		 * all free.
1622fa8a6585SDoug Moore 		 */
16232a4897bdSDoug Moore 		for (m_iter = m_ret;
16242a4897bdSDoug Moore 		    m_iter < m_ret + npages && max_order == m_iter->order;
16252a4897bdSDoug Moore 		    m_iter += 1 << max_order) {
1626fa8a6585SDoug Moore 		}
16272a4897bdSDoug Moore 		if (m_iter < m_ret + npages)
1628fa8a6585SDoug Moore 			continue;
1629fa8a6585SDoug Moore 		return (m_ret);
1630fa8a6585SDoug Moore 	}
1631fa8a6585SDoug Moore 	return (NULL);
1632fa8a6585SDoug Moore }
1633fa8a6585SDoug Moore 
1634fa8a6585SDoug Moore /*
1635fa8a6585SDoug Moore  * Find a run of contiguous physical pages from the specified free list
1636342056faSDoug Moore  * table.
1637c869e672SAlan Cox  */
1638c869e672SAlan Cox static vm_page_t
vm_phys_find_queues_contig(struct vm_freelist (* queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX],u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)1639fa8a6585SDoug Moore vm_phys_find_queues_contig(
1640342056faSDoug Moore     struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX],
1641342056faSDoug Moore     u_long npages, vm_paddr_t low, vm_paddr_t high,
1642342056faSDoug Moore     u_long alignment, vm_paddr_t boundary)
1643c869e672SAlan Cox {
1644c869e672SAlan Cox 	struct vm_freelist *fl;
1645fa8a6585SDoug Moore 	vm_page_t m_ret;
1646c869e672SAlan Cox 	vm_paddr_t pa, pa_end, size;
1647c869e672SAlan Cox 	int oind, order, pind;
1648c869e672SAlan Cox 
1649c869e672SAlan Cox 	KASSERT(npages > 0, ("npages is 0"));
1650c869e672SAlan Cox 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1651c869e672SAlan Cox 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1652c869e672SAlan Cox 	/* Compute the queue that is the best fit for npages. */
16539161b4deSAlan Cox 	order = flsl(npages - 1);
1654fa8a6585SDoug Moore 	/* Search for a large enough free block. */
1655c869e672SAlan Cox 	size = npages << PAGE_SHIFT;
1656fa8a6585SDoug Moore 	for (oind = order; oind < VM_NFREEORDER; oind++) {
1657b16b4c22SMark Johnston 		for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
1658342056faSDoug Moore 			fl = (*queues)[pind];
16595cd29d0fSMark Johnston 			TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
1660c869e672SAlan Cox 				/*
1661da92ecbcSDoug Moore 				 * Determine if the address range starting at pa
1662da92ecbcSDoug Moore 				 * is within the given range, satisfies the
1663da92ecbcSDoug Moore 				 * given alignment, and does not cross the given
1664da92ecbcSDoug Moore 				 * boundary.
166511752d88SAlan Cox 				 */
1666da92ecbcSDoug Moore 				pa = VM_PAGE_TO_PHYS(m_ret);
1667da92ecbcSDoug Moore 				pa_end = pa + size;
1668fa8a6585SDoug Moore 				if (low <= pa && pa_end <= high &&
1669fa8a6585SDoug Moore 				    vm_addr_ok(pa, size, alignment, boundary))
1670fa8a6585SDoug Moore 					return (m_ret);
1671fa8a6585SDoug Moore 			}
1672fa8a6585SDoug Moore 		}
1673fa8a6585SDoug Moore 	}
1674da92ecbcSDoug Moore 	if (order < VM_NFREEORDER)
1675fa8a6585SDoug Moore 		return (NULL);
16762a4897bdSDoug Moore 	/* Search for a long-enough sequence of max-order blocks. */
1677b16b4c22SMark Johnston 	for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
1678fa8a6585SDoug Moore 		fl = (*queues)[pind];
16792a4897bdSDoug Moore 		m_ret = vm_phys_find_freelist_contig(fl, npages,
1680fa8a6585SDoug Moore 		    low, high, alignment, boundary);
1681fa8a6585SDoug Moore 		if (m_ret != NULL)
1682fa8a6585SDoug Moore 			return (m_ret);
168311752d88SAlan Cox 	}
168411752d88SAlan Cox 	return (NULL);
168511752d88SAlan Cox }
168611752d88SAlan Cox 
1687b7565d44SJeff Roberson /*
1688342056faSDoug Moore  * Allocate a contiguous set of physical pages of the given size
1689342056faSDoug Moore  * "npages" from the free lists.  All of the physical pages must be at
1690342056faSDoug Moore  * or above the given physical address "low" and below the given
1691342056faSDoug Moore  * physical address "high".  The given value "alignment" determines the
1692342056faSDoug Moore  * alignment of the first physical page in the set.  If the given value
1693342056faSDoug Moore  * "boundary" is non-zero, then the set of physical pages cannot cross
1694342056faSDoug Moore  * any physical address boundary that is a multiple of that value.  Both
16950078df5fSDoug Moore  * "alignment" and "boundary" must be a power of two.  Sets the pool
16960078df5fSDoug Moore  * field to DEFAULT in the first allocated page.
1697342056faSDoug Moore  */
1698342056faSDoug Moore vm_page_t
vm_phys_alloc_contig(int domain,u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)1699342056faSDoug Moore vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
1700342056faSDoug Moore     u_long alignment, vm_paddr_t boundary)
1701342056faSDoug Moore {
1702342056faSDoug Moore 	vm_paddr_t pa_end, pa_start;
1703fa8a6585SDoug Moore 	struct vm_freelist *fl;
1704fa8a6585SDoug Moore 	vm_page_t m, m_run;
1705342056faSDoug Moore 	struct vm_phys_seg *seg;
1706342056faSDoug Moore 	struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX];
1707fa8a6585SDoug Moore 	int oind, segind;
1708342056faSDoug Moore 
1709342056faSDoug Moore 	KASSERT(npages > 0, ("npages is 0"));
1710342056faSDoug Moore 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1711342056faSDoug Moore 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1712342056faSDoug Moore 	vm_domain_free_assert_locked(VM_DOMAIN(domain));
1713342056faSDoug Moore 	if (low >= high)
1714342056faSDoug Moore 		return (NULL);
1715342056faSDoug Moore 	queues = NULL;
1716342056faSDoug Moore 	m_run = NULL;
1717342056faSDoug Moore 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
1718342056faSDoug Moore 		seg = &vm_phys_segs[segind];
1719342056faSDoug Moore 		if (seg->start >= high || seg->domain != domain)
1720342056faSDoug Moore 			continue;
1721342056faSDoug Moore 		if (low >= seg->end)
1722342056faSDoug Moore 			break;
1723342056faSDoug Moore 		if (low <= seg->start)
1724342056faSDoug Moore 			pa_start = seg->start;
1725342056faSDoug Moore 		else
1726342056faSDoug Moore 			pa_start = low;
1727342056faSDoug Moore 		if (high < seg->end)
1728342056faSDoug Moore 			pa_end = high;
1729342056faSDoug Moore 		else
1730342056faSDoug Moore 			pa_end = seg->end;
1731342056faSDoug Moore 		if (pa_end - pa_start < ptoa(npages))
1732342056faSDoug Moore 			continue;
1733342056faSDoug Moore 		/*
1734342056faSDoug Moore 		 * If a previous segment led to a search using
1735342056faSDoug Moore 		 * the same free lists as would this segment, then
1736342056faSDoug Moore 		 * we've actually already searched within this
1737342056faSDoug Moore 		 * too.  So skip it.
1738342056faSDoug Moore 		 */
1739342056faSDoug Moore 		if (seg->free_queues == queues)
1740342056faSDoug Moore 			continue;
1741342056faSDoug Moore 		queues = seg->free_queues;
1742fa8a6585SDoug Moore 		m_run = vm_phys_find_queues_contig(queues, npages,
1743342056faSDoug Moore 		    low, high, alignment, boundary);
1744342056faSDoug Moore 		if (m_run != NULL)
1745342056faSDoug Moore 			break;
1746342056faSDoug Moore 	}
1747fa8a6585SDoug Moore 	if (m_run == NULL)
1748fa8a6585SDoug Moore 		return (NULL);
1749fa8a6585SDoug Moore 
1750fa8a6585SDoug Moore 	/* Allocate pages from the page-range found. */
1751fa8a6585SDoug Moore 	for (m = m_run; m < &m_run[npages]; m = &m[1 << oind]) {
1752fa8a6585SDoug Moore 		fl = (*queues)[m->pool];
1753fa8a6585SDoug Moore 		oind = m->order;
1754fa8a6585SDoug Moore 		vm_freelist_rem(fl, m, oind);
17550078df5fSDoug Moore 		vm_phys_finish_init(m, oind);
1756fa8a6585SDoug Moore 	}
1757fa8a6585SDoug Moore 	/* Return excess pages to the free lists. */
1758fa8a6585SDoug Moore 	fl = (*queues)[VM_FREEPOOL_DEFAULT];
17590078df5fSDoug Moore 	vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl,
17600078df5fSDoug Moore 	    VM_FREEPOOL_DEFAULT, 0);
17612a4897bdSDoug Moore 
17622a4897bdSDoug Moore 	/* Return page verified to satisfy conditions of request. */
17632a4897bdSDoug Moore 	pa_start = VM_PAGE_TO_PHYS(m_run);
17642a4897bdSDoug Moore 	KASSERT(low <= pa_start,
17652a4897bdSDoug Moore 	    ("memory allocated below minimum requested range"));
17662a4897bdSDoug Moore 	KASSERT(pa_start + ptoa(npages) <= high,
17672a4897bdSDoug Moore 	    ("memory allocated above maximum requested range"));
17682a4897bdSDoug Moore 	seg = &vm_phys_segs[m_run->segind];
17692a4897bdSDoug Moore 	KASSERT(seg->domain == domain,
17702a4897bdSDoug Moore 	    ("memory not allocated from specified domain"));
17712a4897bdSDoug Moore 	KASSERT(vm_addr_ok(pa_start, ptoa(npages), alignment, boundary),
17722a4897bdSDoug Moore 	    ("memory alignment/boundary constraints not satisfied"));
1773342056faSDoug Moore 	return (m_run);
1774342056faSDoug Moore }
1775342056faSDoug Moore 
1776342056faSDoug Moore /*
1777b7565d44SJeff Roberson  * Return the index of the first unused slot which may be the terminating
1778b7565d44SJeff Roberson  * entry.
1779b7565d44SJeff Roberson  */
1780b7565d44SJeff Roberson static int
vm_phys_avail_count(void)1781b7565d44SJeff Roberson vm_phys_avail_count(void)
1782b7565d44SJeff Roberson {
1783b7565d44SJeff Roberson 	int i;
1784b7565d44SJeff Roberson 
1785291b7bf0SOlivier Certner 	for (i = 0; i < PHYS_AVAIL_COUNT; i += 2)
1786291b7bf0SOlivier Certner 		if (phys_avail[i] == 0 && phys_avail[i + 1] == 0)
1787b7565d44SJeff Roberson 			return (i);
1788291b7bf0SOlivier Certner 	panic("Improperly terminated phys_avail[]");
1789b7565d44SJeff Roberson }
1790b7565d44SJeff Roberson 
1791b7565d44SJeff Roberson /*
1792b7565d44SJeff Roberson  * Assert that a phys_avail entry is valid.
1793b7565d44SJeff Roberson  */
1794b7565d44SJeff Roberson static void
vm_phys_avail_check(int i)1795b7565d44SJeff Roberson vm_phys_avail_check(int i)
1796b7565d44SJeff Roberson {
1797125ef4e0SOlivier Certner 	if (i % 2 != 0)
1798125ef4e0SOlivier Certner 		panic("Chunk start index %d is not even.", i);
1799b7565d44SJeff Roberson 	if (phys_avail[i] & PAGE_MASK)
1800b7565d44SJeff Roberson 		panic("Unaligned phys_avail[%d]: %#jx", i,
1801b7565d44SJeff Roberson 		    (intmax_t)phys_avail[i]);
1802b7565d44SJeff Roberson 	if (phys_avail[i + 1] & PAGE_MASK)
1803b7565d44SJeff Roberson 		panic("Unaligned phys_avail[%d + 1]: %#jx", i,
1804125ef4e0SOlivier Certner 		    (intmax_t)phys_avail[i + 1]);
1805b7565d44SJeff Roberson 	if (phys_avail[i + 1] < phys_avail[i])
1806125ef4e0SOlivier Certner 		panic("phys_avail[%d]: start %#jx > end %#jx", i,
1807b7565d44SJeff Roberson 		    (intmax_t)phys_avail[i], (intmax_t)phys_avail[i + 1]);
1808b7565d44SJeff Roberson }
1809b7565d44SJeff Roberson 
1810b7565d44SJeff Roberson /*
1811b7565d44SJeff Roberson  * Return the index of an overlapping phys_avail entry or -1.
1812b7565d44SJeff Roberson  */
1813be3f5f29SJeff Roberson #ifdef NUMA
1814b7565d44SJeff Roberson static int
vm_phys_avail_find(vm_paddr_t pa)1815b7565d44SJeff Roberson vm_phys_avail_find(vm_paddr_t pa)
1816b7565d44SJeff Roberson {
1817b7565d44SJeff Roberson 	int i;
1818b7565d44SJeff Roberson 
1819b7565d44SJeff Roberson 	for (i = 0; phys_avail[i + 1]; i += 2)
1820b7565d44SJeff Roberson 		if (phys_avail[i] <= pa && phys_avail[i + 1] > pa)
1821b7565d44SJeff Roberson 			return (i);
1822b7565d44SJeff Roberson 	return (-1);
1823b7565d44SJeff Roberson }
1824be3f5f29SJeff Roberson #endif
1825b7565d44SJeff Roberson 
1826b7565d44SJeff Roberson /*
1827b7565d44SJeff Roberson  * Return the index of the largest entry.
1828b7565d44SJeff Roberson  */
1829b7565d44SJeff Roberson int
vm_phys_avail_largest(void)1830b7565d44SJeff Roberson vm_phys_avail_largest(void)
1831b7565d44SJeff Roberson {
1832b7565d44SJeff Roberson 	vm_paddr_t sz, largesz;
1833b7565d44SJeff Roberson 	int largest;
1834b7565d44SJeff Roberson 	int i;
1835b7565d44SJeff Roberson 
1836b7565d44SJeff Roberson 	largest = 0;
1837b7565d44SJeff Roberson 	largesz = 0;
1838b7565d44SJeff Roberson 	for (i = 0; phys_avail[i + 1]; i += 2) {
1839b7565d44SJeff Roberson 		sz = vm_phys_avail_size(i);
1840b7565d44SJeff Roberson 		if (sz > largesz) {
1841b7565d44SJeff Roberson 			largesz = sz;
1842b7565d44SJeff Roberson 			largest = i;
1843b7565d44SJeff Roberson 		}
1844b7565d44SJeff Roberson 	}
1845b7565d44SJeff Roberson 
1846b7565d44SJeff Roberson 	return (largest);
1847b7565d44SJeff Roberson }
1848b7565d44SJeff Roberson 
1849b7565d44SJeff Roberson vm_paddr_t
vm_phys_avail_size(int i)1850b7565d44SJeff Roberson vm_phys_avail_size(int i)
1851b7565d44SJeff Roberson {
1852b7565d44SJeff Roberson 
1853b7565d44SJeff Roberson 	return (phys_avail[i + 1] - phys_avail[i]);
1854b7565d44SJeff Roberson }
1855b7565d44SJeff Roberson 
1856b7565d44SJeff Roberson /*
1857e1499bffSOlivier Certner  * Split a chunk in phys_avail[] at the address 'pa'.
1858e1499bffSOlivier Certner  *
1859e1499bffSOlivier Certner  * 'pa' must be within a chunk (slots i and i + 1) or one of its boundaries.
1860e1499bffSOlivier Certner  * Returns zero on actual split, in which case the two new chunks occupy slots
1861e1499bffSOlivier Certner  * i to i + 3, else EJUSTRETURN if 'pa' was one of the boundaries (and no split
1862e1499bffSOlivier Certner  * actually occurred) else ENOSPC if there are not enough slots in phys_avail[]
1863e1499bffSOlivier Certner  * to represent the additional chunk caused by the split.
1864b7565d44SJeff Roberson  */
1865b7565d44SJeff Roberson static int
vm_phys_avail_split(vm_paddr_t pa,int i)1866b7565d44SJeff Roberson vm_phys_avail_split(vm_paddr_t pa, int i)
1867b7565d44SJeff Roberson {
1868b7565d44SJeff Roberson 	int cnt;
1869b7565d44SJeff Roberson 
1870b7565d44SJeff Roberson 	vm_phys_avail_check(i);
1871e1499bffSOlivier Certner 	if (pa < phys_avail[i] || pa > phys_avail[i + 1])
1872e1499bffSOlivier Certner 		panic("%s: Address %#jx not in range at slot %d [%#jx;%#jx].",
1873e1499bffSOlivier Certner 		    __func__, (uintmax_t)pa, i,
1874e1499bffSOlivier Certner 		    (uintmax_t)phys_avail[i], (uintmax_t)phys_avail[i + 1]);
1875e1499bffSOlivier Certner 	if (pa == phys_avail[i] || pa == phys_avail[i + 1])
1876e1499bffSOlivier Certner 		return (EJUSTRETURN);
1877b7565d44SJeff Roberson 	cnt = vm_phys_avail_count();
1878b7565d44SJeff Roberson 	if (cnt >= PHYS_AVAIL_ENTRIES)
1879b7565d44SJeff Roberson 		return (ENOSPC);
1880b7565d44SJeff Roberson 	memmove(&phys_avail[i + 2], &phys_avail[i],
1881b7565d44SJeff Roberson 	    (cnt - i) * sizeof(phys_avail[0]));
1882b7565d44SJeff Roberson 	phys_avail[i + 1] = pa;
1883b7565d44SJeff Roberson 	phys_avail[i + 2] = pa;
1884b7565d44SJeff Roberson 	vm_phys_avail_check(i);
1885b7565d44SJeff Roberson 	vm_phys_avail_check(i+2);
1886b7565d44SJeff Roberson 
1887b7565d44SJeff Roberson 	return (0);
1888b7565d44SJeff Roberson }
1889b7565d44SJeff Roberson 
189031991a5aSMitchell Horne /*
189131991a5aSMitchell Horne  * Check if a given physical address can be included as part of a crash dump.
189231991a5aSMitchell Horne  */
189331991a5aSMitchell Horne bool
vm_phys_is_dumpable(vm_paddr_t pa)189431991a5aSMitchell Horne vm_phys_is_dumpable(vm_paddr_t pa)
189531991a5aSMitchell Horne {
189631991a5aSMitchell Horne 	vm_page_t m;
189731991a5aSMitchell Horne 	int i;
189831991a5aSMitchell Horne 
189931991a5aSMitchell Horne 	if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
190031991a5aSMitchell Horne 		return ((m->flags & PG_NODUMP) == 0);
190131991a5aSMitchell Horne 
190231991a5aSMitchell Horne 	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
190331991a5aSMitchell Horne 		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
190431991a5aSMitchell Horne 			return (true);
190531991a5aSMitchell Horne 	}
190631991a5aSMitchell Horne 	return (false);
190731991a5aSMitchell Horne }
190831991a5aSMitchell Horne 
190981302f1dSMark Johnston void
vm_phys_early_add_seg(vm_paddr_t start,vm_paddr_t end)191081302f1dSMark Johnston vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end)
191181302f1dSMark Johnston {
191281302f1dSMark Johnston 	struct vm_phys_seg *seg;
191381302f1dSMark Johnston 
191481302f1dSMark Johnston 	if (vm_phys_early_nsegs == -1)
191581302f1dSMark Johnston 		panic("%s: called after initialization", __func__);
191681302f1dSMark Johnston 	if (vm_phys_early_nsegs == nitems(vm_phys_early_segs))
191781302f1dSMark Johnston 		panic("%s: ran out of early segments", __func__);
191881302f1dSMark Johnston 
191981302f1dSMark Johnston 	seg = &vm_phys_early_segs[vm_phys_early_nsegs++];
192081302f1dSMark Johnston 	seg->start = start;
192181302f1dSMark Johnston 	seg->end = end;
192281302f1dSMark Johnston }
192381302f1dSMark Johnston 
1924b7565d44SJeff Roberson /*
1925b7565d44SJeff Roberson  * This routine allocates NUMA node specific memory before the page
1926b7565d44SJeff Roberson  * allocator is bootstrapped.
1927b7565d44SJeff Roberson  */
1928b7565d44SJeff Roberson vm_paddr_t
vm_phys_early_alloc(int domain,size_t alloc_size)1929b7565d44SJeff Roberson vm_phys_early_alloc(int domain, size_t alloc_size)
1930b7565d44SJeff Roberson {
19312e7838aeSJohn Baldwin #ifdef NUMA
19322e7838aeSJohn Baldwin 	int mem_index;
19332e7838aeSJohn Baldwin #endif
19342e7838aeSJohn Baldwin 	int i, biggestone;
1935b7565d44SJeff Roberson 	vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
1936b7565d44SJeff Roberson 
193781302f1dSMark Johnston 	KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains),
193881302f1dSMark Johnston 	    ("%s: invalid domain index %d", __func__, domain));
1939b7565d44SJeff Roberson 
1940b7565d44SJeff Roberson 	/*
1941b7565d44SJeff Roberson 	 * Search the mem_affinity array for the biggest address
1942b7565d44SJeff Roberson 	 * range in the desired domain.  This is used to constrain
1943b7565d44SJeff Roberson 	 * the phys_avail selection below.
1944b7565d44SJeff Roberson 	 */
1945b7565d44SJeff Roberson 	biggestsize = 0;
1946b7565d44SJeff Roberson 	mem_start = 0;
1947b7565d44SJeff Roberson 	mem_end = -1;
1948b7565d44SJeff Roberson #ifdef NUMA
19492e7838aeSJohn Baldwin 	mem_index = 0;
1950b7565d44SJeff Roberson 	if (mem_affinity != NULL) {
1951b7565d44SJeff Roberson 		for (i = 0;; i++) {
1952b7565d44SJeff Roberson 			size = mem_affinity[i].end - mem_affinity[i].start;
1953b7565d44SJeff Roberson 			if (size == 0)
1954b7565d44SJeff Roberson 				break;
195581302f1dSMark Johnston 			if (domain != -1 && mem_affinity[i].domain != domain)
1956b7565d44SJeff Roberson 				continue;
1957b7565d44SJeff Roberson 			if (size > biggestsize) {
1958b7565d44SJeff Roberson 				mem_index = i;
1959b7565d44SJeff Roberson 				biggestsize = size;
1960b7565d44SJeff Roberson 			}
1961b7565d44SJeff Roberson 		}
1962b7565d44SJeff Roberson 		mem_start = mem_affinity[mem_index].start;
1963b7565d44SJeff Roberson 		mem_end = mem_affinity[mem_index].end;
1964b7565d44SJeff Roberson 	}
1965b7565d44SJeff Roberson #endif
1966b7565d44SJeff Roberson 
1967b7565d44SJeff Roberson 	/*
1968b7565d44SJeff Roberson 	 * Now find biggest physical segment in within the desired
1969b7565d44SJeff Roberson 	 * numa domain.
1970b7565d44SJeff Roberson 	 */
1971b7565d44SJeff Roberson 	biggestsize = 0;
1972b7565d44SJeff Roberson 	biggestone = 0;
1973b7565d44SJeff Roberson 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1974b7565d44SJeff Roberson 		/* skip regions that are out of range */
1975b7565d44SJeff Roberson 		if (phys_avail[i+1] - alloc_size < mem_start ||
1976b7565d44SJeff Roberson 		    phys_avail[i+1] > mem_end)
1977b7565d44SJeff Roberson 			continue;
1978b7565d44SJeff Roberson 		size = vm_phys_avail_size(i);
1979b7565d44SJeff Roberson 		if (size > biggestsize) {
1980b7565d44SJeff Roberson 			biggestone = i;
1981b7565d44SJeff Roberson 			biggestsize = size;
1982b7565d44SJeff Roberson 		}
1983b7565d44SJeff Roberson 	}
1984b7565d44SJeff Roberson 	alloc_size = round_page(alloc_size);
1985b7565d44SJeff Roberson 
1986b7565d44SJeff Roberson 	/*
1987b7565d44SJeff Roberson 	 * Grab single pages from the front to reduce fragmentation.
1988b7565d44SJeff Roberson 	 */
1989b7565d44SJeff Roberson 	if (alloc_size == PAGE_SIZE) {
1990b7565d44SJeff Roberson 		pa = phys_avail[biggestone];
1991b7565d44SJeff Roberson 		phys_avail[biggestone] += PAGE_SIZE;
1992b7565d44SJeff Roberson 		vm_phys_avail_check(biggestone);
1993b7565d44SJeff Roberson 		return (pa);
1994b7565d44SJeff Roberson 	}
1995b7565d44SJeff Roberson 
1996b7565d44SJeff Roberson 	/*
1997b7565d44SJeff Roberson 	 * Naturally align large allocations.
1998b7565d44SJeff Roberson 	 */
1999b7565d44SJeff Roberson 	align = phys_avail[biggestone + 1] & (alloc_size - 1);
2000b7565d44SJeff Roberson 	if (alloc_size + align > biggestsize)
2001b7565d44SJeff Roberson 		panic("cannot find a large enough size\n");
2002b7565d44SJeff Roberson 	if (align != 0 &&
2003b7565d44SJeff Roberson 	    vm_phys_avail_split(phys_avail[biggestone + 1] - align,
2004b7565d44SJeff Roberson 	    biggestone) != 0)
2005b7565d44SJeff Roberson 		/* Wasting memory. */
2006b7565d44SJeff Roberson 		phys_avail[biggestone + 1] -= align;
2007b7565d44SJeff Roberson 
2008b7565d44SJeff Roberson 	phys_avail[biggestone + 1] -= alloc_size;
2009b7565d44SJeff Roberson 	vm_phys_avail_check(biggestone);
2010b7565d44SJeff Roberson 	pa = phys_avail[biggestone + 1];
2011b7565d44SJeff Roberson 	return (pa);
2012b7565d44SJeff Roberson }
2013b7565d44SJeff Roberson 
2014b7565d44SJeff Roberson void
vm_phys_early_startup(void)2015b7565d44SJeff Roberson vm_phys_early_startup(void)
2016b7565d44SJeff Roberson {
201781302f1dSMark Johnston 	struct vm_phys_seg *seg;
2018b7565d44SJeff Roberson 	int i;
2019b7565d44SJeff Roberson 
2020*32e77bcdSOlivier Certner 	if (phys_avail[1] == 0)
2021*32e77bcdSOlivier Certner 		panic("phys_avail[] is empty");
2022*32e77bcdSOlivier Certner 
2023b7565d44SJeff Roberson 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
2024b7565d44SJeff Roberson 		phys_avail[i] = round_page(phys_avail[i]);
2025b7565d44SJeff Roberson 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
2026b7565d44SJeff Roberson 	}
2027b7565d44SJeff Roberson 
202881302f1dSMark Johnston 	for (i = 0; i < vm_phys_early_nsegs; i++) {
202981302f1dSMark Johnston 		seg = &vm_phys_early_segs[i];
203081302f1dSMark Johnston 		vm_phys_add_seg(seg->start, seg->end);
203181302f1dSMark Johnston 	}
203281302f1dSMark Johnston 	vm_phys_early_nsegs = -1;
203381302f1dSMark Johnston 
2034b7565d44SJeff Roberson #ifdef NUMA
2035b7565d44SJeff Roberson 	/* Force phys_avail to be split by domain. */
2036b7565d44SJeff Roberson 	if (mem_affinity != NULL) {
2037b7565d44SJeff Roberson 		int idx;
2038b7565d44SJeff Roberson 
2039b7565d44SJeff Roberson 		for (i = 0; mem_affinity[i].end != 0; i++) {
2040b7565d44SJeff Roberson 			idx = vm_phys_avail_find(mem_affinity[i].start);
2041e1499bffSOlivier Certner 			if (idx != -1)
2042b7565d44SJeff Roberson 				vm_phys_avail_split(mem_affinity[i].start, idx);
2043b7565d44SJeff Roberson 			idx = vm_phys_avail_find(mem_affinity[i].end);
2044e1499bffSOlivier Certner 			if (idx != -1)
2045b7565d44SJeff Roberson 				vm_phys_avail_split(mem_affinity[i].end, idx);
2046b7565d44SJeff Roberson 		}
2047b7565d44SJeff Roberson 	}
2048b7565d44SJeff Roberson #endif
2049b7565d44SJeff Roberson }
2050b7565d44SJeff Roberson 
205111752d88SAlan Cox #ifdef DDB
205211752d88SAlan Cox /*
205311752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
205411752d88SAlan Cox  */
DB_SHOW_COMMAND_FLAGS(freepages,db_show_freepages,DB_CMD_MEMSAFE)2055c84c5e00SMitchell Horne DB_SHOW_COMMAND_FLAGS(freepages, db_show_freepages, DB_CMD_MEMSAFE)
205611752d88SAlan Cox {
205711752d88SAlan Cox 	struct vm_freelist *fl;
20587e226537SAttilio Rao 	int flind, oind, pind, dom;
205911752d88SAlan Cox 
20607e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
20617e226537SAttilio Rao 		db_printf("DOMAIN: %d\n", dom);
206211752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
206311752d88SAlan Cox 			db_printf("FREE LIST %d:\n"
206411752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
206511752d88SAlan Cox 			    "\n              ", flind);
206611752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
206711752d88SAlan Cox 				db_printf("  |  POOL %d", pind);
206811752d88SAlan Cox 			db_printf("\n--            ");
206911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
207011752d88SAlan Cox 				db_printf("-- --      ");
207111752d88SAlan Cox 			db_printf("--\n");
207211752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
207311752d88SAlan Cox 				db_printf("  %2.2d (%6.6dK)", oind,
207411752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
207511752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
20767e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
207711752d88SAlan Cox 					db_printf("  |  %6.6d", fl[oind].lcnt);
207811752d88SAlan Cox 				}
207911752d88SAlan Cox 				db_printf("\n");
208011752d88SAlan Cox 			}
208111752d88SAlan Cox 			db_printf("\n");
208211752d88SAlan Cox 		}
20837e226537SAttilio Rao 		db_printf("\n");
20847e226537SAttilio Rao 	}
208511752d88SAlan Cox }
208611752d88SAlan Cox #endif
2087