111752d88SAlan Cox /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni *
411752d88SAlan Cox * Copyright (c) 2002-2006 Rice University
511752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
611752d88SAlan Cox * All rights reserved.
711752d88SAlan Cox *
811752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox,
911752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
1011752d88SAlan Cox *
1111752d88SAlan Cox * Redistribution and use in source and binary forms, with or without
1211752d88SAlan Cox * modification, are permitted provided that the following conditions
1311752d88SAlan Cox * are met:
1411752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright
1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer.
1611752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright
1711752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the
1811752d88SAlan Cox * documentation and/or other materials provided with the distribution.
1911752d88SAlan Cox *
2011752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2111752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2211752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2311752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2411752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2511752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2611752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2711752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2811752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2911752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
3011752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3111752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE.
3211752d88SAlan Cox */
3311752d88SAlan Cox
34fbd80bd0SAlan Cox /*
35fbd80bd0SAlan Cox * Physical memory system implementation
36fbd80bd0SAlan Cox *
37fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the
38fbd80bd0SAlan Cox * virtual memory system.
39fbd80bd0SAlan Cox */
40fbd80bd0SAlan Cox
4111752d88SAlan Cox #include <sys/cdefs.h>
4211752d88SAlan Cox #include "opt_ddb.h"
43174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
47662e7fa8SMark Johnston #include <sys/domainset.h>
4811752d88SAlan Cox #include <sys/lock.h>
4911752d88SAlan Cox #include <sys/kernel.h>
50b16b4c22SMark Johnston #include <sys/kthread.h>
5111752d88SAlan Cox #include <sys/malloc.h>
5211752d88SAlan Cox #include <sys/mutex.h>
537e226537SAttilio Rao #include <sys/proc.h>
5411752d88SAlan Cox #include <sys/queue.h>
5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h>
5611752d88SAlan Cox #include <sys/sbuf.h>
57b16b4c22SMark Johnston #include <sys/sched.h>
5811752d88SAlan Cox #include <sys/sysctl.h>
5938d6b2dcSRoger Pau Monné #include <sys/tree.h>
60b16b4c22SMark Johnston #include <sys/tslog.h>
61b16b4c22SMark Johnston #include <sys/unistd.h>
6211752d88SAlan Cox #include <sys/vmmeter.h>
6311752d88SAlan Cox
6411752d88SAlan Cox #include <ddb/ddb.h>
6511752d88SAlan Cox
6611752d88SAlan Cox #include <vm/vm.h>
6701e115abSDoug Moore #include <vm/vm_extern.h>
6811752d88SAlan Cox #include <vm/vm_param.h>
6911752d88SAlan Cox #include <vm/vm_kern.h>
7011752d88SAlan Cox #include <vm/vm_object.h>
7111752d88SAlan Cox #include <vm/vm_page.h>
7211752d88SAlan Cox #include <vm/vm_phys.h>
73e2068d0bSJeff Roberson #include <vm/vm_pagequeue.h>
7411752d88SAlan Cox
75449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
76449c2e92SKonstantin Belousov "Too many physsegs.");
77c9b06fa5SDoug Moore _Static_assert(sizeof(long long) >= sizeof(vm_paddr_t),
78c9b06fa5SDoug Moore "vm_paddr_t too big for ffsll, flsll.");
7911752d88SAlan Cox
80b6715dabSJeff Roberson #ifdef NUMA
81cdfeced8SJeff Roberson struct mem_affinity __read_mostly *mem_affinity;
82cdfeced8SJeff Roberson int __read_mostly *mem_locality;
83c415cfc8SZhenlei Huang
84c415cfc8SZhenlei Huang static int numa_disabled;
85c415cfc8SZhenlei Huang static SYSCTL_NODE(_vm, OID_AUTO, numa, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
86c415cfc8SZhenlei Huang "NUMA options");
87c415cfc8SZhenlei Huang SYSCTL_INT(_vm_numa, OID_AUTO, disabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
88c415cfc8SZhenlei Huang &numa_disabled, 0, "NUMA-awareness in the allocators is disabled");
8962d70a81SJohn Baldwin #endif
90a3870a18SJohn Baldwin
91cdfeced8SJeff Roberson int __read_mostly vm_ndomains = 1;
92463406acSMark Johnston domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1);
937e226537SAttilio Rao
94cdfeced8SJeff Roberson struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX];
95cdfeced8SJeff Roberson int __read_mostly vm_phys_nsegs;
9681302f1dSMark Johnston static struct vm_phys_seg vm_phys_early_segs[8];
9781302f1dSMark Johnston static int vm_phys_early_nsegs;
9811752d88SAlan Cox
9938d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg;
10038d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
10138d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *);
10238d6b2dcSRoger Pau Monné
10338d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
104b649c2acSDoug Moore RB_INITIALIZER(&vm_phys_fictitious_tree);
10538d6b2dcSRoger Pau Monné
10638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg {
10738d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node;
10838d6b2dcSRoger Pau Monné /* Memory region data */
109b6de32bdSKonstantin Belousov vm_paddr_t start;
110b6de32bdSKonstantin Belousov vm_paddr_t end;
111b6de32bdSKonstantin Belousov vm_page_t first_page;
11238d6b2dcSRoger Pau Monné };
11338d6b2dcSRoger Pau Monné
11438d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
11538d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp);
11638d6b2dcSRoger Pau Monné
117cdfeced8SJeff Roberson static struct rwlock_padalign vm_phys_fictitious_reg_lock;
118c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
119b6de32bdSKonstantin Belousov
120cdfeced8SJeff Roberson static struct vm_freelist __aligned(CACHE_LINE_SIZE)
121f2a496d6SKonstantin Belousov vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL]
122f2a496d6SKonstantin Belousov [VM_NFREEORDER_MAX];
12311752d88SAlan Cox
124cdfeced8SJeff Roberson static int __read_mostly vm_nfreelists;
125d866a563SAlan Cox
126d866a563SAlan Cox /*
12721943937SJeff Roberson * These "avail lists" are globals used to communicate boot-time physical
12821943937SJeff Roberson * memory layout to other parts of the kernel. Each physically contiguous
12921943937SJeff Roberson * region of memory is defined by a start address at an even index and an
13021943937SJeff Roberson * end address at the following odd index. Each list is terminated by a
13121943937SJeff Roberson * pair of zero entries.
13221943937SJeff Roberson *
13321943937SJeff Roberson * dump_avail tells the dump code what regions to include in a crash dump, and
13421943937SJeff Roberson * phys_avail is all of the remaining physical memory that is available for
13521943937SJeff Roberson * the vm system.
13621943937SJeff Roberson *
13721943937SJeff Roberson * Initially dump_avail and phys_avail are identical. Boot time memory
13821943937SJeff Roberson * allocations remove extents from phys_avail that may still be included
13921943937SJeff Roberson * in dumps.
14021943937SJeff Roberson */
14121943937SJeff Roberson vm_paddr_t phys_avail[PHYS_AVAIL_COUNT];
14221943937SJeff Roberson vm_paddr_t dump_avail[PHYS_AVAIL_COUNT];
14321943937SJeff Roberson
14421943937SJeff Roberson /*
145d866a563SAlan Cox * Provides the mapping from VM_FREELIST_* to free list indices (flind).
146d866a563SAlan Cox */
147cdfeced8SJeff Roberson static int __read_mostly vm_freelist_to_flind[VM_NFREELIST];
148b16b4c22SMark Johnston static int __read_mostly vm_default_freepool;
149d866a563SAlan Cox
150d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0);
151d866a563SAlan Cox
152d866a563SAlan Cox #ifdef VM_FREELIST_DMA32
153d866a563SAlan Cox #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32)
154d866a563SAlan Cox #endif
155d866a563SAlan Cox
156d866a563SAlan Cox /*
157d866a563SAlan Cox * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
158d866a563SAlan Cox * the ordering of the free list boundaries.
159d866a563SAlan Cox */
160d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
161d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
162d866a563SAlan Cox #endif
16311752d88SAlan Cox
16411752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
1657029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_free,
166114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1677029da5cSPawel Biernacki sysctl_vm_phys_free, "A",
1687029da5cSPawel Biernacki "Phys Free Info");
16911752d88SAlan Cox
17011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
1717029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_segs,
172114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1737029da5cSPawel Biernacki sysctl_vm_phys_segs, "A",
1747029da5cSPawel Biernacki "Phys Seg Info");
17511752d88SAlan Cox
176b6715dabSJeff Roberson #ifdef NUMA
177415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
1787029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_locality,
179114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1807029da5cSPawel Biernacki sysctl_vm_phys_locality, "A",
1817029da5cSPawel Biernacki "Phys Locality Info");
1826520495aSAdrian Chadd #endif
183415d7ccaSAdrian Chadd
1847e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
1857e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available.");
186a3870a18SJohn Baldwin
187d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
188d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
18911752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
1900078df5fSDoug Moore int order, int pool, int tail);
191c606ab59SDoug Moore
192b16b4c22SMark Johnston static bool __diagused
vm_phys_pool_valid(int pool)193b16b4c22SMark Johnston vm_phys_pool_valid(int pool)
194b16b4c22SMark Johnston {
195b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
196b16b4c22SMark Johnston if (pool == VM_FREEPOOL_LAZYINIT)
197b16b4c22SMark Johnston return (false);
198b16b4c22SMark Johnston #endif
199b16b4c22SMark Johnston return (pool >= 0 && pool < VM_NFREEPOOL);
200b16b4c22SMark Johnston }
201b16b4c22SMark Johnston
20238d6b2dcSRoger Pau Monné /*
20338d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management.
20438d6b2dcSRoger Pau Monné */
20538d6b2dcSRoger Pau Monné static inline int
vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg * p,struct vm_phys_fictitious_seg * range)20638d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
20738d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range)
20838d6b2dcSRoger Pau Monné {
20938d6b2dcSRoger Pau Monné
21038d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0,
21138d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page"));
21238d6b2dcSRoger Pau Monné if (p->start >= range->end)
21338d6b2dcSRoger Pau Monné return (1);
21438d6b2dcSRoger Pau Monné if (p->start < range->start)
21538d6b2dcSRoger Pau Monné return (-1);
21638d6b2dcSRoger Pau Monné
21738d6b2dcSRoger Pau Monné return (0);
21838d6b2dcSRoger Pau Monné }
21938d6b2dcSRoger Pau Monné
22038d6b2dcSRoger Pau Monné static int
vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg * p1,struct vm_phys_fictitious_seg * p2)22138d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
22238d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2)
22338d6b2dcSRoger Pau Monné {
22438d6b2dcSRoger Pau Monné
22538d6b2dcSRoger Pau Monné /* Check if this is a search for a page */
22638d6b2dcSRoger Pau Monné if (p1->end == 0)
22738d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2));
22838d6b2dcSRoger Pau Monné
22938d6b2dcSRoger Pau Monné KASSERT(p2->end != 0,
23038d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison"));
23138d6b2dcSRoger Pau Monné
23238d6b2dcSRoger Pau Monné /* Searching to add a new range */
23338d6b2dcSRoger Pau Monné if (p1->end <= p2->start)
23438d6b2dcSRoger Pau Monné return (-1);
23538d6b2dcSRoger Pau Monné if (p1->start >= p2->end)
23638d6b2dcSRoger Pau Monné return (1);
23738d6b2dcSRoger Pau Monné
23838d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n"
23938d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
24038d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
24138d6b2dcSRoger Pau Monné }
24238d6b2dcSRoger Pau Monné
2436f4acaf4SJeff Roberson int
vm_phys_domain_match(int prefer __numa_used,vm_paddr_t low __numa_used,vm_paddr_t high __numa_used)244cb20a74cSStephen J. Kiernan vm_phys_domain_match(int prefer __numa_used, vm_paddr_t low __numa_used,
245cb20a74cSStephen J. Kiernan vm_paddr_t high __numa_used)
246449c2e92SKonstantin Belousov {
247b6715dabSJeff Roberson #ifdef NUMA
2486f4acaf4SJeff Roberson domainset_t mask;
2496f4acaf4SJeff Roberson int i;
250449c2e92SKonstantin Belousov
2516f4acaf4SJeff Roberson if (vm_ndomains == 1 || mem_affinity == NULL)
2526f4acaf4SJeff Roberson return (0);
2536f4acaf4SJeff Roberson
2546f4acaf4SJeff Roberson DOMAINSET_ZERO(&mask);
2556f4acaf4SJeff Roberson /*
2566f4acaf4SJeff Roberson * Check for any memory that overlaps low, high.
2576f4acaf4SJeff Roberson */
2586f4acaf4SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++)
2596f4acaf4SJeff Roberson if (mem_affinity[i].start <= high &&
2606f4acaf4SJeff Roberson mem_affinity[i].end >= low)
2616f4acaf4SJeff Roberson DOMAINSET_SET(mem_affinity[i].domain, &mask);
2626f4acaf4SJeff Roberson if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask))
2636f4acaf4SJeff Roberson return (prefer);
2646f4acaf4SJeff Roberson if (DOMAINSET_EMPTY(&mask))
2656f4acaf4SJeff Roberson panic("vm_phys_domain_match: Impossible constraint");
2666f4acaf4SJeff Roberson return (DOMAINSET_FFS(&mask) - 1);
2676f4acaf4SJeff Roberson #else
2686f4acaf4SJeff Roberson return (0);
2696f4acaf4SJeff Roberson #endif
270449c2e92SKonstantin Belousov }
271449c2e92SKonstantin Belousov
27211752d88SAlan Cox /*
27311752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically,
27411752d88SAlan Cox * the amount of physical memory in each free list.
27511752d88SAlan Cox */
27611752d88SAlan Cox static int
sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)27711752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
27811752d88SAlan Cox {
27911752d88SAlan Cox struct sbuf sbuf;
28011752d88SAlan Cox struct vm_freelist *fl;
2817e226537SAttilio Rao int dom, error, flind, oind, pind;
28211752d88SAlan Cox
28300f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0);
28400f0e671SMatthew D Fleming if (error != 0)
28500f0e671SMatthew D Fleming return (error);
2867e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
2877e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) {
288eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
28911752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) {
290eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
29111752d88SAlan Cox "\n ORDER (SIZE) | NUMBER"
29211752d88SAlan Cox "\n ", flind);
29311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++)
29411752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind);
29511752d88SAlan Cox sbuf_printf(&sbuf, "\n-- ");
29611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++)
29711752d88SAlan Cox sbuf_printf(&sbuf, "-- -- ");
29811752d88SAlan Cox sbuf_printf(&sbuf, "--\n");
29911752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
300d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind,
30111752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind));
30211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) {
3037e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind];
304eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d",
3057e226537SAttilio Rao fl[oind].lcnt);
30611752d88SAlan Cox }
30711752d88SAlan Cox sbuf_printf(&sbuf, "\n");
30811752d88SAlan Cox }
3097e226537SAttilio Rao }
31011752d88SAlan Cox }
3114e657159SMatthew D Fleming error = sbuf_finish(&sbuf);
31211752d88SAlan Cox sbuf_delete(&sbuf);
31311752d88SAlan Cox return (error);
31411752d88SAlan Cox }
31511752d88SAlan Cox
31611752d88SAlan Cox /*
31711752d88SAlan Cox * Outputs the set of physical memory segments.
31811752d88SAlan Cox */
31911752d88SAlan Cox static int
sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)32011752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
32111752d88SAlan Cox {
32211752d88SAlan Cox struct sbuf sbuf;
32311752d88SAlan Cox struct vm_phys_seg *seg;
32411752d88SAlan Cox int error, segind;
32511752d88SAlan Cox
32600f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0);
32700f0e671SMatthew D Fleming if (error != 0)
32800f0e671SMatthew D Fleming return (error);
3294e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
33011752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) {
33111752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
33211752d88SAlan Cox seg = &vm_phys_segs[segind];
33311752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n",
33411752d88SAlan Cox (uintmax_t)seg->start);
33511752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n",
33611752d88SAlan Cox (uintmax_t)seg->end);
337a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain);
33811752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
33911752d88SAlan Cox }
3404e657159SMatthew D Fleming error = sbuf_finish(&sbuf);
34111752d88SAlan Cox sbuf_delete(&sbuf);
34211752d88SAlan Cox return (error);
34311752d88SAlan Cox }
34411752d88SAlan Cox
345415d7ccaSAdrian Chadd /*
346415d7ccaSAdrian Chadd * Return affinity, or -1 if there's no affinity information.
347415d7ccaSAdrian Chadd */
3486520495aSAdrian Chadd int
vm_phys_mem_affinity(int f __numa_used,int t __numa_used)349cb20a74cSStephen J. Kiernan vm_phys_mem_affinity(int f __numa_used, int t __numa_used)
350415d7ccaSAdrian Chadd {
351415d7ccaSAdrian Chadd
352b6715dabSJeff Roberson #ifdef NUMA
353415d7ccaSAdrian Chadd if (mem_locality == NULL)
354415d7ccaSAdrian Chadd return (-1);
355415d7ccaSAdrian Chadd if (f >= vm_ndomains || t >= vm_ndomains)
356415d7ccaSAdrian Chadd return (-1);
357415d7ccaSAdrian Chadd return (mem_locality[f * vm_ndomains + t]);
3586520495aSAdrian Chadd #else
3596520495aSAdrian Chadd return (-1);
3606520495aSAdrian Chadd #endif
361415d7ccaSAdrian Chadd }
362415d7ccaSAdrian Chadd
363b6715dabSJeff Roberson #ifdef NUMA
364415d7ccaSAdrian Chadd /*
365415d7ccaSAdrian Chadd * Outputs the VM locality table.
366415d7ccaSAdrian Chadd */
367415d7ccaSAdrian Chadd static int
sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)368415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
369415d7ccaSAdrian Chadd {
370415d7ccaSAdrian Chadd struct sbuf sbuf;
371415d7ccaSAdrian Chadd int error, i, j;
372415d7ccaSAdrian Chadd
373415d7ccaSAdrian Chadd error = sysctl_wire_old_buffer(req, 0);
374415d7ccaSAdrian Chadd if (error != 0)
375415d7ccaSAdrian Chadd return (error);
376415d7ccaSAdrian Chadd sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
377415d7ccaSAdrian Chadd
378415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n");
379415d7ccaSAdrian Chadd
380415d7ccaSAdrian Chadd for (i = 0; i < vm_ndomains; i++) {
381415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d: ", i);
382415d7ccaSAdrian Chadd for (j = 0; j < vm_ndomains; j++) {
383415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j));
384415d7ccaSAdrian Chadd }
385415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n");
386415d7ccaSAdrian Chadd }
387415d7ccaSAdrian Chadd error = sbuf_finish(&sbuf);
388415d7ccaSAdrian Chadd sbuf_delete(&sbuf);
389415d7ccaSAdrian Chadd return (error);
390415d7ccaSAdrian Chadd }
3916520495aSAdrian Chadd #endif
392415d7ccaSAdrian Chadd
3937e226537SAttilio Rao static void
vm_freelist_add(struct vm_freelist * fl,vm_page_t m,int order,int pool,int tail)3940078df5fSDoug Moore vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool,
3950078df5fSDoug Moore int tail)
396a3870a18SJohn Baldwin {
397a3870a18SJohn Baldwin
3987e226537SAttilio Rao m->order = order;
3990078df5fSDoug Moore m->pool = pool;
4007e226537SAttilio Rao if (tail)
4015cd29d0fSMark Johnston TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
4027e226537SAttilio Rao else
4035cd29d0fSMark Johnston TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
4047e226537SAttilio Rao fl[order].lcnt++;
405a3870a18SJohn Baldwin }
4067e226537SAttilio Rao
4077e226537SAttilio Rao static void
vm_freelist_rem(struct vm_freelist * fl,vm_page_t m,int order)4087e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
4097e226537SAttilio Rao {
4107e226537SAttilio Rao
4115cd29d0fSMark Johnston TAILQ_REMOVE(&fl[order].pl, m, listq);
4127e226537SAttilio Rao fl[order].lcnt--;
4137e226537SAttilio Rao m->order = VM_NFREEORDER;
414a3870a18SJohn Baldwin }
415a3870a18SJohn Baldwin
41611752d88SAlan Cox /*
41711752d88SAlan Cox * Create a physical memory segment.
41811752d88SAlan Cox */
41911752d88SAlan Cox static void
_vm_phys_create_seg(vm_paddr_t start,vm_paddr_t end,int domain)420d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
42111752d88SAlan Cox {
42211752d88SAlan Cox struct vm_phys_seg *seg;
42311752d88SAlan Cox
4248a14ddccSOlivier Certner if (!(0 <= domain && domain < vm_ndomains))
4258a14ddccSOlivier Certner panic("%s: Invalid domain %d ('vm_ndomains' is %d)",
4268a14ddccSOlivier Certner __func__, domain, vm_ndomains);
4278a14ddccSOlivier Certner if (vm_phys_nsegs >= VM_PHYSSEG_MAX)
4288a14ddccSOlivier Certner panic("Not enough storage for physical segments, "
4298a14ddccSOlivier Certner "increase VM_PHYSSEG_MAX");
4308a14ddccSOlivier Certner
43111752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++];
4328a14ddccSOlivier Certner while (seg > vm_phys_segs && seg[-1].start >= end) {
433271f0f12SAlan Cox *seg = *(seg - 1);
434271f0f12SAlan Cox seg--;
435271f0f12SAlan Cox }
43611752d88SAlan Cox seg->start = start;
43711752d88SAlan Cox seg->end = end;
438a3870a18SJohn Baldwin seg->domain = domain;
4398a14ddccSOlivier Certner if (seg != vm_phys_segs && seg[-1].end > start)
4408a14ddccSOlivier Certner panic("Overlapping physical segments: Current [%#jx,%#jx) "
4418a14ddccSOlivier Certner "at index %zu, previous [%#jx,%#jx)",
4428a14ddccSOlivier Certner (uintmax_t)start, (uintmax_t)end, seg - vm_phys_segs,
4438a14ddccSOlivier Certner (uintmax_t)seg[-1].start, (uintmax_t)seg[-1].end);
44411752d88SAlan Cox }
44511752d88SAlan Cox
446a3870a18SJohn Baldwin static void
vm_phys_create_seg(vm_paddr_t start,vm_paddr_t end)447d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
448a3870a18SJohn Baldwin {
449b6715dabSJeff Roberson #ifdef NUMA
450a3870a18SJohn Baldwin int i;
451a3870a18SJohn Baldwin
452a3870a18SJohn Baldwin if (mem_affinity == NULL) {
453d866a563SAlan Cox _vm_phys_create_seg(start, end, 0);
454a3870a18SJohn Baldwin return;
455a3870a18SJohn Baldwin }
456a3870a18SJohn Baldwin
457a3870a18SJohn Baldwin for (i = 0;; i++) {
458a3870a18SJohn Baldwin if (mem_affinity[i].end == 0)
459a3870a18SJohn Baldwin panic("Reached end of affinity info");
460a3870a18SJohn Baldwin if (mem_affinity[i].end <= start)
461a3870a18SJohn Baldwin continue;
462a3870a18SJohn Baldwin if (mem_affinity[i].start > start)
463a3870a18SJohn Baldwin panic("No affinity info for start %jx",
464a3870a18SJohn Baldwin (uintmax_t)start);
465a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) {
466d866a563SAlan Cox _vm_phys_create_seg(start, end,
467a3870a18SJohn Baldwin mem_affinity[i].domain);
468a3870a18SJohn Baldwin break;
469a3870a18SJohn Baldwin }
470d866a563SAlan Cox _vm_phys_create_seg(start, mem_affinity[i].end,
471a3870a18SJohn Baldwin mem_affinity[i].domain);
472a3870a18SJohn Baldwin start = mem_affinity[i].end;
473a3870a18SJohn Baldwin }
47462d70a81SJohn Baldwin #else
47562d70a81SJohn Baldwin _vm_phys_create_seg(start, end, 0);
47662d70a81SJohn Baldwin #endif
477a3870a18SJohn Baldwin }
478a3870a18SJohn Baldwin
47911752d88SAlan Cox /*
480271f0f12SAlan Cox * Add a physical memory segment.
481271f0f12SAlan Cox */
482271f0f12SAlan Cox void
vm_phys_add_seg(vm_paddr_t start,vm_paddr_t end)483271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
484271f0f12SAlan Cox {
485d866a563SAlan Cox vm_paddr_t paddr;
486271f0f12SAlan Cox
487f30309abSOlivier Certner if ((start & PAGE_MASK) != 0)
488f30309abSOlivier Certner panic("%s: start (%jx) is not page aligned", __func__,
489f30309abSOlivier Certner (uintmax_t)start);
490f30309abSOlivier Certner if ((end & PAGE_MASK) != 0)
491f30309abSOlivier Certner panic("%s: end (%jx) is not page aligned", __func__,
492f30309abSOlivier Certner (uintmax_t)end);
493f30309abSOlivier Certner if (start > end)
494f30309abSOlivier Certner panic("%s: start (%jx) > end (%jx)!", __func__,
495f30309abSOlivier Certner (uintmax_t)start, (uintmax_t)end);
496f30309abSOlivier Certner
497f30309abSOlivier Certner if (start == end)
498f30309abSOlivier Certner return;
499d866a563SAlan Cox
500d866a563SAlan Cox /*
501d866a563SAlan Cox * Split the physical memory segment if it spans two or more free
502d866a563SAlan Cox * list boundaries.
503d866a563SAlan Cox */
504d866a563SAlan Cox paddr = start;
505d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM
506d866a563SAlan Cox if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
507d866a563SAlan Cox vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
508d866a563SAlan Cox paddr = VM_LOWMEM_BOUNDARY;
509d866a563SAlan Cox }
510271f0f12SAlan Cox #endif
511d866a563SAlan Cox #ifdef VM_FREELIST_DMA32
512d866a563SAlan Cox if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
513d866a563SAlan Cox vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
514d866a563SAlan Cox paddr = VM_DMA32_BOUNDARY;
515d866a563SAlan Cox }
516d866a563SAlan Cox #endif
517d866a563SAlan Cox vm_phys_create_seg(paddr, end);
518271f0f12SAlan Cox }
519271f0f12SAlan Cox
520271f0f12SAlan Cox /*
52111752d88SAlan Cox * Initialize the physical memory allocator.
522d866a563SAlan Cox *
523d866a563SAlan Cox * Requires that vm_page_array is initialized!
52411752d88SAlan Cox */
52511752d88SAlan Cox void
vm_phys_init(void)52611752d88SAlan Cox vm_phys_init(void)
52711752d88SAlan Cox {
52811752d88SAlan Cox struct vm_freelist *fl;
52972aebdd7SAlan Cox struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg;
53052526922SJohn Baldwin #if defined(VM_DMA32_NPAGES_THRESHOLD) || defined(VM_PHYSSEG_SPARSE)
531d866a563SAlan Cox u_long npages;
53252526922SJohn Baldwin #endif
533d866a563SAlan Cox int dom, flind, freelist, oind, pind, segind;
53411752d88SAlan Cox
535d866a563SAlan Cox /*
536d866a563SAlan Cox * Compute the number of free lists, and generate the mapping from the
537d866a563SAlan Cox * manifest constants VM_FREELIST_* to the free list indices.
538d866a563SAlan Cox *
539d866a563SAlan Cox * Initially, the entries of vm_freelist_to_flind[] are set to either
540d866a563SAlan Cox * 0 or 1 to indicate which free lists should be created.
541d866a563SAlan Cox */
54252526922SJohn Baldwin #ifdef VM_DMA32_NPAGES_THRESHOLD
543d866a563SAlan Cox npages = 0;
54452526922SJohn Baldwin #endif
545d866a563SAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
546d866a563SAlan Cox seg = &vm_phys_segs[segind];
547d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM
548d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY)
549d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
550d866a563SAlan Cox else
551d866a563SAlan Cox #endif
552d866a563SAlan Cox #ifdef VM_FREELIST_DMA32
553d866a563SAlan Cox if (
554d866a563SAlan Cox #ifdef VM_DMA32_NPAGES_THRESHOLD
555d866a563SAlan Cox /*
556d866a563SAlan Cox * Create the DMA32 free list only if the amount of
557d866a563SAlan Cox * physical memory above physical address 4G exceeds the
558d866a563SAlan Cox * given threshold.
559d866a563SAlan Cox */
560d866a563SAlan Cox npages > VM_DMA32_NPAGES_THRESHOLD &&
561d866a563SAlan Cox #endif
562d866a563SAlan Cox seg->end <= VM_DMA32_BOUNDARY)
563d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
564d866a563SAlan Cox else
565d866a563SAlan Cox #endif
566d866a563SAlan Cox {
56752526922SJohn Baldwin #ifdef VM_DMA32_NPAGES_THRESHOLD
568d866a563SAlan Cox npages += atop(seg->end - seg->start);
56952526922SJohn Baldwin #endif
570d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
571d866a563SAlan Cox }
572d866a563SAlan Cox }
573d866a563SAlan Cox /* Change each entry into a running total of the free lists. */
574d866a563SAlan Cox for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
575d866a563SAlan Cox vm_freelist_to_flind[freelist] +=
576d866a563SAlan Cox vm_freelist_to_flind[freelist - 1];
577d866a563SAlan Cox }
578d866a563SAlan Cox vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
579d866a563SAlan Cox KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
580d866a563SAlan Cox /* Change each entry into a free list index. */
581d866a563SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++)
582d866a563SAlan Cox vm_freelist_to_flind[freelist]--;
583d866a563SAlan Cox
584d866a563SAlan Cox /*
585d866a563SAlan Cox * Initialize the first_page and free_queues fields of each physical
586d866a563SAlan Cox * memory segment.
587d866a563SAlan Cox */
588271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE
589d866a563SAlan Cox npages = 0;
59011752d88SAlan Cox #endif
591271f0f12SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) {
592271f0f12SAlan Cox seg = &vm_phys_segs[segind];
593271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE
594d866a563SAlan Cox seg->first_page = &vm_page_array[npages];
595d866a563SAlan Cox npages += atop(seg->end - seg->start);
596271f0f12SAlan Cox #else
597271f0f12SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(seg->start);
59811752d88SAlan Cox #endif
599d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM
600d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) {
601d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
602d866a563SAlan Cox KASSERT(flind >= 0,
603d866a563SAlan Cox ("vm_phys_init: LOWMEM flind < 0"));
604d866a563SAlan Cox } else
605d866a563SAlan Cox #endif
606d866a563SAlan Cox #ifdef VM_FREELIST_DMA32
607d866a563SAlan Cox if (seg->end <= VM_DMA32_BOUNDARY) {
608d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
609d866a563SAlan Cox KASSERT(flind >= 0,
610d866a563SAlan Cox ("vm_phys_init: DMA32 flind < 0"));
611d866a563SAlan Cox } else
612d866a563SAlan Cox #endif
613d866a563SAlan Cox {
614d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
615d866a563SAlan Cox KASSERT(flind >= 0,
616d866a563SAlan Cox ("vm_phys_init: DEFAULT flind < 0"));
61711752d88SAlan Cox }
618d866a563SAlan Cox seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
619d866a563SAlan Cox }
620d866a563SAlan Cox
621d866a563SAlan Cox /*
62272aebdd7SAlan Cox * Coalesce physical memory segments that are contiguous and share the
62372aebdd7SAlan Cox * same per-domain free queues.
62472aebdd7SAlan Cox */
62572aebdd7SAlan Cox prev_seg = vm_phys_segs;
62672aebdd7SAlan Cox seg = &vm_phys_segs[1];
62772aebdd7SAlan Cox end_seg = &vm_phys_segs[vm_phys_nsegs];
62872aebdd7SAlan Cox while (seg < end_seg) {
62972aebdd7SAlan Cox if (prev_seg->end == seg->start &&
63072aebdd7SAlan Cox prev_seg->free_queues == seg->free_queues) {
63172aebdd7SAlan Cox prev_seg->end = seg->end;
63272aebdd7SAlan Cox KASSERT(prev_seg->domain == seg->domain,
63372aebdd7SAlan Cox ("vm_phys_init: free queues cannot span domains"));
63472aebdd7SAlan Cox vm_phys_nsegs--;
63572aebdd7SAlan Cox end_seg--;
63672aebdd7SAlan Cox for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++)
63772aebdd7SAlan Cox *tmp_seg = *(tmp_seg + 1);
63872aebdd7SAlan Cox } else {
63972aebdd7SAlan Cox prev_seg = seg;
64072aebdd7SAlan Cox seg++;
64172aebdd7SAlan Cox }
64272aebdd7SAlan Cox }
64372aebdd7SAlan Cox
64472aebdd7SAlan Cox /*
645d866a563SAlan Cox * Initialize the free queues.
646d866a563SAlan Cox */
6477e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) {
64811752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) {
64911752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) {
6507e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind];
65111752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++)
65211752d88SAlan Cox TAILQ_INIT(&fl[oind].pl);
65311752d88SAlan Cox }
65411752d88SAlan Cox }
655a3870a18SJohn Baldwin }
656d866a563SAlan Cox
657b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
658b16b4c22SMark Johnston vm_default_freepool = VM_FREEPOOL_LAZYINIT;
659b16b4c22SMark Johnston #else
660b16b4c22SMark Johnston vm_default_freepool = VM_FREEPOOL_DEFAULT;
661b16b4c22SMark Johnston #endif
662b16b4c22SMark Johnston
66338d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
66411752d88SAlan Cox }
66511752d88SAlan Cox
66611752d88SAlan Cox /*
667662e7fa8SMark Johnston * Register info about the NUMA topology of the system.
668662e7fa8SMark Johnston *
669662e7fa8SMark Johnston * Invoked by platform-dependent code prior to vm_phys_init().
670662e7fa8SMark Johnston */
671662e7fa8SMark Johnston void
vm_phys_register_domains(int ndomains __numa_used,struct mem_affinity * affinity __numa_used,int * locality __numa_used)672cb20a74cSStephen J. Kiernan vm_phys_register_domains(int ndomains __numa_used,
673cb20a74cSStephen J. Kiernan struct mem_affinity *affinity __numa_used, int *locality __numa_used)
674662e7fa8SMark Johnston {
675662e7fa8SMark Johnston #ifdef NUMA
676c415cfc8SZhenlei Huang int i;
677662e7fa8SMark Johnston
678b61f3142SMark Johnston /*
679b61f3142SMark Johnston * For now the only override value that we support is 1, which
680b61f3142SMark Johnston * effectively disables NUMA-awareness in the allocators.
681b61f3142SMark Johnston */
682c415cfc8SZhenlei Huang TUNABLE_INT_FETCH("vm.numa.disabled", &numa_disabled);
683c415cfc8SZhenlei Huang if (numa_disabled)
684b61f3142SMark Johnston ndomains = 1;
685b61f3142SMark Johnston
686b61f3142SMark Johnston if (ndomains > 1) {
687662e7fa8SMark Johnston vm_ndomains = ndomains;
688662e7fa8SMark Johnston mem_affinity = affinity;
689662e7fa8SMark Johnston mem_locality = locality;
690b61f3142SMark Johnston }
691662e7fa8SMark Johnston
692662e7fa8SMark Johnston for (i = 0; i < vm_ndomains; i++)
693662e7fa8SMark Johnston DOMAINSET_SET(i, &all_domains);
694662e7fa8SMark Johnston #endif
695662e7fa8SMark Johnston }
696662e7fa8SMark Johnston
697662e7fa8SMark Johnston /*
69811752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages.
699370a338aSAlan Cox *
700370a338aSAlan Cox * When this function is called by a page allocation function, the caller
701370a338aSAlan Cox * should request insertion at the head unless the order [order, oind) queues
702370a338aSAlan Cox * are known to be empty. The objective being to reduce the likelihood of
703370a338aSAlan Cox * long-term fragmentation by promoting contemporaneous allocation and
704370a338aSAlan Cox * (hopefully) deallocation.
70511752d88SAlan Cox */
70611752d88SAlan Cox static __inline void
vm_phys_split_pages(vm_page_t m,int oind,struct vm_freelist * fl,int order,int pool,int tail)707370a338aSAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order,
7080078df5fSDoug Moore int pool, int tail)
70911752d88SAlan Cox {
71011752d88SAlan Cox vm_page_t m_buddy;
71111752d88SAlan Cox
71211752d88SAlan Cox while (oind > order) {
71311752d88SAlan Cox oind--;
71411752d88SAlan Cox m_buddy = &m[1 << oind];
71511752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER,
71611752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d",
71711752d88SAlan Cox m_buddy, m_buddy->order));
7180078df5fSDoug Moore vm_freelist_add(fl, m_buddy, oind, pool, tail);
71911752d88SAlan Cox }
72011752d88SAlan Cox }
72111752d88SAlan Cox
722d7ec4a88SMark Johnston static void
vm_phys_enq_chunk(struct vm_freelist * fl,vm_page_t m,int order,int pool,int tail)7230078df5fSDoug Moore vm_phys_enq_chunk(struct vm_freelist *fl, vm_page_t m, int order, int pool,
7240078df5fSDoug Moore int tail)
725d7ec4a88SMark Johnston {
726d7ec4a88SMark Johnston KASSERT(order >= 0 && order < VM_NFREEORDER,
727d7ec4a88SMark Johnston ("%s: invalid order %d", __func__, order));
728d7ec4a88SMark Johnston
7290078df5fSDoug Moore vm_freelist_add(fl, m, order, pool, tail);
730b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
7310078df5fSDoug Moore if (__predict_false(pool == VM_FREEPOOL_LAZYINIT)) {
732b16b4c22SMark Johnston vm_page_t m_next;
733517c5854SMark Johnston vm_paddr_t pa;
734b16b4c22SMark Johnston int npages;
735b16b4c22SMark Johnston
736b16b4c22SMark Johnston npages = 1 << order;
737b16b4c22SMark Johnston m_next = m + npages;
738517c5854SMark Johnston pa = m->phys_addr + ptoa(npages);
739517c5854SMark Johnston if (pa < vm_phys_segs[m->segind].end) {
740517c5854SMark Johnston vm_page_init_page(m_next, pa, m->segind,
741b16b4c22SMark Johnston VM_FREEPOOL_LAZYINIT);
742b16b4c22SMark Johnston }
743517c5854SMark Johnston }
744b16b4c22SMark Johnston #endif
745d7ec4a88SMark Johnston }
746d7ec4a88SMark Johnston
74711752d88SAlan Cox /*
748e77f4e7fSDoug Moore * Add the physical pages [m, m + npages) at the beginning of a power-of-two
749e77f4e7fSDoug Moore * aligned and sized set to the specified free list.
750e77f4e7fSDoug Moore *
751e77f4e7fSDoug Moore * When this function is called by a page allocation function, the caller
752e77f4e7fSDoug Moore * should request insertion at the head unless the lower-order queues are
753e77f4e7fSDoug Moore * known to be empty. The objective being to reduce the likelihood of long-
754e77f4e7fSDoug Moore * term fragmentation by promoting contemporaneous allocation and (hopefully)
755e77f4e7fSDoug Moore * deallocation.
756e77f4e7fSDoug Moore *
757e77f4e7fSDoug Moore * The physical page m's buddy must not be free.
758e77f4e7fSDoug Moore */
759e77f4e7fSDoug Moore static void
vm_phys_enq_beg(vm_page_t m,u_int npages,struct vm_freelist * fl,int pool,int tail)7600078df5fSDoug Moore vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool,
7610078df5fSDoug Moore int tail)
762e77f4e7fSDoug Moore {
763e77f4e7fSDoug Moore int order;
764e77f4e7fSDoug Moore
765e77f4e7fSDoug Moore KASSERT(npages == 0 ||
766e77f4e7fSDoug Moore (VM_PAGE_TO_PHYS(m) &
767543d55d7SDoug Moore ((PAGE_SIZE << ilog2(npages)) - 1)) == 0,
768e77f4e7fSDoug Moore ("%s: page %p and npages %u are misaligned",
769e77f4e7fSDoug Moore __func__, m, npages));
770e77f4e7fSDoug Moore while (npages > 0) {
771e77f4e7fSDoug Moore KASSERT(m->order == VM_NFREEORDER,
772e77f4e7fSDoug Moore ("%s: page %p has unexpected order %d",
773e77f4e7fSDoug Moore __func__, m, m->order));
774543d55d7SDoug Moore order = ilog2(npages);
775e77f4e7fSDoug Moore KASSERT(order < VM_NFREEORDER,
776e77f4e7fSDoug Moore ("%s: order %d is out of range", __func__, order));
7770078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, tail);
778e77f4e7fSDoug Moore m += 1 << order;
779e77f4e7fSDoug Moore npages -= 1 << order;
780e77f4e7fSDoug Moore }
781e77f4e7fSDoug Moore }
782e77f4e7fSDoug Moore
783e77f4e7fSDoug Moore /*
7847493904eSAlan Cox * Add the physical pages [m, m + npages) at the end of a power-of-two aligned
7857493904eSAlan Cox * and sized set to the specified free list.
7867493904eSAlan Cox *
7877493904eSAlan Cox * When this function is called by a page allocation function, the caller
7887493904eSAlan Cox * should request insertion at the head unless the lower-order queues are
7897493904eSAlan Cox * known to be empty. The objective being to reduce the likelihood of long-
7907493904eSAlan Cox * term fragmentation by promoting contemporaneous allocation and (hopefully)
7917493904eSAlan Cox * deallocation.
7927493904eSAlan Cox *
793ccdb2827SDoug Moore * If npages is zero, this function does nothing and ignores the physical page
794ccdb2827SDoug Moore * parameter m. Otherwise, the physical page m's buddy must not be free.
7957493904eSAlan Cox */
796c9b06fa5SDoug Moore static vm_page_t
vm_phys_enq_range(vm_page_t m,u_int npages,struct vm_freelist * fl,int pool,int tail)7970078df5fSDoug Moore vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool,
7980078df5fSDoug Moore int tail)
7997493904eSAlan Cox {
8007493904eSAlan Cox int order;
8017493904eSAlan Cox
802ccdb2827SDoug Moore KASSERT(npages == 0 ||
803ccdb2827SDoug Moore ((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) &
804543d55d7SDoug Moore ((PAGE_SIZE << ilog2(npages)) - 1)) == 0,
8057493904eSAlan Cox ("vm_phys_enq_range: page %p and npages %u are misaligned",
8067493904eSAlan Cox m, npages));
807c9b06fa5SDoug Moore while (npages > 0) {
8087493904eSAlan Cox KASSERT(m->order == VM_NFREEORDER,
8097493904eSAlan Cox ("vm_phys_enq_range: page %p has unexpected order %d",
8107493904eSAlan Cox m, m->order));
8117493904eSAlan Cox order = ffs(npages) - 1;
8120078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, tail);
813c9b06fa5SDoug Moore m += 1 << order;
814c9b06fa5SDoug Moore npages -= 1 << order;
815c9b06fa5SDoug Moore }
816c9b06fa5SDoug Moore return (m);
8177493904eSAlan Cox }
8187493904eSAlan Cox
8197493904eSAlan Cox /*
8200078df5fSDoug Moore * Complete initialization a contiguous, power of two-sized set of physical
8210078df5fSDoug Moore * pages.
822b16b4c22SMark Johnston *
823b16b4c22SMark Johnston * If the pages currently belong to the lazy init pool, then the corresponding
824b16b4c22SMark Johnston * page structures must be initialized. In this case it is assumed that the
825b16b4c22SMark Johnston * first page in the run has already been initialized.
826e3537f92SDoug Moore */
827e3537f92SDoug Moore static void
vm_phys_finish_init(vm_page_t m,int order)8280078df5fSDoug Moore vm_phys_finish_init(vm_page_t m, int order)
829e3537f92SDoug Moore {
830b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
831b16b4c22SMark Johnston if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
832b16b4c22SMark Johnston vm_paddr_t pa;
833b16b4c22SMark Johnston int segind;
834e3537f92SDoug Moore
835b16b4c22SMark Johnston TSENTER();
836b16b4c22SMark Johnston pa = m->phys_addr + PAGE_SIZE;
837b16b4c22SMark Johnston segind = m->segind;
838b16b4c22SMark Johnston for (vm_page_t m_tmp = m + 1; m_tmp < &m[1 << order];
839b16b4c22SMark Johnston m_tmp++, pa += PAGE_SIZE)
8400078df5fSDoug Moore vm_page_init_page(m_tmp, pa, segind, VM_NFREEPOOL);
841b16b4c22SMark Johnston TSEXIT();
8420078df5fSDoug Moore }
843b16b4c22SMark Johnston #endif
844e3537f92SDoug Moore }
845e3537f92SDoug Moore
846e3537f92SDoug Moore /*
84789ea39a7SAlan Cox * Tries to allocate the specified number of pages from the specified pool
84889ea39a7SAlan Cox * within the specified domain. Returns the actual number of allocated pages
84989ea39a7SAlan Cox * and a pointer to each page through the array ma[].
85089ea39a7SAlan Cox *
85132d81f21SAlan Cox * The returned pages may not be physically contiguous. However, in contrast
85232d81f21SAlan Cox * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0),
85332d81f21SAlan Cox * calling this function once to allocate the desired number of pages will
8540078df5fSDoug Moore * avoid wasted time in vm_phys_split_pages(). The allocated pages have no
8550078df5fSDoug Moore * valid pool field set.
85689ea39a7SAlan Cox *
85789ea39a7SAlan Cox * The free page queues for the specified domain must be locked.
85889ea39a7SAlan Cox */
85989ea39a7SAlan Cox int
vm_phys_alloc_npages(int domain,int pool,int npages,vm_page_t ma[])86089ea39a7SAlan Cox vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[])
86189ea39a7SAlan Cox {
86289ea39a7SAlan Cox struct vm_freelist *alt, *fl;
86389ea39a7SAlan Cox vm_page_t m;
864c9b06fa5SDoug Moore int avail, end, flind, freelist, i, oind, pind;
86589ea39a7SAlan Cox
86689ea39a7SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains,
86789ea39a7SAlan Cox ("vm_phys_alloc_npages: domain %d is out of range", domain));
868b16b4c22SMark Johnston KASSERT(vm_phys_pool_valid(pool),
86989ea39a7SAlan Cox ("vm_phys_alloc_npages: pool %d is out of range", pool));
87089ea39a7SAlan Cox KASSERT(npages <= 1 << (VM_NFREEORDER - 1),
87189ea39a7SAlan Cox ("vm_phys_alloc_npages: npages %d is out of range", npages));
87289ea39a7SAlan Cox vm_domain_free_assert_locked(VM_DOMAIN(domain));
87389ea39a7SAlan Cox i = 0;
87489ea39a7SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
87589ea39a7SAlan Cox flind = vm_freelist_to_flind[freelist];
87689ea39a7SAlan Cox if (flind < 0)
87789ea39a7SAlan Cox continue;
87889ea39a7SAlan Cox fl = vm_phys_free_queues[domain][flind][pool];
87989ea39a7SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) {
88089ea39a7SAlan Cox while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) {
88189ea39a7SAlan Cox vm_freelist_rem(fl, m, oind);
882c9b06fa5SDoug Moore avail = i + (1 << oind);
883c9b06fa5SDoug Moore end = imin(npages, avail);
884e3537f92SDoug Moore while (i < end)
88589ea39a7SAlan Cox ma[i++] = m++;
886c9b06fa5SDoug Moore if (i == npages) {
8877493904eSAlan Cox /*
888c9b06fa5SDoug Moore * Return excess pages to fl. Its order
889c9b06fa5SDoug Moore * [0, oind) queues are empty.
8907493904eSAlan Cox */
8910078df5fSDoug Moore vm_phys_enq_range(m, avail - i, fl,
8920078df5fSDoug Moore pool, 1);
89389ea39a7SAlan Cox return (npages);
894c9b06fa5SDoug Moore }
89589ea39a7SAlan Cox }
89689ea39a7SAlan Cox }
89789ea39a7SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
898b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL;
899b16b4c22SMark Johnston pind++) {
90089ea39a7SAlan Cox alt = vm_phys_free_queues[domain][flind][pind];
90189ea39a7SAlan Cox while ((m = TAILQ_FIRST(&alt[oind].pl)) !=
90289ea39a7SAlan Cox NULL) {
90389ea39a7SAlan Cox vm_freelist_rem(alt, m, oind);
9040078df5fSDoug Moore vm_phys_finish_init(m, oind);
905c9b06fa5SDoug Moore avail = i + (1 << oind);
906c9b06fa5SDoug Moore end = imin(npages, avail);
907e3537f92SDoug Moore while (i < end)
90889ea39a7SAlan Cox ma[i++] = m++;
909c9b06fa5SDoug Moore if (i == npages) {
9107493904eSAlan Cox /*
9117493904eSAlan Cox * Return excess pages to fl.
9127493904eSAlan Cox * Its order [0, oind) queues
9137493904eSAlan Cox * are empty.
9147493904eSAlan Cox */
915c9b06fa5SDoug Moore vm_phys_enq_range(m, avail - i,
9160078df5fSDoug Moore fl, pool, 1);
91789ea39a7SAlan Cox return (npages);
918c9b06fa5SDoug Moore }
91989ea39a7SAlan Cox }
92089ea39a7SAlan Cox }
92189ea39a7SAlan Cox }
92289ea39a7SAlan Cox }
92389ea39a7SAlan Cox return (i);
92489ea39a7SAlan Cox }
92589ea39a7SAlan Cox
92689ea39a7SAlan Cox /*
927d866a563SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages from the
928d866a563SAlan Cox * specified free list. The free list must be specified using one of the
929e3537f92SDoug Moore * manifest constants VM_FREELIST_*.
930d866a563SAlan Cox *
931d866a563SAlan Cox * The free page queues must be locked.
93249ca10d4SJayachandran C. */
9336aede562SDoug Moore static vm_page_t
vm_phys_alloc_freelist_pages(int domain,int freelist,int pool,int order)9340db2102aSMichael Zhilin vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
93549ca10d4SJayachandran C. {
936ef435ae7SJeff Roberson struct vm_freelist *alt, *fl;
93711752d88SAlan Cox vm_page_t m;
9380db2102aSMichael Zhilin int oind, pind, flind;
93911752d88SAlan Cox
940ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains,
941ef435ae7SJeff Roberson ("vm_phys_alloc_freelist_pages: domain %d is out of range",
942ef435ae7SJeff Roberson domain));
9430db2102aSMichael Zhilin KASSERT(freelist < VM_NFREELIST,
944d866a563SAlan Cox ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
9455be93778SAndrew Turner freelist));
946b16b4c22SMark Johnston KASSERT(vm_phys_pool_valid(pool),
94749ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
94811752d88SAlan Cox KASSERT(order < VM_NFREEORDER,
94949ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
9506520495aSAdrian Chadd
9510db2102aSMichael Zhilin flind = vm_freelist_to_flind[freelist];
9520db2102aSMichael Zhilin /* Check if freelist is present */
9530db2102aSMichael Zhilin if (flind < 0)
9540db2102aSMichael Zhilin return (NULL);
9550db2102aSMichael Zhilin
956e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(domain));
9577e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0];
95811752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) {
95911752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl);
96011752d88SAlan Cox if (m != NULL) {
9617e226537SAttilio Rao vm_freelist_rem(fl, m, oind);
962370a338aSAlan Cox /* The order [order, oind) queues are empty. */
9630078df5fSDoug Moore vm_phys_split_pages(m, oind, fl, order, pool, 1);
96411752d88SAlan Cox return (m);
96511752d88SAlan Cox }
96611752d88SAlan Cox }
96711752d88SAlan Cox
96811752d88SAlan Cox /*
96911752d88SAlan Cox * The given pool was empty. Find the largest
97011752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any
97111752d88SAlan Cox * pool. Transfer these pages to the given pool, and
97211752d88SAlan Cox * use them to satisfy the allocation.
97311752d88SAlan Cox */
97411752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
975b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
9767e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0];
97711752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl);
97811752d88SAlan Cox if (m != NULL) {
9797e226537SAttilio Rao vm_freelist_rem(alt, m, oind);
9800078df5fSDoug Moore vm_phys_finish_init(m, oind);
981370a338aSAlan Cox /* The order [order, oind) queues are empty. */
9820078df5fSDoug Moore vm_phys_split_pages(m, oind, fl, order, pool, 1);
98311752d88SAlan Cox return (m);
98411752d88SAlan Cox }
98511752d88SAlan Cox }
98611752d88SAlan Cox }
98711752d88SAlan Cox return (NULL);
98811752d88SAlan Cox }
98911752d88SAlan Cox
99011752d88SAlan Cox /*
9916aede562SDoug Moore * Allocate a contiguous, power of two-sized set of physical pages
9926aede562SDoug Moore * from the free lists.
9936aede562SDoug Moore *
9946aede562SDoug Moore * The free page queues must be locked.
9956aede562SDoug Moore */
9966aede562SDoug Moore vm_page_t
vm_phys_alloc_pages(int domain,int pool,int order)9976aede562SDoug Moore vm_phys_alloc_pages(int domain, int pool, int order)
9986aede562SDoug Moore {
9996aede562SDoug Moore vm_page_t m;
10006aede562SDoug Moore int freelist;
10016aede562SDoug Moore
10026aede562SDoug Moore for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
10036aede562SDoug Moore m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order);
10046aede562SDoug Moore if (m != NULL)
10056aede562SDoug Moore return (m);
10066aede562SDoug Moore }
10076aede562SDoug Moore return (NULL);
10086aede562SDoug Moore }
10096aede562SDoug Moore
10106aede562SDoug Moore /*
101169cbb187SMark Johnston * Find the vm_page corresponding to the given physical address, which must lie
101269cbb187SMark Johnston * within the given physical memory segment.
101369cbb187SMark Johnston */
101469cbb187SMark Johnston vm_page_t
vm_phys_seg_paddr_to_vm_page(struct vm_phys_seg * seg,vm_paddr_t pa)101569cbb187SMark Johnston vm_phys_seg_paddr_to_vm_page(struct vm_phys_seg *seg, vm_paddr_t pa)
101669cbb187SMark Johnston {
101769cbb187SMark Johnston KASSERT(pa >= seg->start && pa < seg->end,
101869cbb187SMark Johnston ("%s: pa %#jx is out of range", __func__, (uintmax_t)pa));
101969cbb187SMark Johnston
102069cbb187SMark Johnston return (&seg->first_page[atop(pa - seg->start)]);
102169cbb187SMark Johnston }
102269cbb187SMark Johnston
102369cbb187SMark Johnston /*
102411752d88SAlan Cox * Find the vm_page corresponding to the given physical address.
102511752d88SAlan Cox */
102611752d88SAlan Cox vm_page_t
vm_phys_paddr_to_vm_page(vm_paddr_t pa)102711752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
102811752d88SAlan Cox {
102911752d88SAlan Cox struct vm_phys_seg *seg;
103011752d88SAlan Cox
10319e817428SDoug Moore if ((seg = vm_phys_paddr_to_seg(pa)) != NULL)
103269cbb187SMark Johnston return (vm_phys_seg_paddr_to_vm_page(seg, pa));
1033f06a3a36SAndrew Thompson return (NULL);
103411752d88SAlan Cox }
103511752d88SAlan Cox
1036b6de32bdSKonstantin Belousov vm_page_t
vm_phys_fictitious_to_vm_page(vm_paddr_t pa)1037b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
1038b6de32bdSKonstantin Belousov {
103938d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg;
1040b6de32bdSKonstantin Belousov vm_page_t m;
1041b6de32bdSKonstantin Belousov
1042b6de32bdSKonstantin Belousov m = NULL;
104338d6b2dcSRoger Pau Monné tmp.start = pa;
104438d6b2dcSRoger Pau Monné tmp.end = 0;
104538d6b2dcSRoger Pau Monné
104638d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock);
104738d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
104838d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock);
104938d6b2dcSRoger Pau Monné if (seg == NULL)
105038d6b2dcSRoger Pau Monné return (NULL);
105138d6b2dcSRoger Pau Monné
1052b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)];
105338d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
105438d6b2dcSRoger Pau Monné
1055b6de32bdSKonstantin Belousov return (m);
1056b6de32bdSKonstantin Belousov }
1057b6de32bdSKonstantin Belousov
10585ebe728dSRoger Pau Monné static inline void
vm_phys_fictitious_init_range(vm_page_t range,vm_paddr_t start,long page_count,vm_memattr_t memattr)10595ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
10605ebe728dSRoger Pau Monné long page_count, vm_memattr_t memattr)
10615ebe728dSRoger Pau Monné {
10625ebe728dSRoger Pau Monné long i;
10635ebe728dSRoger Pau Monné
1064f93f7cf1SMark Johnston bzero(range, page_count * sizeof(*range));
10655ebe728dSRoger Pau Monné for (i = 0; i < page_count; i++) {
10665ebe728dSRoger Pau Monné vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
10675ebe728dSRoger Pau Monné range[i].oflags &= ~VPO_UNMANAGED;
10685ebe728dSRoger Pau Monné range[i].busy_lock = VPB_UNBUSIED;
10695ebe728dSRoger Pau Monné }
10705ebe728dSRoger Pau Monné }
10715ebe728dSRoger Pau Monné
1072b6de32bdSKonstantin Belousov int
vm_phys_fictitious_reg_range(vm_paddr_t start,vm_paddr_t end,vm_memattr_t memattr)1073b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
1074b6de32bdSKonstantin Belousov vm_memattr_t memattr)
1075b6de32bdSKonstantin Belousov {
1076b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg;
1077b6de32bdSKonstantin Belousov vm_page_t fp;
10785ebe728dSRoger Pau Monné long page_count;
1079b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
10805ebe728dSRoger Pau Monné long pi, pe;
10815ebe728dSRoger Pau Monné long dpage_count;
1082b6de32bdSKonstantin Belousov #endif
1083b6de32bdSKonstantin Belousov
10845ebe728dSRoger Pau Monné KASSERT(start < end,
10855ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)",
10865ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end));
10875ebe728dSRoger Pau Monné
1088b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE;
1089b6de32bdSKonstantin Belousov
1090b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
1091b6de32bdSKonstantin Belousov pi = atop(start);
10925ebe728dSRoger Pau Monné pe = atop(end);
10935ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
1094b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page];
10955ebe728dSRoger Pau Monné if ((pe - first_page) > vm_page_array_size) {
10965ebe728dSRoger Pau Monné /*
10975ebe728dSRoger Pau Monné * We have a segment that starts inside
10985ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it.
10995ebe728dSRoger Pau Monné *
11005ebe728dSRoger Pau Monné * Use vm_page_array pages for those that are
11015ebe728dSRoger Pau Monné * inside of the vm_page_array range, and
11025ebe728dSRoger Pau Monné * allocate the remaining ones.
11035ebe728dSRoger Pau Monné */
11045ebe728dSRoger Pau Monné dpage_count = vm_page_array_size - (pi - first_page);
11055ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, dpage_count,
11065ebe728dSRoger Pau Monné memattr);
11075ebe728dSRoger Pau Monné page_count -= dpage_count;
11085ebe728dSRoger Pau Monné start += ptoa(dpage_count);
11095ebe728dSRoger Pau Monné goto alloc;
11105ebe728dSRoger Pau Monné }
11115ebe728dSRoger Pau Monné /*
11125ebe728dSRoger Pau Monné * We can allocate the full range from vm_page_array,
11135ebe728dSRoger Pau Monné * so there's no need to register the range in the tree.
11145ebe728dSRoger Pau Monné */
11155ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr);
11165ebe728dSRoger Pau Monné return (0);
11175ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
11185ebe728dSRoger Pau Monné /*
11195ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array,
11205ebe728dSRoger Pau Monné * but starts outside of it.
11215ebe728dSRoger Pau Monné */
11225ebe728dSRoger Pau Monné fp = &vm_page_array[0];
11235ebe728dSRoger Pau Monné dpage_count = pe - first_page;
11245ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
11255ebe728dSRoger Pau Monné memattr);
11265ebe728dSRoger Pau Monné end -= ptoa(dpage_count);
11275ebe728dSRoger Pau Monné page_count -= dpage_count;
11285ebe728dSRoger Pau Monné goto alloc;
11295ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
11305ebe728dSRoger Pau Monné /*
11315ebe728dSRoger Pau Monné * Trying to register a fictitious range that expands before
11325ebe728dSRoger Pau Monné * and after vm_page_array.
11335ebe728dSRoger Pau Monné */
11345ebe728dSRoger Pau Monné return (EINVAL);
11355ebe728dSRoger Pau Monné } else {
11365ebe728dSRoger Pau Monné alloc:
1137b6de32bdSKonstantin Belousov #endif
1138b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
1139f93f7cf1SMark Johnston M_WAITOK);
11405ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE
1141b6de32bdSKonstantin Belousov }
11425ebe728dSRoger Pau Monné #endif
11435ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr);
114438d6b2dcSRoger Pau Monné
114538d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
1146b6de32bdSKonstantin Belousov seg->start = start;
1147b6de32bdSKonstantin Belousov seg->end = end;
1148b6de32bdSKonstantin Belousov seg->first_page = fp;
114938d6b2dcSRoger Pau Monné
115038d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock);
115138d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
115238d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock);
115338d6b2dcSRoger Pau Monné
1154b6de32bdSKonstantin Belousov return (0);
1155b6de32bdSKonstantin Belousov }
1156b6de32bdSKonstantin Belousov
1157b6de32bdSKonstantin Belousov void
vm_phys_fictitious_unreg_range(vm_paddr_t start,vm_paddr_t end)1158b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
1159b6de32bdSKonstantin Belousov {
116038d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp;
1161b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
11625ebe728dSRoger Pau Monné long pi, pe;
1163b6de32bdSKonstantin Belousov #endif
1164b6de32bdSKonstantin Belousov
11655ebe728dSRoger Pau Monné KASSERT(start < end,
11665ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)",
11675ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end));
11685ebe728dSRoger Pau Monné
1169b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
1170b6de32bdSKonstantin Belousov pi = atop(start);
11715ebe728dSRoger Pau Monné pe = atop(end);
11725ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
11735ebe728dSRoger Pau Monné if ((pe - first_page) <= vm_page_array_size) {
11745ebe728dSRoger Pau Monné /*
11755ebe728dSRoger Pau Monné * This segment was allocated using vm_page_array
11765ebe728dSRoger Pau Monné * only, there's nothing to do since those pages
11775ebe728dSRoger Pau Monné * were never added to the tree.
11785ebe728dSRoger Pau Monné */
11795ebe728dSRoger Pau Monné return;
11805ebe728dSRoger Pau Monné }
11815ebe728dSRoger Pau Monné /*
11825ebe728dSRoger Pau Monné * We have a segment that starts inside
11835ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it.
11845ebe728dSRoger Pau Monné *
11855ebe728dSRoger Pau Monné * Calculate how many pages were added to the
11865ebe728dSRoger Pau Monné * tree and free them.
11875ebe728dSRoger Pau Monné */
11885ebe728dSRoger Pau Monné start = ptoa(first_page + vm_page_array_size);
11895ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
11905ebe728dSRoger Pau Monné /*
11915ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array,
11925ebe728dSRoger Pau Monné * but starts outside of it.
11935ebe728dSRoger Pau Monné */
11945ebe728dSRoger Pau Monné end = ptoa(first_page);
11955ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
11965ebe728dSRoger Pau Monné /* Since it's not possible to register such a range, panic. */
11975ebe728dSRoger Pau Monné panic(
11985ebe728dSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]",
11995ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end);
12005ebe728dSRoger Pau Monné }
1201b6de32bdSKonstantin Belousov #endif
120238d6b2dcSRoger Pau Monné tmp.start = start;
120338d6b2dcSRoger Pau Monné tmp.end = 0;
1204b6de32bdSKonstantin Belousov
120538d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock);
120638d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
120738d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) {
120838d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock);
120938d6b2dcSRoger Pau Monné panic(
121038d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]",
121138d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end);
121238d6b2dcSRoger Pau Monné }
121338d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
121438d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock);
121538d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES);
121638d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES);
1217b6de32bdSKonstantin Belousov }
1218b6de32bdSKonstantin Belousov
121911752d88SAlan Cox /*
1220e3537f92SDoug Moore * Free a contiguous, power of two-sized set of physical pages.
12210078df5fSDoug Moore * The pool field in the first page determines the destination pool.
12228941dc44SAlan Cox *
12238941dc44SAlan Cox * The free page queues must be locked.
122411752d88SAlan Cox */
122511752d88SAlan Cox void
vm_phys_free_pages(vm_page_t m,int pool,int order)12260078df5fSDoug Moore vm_phys_free_pages(vm_page_t m, int pool, int order)
122711752d88SAlan Cox {
122811752d88SAlan Cox struct vm_freelist *fl;
122911752d88SAlan Cox struct vm_phys_seg *seg;
12305c1f2cc4SAlan Cox vm_paddr_t pa;
123111752d88SAlan Cox vm_page_t m_buddy;
123211752d88SAlan Cox
123311752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER,
12340078df5fSDoug Moore ("%s: page %p has unexpected order %d",
12350078df5fSDoug Moore __func__, m, m->order));
12360078df5fSDoug Moore KASSERT(vm_phys_pool_valid(pool),
12370078df5fSDoug Moore ("%s: unexpected pool param %d", __func__, pool));
123811752d88SAlan Cox KASSERT(order < VM_NFREEORDER,
12390078df5fSDoug Moore ("%s: order %d is out of range", __func__, order));
124011752d88SAlan Cox seg = &vm_phys_segs[m->segind];
1241e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
12425c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) {
12435c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m);
12445c1f2cc4SAlan Cox do {
12455c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
12465c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end)
124711752d88SAlan Cox break;
124869cbb187SMark Johnston m_buddy = vm_phys_seg_paddr_to_vm_page(seg, pa);
124911752d88SAlan Cox if (m_buddy->order != order)
125011752d88SAlan Cox break;
125111752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool];
12527e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order);
12530078df5fSDoug Moore vm_phys_finish_init(m_buddy, order);
125411752d88SAlan Cox order++;
12555c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
125669cbb187SMark Johnston m = vm_phys_seg_paddr_to_vm_page(seg, pa);
12575c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1);
125811752d88SAlan Cox }
12590078df5fSDoug Moore fl = (*seg->free_queues)[pool];
12600078df5fSDoug Moore vm_freelist_add(fl, m, order, pool, 1);
126111752d88SAlan Cox }
126211752d88SAlan Cox
1263b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
1264b16b4c22SMark Johnston /*
1265b16b4c22SMark Johnston * Initialize all pages lingering in the lazy init pool of a NUMA domain, moving
1266b16b4c22SMark Johnston * them to the default pool. This is a prerequisite for some rare operations
1267b16b4c22SMark Johnston * which need to scan the page array and thus depend on all pages being
1268b16b4c22SMark Johnston * initialized.
1269b16b4c22SMark Johnston */
1270b16b4c22SMark Johnston static void
vm_phys_lazy_init_domain(int domain,bool locked)1271b16b4c22SMark Johnston vm_phys_lazy_init_domain(int domain, bool locked)
1272b16b4c22SMark Johnston {
1273b16b4c22SMark Johnston static bool initdone[MAXMEMDOM];
1274b16b4c22SMark Johnston struct vm_domain *vmd;
1275b16b4c22SMark Johnston struct vm_freelist *fl;
1276b16b4c22SMark Johnston vm_page_t m;
1277b16b4c22SMark Johnston int pind;
1278b16b4c22SMark Johnston bool unlocked;
1279b16b4c22SMark Johnston
1280b16b4c22SMark Johnston if (__predict_true(atomic_load_bool(&initdone[domain])))
1281b16b4c22SMark Johnston return;
1282b16b4c22SMark Johnston
1283b16b4c22SMark Johnston vmd = VM_DOMAIN(domain);
1284b16b4c22SMark Johnston if (locked)
1285b16b4c22SMark Johnston vm_domain_free_assert_locked(vmd);
1286b16b4c22SMark Johnston else
1287b16b4c22SMark Johnston vm_domain_free_lock(vmd);
1288b16b4c22SMark Johnston if (atomic_load_bool(&initdone[domain]))
1289b16b4c22SMark Johnston goto out;
1290b16b4c22SMark Johnston pind = VM_FREEPOOL_LAZYINIT;
1291b16b4c22SMark Johnston for (int freelist = 0; freelist < VM_NFREELIST; freelist++) {
1292b16b4c22SMark Johnston int flind;
1293b16b4c22SMark Johnston
1294b16b4c22SMark Johnston flind = vm_freelist_to_flind[freelist];
1295b16b4c22SMark Johnston if (flind < 0)
1296b16b4c22SMark Johnston continue;
1297b16b4c22SMark Johnston fl = vm_phys_free_queues[domain][flind][pind];
1298b16b4c22SMark Johnston for (int oind = 0; oind < VM_NFREEORDER; oind++) {
1299b16b4c22SMark Johnston if (atomic_load_int(&fl[oind].lcnt) == 0)
1300b16b4c22SMark Johnston continue;
1301b16b4c22SMark Johnston while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) {
1302b16b4c22SMark Johnston /*
1303b16b4c22SMark Johnston * Avoid holding the lock across the
1304b16b4c22SMark Johnston * initialization unless there's a free page
1305b16b4c22SMark Johnston * shortage.
1306b16b4c22SMark Johnston */
1307b16b4c22SMark Johnston vm_freelist_rem(fl, m, oind);
1308b16b4c22SMark Johnston unlocked = vm_domain_allocate(vmd,
1309b16b4c22SMark Johnston VM_ALLOC_NORMAL, 1 << oind);
1310b16b4c22SMark Johnston if (unlocked)
1311b16b4c22SMark Johnston vm_domain_free_unlock(vmd);
13120078df5fSDoug Moore vm_phys_finish_init(m, oind);
1313b16b4c22SMark Johnston if (unlocked) {
1314b16b4c22SMark Johnston vm_domain_freecnt_inc(vmd, 1 << oind);
1315b16b4c22SMark Johnston vm_domain_free_lock(vmd);
1316b16b4c22SMark Johnston }
13170078df5fSDoug Moore vm_phys_free_pages(m, VM_FREEPOOL_DEFAULT,
13180078df5fSDoug Moore oind);
1319b16b4c22SMark Johnston }
1320b16b4c22SMark Johnston }
1321b16b4c22SMark Johnston }
1322b16b4c22SMark Johnston atomic_store_bool(&initdone[domain], true);
1323b16b4c22SMark Johnston out:
1324b16b4c22SMark Johnston if (!locked)
1325b16b4c22SMark Johnston vm_domain_free_unlock(vmd);
1326b16b4c22SMark Johnston }
1327b16b4c22SMark Johnston
1328b16b4c22SMark Johnston static void
vm_phys_lazy_init(void)1329b16b4c22SMark Johnston vm_phys_lazy_init(void)
1330b16b4c22SMark Johnston {
1331b16b4c22SMark Johnston for (int domain = 0; domain < vm_ndomains; domain++)
1332b16b4c22SMark Johnston vm_phys_lazy_init_domain(domain, false);
1333b16b4c22SMark Johnston atomic_store_int(&vm_default_freepool, VM_FREEPOOL_DEFAULT);
1334b16b4c22SMark Johnston }
1335b16b4c22SMark Johnston
1336b16b4c22SMark Johnston static void
vm_phys_lazy_init_kthr(void * arg __unused)1337b16b4c22SMark Johnston vm_phys_lazy_init_kthr(void *arg __unused)
1338b16b4c22SMark Johnston {
1339b16b4c22SMark Johnston vm_phys_lazy_init();
1340b16b4c22SMark Johnston kthread_exit();
1341b16b4c22SMark Johnston }
1342b16b4c22SMark Johnston
1343b16b4c22SMark Johnston static void
vm_phys_lazy_sysinit(void * arg __unused)1344b16b4c22SMark Johnston vm_phys_lazy_sysinit(void *arg __unused)
1345b16b4c22SMark Johnston {
1346b16b4c22SMark Johnston struct thread *td;
1347b16b4c22SMark Johnston int error;
1348b16b4c22SMark Johnston
1349b16b4c22SMark Johnston error = kthread_add(vm_phys_lazy_init_kthr, NULL, curproc, &td,
1350b16b4c22SMark Johnston RFSTOPPED, 0, "vmlazyinit");
1351b16b4c22SMark Johnston if (error == 0) {
1352b16b4c22SMark Johnston thread_lock(td);
1353b16b4c22SMark Johnston sched_prio(td, PRI_MIN_IDLE);
1354b16b4c22SMark Johnston sched_add(td, SRQ_BORING);
1355b16b4c22SMark Johnston } else {
1356b16b4c22SMark Johnston printf("%s: could not create lazy init thread: %d\n",
1357b16b4c22SMark Johnston __func__, error);
1358b16b4c22SMark Johnston vm_phys_lazy_init();
1359b16b4c22SMark Johnston }
1360b16b4c22SMark Johnston }
1361b16b4c22SMark Johnston SYSINIT(vm_phys_lazy_init, SI_SUB_SMP, SI_ORDER_ANY, vm_phys_lazy_sysinit,
1362b16b4c22SMark Johnston NULL);
1363b16b4c22SMark Johnston #endif /* VM_FREEPOOL_LAZYINIT */
1364b16b4c22SMark Johnston
136511752d88SAlan Cox /*
1366e3537f92SDoug Moore * Free a contiguous, arbitrarily sized set of physical pages, without
13670078df5fSDoug Moore * merging across set boundaries. Assumes no pages have a valid pool field.
1368b8590daeSDoug Moore *
1369b8590daeSDoug Moore * The free page queues must be locked.
1370b8590daeSDoug Moore */
1371b8590daeSDoug Moore void
vm_phys_enqueue_contig(vm_page_t m,int pool,u_long npages)13720078df5fSDoug Moore vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages)
1373b8590daeSDoug Moore {
1374b8590daeSDoug Moore struct vm_freelist *fl;
1375b8590daeSDoug Moore struct vm_phys_seg *seg;
1376b8590daeSDoug Moore vm_page_t m_end;
1377c9b06fa5SDoug Moore vm_paddr_t diff, lo;
1378b8590daeSDoug Moore int order;
1379b8590daeSDoug Moore
1380b8590daeSDoug Moore /*
1381b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages in the largest
1382b8590daeSDoug Moore * possible power-of-two-sized subsets.
1383b8590daeSDoug Moore */
1384b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m));
1385b8590daeSDoug Moore seg = &vm_phys_segs[m->segind];
13860078df5fSDoug Moore fl = (*seg->free_queues)[pool];
1387b8590daeSDoug Moore m_end = m + npages;
1388b8590daeSDoug Moore /* Free blocks of increasing size. */
13896dd15b7aSDoug Moore lo = atop(VM_PAGE_TO_PHYS(m));
1390c9b06fa5SDoug Moore if (m < m_end &&
1391c9b06fa5SDoug Moore (diff = lo ^ (lo + npages - 1)) != 0) {
1392543d55d7SDoug Moore order = min(ilog2(diff), VM_NFREEORDER - 1);
13930078df5fSDoug Moore m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl,
13940078df5fSDoug Moore pool, 1);
13955c1f2cc4SAlan Cox }
1396c9b06fa5SDoug Moore
1397b8590daeSDoug Moore /* Free blocks of maximum size. */
1398c9b06fa5SDoug Moore order = VM_NFREEORDER - 1;
1399b8590daeSDoug Moore while (m + (1 << order) <= m_end) {
1400b8590daeSDoug Moore KASSERT(seg == &vm_phys_segs[m->segind],
1401b8590daeSDoug Moore ("%s: page range [%p,%p) spans multiple segments",
1402b8590daeSDoug Moore __func__, m_end - npages, m));
14030078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, 1);
1404b8590daeSDoug Moore m += 1 << order;
1405b8590daeSDoug Moore }
1406b8590daeSDoug Moore /* Free blocks of diminishing size. */
14070078df5fSDoug Moore vm_phys_enq_beg(m, m_end - m, fl, pool, 1);
1408b8590daeSDoug Moore }
1409b8590daeSDoug Moore
1410b8590daeSDoug Moore /*
1411b8590daeSDoug Moore * Free a contiguous, arbitrarily sized set of physical pages.
14120078df5fSDoug Moore * Assumes that every page but the first has no valid pool field.
14130078df5fSDoug Moore * Uses the pool value in the first page if valid, otherwise default.
1414b8590daeSDoug Moore *
1415b8590daeSDoug Moore * The free page queues must be locked.
1416b8590daeSDoug Moore */
1417b8590daeSDoug Moore void
vm_phys_free_contig(vm_page_t m,int pool,u_long npages)14180078df5fSDoug Moore vm_phys_free_contig(vm_page_t m, int pool, u_long npages)
1419b8590daeSDoug Moore {
14206dd15b7aSDoug Moore vm_paddr_t lo;
1421b8590daeSDoug Moore vm_page_t m_start, m_end;
14226dd15b7aSDoug Moore unsigned max_order, order_start, order_end;
1423b8590daeSDoug Moore
1424b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m));
1425b8590daeSDoug Moore
14266dd15b7aSDoug Moore lo = atop(VM_PAGE_TO_PHYS(m));
1427543d55d7SDoug Moore max_order = min(ilog2(lo ^ (lo + npages)), VM_NFREEORDER - 1);
1428e3537f92SDoug Moore
1429e3537f92SDoug Moore m_start = m;
14306dd15b7aSDoug Moore order_start = ffsll(lo) - 1;
14316dd15b7aSDoug Moore if (order_start < max_order)
1432b8590daeSDoug Moore m_start += 1 << order_start;
1433e3537f92SDoug Moore m_end = m + npages;
14346dd15b7aSDoug Moore order_end = ffsll(lo + npages) - 1;
14356dd15b7aSDoug Moore if (order_end < max_order)
1436b8590daeSDoug Moore m_end -= 1 << order_end;
1437b8590daeSDoug Moore /*
1438b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages at the start and
1439b8590daeSDoug Moore * end of the range last.
1440b8590daeSDoug Moore */
1441b8590daeSDoug Moore if (m_start < m_end)
14420078df5fSDoug Moore vm_phys_enqueue_contig(m_start, pool, m_end - m_start);
1443e3537f92SDoug Moore if (order_start < max_order)
14440078df5fSDoug Moore vm_phys_free_pages(m, pool, order_start);
1445e3537f92SDoug Moore if (order_end < max_order)
14460078df5fSDoug Moore vm_phys_free_pages(m_end, pool, order_end);
14475c1f2cc4SAlan Cox }
14485c1f2cc4SAlan Cox
14495c1f2cc4SAlan Cox /*
14509e817428SDoug Moore * Identify the first address range within segment segind or greater
14519e817428SDoug Moore * that matches the domain, lies within the low/high range, and has
14529e817428SDoug Moore * enough pages. Return -1 if there is none.
1453c869e672SAlan Cox */
14549e817428SDoug Moore int
vm_phys_find_range(vm_page_t bounds[],int segind,int domain,u_long npages,vm_paddr_t low,vm_paddr_t high)14559e817428SDoug Moore vm_phys_find_range(vm_page_t bounds[], int segind, int domain,
14569e817428SDoug Moore u_long npages, vm_paddr_t low, vm_paddr_t high)
1457c869e672SAlan Cox {
14589e817428SDoug Moore vm_paddr_t pa_end, pa_start;
14599e817428SDoug Moore struct vm_phys_seg *end_seg, *seg;
1460c869e672SAlan Cox
14619e817428SDoug Moore KASSERT(npages > 0, ("npages is zero"));
146258d42717SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range"));
14639e817428SDoug Moore end_seg = &vm_phys_segs[vm_phys_nsegs];
14649e817428SDoug Moore for (seg = &vm_phys_segs[segind]; seg < end_seg; seg++) {
14653f289c3fSJeff Roberson if (seg->domain != domain)
14663f289c3fSJeff Roberson continue;
1467c869e672SAlan Cox if (seg->start >= high)
14689e817428SDoug Moore return (-1);
14699e817428SDoug Moore pa_start = MAX(low, seg->start);
14709e817428SDoug Moore pa_end = MIN(high, seg->end);
14719e817428SDoug Moore if (pa_end - pa_start < ptoa(npages))
1472c869e672SAlan Cox continue;
1473b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT
1474b16b4c22SMark Johnston /*
1475b16b4c22SMark Johnston * The pages on the free lists must be initialized.
1476b16b4c22SMark Johnston */
1477b16b4c22SMark Johnston vm_phys_lazy_init_domain(domain, false);
1478b16b4c22SMark Johnston #endif
147969cbb187SMark Johnston bounds[0] = vm_phys_seg_paddr_to_vm_page(seg, pa_start);
1480fbff6d54SMark Johnston bounds[1] = &seg->first_page[atop(pa_end - seg->start)];
14819e817428SDoug Moore return (seg - vm_phys_segs);
1482c869e672SAlan Cox }
14839e817428SDoug Moore return (-1);
1484c869e672SAlan Cox }
1485c869e672SAlan Cox
1486c869e672SAlan Cox /*
14879742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search
14886062d9faSMark Johnston * succeeds, remove "m" from the free lists and return true. Otherwise, return
14896062d9faSMark Johnston * false, indicating that "m" is not in the free lists.
14907bfda801SAlan Cox *
14917bfda801SAlan Cox * The free page queues must be locked.
14927bfda801SAlan Cox */
14936062d9faSMark Johnston bool
vm_phys_unfree_page(vm_paddr_t pa)1494b16b4c22SMark Johnston vm_phys_unfree_page(vm_paddr_t pa)
14957bfda801SAlan Cox {
14967bfda801SAlan Cox struct vm_freelist *fl;
14977bfda801SAlan Cox struct vm_phys_seg *seg;
1498b16b4c22SMark Johnston vm_paddr_t pa_half;
1499b16b4c22SMark Johnston vm_page_t m, m_set, m_tmp;
15000078df5fSDoug Moore int order, pool;
15017bfda801SAlan Cox
1502b16b4c22SMark Johnston seg = vm_phys_paddr_to_seg(pa);
1503b16b4c22SMark Johnston vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
1504b16b4c22SMark Johnston
15050078df5fSDoug Moore #ifdef VM_FREEPOOL_LAZYINIT
1506b16b4c22SMark Johnston /*
1507b16b4c22SMark Johnston * The pages on the free lists must be initialized.
1508b16b4c22SMark Johnston */
1509b16b4c22SMark Johnston vm_phys_lazy_init_domain(seg->domain, true);
1510b16b4c22SMark Johnston #endif
1511b16b4c22SMark Johnston
15127bfda801SAlan Cox /*
15137bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free
15147bfda801SAlan Cox * physical pages containing the given physical page "m" and
15157bfda801SAlan Cox * assign it to "m_set".
15167bfda801SAlan Cox */
1517b16b4c22SMark Johnston m = vm_phys_paddr_to_vm_page(pa);
15187bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
1519bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) {
15207bfda801SAlan Cox order++;
15217bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
15222fbced65SAlan Cox if (pa >= seg->start)
152369cbb187SMark Johnston m_set = vm_phys_seg_paddr_to_vm_page(seg, pa);
1524e35395ceSAlan Cox else
15256062d9faSMark Johnston return (false);
15267bfda801SAlan Cox }
1527e35395ceSAlan Cox if (m_set->order < order)
15286062d9faSMark Johnston return (false);
1529e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER)
15306062d9faSMark Johnston return (false);
15317bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER,
15327bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d",
15337bfda801SAlan Cox m_set, m_set->order));
15347bfda801SAlan Cox
15357bfda801SAlan Cox /*
15367bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract
15377bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set"
15387bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half
15397bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists.
15407bfda801SAlan Cox */
15410078df5fSDoug Moore pool = m_set->pool;
15420078df5fSDoug Moore fl = (*seg->free_queues)[pool];
15437bfda801SAlan Cox order = m_set->order;
15447e226537SAttilio Rao vm_freelist_rem(fl, m_set, order);
15457bfda801SAlan Cox while (order > 0) {
15467bfda801SAlan Cox order--;
15477bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
15487bfda801SAlan Cox if (m->phys_addr < pa_half)
154969cbb187SMark Johnston m_tmp = vm_phys_seg_paddr_to_vm_page(seg, pa_half);
15507bfda801SAlan Cox else {
15517bfda801SAlan Cox m_tmp = m_set;
155269cbb187SMark Johnston m_set = vm_phys_seg_paddr_to_vm_page(seg, pa_half);
15537bfda801SAlan Cox }
15540078df5fSDoug Moore vm_freelist_add(fl, m_tmp, order, pool, 0);
15557bfda801SAlan Cox }
15567bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
15576062d9faSMark Johnston return (true);
15587bfda801SAlan Cox }
15597bfda801SAlan Cox
15607bfda801SAlan Cox /*
15612a4897bdSDoug Moore * Find a run of contiguous physical pages, meeting alignment requirements, from
15622a4897bdSDoug Moore * a list of max-sized page blocks, where we need at least two consecutive
15632a4897bdSDoug Moore * blocks to satisfy the (large) page request.
1564fa8a6585SDoug Moore */
1565fa8a6585SDoug Moore static vm_page_t
vm_phys_find_freelist_contig(struct vm_freelist * fl,u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)15662a4897bdSDoug Moore vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages,
1567fa8a6585SDoug Moore vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
1568fa8a6585SDoug Moore {
1569fa8a6585SDoug Moore struct vm_phys_seg *seg;
15702a4897bdSDoug Moore vm_page_t m, m_iter, m_ret;
15712a4897bdSDoug Moore vm_paddr_t max_size, size;
15722a4897bdSDoug Moore int max_order;
1573fa8a6585SDoug Moore
15742a4897bdSDoug Moore max_order = VM_NFREEORDER - 1;
1575fa8a6585SDoug Moore size = npages << PAGE_SHIFT;
15762a4897bdSDoug Moore max_size = (vm_paddr_t)1 << (PAGE_SHIFT + max_order);
15772a4897bdSDoug Moore KASSERT(size > max_size, ("size is too small"));
15782a4897bdSDoug Moore
1579fa8a6585SDoug Moore /*
15802a4897bdSDoug Moore * In order to avoid examining any free max-sized page block more than
15812a4897bdSDoug Moore * twice, identify the ones that are first in a physically-contiguous
15822a4897bdSDoug Moore * sequence of such blocks, and only for those walk the sequence to
15832a4897bdSDoug Moore * check if there are enough free blocks starting at a properly aligned
15842a4897bdSDoug Moore * block. Thus, no block is checked for free-ness more than twice.
1585fa8a6585SDoug Moore */
15862a4897bdSDoug Moore TAILQ_FOREACH(m, &fl[max_order].pl, listq) {
15872a4897bdSDoug Moore /*
15882a4897bdSDoug Moore * Skip m unless it is first in a sequence of free max page
15892a4897bdSDoug Moore * blocks >= low in its segment.
15902a4897bdSDoug Moore */
15912a4897bdSDoug Moore seg = &vm_phys_segs[m->segind];
15922a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m) < MAX(low, seg->start))
15932a4897bdSDoug Moore continue;
15942a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m) >= max_size &&
15952a4897bdSDoug Moore VM_PAGE_TO_PHYS(m) - max_size >= MAX(low, seg->start) &&
15962a4897bdSDoug Moore max_order == m[-1 << max_order].order)
1597fa8a6585SDoug Moore continue;
1598fa8a6585SDoug Moore
1599fa8a6585SDoug Moore /*
16002a4897bdSDoug Moore * Advance m_ret from m to the first of the sequence, if any,
16012a4897bdSDoug Moore * that satisfies alignment conditions and might leave enough
16022a4897bdSDoug Moore * space.
1603fa8a6585SDoug Moore */
16042a4897bdSDoug Moore m_ret = m;
16052a4897bdSDoug Moore while (!vm_addr_ok(VM_PAGE_TO_PHYS(m_ret),
16062a4897bdSDoug Moore size, alignment, boundary) &&
16072a4897bdSDoug Moore VM_PAGE_TO_PHYS(m_ret) + size <= MIN(high, seg->end) &&
16082a4897bdSDoug Moore max_order == m_ret[1 << max_order].order)
16092a4897bdSDoug Moore m_ret += 1 << max_order;
16102a4897bdSDoug Moore
16112a4897bdSDoug Moore /*
16122a4897bdSDoug Moore * Skip m unless some block m_ret in the sequence is properly
16132a4897bdSDoug Moore * aligned, and begins a sequence of enough pages less than
16142a4897bdSDoug Moore * high, and in the same segment.
16152a4897bdSDoug Moore */
16162a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m_ret) + size > MIN(high, seg->end))
1617fa8a6585SDoug Moore continue;
1618fa8a6585SDoug Moore
1619fa8a6585SDoug Moore /*
16202a4897bdSDoug Moore * Skip m unless the blocks to allocate starting at m_ret are
16212a4897bdSDoug Moore * all free.
1622fa8a6585SDoug Moore */
16232a4897bdSDoug Moore for (m_iter = m_ret;
16242a4897bdSDoug Moore m_iter < m_ret + npages && max_order == m_iter->order;
16252a4897bdSDoug Moore m_iter += 1 << max_order) {
1626fa8a6585SDoug Moore }
16272a4897bdSDoug Moore if (m_iter < m_ret + npages)
1628fa8a6585SDoug Moore continue;
1629fa8a6585SDoug Moore return (m_ret);
1630fa8a6585SDoug Moore }
1631fa8a6585SDoug Moore return (NULL);
1632fa8a6585SDoug Moore }
1633fa8a6585SDoug Moore
1634fa8a6585SDoug Moore /*
1635fa8a6585SDoug Moore * Find a run of contiguous physical pages from the specified free list
1636342056faSDoug Moore * table.
1637c869e672SAlan Cox */
1638c869e672SAlan Cox static vm_page_t
vm_phys_find_queues_contig(struct vm_freelist (* queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX],u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)1639fa8a6585SDoug Moore vm_phys_find_queues_contig(
1640342056faSDoug Moore struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX],
1641342056faSDoug Moore u_long npages, vm_paddr_t low, vm_paddr_t high,
1642342056faSDoug Moore u_long alignment, vm_paddr_t boundary)
1643c869e672SAlan Cox {
1644c869e672SAlan Cox struct vm_freelist *fl;
1645fa8a6585SDoug Moore vm_page_t m_ret;
1646c869e672SAlan Cox vm_paddr_t pa, pa_end, size;
1647c869e672SAlan Cox int oind, order, pind;
1648c869e672SAlan Cox
1649c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0"));
1650c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1651c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1652c869e672SAlan Cox /* Compute the queue that is the best fit for npages. */
16539161b4deSAlan Cox order = flsl(npages - 1);
1654fa8a6585SDoug Moore /* Search for a large enough free block. */
1655c869e672SAlan Cox size = npages << PAGE_SHIFT;
1656fa8a6585SDoug Moore for (oind = order; oind < VM_NFREEORDER; oind++) {
1657b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
1658342056faSDoug Moore fl = (*queues)[pind];
16595cd29d0fSMark Johnston TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
1660c869e672SAlan Cox /*
1661da92ecbcSDoug Moore * Determine if the address range starting at pa
1662da92ecbcSDoug Moore * is within the given range, satisfies the
1663da92ecbcSDoug Moore * given alignment, and does not cross the given
1664da92ecbcSDoug Moore * boundary.
166511752d88SAlan Cox */
1666da92ecbcSDoug Moore pa = VM_PAGE_TO_PHYS(m_ret);
1667da92ecbcSDoug Moore pa_end = pa + size;
1668fa8a6585SDoug Moore if (low <= pa && pa_end <= high &&
1669fa8a6585SDoug Moore vm_addr_ok(pa, size, alignment, boundary))
1670fa8a6585SDoug Moore return (m_ret);
1671fa8a6585SDoug Moore }
1672fa8a6585SDoug Moore }
1673fa8a6585SDoug Moore }
1674da92ecbcSDoug Moore if (order < VM_NFREEORDER)
1675fa8a6585SDoug Moore return (NULL);
16762a4897bdSDoug Moore /* Search for a long-enough sequence of max-order blocks. */
1677b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
1678fa8a6585SDoug Moore fl = (*queues)[pind];
16792a4897bdSDoug Moore m_ret = vm_phys_find_freelist_contig(fl, npages,
1680fa8a6585SDoug Moore low, high, alignment, boundary);
1681fa8a6585SDoug Moore if (m_ret != NULL)
1682fa8a6585SDoug Moore return (m_ret);
168311752d88SAlan Cox }
168411752d88SAlan Cox return (NULL);
168511752d88SAlan Cox }
168611752d88SAlan Cox
1687b7565d44SJeff Roberson /*
1688342056faSDoug Moore * Allocate a contiguous set of physical pages of the given size
1689342056faSDoug Moore * "npages" from the free lists. All of the physical pages must be at
1690342056faSDoug Moore * or above the given physical address "low" and below the given
1691342056faSDoug Moore * physical address "high". The given value "alignment" determines the
1692342056faSDoug Moore * alignment of the first physical page in the set. If the given value
1693342056faSDoug Moore * "boundary" is non-zero, then the set of physical pages cannot cross
1694342056faSDoug Moore * any physical address boundary that is a multiple of that value. Both
16950078df5fSDoug Moore * "alignment" and "boundary" must be a power of two. Sets the pool
16960078df5fSDoug Moore * field to DEFAULT in the first allocated page.
1697342056faSDoug Moore */
1698342056faSDoug Moore vm_page_t
vm_phys_alloc_contig(int domain,u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)1699342056faSDoug Moore vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
1700342056faSDoug Moore u_long alignment, vm_paddr_t boundary)
1701342056faSDoug Moore {
1702342056faSDoug Moore vm_paddr_t pa_end, pa_start;
1703fa8a6585SDoug Moore struct vm_freelist *fl;
1704fa8a6585SDoug Moore vm_page_t m, m_run;
1705342056faSDoug Moore struct vm_phys_seg *seg;
1706342056faSDoug Moore struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX];
1707fa8a6585SDoug Moore int oind, segind;
1708342056faSDoug Moore
1709342056faSDoug Moore KASSERT(npages > 0, ("npages is 0"));
1710342056faSDoug Moore KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1711342056faSDoug Moore KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1712342056faSDoug Moore vm_domain_free_assert_locked(VM_DOMAIN(domain));
1713342056faSDoug Moore if (low >= high)
1714342056faSDoug Moore return (NULL);
1715342056faSDoug Moore queues = NULL;
1716342056faSDoug Moore m_run = NULL;
1717342056faSDoug Moore for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
1718342056faSDoug Moore seg = &vm_phys_segs[segind];
1719342056faSDoug Moore if (seg->start >= high || seg->domain != domain)
1720342056faSDoug Moore continue;
1721342056faSDoug Moore if (low >= seg->end)
1722342056faSDoug Moore break;
1723342056faSDoug Moore if (low <= seg->start)
1724342056faSDoug Moore pa_start = seg->start;
1725342056faSDoug Moore else
1726342056faSDoug Moore pa_start = low;
1727342056faSDoug Moore if (high < seg->end)
1728342056faSDoug Moore pa_end = high;
1729342056faSDoug Moore else
1730342056faSDoug Moore pa_end = seg->end;
1731342056faSDoug Moore if (pa_end - pa_start < ptoa(npages))
1732342056faSDoug Moore continue;
1733342056faSDoug Moore /*
1734342056faSDoug Moore * If a previous segment led to a search using
1735342056faSDoug Moore * the same free lists as would this segment, then
1736342056faSDoug Moore * we've actually already searched within this
1737342056faSDoug Moore * too. So skip it.
1738342056faSDoug Moore */
1739342056faSDoug Moore if (seg->free_queues == queues)
1740342056faSDoug Moore continue;
1741342056faSDoug Moore queues = seg->free_queues;
1742fa8a6585SDoug Moore m_run = vm_phys_find_queues_contig(queues, npages,
1743342056faSDoug Moore low, high, alignment, boundary);
1744342056faSDoug Moore if (m_run != NULL)
1745342056faSDoug Moore break;
1746342056faSDoug Moore }
1747fa8a6585SDoug Moore if (m_run == NULL)
1748fa8a6585SDoug Moore return (NULL);
1749fa8a6585SDoug Moore
1750fa8a6585SDoug Moore /* Allocate pages from the page-range found. */
1751fa8a6585SDoug Moore for (m = m_run; m < &m_run[npages]; m = &m[1 << oind]) {
1752fa8a6585SDoug Moore fl = (*queues)[m->pool];
1753fa8a6585SDoug Moore oind = m->order;
1754fa8a6585SDoug Moore vm_freelist_rem(fl, m, oind);
17550078df5fSDoug Moore vm_phys_finish_init(m, oind);
1756fa8a6585SDoug Moore }
1757fa8a6585SDoug Moore /* Return excess pages to the free lists. */
1758fa8a6585SDoug Moore fl = (*queues)[VM_FREEPOOL_DEFAULT];
17590078df5fSDoug Moore vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl,
17600078df5fSDoug Moore VM_FREEPOOL_DEFAULT, 0);
17612a4897bdSDoug Moore
17622a4897bdSDoug Moore /* Return page verified to satisfy conditions of request. */
17632a4897bdSDoug Moore pa_start = VM_PAGE_TO_PHYS(m_run);
17642a4897bdSDoug Moore KASSERT(low <= pa_start,
17652a4897bdSDoug Moore ("memory allocated below minimum requested range"));
17662a4897bdSDoug Moore KASSERT(pa_start + ptoa(npages) <= high,
17672a4897bdSDoug Moore ("memory allocated above maximum requested range"));
17682a4897bdSDoug Moore seg = &vm_phys_segs[m_run->segind];
17692a4897bdSDoug Moore KASSERT(seg->domain == domain,
17702a4897bdSDoug Moore ("memory not allocated from specified domain"));
17712a4897bdSDoug Moore KASSERT(vm_addr_ok(pa_start, ptoa(npages), alignment, boundary),
17722a4897bdSDoug Moore ("memory alignment/boundary constraints not satisfied"));
1773342056faSDoug Moore return (m_run);
1774342056faSDoug Moore }
1775342056faSDoug Moore
1776342056faSDoug Moore /*
1777b7565d44SJeff Roberson * Return the index of the first unused slot which may be the terminating
1778b7565d44SJeff Roberson * entry.
1779b7565d44SJeff Roberson */
1780b7565d44SJeff Roberson static int
vm_phys_avail_count(void)1781b7565d44SJeff Roberson vm_phys_avail_count(void)
1782b7565d44SJeff Roberson {
1783b7565d44SJeff Roberson int i;
1784b7565d44SJeff Roberson
1785291b7bf0SOlivier Certner for (i = 0; i < PHYS_AVAIL_COUNT; i += 2)
1786291b7bf0SOlivier Certner if (phys_avail[i] == 0 && phys_avail[i + 1] == 0)
1787b7565d44SJeff Roberson return (i);
1788291b7bf0SOlivier Certner panic("Improperly terminated phys_avail[]");
1789b7565d44SJeff Roberson }
1790b7565d44SJeff Roberson
1791b7565d44SJeff Roberson /*
1792b7565d44SJeff Roberson * Assert that a phys_avail entry is valid.
1793b7565d44SJeff Roberson */
1794b7565d44SJeff Roberson static void
vm_phys_avail_check(int i)1795b7565d44SJeff Roberson vm_phys_avail_check(int i)
1796b7565d44SJeff Roberson {
1797125ef4e0SOlivier Certner if (i % 2 != 0)
1798125ef4e0SOlivier Certner panic("Chunk start index %d is not even.", i);
1799b7565d44SJeff Roberson if (phys_avail[i] & PAGE_MASK)
1800b7565d44SJeff Roberson panic("Unaligned phys_avail[%d]: %#jx", i,
1801b7565d44SJeff Roberson (intmax_t)phys_avail[i]);
1802b7565d44SJeff Roberson if (phys_avail[i + 1] & PAGE_MASK)
1803b7565d44SJeff Roberson panic("Unaligned phys_avail[%d + 1]: %#jx", i,
1804125ef4e0SOlivier Certner (intmax_t)phys_avail[i + 1]);
1805b7565d44SJeff Roberson if (phys_avail[i + 1] < phys_avail[i])
1806125ef4e0SOlivier Certner panic("phys_avail[%d]: start %#jx > end %#jx", i,
1807b7565d44SJeff Roberson (intmax_t)phys_avail[i], (intmax_t)phys_avail[i + 1]);
1808b7565d44SJeff Roberson }
1809b7565d44SJeff Roberson
1810b7565d44SJeff Roberson /*
1811b7565d44SJeff Roberson * Return the index of an overlapping phys_avail entry or -1.
1812b7565d44SJeff Roberson */
1813be3f5f29SJeff Roberson #ifdef NUMA
1814b7565d44SJeff Roberson static int
vm_phys_avail_find(vm_paddr_t pa)1815b7565d44SJeff Roberson vm_phys_avail_find(vm_paddr_t pa)
1816b7565d44SJeff Roberson {
1817b7565d44SJeff Roberson int i;
1818b7565d44SJeff Roberson
1819b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2)
1820b7565d44SJeff Roberson if (phys_avail[i] <= pa && phys_avail[i + 1] > pa)
1821b7565d44SJeff Roberson return (i);
1822b7565d44SJeff Roberson return (-1);
1823b7565d44SJeff Roberson }
1824be3f5f29SJeff Roberson #endif
1825b7565d44SJeff Roberson
1826b7565d44SJeff Roberson /*
1827b7565d44SJeff Roberson * Return the index of the largest entry.
1828b7565d44SJeff Roberson */
1829b7565d44SJeff Roberson int
vm_phys_avail_largest(void)1830b7565d44SJeff Roberson vm_phys_avail_largest(void)
1831b7565d44SJeff Roberson {
1832b7565d44SJeff Roberson vm_paddr_t sz, largesz;
1833b7565d44SJeff Roberson int largest;
1834b7565d44SJeff Roberson int i;
1835b7565d44SJeff Roberson
1836b7565d44SJeff Roberson largest = 0;
1837b7565d44SJeff Roberson largesz = 0;
1838b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) {
1839b7565d44SJeff Roberson sz = vm_phys_avail_size(i);
1840b7565d44SJeff Roberson if (sz > largesz) {
1841b7565d44SJeff Roberson largesz = sz;
1842b7565d44SJeff Roberson largest = i;
1843b7565d44SJeff Roberson }
1844b7565d44SJeff Roberson }
1845b7565d44SJeff Roberson
1846b7565d44SJeff Roberson return (largest);
1847b7565d44SJeff Roberson }
1848b7565d44SJeff Roberson
1849b7565d44SJeff Roberson vm_paddr_t
vm_phys_avail_size(int i)1850b7565d44SJeff Roberson vm_phys_avail_size(int i)
1851b7565d44SJeff Roberson {
1852b7565d44SJeff Roberson
1853b7565d44SJeff Roberson return (phys_avail[i + 1] - phys_avail[i]);
1854b7565d44SJeff Roberson }
1855b7565d44SJeff Roberson
1856b7565d44SJeff Roberson /*
1857e1499bffSOlivier Certner * Split a chunk in phys_avail[] at the address 'pa'.
1858e1499bffSOlivier Certner *
1859e1499bffSOlivier Certner * 'pa' must be within a chunk (slots i and i + 1) or one of its boundaries.
1860e1499bffSOlivier Certner * Returns zero on actual split, in which case the two new chunks occupy slots
1861e1499bffSOlivier Certner * i to i + 3, else EJUSTRETURN if 'pa' was one of the boundaries (and no split
1862e1499bffSOlivier Certner * actually occurred) else ENOSPC if there are not enough slots in phys_avail[]
1863e1499bffSOlivier Certner * to represent the additional chunk caused by the split.
1864b7565d44SJeff Roberson */
1865b7565d44SJeff Roberson static int
vm_phys_avail_split(vm_paddr_t pa,int i)1866b7565d44SJeff Roberson vm_phys_avail_split(vm_paddr_t pa, int i)
1867b7565d44SJeff Roberson {
1868b7565d44SJeff Roberson int cnt;
1869b7565d44SJeff Roberson
1870b7565d44SJeff Roberson vm_phys_avail_check(i);
1871e1499bffSOlivier Certner if (pa < phys_avail[i] || pa > phys_avail[i + 1])
1872e1499bffSOlivier Certner panic("%s: Address %#jx not in range at slot %d [%#jx;%#jx].",
1873e1499bffSOlivier Certner __func__, (uintmax_t)pa, i,
1874e1499bffSOlivier Certner (uintmax_t)phys_avail[i], (uintmax_t)phys_avail[i + 1]);
1875e1499bffSOlivier Certner if (pa == phys_avail[i] || pa == phys_avail[i + 1])
1876e1499bffSOlivier Certner return (EJUSTRETURN);
1877b7565d44SJeff Roberson cnt = vm_phys_avail_count();
1878b7565d44SJeff Roberson if (cnt >= PHYS_AVAIL_ENTRIES)
1879b7565d44SJeff Roberson return (ENOSPC);
1880b7565d44SJeff Roberson memmove(&phys_avail[i + 2], &phys_avail[i],
1881b7565d44SJeff Roberson (cnt - i) * sizeof(phys_avail[0]));
1882b7565d44SJeff Roberson phys_avail[i + 1] = pa;
1883b7565d44SJeff Roberson phys_avail[i + 2] = pa;
1884b7565d44SJeff Roberson vm_phys_avail_check(i);
1885b7565d44SJeff Roberson vm_phys_avail_check(i+2);
1886b7565d44SJeff Roberson
1887b7565d44SJeff Roberson return (0);
1888b7565d44SJeff Roberson }
1889b7565d44SJeff Roberson
189031991a5aSMitchell Horne /*
189131991a5aSMitchell Horne * Check if a given physical address can be included as part of a crash dump.
189231991a5aSMitchell Horne */
189331991a5aSMitchell Horne bool
vm_phys_is_dumpable(vm_paddr_t pa)189431991a5aSMitchell Horne vm_phys_is_dumpable(vm_paddr_t pa)
189531991a5aSMitchell Horne {
189631991a5aSMitchell Horne vm_page_t m;
189731991a5aSMitchell Horne int i;
189831991a5aSMitchell Horne
189931991a5aSMitchell Horne if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
190031991a5aSMitchell Horne return ((m->flags & PG_NODUMP) == 0);
190131991a5aSMitchell Horne
190231991a5aSMitchell Horne for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
190331991a5aSMitchell Horne if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
190431991a5aSMitchell Horne return (true);
190531991a5aSMitchell Horne }
190631991a5aSMitchell Horne return (false);
190731991a5aSMitchell Horne }
190831991a5aSMitchell Horne
190981302f1dSMark Johnston void
vm_phys_early_add_seg(vm_paddr_t start,vm_paddr_t end)191081302f1dSMark Johnston vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end)
191181302f1dSMark Johnston {
191281302f1dSMark Johnston struct vm_phys_seg *seg;
191381302f1dSMark Johnston
191481302f1dSMark Johnston if (vm_phys_early_nsegs == -1)
191581302f1dSMark Johnston panic("%s: called after initialization", __func__);
191681302f1dSMark Johnston if (vm_phys_early_nsegs == nitems(vm_phys_early_segs))
191781302f1dSMark Johnston panic("%s: ran out of early segments", __func__);
191881302f1dSMark Johnston
191981302f1dSMark Johnston seg = &vm_phys_early_segs[vm_phys_early_nsegs++];
192081302f1dSMark Johnston seg->start = start;
192181302f1dSMark Johnston seg->end = end;
192281302f1dSMark Johnston }
192381302f1dSMark Johnston
1924b7565d44SJeff Roberson /*
1925b7565d44SJeff Roberson * This routine allocates NUMA node specific memory before the page
1926b7565d44SJeff Roberson * allocator is bootstrapped.
1927b7565d44SJeff Roberson */
1928b7565d44SJeff Roberson vm_paddr_t
vm_phys_early_alloc(int domain,size_t alloc_size)1929b7565d44SJeff Roberson vm_phys_early_alloc(int domain, size_t alloc_size)
1930b7565d44SJeff Roberson {
19312e7838aeSJohn Baldwin #ifdef NUMA
19322e7838aeSJohn Baldwin int mem_index;
19332e7838aeSJohn Baldwin #endif
19342e7838aeSJohn Baldwin int i, biggestone;
1935b7565d44SJeff Roberson vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
1936b7565d44SJeff Roberson
193781302f1dSMark Johnston KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains),
193881302f1dSMark Johnston ("%s: invalid domain index %d", __func__, domain));
1939b7565d44SJeff Roberson
1940b7565d44SJeff Roberson /*
1941b7565d44SJeff Roberson * Search the mem_affinity array for the biggest address
1942b7565d44SJeff Roberson * range in the desired domain. This is used to constrain
1943b7565d44SJeff Roberson * the phys_avail selection below.
1944b7565d44SJeff Roberson */
1945b7565d44SJeff Roberson biggestsize = 0;
1946b7565d44SJeff Roberson mem_start = 0;
1947b7565d44SJeff Roberson mem_end = -1;
1948b7565d44SJeff Roberson #ifdef NUMA
19492e7838aeSJohn Baldwin mem_index = 0;
1950b7565d44SJeff Roberson if (mem_affinity != NULL) {
1951b7565d44SJeff Roberson for (i = 0;; i++) {
1952b7565d44SJeff Roberson size = mem_affinity[i].end - mem_affinity[i].start;
1953b7565d44SJeff Roberson if (size == 0)
1954b7565d44SJeff Roberson break;
195581302f1dSMark Johnston if (domain != -1 && mem_affinity[i].domain != domain)
1956b7565d44SJeff Roberson continue;
1957b7565d44SJeff Roberson if (size > biggestsize) {
1958b7565d44SJeff Roberson mem_index = i;
1959b7565d44SJeff Roberson biggestsize = size;
1960b7565d44SJeff Roberson }
1961b7565d44SJeff Roberson }
1962b7565d44SJeff Roberson mem_start = mem_affinity[mem_index].start;
1963b7565d44SJeff Roberson mem_end = mem_affinity[mem_index].end;
1964b7565d44SJeff Roberson }
1965b7565d44SJeff Roberson #endif
1966b7565d44SJeff Roberson
1967b7565d44SJeff Roberson /*
1968b7565d44SJeff Roberson * Now find biggest physical segment in within the desired
1969b7565d44SJeff Roberson * numa domain.
1970b7565d44SJeff Roberson */
1971b7565d44SJeff Roberson biggestsize = 0;
1972b7565d44SJeff Roberson biggestone = 0;
1973b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1974b7565d44SJeff Roberson /* skip regions that are out of range */
1975b7565d44SJeff Roberson if (phys_avail[i+1] - alloc_size < mem_start ||
1976b7565d44SJeff Roberson phys_avail[i+1] > mem_end)
1977b7565d44SJeff Roberson continue;
1978b7565d44SJeff Roberson size = vm_phys_avail_size(i);
1979b7565d44SJeff Roberson if (size > biggestsize) {
1980b7565d44SJeff Roberson biggestone = i;
1981b7565d44SJeff Roberson biggestsize = size;
1982b7565d44SJeff Roberson }
1983b7565d44SJeff Roberson }
1984b7565d44SJeff Roberson alloc_size = round_page(alloc_size);
1985b7565d44SJeff Roberson
1986b7565d44SJeff Roberson /*
1987b7565d44SJeff Roberson * Grab single pages from the front to reduce fragmentation.
1988b7565d44SJeff Roberson */
1989b7565d44SJeff Roberson if (alloc_size == PAGE_SIZE) {
1990b7565d44SJeff Roberson pa = phys_avail[biggestone];
1991b7565d44SJeff Roberson phys_avail[biggestone] += PAGE_SIZE;
1992b7565d44SJeff Roberson vm_phys_avail_check(biggestone);
1993b7565d44SJeff Roberson return (pa);
1994b7565d44SJeff Roberson }
1995b7565d44SJeff Roberson
1996b7565d44SJeff Roberson /*
1997b7565d44SJeff Roberson * Naturally align large allocations.
1998b7565d44SJeff Roberson */
1999b7565d44SJeff Roberson align = phys_avail[biggestone + 1] & (alloc_size - 1);
2000b7565d44SJeff Roberson if (alloc_size + align > biggestsize)
2001b7565d44SJeff Roberson panic("cannot find a large enough size\n");
2002b7565d44SJeff Roberson if (align != 0 &&
2003b7565d44SJeff Roberson vm_phys_avail_split(phys_avail[biggestone + 1] - align,
2004b7565d44SJeff Roberson biggestone) != 0)
2005b7565d44SJeff Roberson /* Wasting memory. */
2006b7565d44SJeff Roberson phys_avail[biggestone + 1] -= align;
2007b7565d44SJeff Roberson
2008b7565d44SJeff Roberson phys_avail[biggestone + 1] -= alloc_size;
2009b7565d44SJeff Roberson vm_phys_avail_check(biggestone);
2010b7565d44SJeff Roberson pa = phys_avail[biggestone + 1];
2011b7565d44SJeff Roberson return (pa);
2012b7565d44SJeff Roberson }
2013b7565d44SJeff Roberson
2014b7565d44SJeff Roberson void
vm_phys_early_startup(void)2015b7565d44SJeff Roberson vm_phys_early_startup(void)
2016b7565d44SJeff Roberson {
201781302f1dSMark Johnston struct vm_phys_seg *seg;
2018b7565d44SJeff Roberson int i;
2019b7565d44SJeff Roberson
2020*32e77bcdSOlivier Certner if (phys_avail[1] == 0)
2021*32e77bcdSOlivier Certner panic("phys_avail[] is empty");
2022*32e77bcdSOlivier Certner
2023b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) {
2024b7565d44SJeff Roberson phys_avail[i] = round_page(phys_avail[i]);
2025b7565d44SJeff Roberson phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
2026b7565d44SJeff Roberson }
2027b7565d44SJeff Roberson
202881302f1dSMark Johnston for (i = 0; i < vm_phys_early_nsegs; i++) {
202981302f1dSMark Johnston seg = &vm_phys_early_segs[i];
203081302f1dSMark Johnston vm_phys_add_seg(seg->start, seg->end);
203181302f1dSMark Johnston }
203281302f1dSMark Johnston vm_phys_early_nsegs = -1;
203381302f1dSMark Johnston
2034b7565d44SJeff Roberson #ifdef NUMA
2035b7565d44SJeff Roberson /* Force phys_avail to be split by domain. */
2036b7565d44SJeff Roberson if (mem_affinity != NULL) {
2037b7565d44SJeff Roberson int idx;
2038b7565d44SJeff Roberson
2039b7565d44SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) {
2040b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].start);
2041e1499bffSOlivier Certner if (idx != -1)
2042b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].start, idx);
2043b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].end);
2044e1499bffSOlivier Certner if (idx != -1)
2045b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].end, idx);
2046b7565d44SJeff Roberson }
2047b7565d44SJeff Roberson }
2048b7565d44SJeff Roberson #endif
2049b7565d44SJeff Roberson }
2050b7565d44SJeff Roberson
205111752d88SAlan Cox #ifdef DDB
205211752d88SAlan Cox /*
205311752d88SAlan Cox * Show the number of physical pages in each of the free lists.
205411752d88SAlan Cox */
DB_SHOW_COMMAND_FLAGS(freepages,db_show_freepages,DB_CMD_MEMSAFE)2055c84c5e00SMitchell Horne DB_SHOW_COMMAND_FLAGS(freepages, db_show_freepages, DB_CMD_MEMSAFE)
205611752d88SAlan Cox {
205711752d88SAlan Cox struct vm_freelist *fl;
20587e226537SAttilio Rao int flind, oind, pind, dom;
205911752d88SAlan Cox
20607e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) {
20617e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom);
206211752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) {
206311752d88SAlan Cox db_printf("FREE LIST %d:\n"
206411752d88SAlan Cox "\n ORDER (SIZE) | NUMBER"
206511752d88SAlan Cox "\n ", flind);
206611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++)
206711752d88SAlan Cox db_printf(" | POOL %d", pind);
206811752d88SAlan Cox db_printf("\n-- ");
206911752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++)
207011752d88SAlan Cox db_printf("-- -- ");
207111752d88SAlan Cox db_printf("--\n");
207211752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
207311752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind,
207411752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind));
207511752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) {
20767e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind];
207711752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt);
207811752d88SAlan Cox }
207911752d88SAlan Cox db_printf("\n");
208011752d88SAlan Cox }
208111752d88SAlan Cox db_printf("\n");
208211752d88SAlan Cox }
20837e226537SAttilio Rao db_printf("\n");
20847e226537SAttilio Rao }
208511752d88SAlan Cox }
208611752d88SAlan Cox #endif
2087