xref: /freebsd/sys/vm/vm_phys.c (revision eb2f42fbb08ab731f1f1acddbe112153dd0c77e5)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
32fbd80bd0SAlan Cox /*
33fbd80bd0SAlan Cox  *	Physical memory system implementation
34fbd80bd0SAlan Cox  *
35fbd80bd0SAlan Cox  * Any external functions defined by this module are only to be used by the
36fbd80bd0SAlan Cox  * virtual memory system.
37fbd80bd0SAlan Cox  */
38fbd80bd0SAlan Cox 
3911752d88SAlan Cox #include <sys/cdefs.h>
4011752d88SAlan Cox __FBSDID("$FreeBSD$");
4111752d88SAlan Cox 
4211752d88SAlan Cox #include "opt_ddb.h"
43174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox 
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
4711752d88SAlan Cox #include <sys/lock.h>
4811752d88SAlan Cox #include <sys/kernel.h>
4911752d88SAlan Cox #include <sys/malloc.h>
5011752d88SAlan Cox #include <sys/mutex.h>
517e226537SAttilio Rao #if MAXMEMDOM > 1
527e226537SAttilio Rao #include <sys/proc.h>
537e226537SAttilio Rao #endif
5411752d88SAlan Cox #include <sys/queue.h>
5511752d88SAlan Cox #include <sys/sbuf.h>
5611752d88SAlan Cox #include <sys/sysctl.h>
5711752d88SAlan Cox #include <sys/vmmeter.h>
5811752d88SAlan Cox 
5911752d88SAlan Cox #include <ddb/ddb.h>
6011752d88SAlan Cox 
6111752d88SAlan Cox #include <vm/vm.h>
6211752d88SAlan Cox #include <vm/vm_param.h>
6311752d88SAlan Cox #include <vm/vm_kern.h>
6411752d88SAlan Cox #include <vm/vm_object.h>
6511752d88SAlan Cox #include <vm/vm_page.h>
6611752d88SAlan Cox #include <vm/vm_phys.h>
6711752d88SAlan Cox 
68449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
69449c2e92SKonstantin Belousov     "Too many physsegs.");
7011752d88SAlan Cox 
71a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
72a3870a18SJohn Baldwin 
737e226537SAttilio Rao int vm_ndomains = 1;
747e226537SAttilio Rao 
75449c2e92SKonstantin Belousov struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
76449c2e92SKonstantin Belousov int vm_phys_nsegs;
7711752d88SAlan Cox 
78b6de32bdSKonstantin Belousov #define VM_PHYS_FICTITIOUS_NSEGS	8
79b6de32bdSKonstantin Belousov static struct vm_phys_fictitious_seg {
80b6de32bdSKonstantin Belousov 	vm_paddr_t	start;
81b6de32bdSKonstantin Belousov 	vm_paddr_t	end;
82b6de32bdSKonstantin Belousov 	vm_page_t	first_page;
83b6de32bdSKonstantin Belousov } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
84b6de32bdSKonstantin Belousov static struct mtx vm_phys_fictitious_reg_mtx;
85c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
86b6de32bdSKonstantin Belousov 
8711752d88SAlan Cox static struct vm_freelist
887e226537SAttilio Rao     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
8911752d88SAlan Cox 
9011752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
9111752d88SAlan Cox 
9211752d88SAlan Cox static int cnt_prezero;
9311752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
9411752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
9511752d88SAlan Cox 
9611752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
9711752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
9811752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
9911752d88SAlan Cox 
10011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
10111752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
10211752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
10311752d88SAlan Cox 
1047e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
1057e226537SAttilio Rao     &vm_ndomains, 0, "Number of physical memory domains available.");
106a3870a18SJohn Baldwin 
107f5c4b077SJohn Baldwin static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
108f5c4b077SJohn Baldwin     int order);
109a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
110a3870a18SJohn Baldwin     int domain);
11111752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
11211752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
11311752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
11411752d88SAlan Cox     int order);
11511752d88SAlan Cox 
1167e226537SAttilio Rao static __inline int
1177e226537SAttilio Rao vm_rr_selectdomain(void)
1187e226537SAttilio Rao {
1197e226537SAttilio Rao #if MAXMEMDOM > 1
1207e226537SAttilio Rao 	struct thread *td;
1217e226537SAttilio Rao 
1227e226537SAttilio Rao 	td = curthread;
1237e226537SAttilio Rao 
1247e226537SAttilio Rao 	td->td_dom_rr_idx++;
1257e226537SAttilio Rao 	td->td_dom_rr_idx %= vm_ndomains;
1267e226537SAttilio Rao 	return (td->td_dom_rr_idx);
1277e226537SAttilio Rao #else
1287e226537SAttilio Rao 	return (0);
1297e226537SAttilio Rao #endif
1307e226537SAttilio Rao }
1317e226537SAttilio Rao 
132449c2e92SKonstantin Belousov boolean_t
133449c2e92SKonstantin Belousov vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
134449c2e92SKonstantin Belousov {
135449c2e92SKonstantin Belousov 	struct vm_phys_seg *s;
136449c2e92SKonstantin Belousov 	int idx;
137449c2e92SKonstantin Belousov 
138449c2e92SKonstantin Belousov 	while ((idx = ffsl(mask)) != 0) {
139449c2e92SKonstantin Belousov 		idx--;	/* ffsl counts from 1 */
140449c2e92SKonstantin Belousov 		mask &= ~(1UL << idx);
141449c2e92SKonstantin Belousov 		s = &vm_phys_segs[idx];
142449c2e92SKonstantin Belousov 		if (low < s->end && high > s->start)
143449c2e92SKonstantin Belousov 			return (TRUE);
144449c2e92SKonstantin Belousov 	}
145449c2e92SKonstantin Belousov 	return (FALSE);
146449c2e92SKonstantin Belousov }
147449c2e92SKonstantin Belousov 
14811752d88SAlan Cox /*
14911752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
15011752d88SAlan Cox  * the amount of physical memory in each free list.
15111752d88SAlan Cox  */
15211752d88SAlan Cox static int
15311752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
15411752d88SAlan Cox {
15511752d88SAlan Cox 	struct sbuf sbuf;
15611752d88SAlan Cox 	struct vm_freelist *fl;
1577e226537SAttilio Rao 	int dom, error, flind, oind, pind;
15811752d88SAlan Cox 
15900f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
16000f0e671SMatthew D Fleming 	if (error != 0)
16100f0e671SMatthew D Fleming 		return (error);
1627e226537SAttilio Rao 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
1637e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
164*eb2f42fbSAlan Cox 		sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
16511752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
166*eb2f42fbSAlan Cox 			sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
16711752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
16811752d88SAlan Cox 			    "\n              ", flind);
16911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
17011752d88SAlan Cox 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
17111752d88SAlan Cox 			sbuf_printf(&sbuf, "\n--            ");
17211752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
17311752d88SAlan Cox 				sbuf_printf(&sbuf, "-- --      ");
17411752d88SAlan Cox 			sbuf_printf(&sbuf, "--\n");
17511752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
176d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
17711752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
17811752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1797e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
180*eb2f42fbSAlan Cox 					sbuf_printf(&sbuf, "  |  %6d",
1817e226537SAttilio Rao 					    fl[oind].lcnt);
18211752d88SAlan Cox 				}
18311752d88SAlan Cox 				sbuf_printf(&sbuf, "\n");
18411752d88SAlan Cox 			}
1857e226537SAttilio Rao 		}
18611752d88SAlan Cox 	}
1874e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
18811752d88SAlan Cox 	sbuf_delete(&sbuf);
18911752d88SAlan Cox 	return (error);
19011752d88SAlan Cox }
19111752d88SAlan Cox 
19211752d88SAlan Cox /*
19311752d88SAlan Cox  * Outputs the set of physical memory segments.
19411752d88SAlan Cox  */
19511752d88SAlan Cox static int
19611752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
19711752d88SAlan Cox {
19811752d88SAlan Cox 	struct sbuf sbuf;
19911752d88SAlan Cox 	struct vm_phys_seg *seg;
20011752d88SAlan Cox 	int error, segind;
20111752d88SAlan Cox 
20200f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
20300f0e671SMatthew D Fleming 	if (error != 0)
20400f0e671SMatthew D Fleming 		return (error);
2054e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
20611752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
20711752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
20811752d88SAlan Cox 		seg = &vm_phys_segs[segind];
20911752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
21011752d88SAlan Cox 		    (uintmax_t)seg->start);
21111752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
21211752d88SAlan Cox 		    (uintmax_t)seg->end);
213a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
21411752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
21511752d88SAlan Cox 	}
2164e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
21711752d88SAlan Cox 	sbuf_delete(&sbuf);
21811752d88SAlan Cox 	return (error);
21911752d88SAlan Cox }
22011752d88SAlan Cox 
2217e226537SAttilio Rao static void
2227e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
223a3870a18SJohn Baldwin {
224a3870a18SJohn Baldwin 
2257e226537SAttilio Rao 	m->order = order;
2267e226537SAttilio Rao 	if (tail)
227c325e866SKonstantin Belousov 		TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
2287e226537SAttilio Rao 	else
229c325e866SKonstantin Belousov 		TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
2307e226537SAttilio Rao 	fl[order].lcnt++;
231a3870a18SJohn Baldwin }
2327e226537SAttilio Rao 
2337e226537SAttilio Rao static void
2347e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
2357e226537SAttilio Rao {
2367e226537SAttilio Rao 
237c325e866SKonstantin Belousov 	TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
2387e226537SAttilio Rao 	fl[order].lcnt--;
2397e226537SAttilio Rao 	m->order = VM_NFREEORDER;
240a3870a18SJohn Baldwin }
241a3870a18SJohn Baldwin 
24211752d88SAlan Cox /*
24311752d88SAlan Cox  * Create a physical memory segment.
24411752d88SAlan Cox  */
24511752d88SAlan Cox static void
246a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
24711752d88SAlan Cox {
24811752d88SAlan Cox 	struct vm_phys_seg *seg;
24911752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
250d6e9b97bSJohn Baldwin 	long pages;
25111752d88SAlan Cox 	int segind;
25211752d88SAlan Cox 
25311752d88SAlan Cox 	pages = 0;
25411752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
25511752d88SAlan Cox 		seg = &vm_phys_segs[segind];
25611752d88SAlan Cox 		pages += atop(seg->end - seg->start);
25711752d88SAlan Cox 	}
25811752d88SAlan Cox #endif
25911752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
26011752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
2617e226537SAttilio Rao 	KASSERT(domain < vm_ndomains,
2627e226537SAttilio Rao 	    ("vm_phys_create_seg: invalid domain provided"));
26311752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
26411752d88SAlan Cox 	seg->start = start;
26511752d88SAlan Cox 	seg->end = end;
266a3870a18SJohn Baldwin 	seg->domain = domain;
26711752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
26811752d88SAlan Cox 	seg->first_page = &vm_page_array[pages];
26911752d88SAlan Cox #else
27011752d88SAlan Cox 	seg->first_page = PHYS_TO_VM_PAGE(start);
27111752d88SAlan Cox #endif
2727e226537SAttilio Rao 	seg->free_queues = &vm_phys_free_queues[domain][flind];
27311752d88SAlan Cox }
27411752d88SAlan Cox 
275a3870a18SJohn Baldwin static void
276a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
277a3870a18SJohn Baldwin {
278a3870a18SJohn Baldwin 	int i;
279a3870a18SJohn Baldwin 
280a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
281a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
282a3870a18SJohn Baldwin 		return;
283a3870a18SJohn Baldwin 	}
284a3870a18SJohn Baldwin 
285a3870a18SJohn Baldwin 	for (i = 0;; i++) {
286a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
287a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
288a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
289a3870a18SJohn Baldwin 			continue;
290a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
291a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
292a3870a18SJohn Baldwin 			    (uintmax_t)start);
293a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
294a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
295a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
296a3870a18SJohn Baldwin 			break;
297a3870a18SJohn Baldwin 		}
298a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
299a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
300a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
301a3870a18SJohn Baldwin 	}
302a3870a18SJohn Baldwin }
303a3870a18SJohn Baldwin 
30411752d88SAlan Cox /*
30511752d88SAlan Cox  * Initialize the physical memory allocator.
30611752d88SAlan Cox  */
30711752d88SAlan Cox void
30811752d88SAlan Cox vm_phys_init(void)
30911752d88SAlan Cox {
31011752d88SAlan Cox 	struct vm_freelist *fl;
3117e226537SAttilio Rao 	int dom, flind, i, oind, pind;
31211752d88SAlan Cox 
31311752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
31411752d88SAlan Cox #ifdef	VM_FREELIST_ISADMA
31511752d88SAlan Cox 		if (phys_avail[i] < 16777216) {
31611752d88SAlan Cox 			if (phys_avail[i + 1] > 16777216) {
31711752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i], 16777216,
31811752d88SAlan Cox 				    VM_FREELIST_ISADMA);
31911752d88SAlan Cox 				vm_phys_create_seg(16777216, phys_avail[i + 1],
32011752d88SAlan Cox 				    VM_FREELIST_DEFAULT);
32111752d88SAlan Cox 			} else {
32211752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
32311752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_ISADMA);
32411752d88SAlan Cox 			}
32511752d88SAlan Cox 			if (VM_FREELIST_ISADMA >= vm_nfreelists)
32611752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_ISADMA + 1;
32711752d88SAlan Cox 		} else
32811752d88SAlan Cox #endif
32911752d88SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
33011752d88SAlan Cox 		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
33111752d88SAlan Cox 			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
33211752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
33311752d88SAlan Cox 				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
33411752d88SAlan Cox 				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
33511752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
33611752d88SAlan Cox 			} else {
33711752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
33811752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
33911752d88SAlan Cox 			}
34011752d88SAlan Cox 			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
34111752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
34211752d88SAlan Cox 		} else
34311752d88SAlan Cox #endif
34411752d88SAlan Cox 		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
34511752d88SAlan Cox 		    VM_FREELIST_DEFAULT);
34611752d88SAlan Cox 	}
3477e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
34811752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
34911752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
3507e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
35111752d88SAlan Cox 				for (oind = 0; oind < VM_NFREEORDER; oind++)
35211752d88SAlan Cox 					TAILQ_INIT(&fl[oind].pl);
35311752d88SAlan Cox 			}
35411752d88SAlan Cox 		}
355a3870a18SJohn Baldwin 	}
356b6de32bdSKonstantin Belousov 	mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
35711752d88SAlan Cox }
35811752d88SAlan Cox 
35911752d88SAlan Cox /*
36011752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
36111752d88SAlan Cox  */
36211752d88SAlan Cox static __inline void
36311752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
36411752d88SAlan Cox {
36511752d88SAlan Cox 	vm_page_t m_buddy;
36611752d88SAlan Cox 
36711752d88SAlan Cox 	while (oind > order) {
36811752d88SAlan Cox 		oind--;
36911752d88SAlan Cox 		m_buddy = &m[1 << oind];
37011752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
37111752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
37211752d88SAlan Cox 		    m_buddy, m_buddy->order));
3737e226537SAttilio Rao 		vm_freelist_add(fl, m_buddy, oind, 0);
37411752d88SAlan Cox         }
37511752d88SAlan Cox }
37611752d88SAlan Cox 
37711752d88SAlan Cox /*
37811752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
37911752d88SAlan Cox  */
38011752d88SAlan Cox void
38111752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
38211752d88SAlan Cox {
38311752d88SAlan Cox 	vm_page_t m;
384449c2e92SKonstantin Belousov 	struct vm_domain *vmd;
38511752d88SAlan Cox 
38611752d88SAlan Cox 	cnt.v_page_count++;
38711752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
38811752d88SAlan Cox 	m->phys_addr = pa;
38944e46b9eSAlan Cox 	m->queue = PQ_NONE;
39011752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
391449c2e92SKonstantin Belousov 	vmd = vm_phys_domain(m);
392449c2e92SKonstantin Belousov 	vmd->vmd_page_count++;
393449c2e92SKonstantin Belousov 	vmd->vmd_segs |= 1UL << m->segind;
39411752d88SAlan Cox 	m->flags = PG_FREE;
39511752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
39611752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
39711752d88SAlan Cox 	    m, m->order));
39811752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
39911752d88SAlan Cox 	pmap_page_init(m);
4008941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
401449c2e92SKonstantin Belousov 	vm_phys_freecnt_adj(m, 1);
40211752d88SAlan Cox 	vm_phys_free_pages(m, 0);
4038941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
40411752d88SAlan Cox }
40511752d88SAlan Cox 
40611752d88SAlan Cox /*
40711752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
40811752d88SAlan Cox  * from the free lists.
4098941dc44SAlan Cox  *
4108941dc44SAlan Cox  * The free page queues must be locked.
41111752d88SAlan Cox  */
41211752d88SAlan Cox vm_page_t
41311752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
41411752d88SAlan Cox {
41549ca10d4SJayachandran C. 	vm_page_t m;
4167e226537SAttilio Rao 	int dom, domain, flind;
41749ca10d4SJayachandran C. 
418f5c4b077SJohn Baldwin 	KASSERT(pool < VM_NFREEPOOL,
419f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
420f5c4b077SJohn Baldwin 	KASSERT(order < VM_NFREEORDER,
421f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: order %d is out of range", order));
422f5c4b077SJohn Baldwin 
4237e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
4247e226537SAttilio Rao 		domain = vm_rr_selectdomain();
42549ca10d4SJayachandran C. 		for (flind = 0; flind < vm_nfreelists; flind++) {
4267e226537SAttilio Rao 			m = vm_phys_alloc_domain_pages(domain, flind, pool,
4277e226537SAttilio Rao 			    order);
42849ca10d4SJayachandran C. 			if (m != NULL)
42949ca10d4SJayachandran C. 				return (m);
43049ca10d4SJayachandran C. 		}
4317e226537SAttilio Rao 	}
43249ca10d4SJayachandran C. 	return (NULL);
43349ca10d4SJayachandran C. }
43449ca10d4SJayachandran C. 
43549ca10d4SJayachandran C. /*
43649ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
43749ca10d4SJayachandran C.  * specified pool and order
43849ca10d4SJayachandran C.  */
43949ca10d4SJayachandran C. vm_page_t
44049ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
44149ca10d4SJayachandran C. {
44211752d88SAlan Cox 	vm_page_t m;
4437e226537SAttilio Rao 	int dom, domain;
44411752d88SAlan Cox 
44549ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
44649ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
44711752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
44849ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
44911752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
45049ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
451a3870a18SJohn Baldwin 
4527e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
4537e226537SAttilio Rao 		domain = vm_rr_selectdomain();
4547e226537SAttilio Rao 		m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
455f5c4b077SJohn Baldwin 		if (m != NULL)
456f5c4b077SJohn Baldwin 			return (m);
4577e226537SAttilio Rao 	}
4587e226537SAttilio Rao 	return (NULL);
459f5c4b077SJohn Baldwin }
460f5c4b077SJohn Baldwin 
461f5c4b077SJohn Baldwin static vm_page_t
462f5c4b077SJohn Baldwin vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
463f5c4b077SJohn Baldwin {
464f5c4b077SJohn Baldwin 	struct vm_freelist *fl;
465f5c4b077SJohn Baldwin 	struct vm_freelist *alt;
466f5c4b077SJohn Baldwin 	int oind, pind;
467f5c4b077SJohn Baldwin 	vm_page_t m;
468f5c4b077SJohn Baldwin 
46911752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
4707e226537SAttilio Rao 	fl = &vm_phys_free_queues[domain][flind][pool][0];
47111752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
47211752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
47311752d88SAlan Cox 		if (m != NULL) {
4747e226537SAttilio Rao 			vm_freelist_rem(fl, m, oind);
47511752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
47611752d88SAlan Cox 			return (m);
47711752d88SAlan Cox 		}
47811752d88SAlan Cox 	}
47911752d88SAlan Cox 
48011752d88SAlan Cox 	/*
48111752d88SAlan Cox 	 * The given pool was empty.  Find the largest
48211752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
48311752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
48411752d88SAlan Cox 	 * use them to satisfy the allocation.
48511752d88SAlan Cox 	 */
48611752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
48711752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
4887e226537SAttilio Rao 			alt = &vm_phys_free_queues[domain][flind][pind][0];
48911752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
49011752d88SAlan Cox 			if (m != NULL) {
4917e226537SAttilio Rao 				vm_freelist_rem(alt, m, oind);
49211752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
49311752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
49411752d88SAlan Cox 				return (m);
49511752d88SAlan Cox 			}
49611752d88SAlan Cox 		}
49711752d88SAlan Cox 	}
49811752d88SAlan Cox 	return (NULL);
49911752d88SAlan Cox }
50011752d88SAlan Cox 
50111752d88SAlan Cox /*
50211752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
50311752d88SAlan Cox  */
50411752d88SAlan Cox vm_page_t
50511752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
50611752d88SAlan Cox {
50711752d88SAlan Cox 	struct vm_phys_seg *seg;
50811752d88SAlan Cox 	int segind;
50911752d88SAlan Cox 
51011752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
51111752d88SAlan Cox 		seg = &vm_phys_segs[segind];
51211752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
51311752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
51411752d88SAlan Cox 	}
515f06a3a36SAndrew Thompson 	return (NULL);
51611752d88SAlan Cox }
51711752d88SAlan Cox 
518b6de32bdSKonstantin Belousov vm_page_t
519b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
520b6de32bdSKonstantin Belousov {
521b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
522b6de32bdSKonstantin Belousov 	vm_page_t m;
523b6de32bdSKonstantin Belousov 	int segind;
524b6de32bdSKonstantin Belousov 
525b6de32bdSKonstantin Belousov 	m = NULL;
526b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
527b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
528b6de32bdSKonstantin Belousov 		if (pa >= seg->start && pa < seg->end) {
529b6de32bdSKonstantin Belousov 			m = &seg->first_page[atop(pa - seg->start)];
530b6de32bdSKonstantin Belousov 			KASSERT((m->flags & PG_FICTITIOUS) != 0,
531b6de32bdSKonstantin Belousov 			    ("%p not fictitious", m));
532b6de32bdSKonstantin Belousov 			break;
533b6de32bdSKonstantin Belousov 		}
534b6de32bdSKonstantin Belousov 	}
535b6de32bdSKonstantin Belousov 	return (m);
536b6de32bdSKonstantin Belousov }
537b6de32bdSKonstantin Belousov 
538b6de32bdSKonstantin Belousov int
539b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
540b6de32bdSKonstantin Belousov     vm_memattr_t memattr)
541b6de32bdSKonstantin Belousov {
542b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
543b6de32bdSKonstantin Belousov 	vm_page_t fp;
544b6de32bdSKonstantin Belousov 	long i, page_count;
545b6de32bdSKonstantin Belousov 	int segind;
546b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
547b6de32bdSKonstantin Belousov 	long pi;
548b6de32bdSKonstantin Belousov 	boolean_t malloced;
549b6de32bdSKonstantin Belousov #endif
550b6de32bdSKonstantin Belousov 
551b6de32bdSKonstantin Belousov 	page_count = (end - start) / PAGE_SIZE;
552b6de32bdSKonstantin Belousov 
553b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
554b6de32bdSKonstantin Belousov 	pi = atop(start);
555b6de32bdSKonstantin Belousov 	if (pi >= first_page && atop(end) < vm_page_array_size) {
556b6de32bdSKonstantin Belousov 		fp = &vm_page_array[pi - first_page];
557b6de32bdSKonstantin Belousov 		malloced = FALSE;
558b6de32bdSKonstantin Belousov 	} else
559b6de32bdSKonstantin Belousov #endif
560b6de32bdSKonstantin Belousov 	{
561b6de32bdSKonstantin Belousov 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
562b6de32bdSKonstantin Belousov 		    M_WAITOK | M_ZERO);
563b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
564b6de32bdSKonstantin Belousov 		malloced = TRUE;
565b6de32bdSKonstantin Belousov #endif
566b6de32bdSKonstantin Belousov 	}
567b6de32bdSKonstantin Belousov 	for (i = 0; i < page_count; i++) {
568b6de32bdSKonstantin Belousov 		vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
569c7aebda8SAttilio Rao 		fp[i].oflags &= ~VPO_UNMANAGED;
570c7aebda8SAttilio Rao 		fp[i].busy_lock = VPB_UNBUSIED;
571b6de32bdSKonstantin Belousov 	}
572b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
573b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
574b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
575b6de32bdSKonstantin Belousov 		if (seg->start == 0 && seg->end == 0) {
576b6de32bdSKonstantin Belousov 			seg->start = start;
577b6de32bdSKonstantin Belousov 			seg->end = end;
578b6de32bdSKonstantin Belousov 			seg->first_page = fp;
579b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
580b6de32bdSKonstantin Belousov 			return (0);
581b6de32bdSKonstantin Belousov 		}
582b6de32bdSKonstantin Belousov 	}
583b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
584b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
585b6de32bdSKonstantin Belousov 	if (malloced)
586b6de32bdSKonstantin Belousov #endif
587b6de32bdSKonstantin Belousov 		free(fp, M_FICT_PAGES);
588b6de32bdSKonstantin Belousov 	return (EBUSY);
589b6de32bdSKonstantin Belousov }
590b6de32bdSKonstantin Belousov 
591b6de32bdSKonstantin Belousov void
592b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
593b6de32bdSKonstantin Belousov {
594b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
595b6de32bdSKonstantin Belousov 	vm_page_t fp;
596b6de32bdSKonstantin Belousov 	int segind;
597b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
598b6de32bdSKonstantin Belousov 	long pi;
599b6de32bdSKonstantin Belousov #endif
600b6de32bdSKonstantin Belousov 
601b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
602b6de32bdSKonstantin Belousov 	pi = atop(start);
603b6de32bdSKonstantin Belousov #endif
604b6de32bdSKonstantin Belousov 
605b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
606b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
607b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
608b6de32bdSKonstantin Belousov 		if (seg->start == start && seg->end == end) {
609b6de32bdSKonstantin Belousov 			seg->start = seg->end = 0;
610b6de32bdSKonstantin Belousov 			fp = seg->first_page;
611b6de32bdSKonstantin Belousov 			seg->first_page = NULL;
612b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
613b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
614b6de32bdSKonstantin Belousov 			if (pi < first_page || atop(end) >= vm_page_array_size)
615b6de32bdSKonstantin Belousov #endif
616b6de32bdSKonstantin Belousov 				free(fp, M_FICT_PAGES);
617b6de32bdSKonstantin Belousov 			return;
618b6de32bdSKonstantin Belousov 		}
619b6de32bdSKonstantin Belousov 	}
620b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
621b6de32bdSKonstantin Belousov 	KASSERT(0, ("Unregistering not registered fictitious range"));
622b6de32bdSKonstantin Belousov }
623b6de32bdSKonstantin Belousov 
62411752d88SAlan Cox /*
62511752d88SAlan Cox  * Find the segment containing the given physical address.
62611752d88SAlan Cox  */
62711752d88SAlan Cox static int
62811752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
62911752d88SAlan Cox {
63011752d88SAlan Cox 	struct vm_phys_seg *seg;
63111752d88SAlan Cox 	int segind;
63211752d88SAlan Cox 
63311752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
63411752d88SAlan Cox 		seg = &vm_phys_segs[segind];
63511752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
63611752d88SAlan Cox 			return (segind);
63711752d88SAlan Cox 	}
63811752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
63911752d88SAlan Cox 	    (uintmax_t)pa);
64011752d88SAlan Cox }
64111752d88SAlan Cox 
64211752d88SAlan Cox /*
64311752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
6448941dc44SAlan Cox  *
6458941dc44SAlan Cox  * The free page queues must be locked.
64611752d88SAlan Cox  */
64711752d88SAlan Cox void
64811752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
64911752d88SAlan Cox {
65011752d88SAlan Cox 	struct vm_freelist *fl;
65111752d88SAlan Cox 	struct vm_phys_seg *seg;
6525c1f2cc4SAlan Cox 	vm_paddr_t pa;
65311752d88SAlan Cox 	vm_page_t m_buddy;
65411752d88SAlan Cox 
65511752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
6568941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
65711752d88SAlan Cox 	    m, m->order));
65811752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
6598941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
66011752d88SAlan Cox 	    m, m->pool));
66111752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
6628941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
66311752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
66411752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
6655c1f2cc4SAlan Cox 	if (order < VM_NFREEORDER - 1) {
6665c1f2cc4SAlan Cox 		pa = VM_PAGE_TO_PHYS(m);
6675c1f2cc4SAlan Cox 		do {
6685c1f2cc4SAlan Cox 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
6695c1f2cc4SAlan Cox 			if (pa < seg->start || pa >= seg->end)
67011752d88SAlan Cox 				break;
6715c1f2cc4SAlan Cox 			m_buddy = &seg->first_page[atop(pa - seg->start)];
67211752d88SAlan Cox 			if (m_buddy->order != order)
67311752d88SAlan Cox 				break;
67411752d88SAlan Cox 			fl = (*seg->free_queues)[m_buddy->pool];
6757e226537SAttilio Rao 			vm_freelist_rem(fl, m_buddy, order);
67611752d88SAlan Cox 			if (m_buddy->pool != m->pool)
67711752d88SAlan Cox 				vm_phys_set_pool(m->pool, m_buddy, order);
67811752d88SAlan Cox 			order++;
6795c1f2cc4SAlan Cox 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
68011752d88SAlan Cox 			m = &seg->first_page[atop(pa - seg->start)];
6815c1f2cc4SAlan Cox 		} while (order < VM_NFREEORDER - 1);
68211752d88SAlan Cox 	}
68311752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
6847e226537SAttilio Rao 	vm_freelist_add(fl, m, order, 1);
68511752d88SAlan Cox }
68611752d88SAlan Cox 
68711752d88SAlan Cox /*
6885c1f2cc4SAlan Cox  * Free a contiguous, arbitrarily sized set of physical pages.
6895c1f2cc4SAlan Cox  *
6905c1f2cc4SAlan Cox  * The free page queues must be locked.
6915c1f2cc4SAlan Cox  */
6925c1f2cc4SAlan Cox void
6935c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages)
6945c1f2cc4SAlan Cox {
6955c1f2cc4SAlan Cox 	u_int n;
6965c1f2cc4SAlan Cox 	int order;
6975c1f2cc4SAlan Cox 
6985c1f2cc4SAlan Cox 	/*
6995c1f2cc4SAlan Cox 	 * Avoid unnecessary coalescing by freeing the pages in the largest
7005c1f2cc4SAlan Cox 	 * possible power-of-two-sized subsets.
7015c1f2cc4SAlan Cox 	 */
7025c1f2cc4SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7035c1f2cc4SAlan Cox 	for (;; npages -= n) {
7045c1f2cc4SAlan Cox 		/*
7055c1f2cc4SAlan Cox 		 * Unsigned "min" is used here so that "order" is assigned
7065c1f2cc4SAlan Cox 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
7075c1f2cc4SAlan Cox 		 * or the low-order bits of its physical address are zero
7085c1f2cc4SAlan Cox 		 * because the size of a physical address exceeds the size of
7095c1f2cc4SAlan Cox 		 * a long.
7105c1f2cc4SAlan Cox 		 */
7115c1f2cc4SAlan Cox 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
7125c1f2cc4SAlan Cox 		    VM_NFREEORDER - 1);
7135c1f2cc4SAlan Cox 		n = 1 << order;
7145c1f2cc4SAlan Cox 		if (npages < n)
7155c1f2cc4SAlan Cox 			break;
7165c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7175c1f2cc4SAlan Cox 		m += n;
7185c1f2cc4SAlan Cox 	}
7195c1f2cc4SAlan Cox 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
7205c1f2cc4SAlan Cox 	for (; npages > 0; npages -= n) {
7215c1f2cc4SAlan Cox 		order = flsl(npages) - 1;
7225c1f2cc4SAlan Cox 		n = 1 << order;
7235c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7245c1f2cc4SAlan Cox 		m += n;
7255c1f2cc4SAlan Cox 	}
7265c1f2cc4SAlan Cox }
7275c1f2cc4SAlan Cox 
7285c1f2cc4SAlan Cox /*
72911752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
73011752d88SAlan Cox  */
7317bfda801SAlan Cox void
73211752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
73311752d88SAlan Cox {
73411752d88SAlan Cox 	vm_page_t m_tmp;
73511752d88SAlan Cox 
73611752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
73711752d88SAlan Cox 		m_tmp->pool = pool;
73811752d88SAlan Cox }
73911752d88SAlan Cox 
74011752d88SAlan Cox /*
7419742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
7429742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
7439742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
7447bfda801SAlan Cox  *
7457bfda801SAlan Cox  * The free page queues must be locked.
7467bfda801SAlan Cox  */
747e35395ceSAlan Cox boolean_t
7487bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
7497bfda801SAlan Cox {
7507bfda801SAlan Cox 	struct vm_freelist *fl;
7517bfda801SAlan Cox 	struct vm_phys_seg *seg;
7527bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
7537bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
7547bfda801SAlan Cox 	int order;
7557bfda801SAlan Cox 
7567bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7577bfda801SAlan Cox 
7587bfda801SAlan Cox 	/*
7597bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
7607bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
7617bfda801SAlan Cox 	 * assign it to "m_set".
7627bfda801SAlan Cox 	 */
7637bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
7647bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
765bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
7667bfda801SAlan Cox 		order++;
7677bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
7682fbced65SAlan Cox 		if (pa >= seg->start)
7697bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
770e35395ceSAlan Cox 		else
771e35395ceSAlan Cox 			return (FALSE);
7727bfda801SAlan Cox 	}
773e35395ceSAlan Cox 	if (m_set->order < order)
774e35395ceSAlan Cox 		return (FALSE);
775e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
776e35395ceSAlan Cox 		return (FALSE);
7777bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
7787bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
7797bfda801SAlan Cox 	    m_set, m_set->order));
7807bfda801SAlan Cox 
7817bfda801SAlan Cox 	/*
7827bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
7837bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
7847bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
7857bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
7867bfda801SAlan Cox 	 */
7877bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
7887bfda801SAlan Cox 	order = m_set->order;
7897e226537SAttilio Rao 	vm_freelist_rem(fl, m_set, order);
7907bfda801SAlan Cox 	while (order > 0) {
7917bfda801SAlan Cox 		order--;
7927bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
7937bfda801SAlan Cox 		if (m->phys_addr < pa_half)
7947bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
7957bfda801SAlan Cox 		else {
7967bfda801SAlan Cox 			m_tmp = m_set;
7977bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
7987bfda801SAlan Cox 		}
7997e226537SAttilio Rao 		vm_freelist_add(fl, m_tmp, order, 0);
8007bfda801SAlan Cox 	}
8017bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
802e35395ceSAlan Cox 	return (TRUE);
8037bfda801SAlan Cox }
8047bfda801SAlan Cox 
8057bfda801SAlan Cox /*
8067bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
80711752d88SAlan Cox  */
80811752d88SAlan Cox boolean_t
80911752d88SAlan Cox vm_phys_zero_pages_idle(void)
81011752d88SAlan Cox {
8117e226537SAttilio Rao 	static struct vm_freelist *fl;
8127bfda801SAlan Cox 	static int flind, oind, pind;
81311752d88SAlan Cox 	vm_page_t m, m_tmp;
8147e226537SAttilio Rao 	int domain;
81511752d88SAlan Cox 
8167e226537SAttilio Rao 	domain = vm_rr_selectdomain();
8177e226537SAttilio Rao 	fl = vm_phys_free_queues[domain][0][0];
81811752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8197bfda801SAlan Cox 	for (;;) {
820c325e866SKonstantin Belousov 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
8217bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
8227bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
8237bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
824449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, -1);
82511752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
82611752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
82711752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
82811752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
829449c2e92SKonstantin Belousov 					vm_phys_freecnt_adj(m, 1);
8307bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
8317bfda801SAlan Cox 					vm_page_zero_count++;
8327bfda801SAlan Cox 					cnt_prezero++;
83311752d88SAlan Cox 					return (TRUE);
83411752d88SAlan Cox 				}
83511752d88SAlan Cox 			}
83611752d88SAlan Cox 		}
8377bfda801SAlan Cox 		oind++;
8387bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
8397bfda801SAlan Cox 			oind = 0;
8407bfda801SAlan Cox 			pind++;
8417bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
8427bfda801SAlan Cox 				pind = 0;
8437bfda801SAlan Cox 				flind++;
8447bfda801SAlan Cox 				if (flind == vm_nfreelists)
8457bfda801SAlan Cox 					flind = 0;
8467bfda801SAlan Cox 			}
8477e226537SAttilio Rao 			fl = vm_phys_free_queues[domain][flind][pind];
8487bfda801SAlan Cox 		}
8497bfda801SAlan Cox 	}
85011752d88SAlan Cox }
85111752d88SAlan Cox 
85211752d88SAlan Cox /*
8532f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
8542f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
8552f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
8562f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
8572f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
8582f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
8592f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
86011752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
86111752d88SAlan Cox  */
86211752d88SAlan Cox vm_page_t
8635c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
8645c1f2cc4SAlan Cox     u_long alignment, vm_paddr_t boundary)
86511752d88SAlan Cox {
86611752d88SAlan Cox 	struct vm_freelist *fl;
86711752d88SAlan Cox 	struct vm_phys_seg *seg;
86811752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
869fbd80bd0SAlan Cox 	vm_page_t m, m_ret;
8705c1f2cc4SAlan Cox 	u_long npages_end;
8717e226537SAttilio Rao 	int dom, domain, flind, oind, order, pind;
87211752d88SAlan Cox 
873fbd80bd0SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
87411752d88SAlan Cox 	size = npages << PAGE_SHIFT;
87511752d88SAlan Cox 	KASSERT(size != 0,
87611752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
87711752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
87811752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
87911752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
88011752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
88111752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
88211752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
8837e226537SAttilio Rao 	dom = 0;
8847e226537SAttilio Rao restartdom:
8857e226537SAttilio Rao 	domain = vm_rr_selectdomain();
88611752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
88711752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
88811752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
8897e226537SAttilio Rao 				fl = &vm_phys_free_queues[domain][flind][pind][0];
890c325e866SKonstantin Belousov 				TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
89111752d88SAlan Cox 					/*
89211752d88SAlan Cox 					 * A free list may contain physical pages
89311752d88SAlan Cox 					 * from one or more segments.
89411752d88SAlan Cox 					 */
89511752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
89611752d88SAlan Cox 					if (seg->start > high ||
89711752d88SAlan Cox 					    low >= seg->end)
89811752d88SAlan Cox 						continue;
89911752d88SAlan Cox 
90011752d88SAlan Cox 					/*
90111752d88SAlan Cox 					 * Is the size of this allocation request
90211752d88SAlan Cox 					 * larger than the largest block size?
90311752d88SAlan Cox 					 */
90411752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
90511752d88SAlan Cox 						/*
90611752d88SAlan Cox 						 * Determine if a sufficient number
90711752d88SAlan Cox 						 * of subsequent blocks to satisfy
90811752d88SAlan Cox 						 * the allocation request are free.
90911752d88SAlan Cox 						 */
91011752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
91111752d88SAlan Cox 						pa_last = pa + size;
91211752d88SAlan Cox 						for (;;) {
91311752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
91411752d88SAlan Cox 							if (pa >= pa_last)
91511752d88SAlan Cox 								break;
91611752d88SAlan Cox 							if (pa < seg->start ||
91711752d88SAlan Cox 							    pa >= seg->end)
91811752d88SAlan Cox 								break;
91911752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
92011752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
92111752d88SAlan Cox 								break;
92211752d88SAlan Cox 						}
92311752d88SAlan Cox 						/* If not, continue to the next block. */
92411752d88SAlan Cox 						if (pa < pa_last)
92511752d88SAlan Cox 							continue;
92611752d88SAlan Cox 					}
92711752d88SAlan Cox 
92811752d88SAlan Cox 					/*
92911752d88SAlan Cox 					 * Determine if the blocks are within the given range,
93011752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
93111752d88SAlan Cox 					 * given boundary.
93211752d88SAlan Cox 					 */
93311752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
93411752d88SAlan Cox 					if (pa >= low &&
93511752d88SAlan Cox 					    pa + size <= high &&
93611752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
93711752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
93811752d88SAlan Cox 						goto done;
93911752d88SAlan Cox 				}
94011752d88SAlan Cox 			}
94111752d88SAlan Cox 		}
94211752d88SAlan Cox 	}
9437e226537SAttilio Rao 	if (++dom < vm_ndomains)
9447e226537SAttilio Rao 		goto restartdom;
94511752d88SAlan Cox 	return (NULL);
94611752d88SAlan Cox done:
94711752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
94811752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
9497e226537SAttilio Rao 		vm_freelist_rem(fl, m, m->order);
95011752d88SAlan Cox 	}
95111752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
95211752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
95311752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
95411752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
9555c1f2cc4SAlan Cox 	/* Return excess pages to the free lists. */
9565c1f2cc4SAlan Cox 	npages_end = roundup2(npages, 1 << imin(oind, order));
9575c1f2cc4SAlan Cox 	if (npages < npages_end)
9585c1f2cc4SAlan Cox 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
95911752d88SAlan Cox 	return (m_ret);
96011752d88SAlan Cox }
96111752d88SAlan Cox 
96211752d88SAlan Cox #ifdef DDB
96311752d88SAlan Cox /*
96411752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
96511752d88SAlan Cox  */
96611752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
96711752d88SAlan Cox {
96811752d88SAlan Cox 	struct vm_freelist *fl;
9697e226537SAttilio Rao 	int flind, oind, pind, dom;
97011752d88SAlan Cox 
9717e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
9727e226537SAttilio Rao 		db_printf("DOMAIN: %d\n", dom);
97311752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
97411752d88SAlan Cox 			db_printf("FREE LIST %d:\n"
97511752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
97611752d88SAlan Cox 			    "\n              ", flind);
97711752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
97811752d88SAlan Cox 				db_printf("  |  POOL %d", pind);
97911752d88SAlan Cox 			db_printf("\n--            ");
98011752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
98111752d88SAlan Cox 				db_printf("-- --      ");
98211752d88SAlan Cox 			db_printf("--\n");
98311752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
98411752d88SAlan Cox 				db_printf("  %2.2d (%6.6dK)", oind,
98511752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
98611752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
9877e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
98811752d88SAlan Cox 					db_printf("  |  %6.6d", fl[oind].lcnt);
98911752d88SAlan Cox 				}
99011752d88SAlan Cox 				db_printf("\n");
99111752d88SAlan Cox 			}
99211752d88SAlan Cox 			db_printf("\n");
99311752d88SAlan Cox 		}
9947e226537SAttilio Rao 		db_printf("\n");
9957e226537SAttilio Rao 	}
99611752d88SAlan Cox }
99711752d88SAlan Cox #endif
998