xref: /freebsd/sys/vm/vm_phys.c (revision 7e226537c7fd554676f97b437f7fb04b79db33b5)
111752d88SAlan Cox /*-
211752d88SAlan Cox  * Copyright (c) 2002-2006 Rice University
311752d88SAlan Cox  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
411752d88SAlan Cox  * All rights reserved.
511752d88SAlan Cox  *
611752d88SAlan Cox  * This software was developed for the FreeBSD Project by Alan L. Cox,
711752d88SAlan Cox  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
811752d88SAlan Cox  *
911752d88SAlan Cox  * Redistribution and use in source and binary forms, with or without
1011752d88SAlan Cox  * modification, are permitted provided that the following conditions
1111752d88SAlan Cox  * are met:
1211752d88SAlan Cox  * 1. Redistributions of source code must retain the above copyright
1311752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer.
1411752d88SAlan Cox  * 2. Redistributions in binary form must reproduce the above copyright
1511752d88SAlan Cox  *    notice, this list of conditions and the following disclaimer in the
1611752d88SAlan Cox  *    documentation and/or other materials provided with the distribution.
1711752d88SAlan Cox  *
1811752d88SAlan Cox  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1911752d88SAlan Cox  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2011752d88SAlan Cox  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2111752d88SAlan Cox  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
2211752d88SAlan Cox  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2311752d88SAlan Cox  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2411752d88SAlan Cox  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
2511752d88SAlan Cox  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2611752d88SAlan Cox  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2711752d88SAlan Cox  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
2811752d88SAlan Cox  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2911752d88SAlan Cox  * POSSIBILITY OF SUCH DAMAGE.
3011752d88SAlan Cox  */
3111752d88SAlan Cox 
32fbd80bd0SAlan Cox /*
33fbd80bd0SAlan Cox  *	Physical memory system implementation
34fbd80bd0SAlan Cox  *
35fbd80bd0SAlan Cox  * Any external functions defined by this module are only to be used by the
36fbd80bd0SAlan Cox  * virtual memory system.
37fbd80bd0SAlan Cox  */
38fbd80bd0SAlan Cox 
3911752d88SAlan Cox #include <sys/cdefs.h>
4011752d88SAlan Cox __FBSDID("$FreeBSD$");
4111752d88SAlan Cox 
4211752d88SAlan Cox #include "opt_ddb.h"
43174b5f38SJohn Baldwin #include "opt_vm.h"
4411752d88SAlan Cox 
4511752d88SAlan Cox #include <sys/param.h>
4611752d88SAlan Cox #include <sys/systm.h>
4711752d88SAlan Cox #include <sys/lock.h>
4811752d88SAlan Cox #include <sys/kernel.h>
4911752d88SAlan Cox #include <sys/malloc.h>
5011752d88SAlan Cox #include <sys/mutex.h>
51*7e226537SAttilio Rao #if MAXMEMDOM > 1
52*7e226537SAttilio Rao #include <sys/proc.h>
53*7e226537SAttilio Rao #endif
5411752d88SAlan Cox #include <sys/queue.h>
5511752d88SAlan Cox #include <sys/sbuf.h>
5611752d88SAlan Cox #include <sys/sysctl.h>
5711752d88SAlan Cox #include <sys/vmmeter.h>
5811752d88SAlan Cox 
5911752d88SAlan Cox #include <ddb/ddb.h>
6011752d88SAlan Cox 
6111752d88SAlan Cox #include <vm/vm.h>
6211752d88SAlan Cox #include <vm/vm_param.h>
6311752d88SAlan Cox #include <vm/vm_kern.h>
6411752d88SAlan Cox #include <vm/vm_object.h>
6511752d88SAlan Cox #include <vm/vm_page.h>
6611752d88SAlan Cox #include <vm/vm_phys.h>
6711752d88SAlan Cox 
6811752d88SAlan Cox struct vm_freelist {
6911752d88SAlan Cox 	struct pglist pl;
7011752d88SAlan Cox 	int lcnt;
7111752d88SAlan Cox };
7211752d88SAlan Cox 
7311752d88SAlan Cox struct vm_phys_seg {
7411752d88SAlan Cox 	vm_paddr_t	start;
7511752d88SAlan Cox 	vm_paddr_t	end;
7611752d88SAlan Cox 	vm_page_t	first_page;
77a3870a18SJohn Baldwin 	int		domain;
7811752d88SAlan Cox 	struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
7911752d88SAlan Cox };
8011752d88SAlan Cox 
81a3870a18SJohn Baldwin struct mem_affinity *mem_affinity;
82a3870a18SJohn Baldwin 
83*7e226537SAttilio Rao int vm_ndomains = 1;
84*7e226537SAttilio Rao 
8511752d88SAlan Cox static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
8611752d88SAlan Cox 
8711752d88SAlan Cox static int vm_phys_nsegs;
8811752d88SAlan Cox 
89b6de32bdSKonstantin Belousov #define VM_PHYS_FICTITIOUS_NSEGS	8
90b6de32bdSKonstantin Belousov static struct vm_phys_fictitious_seg {
91b6de32bdSKonstantin Belousov 	vm_paddr_t	start;
92b6de32bdSKonstantin Belousov 	vm_paddr_t	end;
93b6de32bdSKonstantin Belousov 	vm_page_t	first_page;
94b6de32bdSKonstantin Belousov } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
95b6de32bdSKonstantin Belousov static struct mtx vm_phys_fictitious_reg_mtx;
96b6de32bdSKonstantin Belousov MALLOC_DEFINE(M_FICT_PAGES, "", "");
97b6de32bdSKonstantin Belousov 
9811752d88SAlan Cox static struct vm_freelist
99*7e226537SAttilio Rao     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
10011752d88SAlan Cox 
10111752d88SAlan Cox static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
10211752d88SAlan Cox 
10311752d88SAlan Cox static int cnt_prezero;
10411752d88SAlan Cox SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
10511752d88SAlan Cox     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
10611752d88SAlan Cox 
10711752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
10811752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
10911752d88SAlan Cox     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
11011752d88SAlan Cox 
11111752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
11211752d88SAlan Cox SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
11311752d88SAlan Cox     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
11411752d88SAlan Cox 
115*7e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
116*7e226537SAttilio Rao     &vm_ndomains, 0, "Number of physical memory domains available.");
117a3870a18SJohn Baldwin 
118f5c4b077SJohn Baldwin static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
119f5c4b077SJohn Baldwin     int order);
120a3870a18SJohn Baldwin static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
121a3870a18SJohn Baldwin     int domain);
12211752d88SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
12311752d88SAlan Cox static int vm_phys_paddr_to_segind(vm_paddr_t pa);
12411752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
12511752d88SAlan Cox     int order);
12611752d88SAlan Cox 
127*7e226537SAttilio Rao static __inline int
128*7e226537SAttilio Rao vm_rr_selectdomain(void)
129*7e226537SAttilio Rao {
130*7e226537SAttilio Rao #if MAXMEMDOM > 1
131*7e226537SAttilio Rao 	struct thread *td;
132*7e226537SAttilio Rao 
133*7e226537SAttilio Rao 	td = curthread;
134*7e226537SAttilio Rao 
135*7e226537SAttilio Rao 	td->td_dom_rr_idx++;
136*7e226537SAttilio Rao 	td->td_dom_rr_idx %= vm_ndomains;
137*7e226537SAttilio Rao 	return (td->td_dom_rr_idx);
138*7e226537SAttilio Rao #else
139*7e226537SAttilio Rao 	return (0);
140*7e226537SAttilio Rao #endif
141*7e226537SAttilio Rao }
142*7e226537SAttilio Rao 
14311752d88SAlan Cox /*
14411752d88SAlan Cox  * Outputs the state of the physical memory allocator, specifically,
14511752d88SAlan Cox  * the amount of physical memory in each free list.
14611752d88SAlan Cox  */
14711752d88SAlan Cox static int
14811752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
14911752d88SAlan Cox {
15011752d88SAlan Cox 	struct sbuf sbuf;
15111752d88SAlan Cox 	struct vm_freelist *fl;
152*7e226537SAttilio Rao 	int dom, error, flind, oind, pind;
15311752d88SAlan Cox 
15400f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
15500f0e671SMatthew D Fleming 	if (error != 0)
15600f0e671SMatthew D Fleming 		return (error);
157*7e226537SAttilio Rao 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
158*7e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
159*7e226537SAttilio Rao 		sbuf_printf(&sbuf,"DOMAIN: %d\n", dom);
16011752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
161*7e226537SAttilio Rao 			sbuf_printf(&sbuf, "FREE LIST %d:\n"
16211752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
16311752d88SAlan Cox 			    "\n              ", flind);
16411752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
16511752d88SAlan Cox 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
16611752d88SAlan Cox 			sbuf_printf(&sbuf, "\n--            ");
16711752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
16811752d88SAlan Cox 				sbuf_printf(&sbuf, "-- --      ");
16911752d88SAlan Cox 			sbuf_printf(&sbuf, "--\n");
17011752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
171d689bc00SAlan Cox 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
17211752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
17311752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
174*7e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
175*7e226537SAttilio Rao 					sbuf_printf(&sbuf, "  |  %6.6d",
176*7e226537SAttilio Rao 					    fl[oind].lcnt);
17711752d88SAlan Cox 				}
17811752d88SAlan Cox 				sbuf_printf(&sbuf, "\n");
17911752d88SAlan Cox 			}
180*7e226537SAttilio Rao 			sbuf_printf(&sbuf, "\n");
181*7e226537SAttilio Rao 		}
182*7e226537SAttilio Rao 		sbuf_printf(&sbuf, "\n");
18311752d88SAlan Cox 	}
1844e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
18511752d88SAlan Cox 	sbuf_delete(&sbuf);
18611752d88SAlan Cox 	return (error);
18711752d88SAlan Cox }
18811752d88SAlan Cox 
18911752d88SAlan Cox /*
19011752d88SAlan Cox  * Outputs the set of physical memory segments.
19111752d88SAlan Cox  */
19211752d88SAlan Cox static int
19311752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
19411752d88SAlan Cox {
19511752d88SAlan Cox 	struct sbuf sbuf;
19611752d88SAlan Cox 	struct vm_phys_seg *seg;
19711752d88SAlan Cox 	int error, segind;
19811752d88SAlan Cox 
19900f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
20000f0e671SMatthew D Fleming 	if (error != 0)
20100f0e671SMatthew D Fleming 		return (error);
2024e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
20311752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
20411752d88SAlan Cox 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
20511752d88SAlan Cox 		seg = &vm_phys_segs[segind];
20611752d88SAlan Cox 		sbuf_printf(&sbuf, "start:     %#jx\n",
20711752d88SAlan Cox 		    (uintmax_t)seg->start);
20811752d88SAlan Cox 		sbuf_printf(&sbuf, "end:       %#jx\n",
20911752d88SAlan Cox 		    (uintmax_t)seg->end);
210a3870a18SJohn Baldwin 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
21111752d88SAlan Cox 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
21211752d88SAlan Cox 	}
2134e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
21411752d88SAlan Cox 	sbuf_delete(&sbuf);
21511752d88SAlan Cox 	return (error);
21611752d88SAlan Cox }
21711752d88SAlan Cox 
218*7e226537SAttilio Rao static void
219*7e226537SAttilio Rao vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
220a3870a18SJohn Baldwin {
221a3870a18SJohn Baldwin 
222*7e226537SAttilio Rao 	m->order = order;
223*7e226537SAttilio Rao 	if (tail)
224*7e226537SAttilio Rao 		TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
225*7e226537SAttilio Rao 	else
226*7e226537SAttilio Rao 		TAILQ_INSERT_HEAD(&fl[order].pl, m, pageq);
227*7e226537SAttilio Rao 	fl[order].lcnt++;
228a3870a18SJohn Baldwin }
229*7e226537SAttilio Rao 
230*7e226537SAttilio Rao static void
231*7e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
232*7e226537SAttilio Rao {
233*7e226537SAttilio Rao 
234*7e226537SAttilio Rao 	TAILQ_REMOVE(&fl[order].pl, m, pageq);
235*7e226537SAttilio Rao 	fl[order].lcnt--;
236*7e226537SAttilio Rao 	m->order = VM_NFREEORDER;
237a3870a18SJohn Baldwin }
238a3870a18SJohn Baldwin 
23911752d88SAlan Cox /*
24011752d88SAlan Cox  * Create a physical memory segment.
24111752d88SAlan Cox  */
24211752d88SAlan Cox static void
243a3870a18SJohn Baldwin _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
24411752d88SAlan Cox {
24511752d88SAlan Cox 	struct vm_phys_seg *seg;
24611752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
247d6e9b97bSJohn Baldwin 	long pages;
24811752d88SAlan Cox 	int segind;
24911752d88SAlan Cox 
25011752d88SAlan Cox 	pages = 0;
25111752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
25211752d88SAlan Cox 		seg = &vm_phys_segs[segind];
25311752d88SAlan Cox 		pages += atop(seg->end - seg->start);
25411752d88SAlan Cox 	}
25511752d88SAlan Cox #endif
25611752d88SAlan Cox 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
25711752d88SAlan Cox 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
258*7e226537SAttilio Rao 	KASSERT(domain < vm_ndomains,
259*7e226537SAttilio Rao 	    ("vm_phys_create_seg: invalid domain provided"));
26011752d88SAlan Cox 	seg = &vm_phys_segs[vm_phys_nsegs++];
26111752d88SAlan Cox 	seg->start = start;
26211752d88SAlan Cox 	seg->end = end;
263a3870a18SJohn Baldwin 	seg->domain = domain;
26411752d88SAlan Cox #ifdef VM_PHYSSEG_SPARSE
26511752d88SAlan Cox 	seg->first_page = &vm_page_array[pages];
26611752d88SAlan Cox #else
26711752d88SAlan Cox 	seg->first_page = PHYS_TO_VM_PAGE(start);
26811752d88SAlan Cox #endif
269*7e226537SAttilio Rao 	seg->free_queues = &vm_phys_free_queues[domain][flind];
27011752d88SAlan Cox }
27111752d88SAlan Cox 
272a3870a18SJohn Baldwin static void
273a3870a18SJohn Baldwin vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
274a3870a18SJohn Baldwin {
275a3870a18SJohn Baldwin 	int i;
276a3870a18SJohn Baldwin 
277a3870a18SJohn Baldwin 	if (mem_affinity == NULL) {
278a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, end, flind, 0);
279a3870a18SJohn Baldwin 		return;
280a3870a18SJohn Baldwin 	}
281a3870a18SJohn Baldwin 
282a3870a18SJohn Baldwin 	for (i = 0;; i++) {
283a3870a18SJohn Baldwin 		if (mem_affinity[i].end == 0)
284a3870a18SJohn Baldwin 			panic("Reached end of affinity info");
285a3870a18SJohn Baldwin 		if (mem_affinity[i].end <= start)
286a3870a18SJohn Baldwin 			continue;
287a3870a18SJohn Baldwin 		if (mem_affinity[i].start > start)
288a3870a18SJohn Baldwin 			panic("No affinity info for start %jx",
289a3870a18SJohn Baldwin 			    (uintmax_t)start);
290a3870a18SJohn Baldwin 		if (mem_affinity[i].end >= end) {
291a3870a18SJohn Baldwin 			_vm_phys_create_seg(start, end, flind,
292a3870a18SJohn Baldwin 			    mem_affinity[i].domain);
293a3870a18SJohn Baldwin 			break;
294a3870a18SJohn Baldwin 		}
295a3870a18SJohn Baldwin 		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
296a3870a18SJohn Baldwin 		    mem_affinity[i].domain);
297a3870a18SJohn Baldwin 		start = mem_affinity[i].end;
298a3870a18SJohn Baldwin 	}
299a3870a18SJohn Baldwin }
300a3870a18SJohn Baldwin 
30111752d88SAlan Cox /*
30211752d88SAlan Cox  * Initialize the physical memory allocator.
30311752d88SAlan Cox  */
30411752d88SAlan Cox void
30511752d88SAlan Cox vm_phys_init(void)
30611752d88SAlan Cox {
30711752d88SAlan Cox 	struct vm_freelist *fl;
308*7e226537SAttilio Rao 	int dom, flind, i, oind, pind;
30911752d88SAlan Cox 
31011752d88SAlan Cox 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
31111752d88SAlan Cox #ifdef	VM_FREELIST_ISADMA
31211752d88SAlan Cox 		if (phys_avail[i] < 16777216) {
31311752d88SAlan Cox 			if (phys_avail[i + 1] > 16777216) {
31411752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i], 16777216,
31511752d88SAlan Cox 				    VM_FREELIST_ISADMA);
31611752d88SAlan Cox 				vm_phys_create_seg(16777216, phys_avail[i + 1],
31711752d88SAlan Cox 				    VM_FREELIST_DEFAULT);
31811752d88SAlan Cox 			} else {
31911752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
32011752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_ISADMA);
32111752d88SAlan Cox 			}
32211752d88SAlan Cox 			if (VM_FREELIST_ISADMA >= vm_nfreelists)
32311752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_ISADMA + 1;
32411752d88SAlan Cox 		} else
32511752d88SAlan Cox #endif
32611752d88SAlan Cox #ifdef	VM_FREELIST_HIGHMEM
32711752d88SAlan Cox 		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
32811752d88SAlan Cox 			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
32911752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
33011752d88SAlan Cox 				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
33111752d88SAlan Cox 				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
33211752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
33311752d88SAlan Cox 			} else {
33411752d88SAlan Cox 				vm_phys_create_seg(phys_avail[i],
33511752d88SAlan Cox 				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
33611752d88SAlan Cox 			}
33711752d88SAlan Cox 			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
33811752d88SAlan Cox 				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
33911752d88SAlan Cox 		} else
34011752d88SAlan Cox #endif
34111752d88SAlan Cox 		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
34211752d88SAlan Cox 		    VM_FREELIST_DEFAULT);
34311752d88SAlan Cox 	}
344*7e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
34511752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
34611752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
347*7e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
34811752d88SAlan Cox 				for (oind = 0; oind < VM_NFREEORDER; oind++)
34911752d88SAlan Cox 					TAILQ_INIT(&fl[oind].pl);
35011752d88SAlan Cox 			}
35111752d88SAlan Cox 		}
352a3870a18SJohn Baldwin 	}
353b6de32bdSKonstantin Belousov 	mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
35411752d88SAlan Cox }
35511752d88SAlan Cox 
35611752d88SAlan Cox /*
35711752d88SAlan Cox  * Split a contiguous, power of two-sized set of physical pages.
35811752d88SAlan Cox  */
35911752d88SAlan Cox static __inline void
36011752d88SAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
36111752d88SAlan Cox {
36211752d88SAlan Cox 	vm_page_t m_buddy;
36311752d88SAlan Cox 
36411752d88SAlan Cox 	while (oind > order) {
36511752d88SAlan Cox 		oind--;
36611752d88SAlan Cox 		m_buddy = &m[1 << oind];
36711752d88SAlan Cox 		KASSERT(m_buddy->order == VM_NFREEORDER,
36811752d88SAlan Cox 		    ("vm_phys_split_pages: page %p has unexpected order %d",
36911752d88SAlan Cox 		    m_buddy, m_buddy->order));
370*7e226537SAttilio Rao 		vm_freelist_add(fl, m_buddy, oind, 0);
37111752d88SAlan Cox         }
37211752d88SAlan Cox }
37311752d88SAlan Cox 
37411752d88SAlan Cox /*
37511752d88SAlan Cox  * Initialize a physical page and add it to the free lists.
37611752d88SAlan Cox  */
37711752d88SAlan Cox void
37811752d88SAlan Cox vm_phys_add_page(vm_paddr_t pa)
37911752d88SAlan Cox {
38011752d88SAlan Cox 	vm_page_t m;
38111752d88SAlan Cox 
38211752d88SAlan Cox 	cnt.v_page_count++;
38311752d88SAlan Cox 	m = vm_phys_paddr_to_vm_page(pa);
38411752d88SAlan Cox 	m->phys_addr = pa;
38544e46b9eSAlan Cox 	m->queue = PQ_NONE;
38611752d88SAlan Cox 	m->segind = vm_phys_paddr_to_segind(pa);
38711752d88SAlan Cox 	m->flags = PG_FREE;
38811752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
38911752d88SAlan Cox 	    ("vm_phys_add_page: page %p has unexpected order %d",
39011752d88SAlan Cox 	    m, m->order));
39111752d88SAlan Cox 	m->pool = VM_FREEPOOL_DEFAULT;
39211752d88SAlan Cox 	pmap_page_init(m);
3938941dc44SAlan Cox 	mtx_lock(&vm_page_queue_free_mtx);
3947bfda801SAlan Cox 	cnt.v_free_count++;
39511752d88SAlan Cox 	vm_phys_free_pages(m, 0);
3968941dc44SAlan Cox 	mtx_unlock(&vm_page_queue_free_mtx);
39711752d88SAlan Cox }
39811752d88SAlan Cox 
39911752d88SAlan Cox /*
40011752d88SAlan Cox  * Allocate a contiguous, power of two-sized set of physical pages
40111752d88SAlan Cox  * from the free lists.
4028941dc44SAlan Cox  *
4038941dc44SAlan Cox  * The free page queues must be locked.
40411752d88SAlan Cox  */
40511752d88SAlan Cox vm_page_t
40611752d88SAlan Cox vm_phys_alloc_pages(int pool, int order)
40711752d88SAlan Cox {
40849ca10d4SJayachandran C. 	vm_page_t m;
409*7e226537SAttilio Rao 	int dom, domain, flind;
41049ca10d4SJayachandran C. 
411f5c4b077SJohn Baldwin 	KASSERT(pool < VM_NFREEPOOL,
412f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
413f5c4b077SJohn Baldwin 	KASSERT(order < VM_NFREEORDER,
414f5c4b077SJohn Baldwin 	    ("vm_phys_alloc_pages: order %d is out of range", order));
415f5c4b077SJohn Baldwin 
416*7e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
417*7e226537SAttilio Rao 		domain = vm_rr_selectdomain();
41849ca10d4SJayachandran C. 		for (flind = 0; flind < vm_nfreelists; flind++) {
419*7e226537SAttilio Rao 			m = vm_phys_alloc_domain_pages(domain, flind, pool,
420*7e226537SAttilio Rao 			    order);
42149ca10d4SJayachandran C. 			if (m != NULL)
42249ca10d4SJayachandran C. 				return (m);
42349ca10d4SJayachandran C. 		}
424*7e226537SAttilio Rao 	}
42549ca10d4SJayachandran C. 	return (NULL);
42649ca10d4SJayachandran C. }
42749ca10d4SJayachandran C. 
42849ca10d4SJayachandran C. /*
42949ca10d4SJayachandran C.  * Find and dequeue a free page on the given free list, with the
43049ca10d4SJayachandran C.  * specified pool and order
43149ca10d4SJayachandran C.  */
43249ca10d4SJayachandran C. vm_page_t
43349ca10d4SJayachandran C. vm_phys_alloc_freelist_pages(int flind, int pool, int order)
43449ca10d4SJayachandran C. {
43511752d88SAlan Cox 	vm_page_t m;
436*7e226537SAttilio Rao 	int dom, domain;
43711752d88SAlan Cox 
43849ca10d4SJayachandran C. 	KASSERT(flind < VM_NFREELIST,
43949ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
44011752d88SAlan Cox 	KASSERT(pool < VM_NFREEPOOL,
44149ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
44211752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
44349ca10d4SJayachandran C. 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
444a3870a18SJohn Baldwin 
445*7e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
446*7e226537SAttilio Rao 		domain = vm_rr_selectdomain();
447*7e226537SAttilio Rao 		m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
448f5c4b077SJohn Baldwin 		if (m != NULL)
449f5c4b077SJohn Baldwin 			return (m);
450*7e226537SAttilio Rao 	}
451*7e226537SAttilio Rao 	return (NULL);
452f5c4b077SJohn Baldwin }
453f5c4b077SJohn Baldwin 
454f5c4b077SJohn Baldwin static vm_page_t
455f5c4b077SJohn Baldwin vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
456f5c4b077SJohn Baldwin {
457f5c4b077SJohn Baldwin 	struct vm_freelist *fl;
458f5c4b077SJohn Baldwin 	struct vm_freelist *alt;
459f5c4b077SJohn Baldwin 	int oind, pind;
460f5c4b077SJohn Baldwin 	vm_page_t m;
461f5c4b077SJohn Baldwin 
46211752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
463*7e226537SAttilio Rao 	fl = &vm_phys_free_queues[domain][flind][pool][0];
46411752d88SAlan Cox 	for (oind = order; oind < VM_NFREEORDER; oind++) {
46511752d88SAlan Cox 		m = TAILQ_FIRST(&fl[oind].pl);
46611752d88SAlan Cox 		if (m != NULL) {
467*7e226537SAttilio Rao 			vm_freelist_rem(fl, m, oind);
46811752d88SAlan Cox 			vm_phys_split_pages(m, oind, fl, order);
46911752d88SAlan Cox 			return (m);
47011752d88SAlan Cox 		}
47111752d88SAlan Cox 	}
47211752d88SAlan Cox 
47311752d88SAlan Cox 	/*
47411752d88SAlan Cox 	 * The given pool was empty.  Find the largest
47511752d88SAlan Cox 	 * contiguous, power-of-two-sized set of pages in any
47611752d88SAlan Cox 	 * pool.  Transfer these pages to the given pool, and
47711752d88SAlan Cox 	 * use them to satisfy the allocation.
47811752d88SAlan Cox 	 */
47911752d88SAlan Cox 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
48011752d88SAlan Cox 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
481*7e226537SAttilio Rao 			alt = &vm_phys_free_queues[domain][flind][pind][0];
48211752d88SAlan Cox 			m = TAILQ_FIRST(&alt[oind].pl);
48311752d88SAlan Cox 			if (m != NULL) {
484*7e226537SAttilio Rao 				vm_freelist_rem(alt, m, oind);
48511752d88SAlan Cox 				vm_phys_set_pool(pool, m, oind);
48611752d88SAlan Cox 				vm_phys_split_pages(m, oind, fl, order);
48711752d88SAlan Cox 				return (m);
48811752d88SAlan Cox 			}
48911752d88SAlan Cox 		}
49011752d88SAlan Cox 	}
49111752d88SAlan Cox 	return (NULL);
49211752d88SAlan Cox }
49311752d88SAlan Cox 
49411752d88SAlan Cox /*
49511752d88SAlan Cox  * Find the vm_page corresponding to the given physical address.
49611752d88SAlan Cox  */
49711752d88SAlan Cox vm_page_t
49811752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa)
49911752d88SAlan Cox {
50011752d88SAlan Cox 	struct vm_phys_seg *seg;
50111752d88SAlan Cox 	int segind;
50211752d88SAlan Cox 
50311752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
50411752d88SAlan Cox 		seg = &vm_phys_segs[segind];
50511752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
50611752d88SAlan Cox 			return (&seg->first_page[atop(pa - seg->start)]);
50711752d88SAlan Cox 	}
508f06a3a36SAndrew Thompson 	return (NULL);
50911752d88SAlan Cox }
51011752d88SAlan Cox 
511b6de32bdSKonstantin Belousov vm_page_t
512b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
513b6de32bdSKonstantin Belousov {
514b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
515b6de32bdSKonstantin Belousov 	vm_page_t m;
516b6de32bdSKonstantin Belousov 	int segind;
517b6de32bdSKonstantin Belousov 
518b6de32bdSKonstantin Belousov 	m = NULL;
519b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
520b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
521b6de32bdSKonstantin Belousov 		if (pa >= seg->start && pa < seg->end) {
522b6de32bdSKonstantin Belousov 			m = &seg->first_page[atop(pa - seg->start)];
523b6de32bdSKonstantin Belousov 			KASSERT((m->flags & PG_FICTITIOUS) != 0,
524b6de32bdSKonstantin Belousov 			    ("%p not fictitious", m));
525b6de32bdSKonstantin Belousov 			break;
526b6de32bdSKonstantin Belousov 		}
527b6de32bdSKonstantin Belousov 	}
528b6de32bdSKonstantin Belousov 	return (m);
529b6de32bdSKonstantin Belousov }
530b6de32bdSKonstantin Belousov 
531b6de32bdSKonstantin Belousov int
532b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
533b6de32bdSKonstantin Belousov     vm_memattr_t memattr)
534b6de32bdSKonstantin Belousov {
535b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
536b6de32bdSKonstantin Belousov 	vm_page_t fp;
537b6de32bdSKonstantin Belousov 	long i, page_count;
538b6de32bdSKonstantin Belousov 	int segind;
539b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
540b6de32bdSKonstantin Belousov 	long pi;
541b6de32bdSKonstantin Belousov 	boolean_t malloced;
542b6de32bdSKonstantin Belousov #endif
543b6de32bdSKonstantin Belousov 
544b6de32bdSKonstantin Belousov 	page_count = (end - start) / PAGE_SIZE;
545b6de32bdSKonstantin Belousov 
546b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
547b6de32bdSKonstantin Belousov 	pi = atop(start);
548b6de32bdSKonstantin Belousov 	if (pi >= first_page && atop(end) < vm_page_array_size) {
549b6de32bdSKonstantin Belousov 		fp = &vm_page_array[pi - first_page];
550b6de32bdSKonstantin Belousov 		malloced = FALSE;
551b6de32bdSKonstantin Belousov 	} else
552b6de32bdSKonstantin Belousov #endif
553b6de32bdSKonstantin Belousov 	{
554b6de32bdSKonstantin Belousov 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
555b6de32bdSKonstantin Belousov 		    M_WAITOK | M_ZERO);
556b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
557b6de32bdSKonstantin Belousov 		malloced = TRUE;
558b6de32bdSKonstantin Belousov #endif
559b6de32bdSKonstantin Belousov 	}
560b6de32bdSKonstantin Belousov 	for (i = 0; i < page_count; i++) {
561b6de32bdSKonstantin Belousov 		vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
562b6de32bdSKonstantin Belousov 		pmap_page_init(&fp[i]);
563b6de32bdSKonstantin Belousov 		fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
564b6de32bdSKonstantin Belousov 	}
565b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
566b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
567b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
568b6de32bdSKonstantin Belousov 		if (seg->start == 0 && seg->end == 0) {
569b6de32bdSKonstantin Belousov 			seg->start = start;
570b6de32bdSKonstantin Belousov 			seg->end = end;
571b6de32bdSKonstantin Belousov 			seg->first_page = fp;
572b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
573b6de32bdSKonstantin Belousov 			return (0);
574b6de32bdSKonstantin Belousov 		}
575b6de32bdSKonstantin Belousov 	}
576b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
577b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
578b6de32bdSKonstantin Belousov 	if (malloced)
579b6de32bdSKonstantin Belousov #endif
580b6de32bdSKonstantin Belousov 		free(fp, M_FICT_PAGES);
581b6de32bdSKonstantin Belousov 	return (EBUSY);
582b6de32bdSKonstantin Belousov }
583b6de32bdSKonstantin Belousov 
584b6de32bdSKonstantin Belousov void
585b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
586b6de32bdSKonstantin Belousov {
587b6de32bdSKonstantin Belousov 	struct vm_phys_fictitious_seg *seg;
588b6de32bdSKonstantin Belousov 	vm_page_t fp;
589b6de32bdSKonstantin Belousov 	int segind;
590b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
591b6de32bdSKonstantin Belousov 	long pi;
592b6de32bdSKonstantin Belousov #endif
593b6de32bdSKonstantin Belousov 
594b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
595b6de32bdSKonstantin Belousov 	pi = atop(start);
596b6de32bdSKonstantin Belousov #endif
597b6de32bdSKonstantin Belousov 
598b6de32bdSKonstantin Belousov 	mtx_lock(&vm_phys_fictitious_reg_mtx);
599b6de32bdSKonstantin Belousov 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
600b6de32bdSKonstantin Belousov 		seg = &vm_phys_fictitious_segs[segind];
601b6de32bdSKonstantin Belousov 		if (seg->start == start && seg->end == end) {
602b6de32bdSKonstantin Belousov 			seg->start = seg->end = 0;
603b6de32bdSKonstantin Belousov 			fp = seg->first_page;
604b6de32bdSKonstantin Belousov 			seg->first_page = NULL;
605b6de32bdSKonstantin Belousov 			mtx_unlock(&vm_phys_fictitious_reg_mtx);
606b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE
607b6de32bdSKonstantin Belousov 			if (pi < first_page || atop(end) >= vm_page_array_size)
608b6de32bdSKonstantin Belousov #endif
609b6de32bdSKonstantin Belousov 				free(fp, M_FICT_PAGES);
610b6de32bdSKonstantin Belousov 			return;
611b6de32bdSKonstantin Belousov 		}
612b6de32bdSKonstantin Belousov 	}
613b6de32bdSKonstantin Belousov 	mtx_unlock(&vm_phys_fictitious_reg_mtx);
614b6de32bdSKonstantin Belousov 	KASSERT(0, ("Unregistering not registered fictitious range"));
615b6de32bdSKonstantin Belousov }
616b6de32bdSKonstantin Belousov 
61711752d88SAlan Cox /*
61811752d88SAlan Cox  * Find the segment containing the given physical address.
61911752d88SAlan Cox  */
62011752d88SAlan Cox static int
62111752d88SAlan Cox vm_phys_paddr_to_segind(vm_paddr_t pa)
62211752d88SAlan Cox {
62311752d88SAlan Cox 	struct vm_phys_seg *seg;
62411752d88SAlan Cox 	int segind;
62511752d88SAlan Cox 
62611752d88SAlan Cox 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
62711752d88SAlan Cox 		seg = &vm_phys_segs[segind];
62811752d88SAlan Cox 		if (pa >= seg->start && pa < seg->end)
62911752d88SAlan Cox 			return (segind);
63011752d88SAlan Cox 	}
63111752d88SAlan Cox 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
63211752d88SAlan Cox 	    (uintmax_t)pa);
63311752d88SAlan Cox }
63411752d88SAlan Cox 
63511752d88SAlan Cox /*
63611752d88SAlan Cox  * Free a contiguous, power of two-sized set of physical pages.
6378941dc44SAlan Cox  *
6388941dc44SAlan Cox  * The free page queues must be locked.
63911752d88SAlan Cox  */
64011752d88SAlan Cox void
64111752d88SAlan Cox vm_phys_free_pages(vm_page_t m, int order)
64211752d88SAlan Cox {
64311752d88SAlan Cox 	struct vm_freelist *fl;
64411752d88SAlan Cox 	struct vm_phys_seg *seg;
6455c1f2cc4SAlan Cox 	vm_paddr_t pa;
64611752d88SAlan Cox 	vm_page_t m_buddy;
64711752d88SAlan Cox 
64811752d88SAlan Cox 	KASSERT(m->order == VM_NFREEORDER,
6498941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected order %d",
65011752d88SAlan Cox 	    m, m->order));
65111752d88SAlan Cox 	KASSERT(m->pool < VM_NFREEPOOL,
6528941dc44SAlan Cox 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
65311752d88SAlan Cox 	    m, m->pool));
65411752d88SAlan Cox 	KASSERT(order < VM_NFREEORDER,
6558941dc44SAlan Cox 	    ("vm_phys_free_pages: order %d is out of range", order));
65611752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
65711752d88SAlan Cox 	seg = &vm_phys_segs[m->segind];
6585c1f2cc4SAlan Cox 	if (order < VM_NFREEORDER - 1) {
6595c1f2cc4SAlan Cox 		pa = VM_PAGE_TO_PHYS(m);
6605c1f2cc4SAlan Cox 		do {
6615c1f2cc4SAlan Cox 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
6625c1f2cc4SAlan Cox 			if (pa < seg->start || pa >= seg->end)
66311752d88SAlan Cox 				break;
6645c1f2cc4SAlan Cox 			m_buddy = &seg->first_page[atop(pa - seg->start)];
66511752d88SAlan Cox 			if (m_buddy->order != order)
66611752d88SAlan Cox 				break;
66711752d88SAlan Cox 			fl = (*seg->free_queues)[m_buddy->pool];
668*7e226537SAttilio Rao 			vm_freelist_rem(fl, m_buddy, order);
66911752d88SAlan Cox 			if (m_buddy->pool != m->pool)
67011752d88SAlan Cox 				vm_phys_set_pool(m->pool, m_buddy, order);
67111752d88SAlan Cox 			order++;
6725c1f2cc4SAlan Cox 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
67311752d88SAlan Cox 			m = &seg->first_page[atop(pa - seg->start)];
6745c1f2cc4SAlan Cox 		} while (order < VM_NFREEORDER - 1);
67511752d88SAlan Cox 	}
67611752d88SAlan Cox 	fl = (*seg->free_queues)[m->pool];
677*7e226537SAttilio Rao 	vm_freelist_add(fl, m, order, 1);
67811752d88SAlan Cox }
67911752d88SAlan Cox 
68011752d88SAlan Cox /*
6815c1f2cc4SAlan Cox  * Free a contiguous, arbitrarily sized set of physical pages.
6825c1f2cc4SAlan Cox  *
6835c1f2cc4SAlan Cox  * The free page queues must be locked.
6845c1f2cc4SAlan Cox  */
6855c1f2cc4SAlan Cox void
6865c1f2cc4SAlan Cox vm_phys_free_contig(vm_page_t m, u_long npages)
6875c1f2cc4SAlan Cox {
6885c1f2cc4SAlan Cox 	u_int n;
6895c1f2cc4SAlan Cox 	int order;
6905c1f2cc4SAlan Cox 
6915c1f2cc4SAlan Cox 	/*
6925c1f2cc4SAlan Cox 	 * Avoid unnecessary coalescing by freeing the pages in the largest
6935c1f2cc4SAlan Cox 	 * possible power-of-two-sized subsets.
6945c1f2cc4SAlan Cox 	 */
6955c1f2cc4SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
6965c1f2cc4SAlan Cox 	for (;; npages -= n) {
6975c1f2cc4SAlan Cox 		/*
6985c1f2cc4SAlan Cox 		 * Unsigned "min" is used here so that "order" is assigned
6995c1f2cc4SAlan Cox 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
7005c1f2cc4SAlan Cox 		 * or the low-order bits of its physical address are zero
7015c1f2cc4SAlan Cox 		 * because the size of a physical address exceeds the size of
7025c1f2cc4SAlan Cox 		 * a long.
7035c1f2cc4SAlan Cox 		 */
7045c1f2cc4SAlan Cox 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
7055c1f2cc4SAlan Cox 		    VM_NFREEORDER - 1);
7065c1f2cc4SAlan Cox 		n = 1 << order;
7075c1f2cc4SAlan Cox 		if (npages < n)
7085c1f2cc4SAlan Cox 			break;
7095c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7105c1f2cc4SAlan Cox 		m += n;
7115c1f2cc4SAlan Cox 	}
7125c1f2cc4SAlan Cox 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
7135c1f2cc4SAlan Cox 	for (; npages > 0; npages -= n) {
7145c1f2cc4SAlan Cox 		order = flsl(npages) - 1;
7155c1f2cc4SAlan Cox 		n = 1 << order;
7165c1f2cc4SAlan Cox 		vm_phys_free_pages(m, order);
7175c1f2cc4SAlan Cox 		m += n;
7185c1f2cc4SAlan Cox 	}
7195c1f2cc4SAlan Cox }
7205c1f2cc4SAlan Cox 
7215c1f2cc4SAlan Cox /*
72211752d88SAlan Cox  * Set the pool for a contiguous, power of two-sized set of physical pages.
72311752d88SAlan Cox  */
7247bfda801SAlan Cox void
72511752d88SAlan Cox vm_phys_set_pool(int pool, vm_page_t m, int order)
72611752d88SAlan Cox {
72711752d88SAlan Cox 	vm_page_t m_tmp;
72811752d88SAlan Cox 
72911752d88SAlan Cox 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
73011752d88SAlan Cox 		m_tmp->pool = pool;
73111752d88SAlan Cox }
73211752d88SAlan Cox 
73311752d88SAlan Cox /*
7349742373aSAlan Cox  * Search for the given physical page "m" in the free lists.  If the search
7359742373aSAlan Cox  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
7369742373aSAlan Cox  * FALSE, indicating that "m" is not in the free lists.
7377bfda801SAlan Cox  *
7387bfda801SAlan Cox  * The free page queues must be locked.
7397bfda801SAlan Cox  */
740e35395ceSAlan Cox boolean_t
7417bfda801SAlan Cox vm_phys_unfree_page(vm_page_t m)
7427bfda801SAlan Cox {
7437bfda801SAlan Cox 	struct vm_freelist *fl;
7447bfda801SAlan Cox 	struct vm_phys_seg *seg;
7457bfda801SAlan Cox 	vm_paddr_t pa, pa_half;
7467bfda801SAlan Cox 	vm_page_t m_set, m_tmp;
7477bfda801SAlan Cox 	int order;
7487bfda801SAlan Cox 
7497bfda801SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
7507bfda801SAlan Cox 
7517bfda801SAlan Cox 	/*
7527bfda801SAlan Cox 	 * First, find the contiguous, power of two-sized set of free
7537bfda801SAlan Cox 	 * physical pages containing the given physical page "m" and
7547bfda801SAlan Cox 	 * assign it to "m_set".
7557bfda801SAlan Cox 	 */
7567bfda801SAlan Cox 	seg = &vm_phys_segs[m->segind];
7577bfda801SAlan Cox 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
758bc8794a1SAlan Cox 	    order < VM_NFREEORDER - 1; ) {
7597bfda801SAlan Cox 		order++;
7607bfda801SAlan Cox 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
7612fbced65SAlan Cox 		if (pa >= seg->start)
7627bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa - seg->start)];
763e35395ceSAlan Cox 		else
764e35395ceSAlan Cox 			return (FALSE);
7657bfda801SAlan Cox 	}
766e35395ceSAlan Cox 	if (m_set->order < order)
767e35395ceSAlan Cox 		return (FALSE);
768e35395ceSAlan Cox 	if (m_set->order == VM_NFREEORDER)
769e35395ceSAlan Cox 		return (FALSE);
7707bfda801SAlan Cox 	KASSERT(m_set->order < VM_NFREEORDER,
7717bfda801SAlan Cox 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
7727bfda801SAlan Cox 	    m_set, m_set->order));
7737bfda801SAlan Cox 
7747bfda801SAlan Cox 	/*
7757bfda801SAlan Cox 	 * Next, remove "m_set" from the free lists.  Finally, extract
7767bfda801SAlan Cox 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
7777bfda801SAlan Cox 	 * is larger than a page, shrink "m_set" by returning the half
7787bfda801SAlan Cox 	 * of "m_set" that does not contain "m" to the free lists.
7797bfda801SAlan Cox 	 */
7807bfda801SAlan Cox 	fl = (*seg->free_queues)[m_set->pool];
7817bfda801SAlan Cox 	order = m_set->order;
782*7e226537SAttilio Rao 	vm_freelist_rem(fl, m_set, order);
7837bfda801SAlan Cox 	while (order > 0) {
7847bfda801SAlan Cox 		order--;
7857bfda801SAlan Cox 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
7867bfda801SAlan Cox 		if (m->phys_addr < pa_half)
7877bfda801SAlan Cox 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
7887bfda801SAlan Cox 		else {
7897bfda801SAlan Cox 			m_tmp = m_set;
7907bfda801SAlan Cox 			m_set = &seg->first_page[atop(pa_half - seg->start)];
7917bfda801SAlan Cox 		}
792*7e226537SAttilio Rao 		vm_freelist_add(fl, m_tmp, order, 0);
7937bfda801SAlan Cox 	}
7947bfda801SAlan Cox 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
795e35395ceSAlan Cox 	return (TRUE);
7967bfda801SAlan Cox }
7977bfda801SAlan Cox 
7987bfda801SAlan Cox /*
7997bfda801SAlan Cox  * Try to zero one physical page.  Used by an idle priority thread.
80011752d88SAlan Cox  */
80111752d88SAlan Cox boolean_t
80211752d88SAlan Cox vm_phys_zero_pages_idle(void)
80311752d88SAlan Cox {
804*7e226537SAttilio Rao 	static struct vm_freelist *fl;
8057bfda801SAlan Cox 	static int flind, oind, pind;
80611752d88SAlan Cox 	vm_page_t m, m_tmp;
807*7e226537SAttilio Rao 	int domain;
80811752d88SAlan Cox 
809*7e226537SAttilio Rao 	domain = vm_rr_selectdomain();
810*7e226537SAttilio Rao 	fl = vm_phys_free_queues[domain][0][0];
81111752d88SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
8127bfda801SAlan Cox 	for (;;) {
8137bfda801SAlan Cox 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
8147bfda801SAlan Cox 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
8157bfda801SAlan Cox 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
8167bfda801SAlan Cox 					vm_phys_unfree_page(m_tmp);
8177bfda801SAlan Cox 					cnt.v_free_count--;
81811752d88SAlan Cox 					mtx_unlock(&vm_page_queue_free_mtx);
81911752d88SAlan Cox 					pmap_zero_page_idle(m_tmp);
82011752d88SAlan Cox 					m_tmp->flags |= PG_ZERO;
82111752d88SAlan Cox 					mtx_lock(&vm_page_queue_free_mtx);
8227bfda801SAlan Cox 					cnt.v_free_count++;
8237bfda801SAlan Cox 					vm_phys_free_pages(m_tmp, 0);
8247bfda801SAlan Cox 					vm_page_zero_count++;
8257bfda801SAlan Cox 					cnt_prezero++;
82611752d88SAlan Cox 					return (TRUE);
82711752d88SAlan Cox 				}
82811752d88SAlan Cox 			}
82911752d88SAlan Cox 		}
8307bfda801SAlan Cox 		oind++;
8317bfda801SAlan Cox 		if (oind == VM_NFREEORDER) {
8327bfda801SAlan Cox 			oind = 0;
8337bfda801SAlan Cox 			pind++;
8347bfda801SAlan Cox 			if (pind == VM_NFREEPOOL) {
8357bfda801SAlan Cox 				pind = 0;
8367bfda801SAlan Cox 				flind++;
8377bfda801SAlan Cox 				if (flind == vm_nfreelists)
8387bfda801SAlan Cox 					flind = 0;
8397bfda801SAlan Cox 			}
840*7e226537SAttilio Rao 			fl = vm_phys_free_queues[domain][flind][pind];
8417bfda801SAlan Cox 		}
8427bfda801SAlan Cox 	}
84311752d88SAlan Cox }
84411752d88SAlan Cox 
84511752d88SAlan Cox /*
8462f9f48d6SAlan Cox  * Allocate a contiguous set of physical pages of the given size
8472f9f48d6SAlan Cox  * "npages" from the free lists.  All of the physical pages must be at
8482f9f48d6SAlan Cox  * or above the given physical address "low" and below the given
8492f9f48d6SAlan Cox  * physical address "high".  The given value "alignment" determines the
8502f9f48d6SAlan Cox  * alignment of the first physical page in the set.  If the given value
8512f9f48d6SAlan Cox  * "boundary" is non-zero, then the set of physical pages cannot cross
8522f9f48d6SAlan Cox  * any physical address boundary that is a multiple of that value.  Both
85311752d88SAlan Cox  * "alignment" and "boundary" must be a power of two.
85411752d88SAlan Cox  */
85511752d88SAlan Cox vm_page_t
8565c1f2cc4SAlan Cox vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
8575c1f2cc4SAlan Cox     u_long alignment, vm_paddr_t boundary)
85811752d88SAlan Cox {
85911752d88SAlan Cox 	struct vm_freelist *fl;
86011752d88SAlan Cox 	struct vm_phys_seg *seg;
86111752d88SAlan Cox 	vm_paddr_t pa, pa_last, size;
862fbd80bd0SAlan Cox 	vm_page_t m, m_ret;
8635c1f2cc4SAlan Cox 	u_long npages_end;
864*7e226537SAttilio Rao 	int dom, domain, flind, oind, order, pind;
86511752d88SAlan Cox 
866fbd80bd0SAlan Cox 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
86711752d88SAlan Cox 	size = npages << PAGE_SHIFT;
86811752d88SAlan Cox 	KASSERT(size != 0,
86911752d88SAlan Cox 	    ("vm_phys_alloc_contig: size must not be 0"));
87011752d88SAlan Cox 	KASSERT((alignment & (alignment - 1)) == 0,
87111752d88SAlan Cox 	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
87211752d88SAlan Cox 	KASSERT((boundary & (boundary - 1)) == 0,
87311752d88SAlan Cox 	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
87411752d88SAlan Cox 	/* Compute the queue that is the best fit for npages. */
87511752d88SAlan Cox 	for (order = 0; (1 << order) < npages; order++);
876*7e226537SAttilio Rao 	dom = 0;
877*7e226537SAttilio Rao restartdom:
878*7e226537SAttilio Rao 	domain = vm_rr_selectdomain();
87911752d88SAlan Cox 	for (flind = 0; flind < vm_nfreelists; flind++) {
88011752d88SAlan Cox 		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
88111752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
882*7e226537SAttilio Rao 				fl = &vm_phys_free_queues[domain][flind][pind][0];
88311752d88SAlan Cox 				TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
88411752d88SAlan Cox 					/*
88511752d88SAlan Cox 					 * A free list may contain physical pages
88611752d88SAlan Cox 					 * from one or more segments.
88711752d88SAlan Cox 					 */
88811752d88SAlan Cox 					seg = &vm_phys_segs[m_ret->segind];
88911752d88SAlan Cox 					if (seg->start > high ||
89011752d88SAlan Cox 					    low >= seg->end)
89111752d88SAlan Cox 						continue;
89211752d88SAlan Cox 
89311752d88SAlan Cox 					/*
89411752d88SAlan Cox 					 * Is the size of this allocation request
89511752d88SAlan Cox 					 * larger than the largest block size?
89611752d88SAlan Cox 					 */
89711752d88SAlan Cox 					if (order >= VM_NFREEORDER) {
89811752d88SAlan Cox 						/*
89911752d88SAlan Cox 						 * Determine if a sufficient number
90011752d88SAlan Cox 						 * of subsequent blocks to satisfy
90111752d88SAlan Cox 						 * the allocation request are free.
90211752d88SAlan Cox 						 */
90311752d88SAlan Cox 						pa = VM_PAGE_TO_PHYS(m_ret);
90411752d88SAlan Cox 						pa_last = pa + size;
90511752d88SAlan Cox 						for (;;) {
90611752d88SAlan Cox 							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
90711752d88SAlan Cox 							if (pa >= pa_last)
90811752d88SAlan Cox 								break;
90911752d88SAlan Cox 							if (pa < seg->start ||
91011752d88SAlan Cox 							    pa >= seg->end)
91111752d88SAlan Cox 								break;
91211752d88SAlan Cox 							m = &seg->first_page[atop(pa - seg->start)];
91311752d88SAlan Cox 							if (m->order != VM_NFREEORDER - 1)
91411752d88SAlan Cox 								break;
91511752d88SAlan Cox 						}
91611752d88SAlan Cox 						/* If not, continue to the next block. */
91711752d88SAlan Cox 						if (pa < pa_last)
91811752d88SAlan Cox 							continue;
91911752d88SAlan Cox 					}
92011752d88SAlan Cox 
92111752d88SAlan Cox 					/*
92211752d88SAlan Cox 					 * Determine if the blocks are within the given range,
92311752d88SAlan Cox 					 * satisfy the given alignment, and do not cross the
92411752d88SAlan Cox 					 * given boundary.
92511752d88SAlan Cox 					 */
92611752d88SAlan Cox 					pa = VM_PAGE_TO_PHYS(m_ret);
92711752d88SAlan Cox 					if (pa >= low &&
92811752d88SAlan Cox 					    pa + size <= high &&
92911752d88SAlan Cox 					    (pa & (alignment - 1)) == 0 &&
93011752d88SAlan Cox 					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
93111752d88SAlan Cox 						goto done;
93211752d88SAlan Cox 				}
93311752d88SAlan Cox 			}
93411752d88SAlan Cox 		}
93511752d88SAlan Cox 	}
936*7e226537SAttilio Rao 	if (++dom < vm_ndomains)
937*7e226537SAttilio Rao 		goto restartdom;
93811752d88SAlan Cox 	return (NULL);
93911752d88SAlan Cox done:
94011752d88SAlan Cox 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
94111752d88SAlan Cox 		fl = (*seg->free_queues)[m->pool];
942*7e226537SAttilio Rao 		vm_freelist_rem(fl, m, m->order);
94311752d88SAlan Cox 	}
94411752d88SAlan Cox 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
94511752d88SAlan Cox 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
94611752d88SAlan Cox 	fl = (*seg->free_queues)[m_ret->pool];
94711752d88SAlan Cox 	vm_phys_split_pages(m_ret, oind, fl, order);
9485c1f2cc4SAlan Cox 	/* Return excess pages to the free lists. */
9495c1f2cc4SAlan Cox 	npages_end = roundup2(npages, 1 << imin(oind, order));
9505c1f2cc4SAlan Cox 	if (npages < npages_end)
9515c1f2cc4SAlan Cox 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
95211752d88SAlan Cox 	return (m_ret);
95311752d88SAlan Cox }
95411752d88SAlan Cox 
95511752d88SAlan Cox #ifdef DDB
95611752d88SAlan Cox /*
95711752d88SAlan Cox  * Show the number of physical pages in each of the free lists.
95811752d88SAlan Cox  */
95911752d88SAlan Cox DB_SHOW_COMMAND(freepages, db_show_freepages)
96011752d88SAlan Cox {
96111752d88SAlan Cox 	struct vm_freelist *fl;
962*7e226537SAttilio Rao 	int flind, oind, pind, dom;
96311752d88SAlan Cox 
964*7e226537SAttilio Rao 	for (dom = 0; dom < vm_ndomains; dom++) {
965*7e226537SAttilio Rao 		db_printf("DOMAIN: %d\n", dom);
96611752d88SAlan Cox 		for (flind = 0; flind < vm_nfreelists; flind++) {
96711752d88SAlan Cox 			db_printf("FREE LIST %d:\n"
96811752d88SAlan Cox 			    "\n  ORDER (SIZE)  |  NUMBER"
96911752d88SAlan Cox 			    "\n              ", flind);
97011752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
97111752d88SAlan Cox 				db_printf("  |  POOL %d", pind);
97211752d88SAlan Cox 			db_printf("\n--            ");
97311752d88SAlan Cox 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
97411752d88SAlan Cox 				db_printf("-- --      ");
97511752d88SAlan Cox 			db_printf("--\n");
97611752d88SAlan Cox 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
97711752d88SAlan Cox 				db_printf("  %2.2d (%6.6dK)", oind,
97811752d88SAlan Cox 				    1 << (PAGE_SHIFT - 10 + oind));
97911752d88SAlan Cox 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
980*7e226537SAttilio Rao 				fl = vm_phys_free_queues[dom][flind][pind];
98111752d88SAlan Cox 					db_printf("  |  %6.6d", fl[oind].lcnt);
98211752d88SAlan Cox 				}
98311752d88SAlan Cox 				db_printf("\n");
98411752d88SAlan Cox 			}
98511752d88SAlan Cox 			db_printf("\n");
98611752d88SAlan Cox 		}
987*7e226537SAttilio Rao 		db_printf("\n");
988*7e226537SAttilio Rao 	}
98911752d88SAlan Cox }
99011752d88SAlan Cox #endif
991