xref: /freebsd/sys/vm/phys_pager.c (revision fdafd315ad0d0f28a11b9fb4476a9ab059c62b92)
160727d8bSWarner Losh /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni  *
424964514SPeter Wemm  * Copyright (c) 2000 Peter Wemm
524964514SPeter Wemm  *
624964514SPeter Wemm  * Redistribution and use in source and binary forms, with or without
724964514SPeter Wemm  * modification, are permitted provided that the following conditions
824964514SPeter Wemm  * are met:
924964514SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
1024964514SPeter Wemm  *    notice, this list of conditions and the following disclaimer.
1124964514SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
1224964514SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
1324964514SPeter Wemm  *    documentation and/or other materials provided with the distribution.
1424964514SPeter Wemm  *
1524964514SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1624964514SPeter Wemm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1724964514SPeter Wemm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1824964514SPeter Wemm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1924964514SPeter Wemm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2024964514SPeter Wemm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2124964514SPeter Wemm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2224964514SPeter Wemm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2324964514SPeter Wemm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2424964514SPeter Wemm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2524964514SPeter Wemm  * SUCH DAMAGE.
2624964514SPeter Wemm  */
2724964514SPeter Wemm 
2824964514SPeter Wemm #include <sys/param.h>
2924964514SPeter Wemm #include <sys/systm.h>
3024964514SPeter Wemm #include <sys/conf.h>
31a9fa2c05SAlfred Perlstein #include <sys/kernel.h>
32fb919e4dSMark Murray #include <sys/lock.h>
330cddd8f0SMatthew Dillon #include <sys/proc.h>
34fb919e4dSMark Murray #include <sys/mutex.h>
3524964514SPeter Wemm #include <sys/mman.h>
3689f6b863SAttilio Rao #include <sys/rwlock.h>
3724964514SPeter Wemm #include <sys/sysctl.h>
3800a3fe96SKonstantin Belousov #include <sys/user.h>
3924964514SPeter Wemm 
4024964514SPeter Wemm #include <vm/vm.h>
41c7aebda8SAttilio Rao #include <vm/vm_param.h>
4224964514SPeter Wemm #include <vm/vm_object.h>
4324964514SPeter Wemm #include <vm/vm_page.h>
44ed01d989SKonstantin Belousov #include <vm/vm_pageout.h>
4524964514SPeter Wemm #include <vm/vm_pager.h>
4624964514SPeter Wemm 
47248a0568SRemko Lodder /* list of phys pager objects */
4824964514SPeter Wemm static struct pagerlst phys_pager_object_list;
49a9fa2c05SAlfred Perlstein /* protect access to phys_pager_object_list */
50a9fa2c05SAlfred Perlstein static struct mtx phys_pager_mtx;
5124964514SPeter Wemm 
52a720b31cSKonstantin Belousov static int default_phys_pager_getpages(vm_object_t object, vm_page_t *m,
53a720b31cSKonstantin Belousov     int count, int *rbehind, int *rahead);
54a720b31cSKonstantin Belousov static int default_phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
55a720b31cSKonstantin Belousov     int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last);
56a720b31cSKonstantin Belousov static boolean_t default_phys_pager_haspage(vm_object_t object,
57a720b31cSKonstantin Belousov     vm_pindex_t pindex, int *before, int *after);
58d474440aSKonstantin Belousov const struct phys_pager_ops default_phys_pg_ops = {
59a720b31cSKonstantin Belousov 	.phys_pg_getpages = default_phys_pager_getpages,
60a720b31cSKonstantin Belousov 	.phys_pg_populate = default_phys_pager_populate,
61a720b31cSKonstantin Belousov 	.phys_pg_haspage = default_phys_pager_haspage,
62a720b31cSKonstantin Belousov 	.phys_pg_ctor = NULL,
63a720b31cSKonstantin Belousov 	.phys_pg_dtor = NULL,
64a720b31cSKonstantin Belousov };
65a720b31cSKonstantin Belousov 
6624964514SPeter Wemm static void
phys_pager_init(void)67bb663856SPeter Wemm phys_pager_init(void)
6824964514SPeter Wemm {
69bb663856SPeter Wemm 
7024964514SPeter Wemm 	TAILQ_INIT(&phys_pager_object_list);
716008862bSJohn Baldwin 	mtx_init(&phys_pager_mtx, "phys_pager list", NULL, MTX_DEF);
7224964514SPeter Wemm }
7324964514SPeter Wemm 
74a720b31cSKonstantin Belousov vm_object_t
phys_pager_allocate(void * handle,const struct phys_pager_ops * ops,void * data,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * cred)75d474440aSKonstantin Belousov phys_pager_allocate(void *handle, const struct phys_pager_ops *ops, void *data,
76a720b31cSKonstantin Belousov     vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred)
7724964514SPeter Wemm {
78efe7553eSKonstantin Belousov 	vm_object_t object, object1;
792f7af3dbSAlan Cox 	vm_pindex_t pindex;
80a720b31cSKonstantin Belousov 	bool init;
8124964514SPeter Wemm 
8224964514SPeter Wemm 	/*
8324964514SPeter Wemm 	 * Offset should be page aligned.
8424964514SPeter Wemm 	 */
8524964514SPeter Wemm 	if (foff & PAGE_MASK)
8624964514SPeter Wemm 		return (NULL);
8724964514SPeter Wemm 
882f7af3dbSAlan Cox 	pindex = OFF_TO_IDX(foff + PAGE_MASK + size);
89a720b31cSKonstantin Belousov 	init = true;
9024964514SPeter Wemm 
91b5861b34SAlfred Perlstein 	if (handle != NULL) {
92efe7553eSKonstantin Belousov 		mtx_lock(&phys_pager_mtx);
9324964514SPeter Wemm 		/*
9424964514SPeter Wemm 		 * Look up pager, creating as necessary.
9524964514SPeter Wemm 		 */
96efe7553eSKonstantin Belousov 		object1 = NULL;
9724964514SPeter Wemm 		object = vm_pager_object_lookup(&phys_pager_object_list, handle);
9824964514SPeter Wemm 		if (object == NULL) {
9924964514SPeter Wemm 			/*
10024964514SPeter Wemm 			 * Allocate object and associate it with the pager.
10124964514SPeter Wemm 			 */
102efe7553eSKonstantin Belousov 			mtx_unlock(&phys_pager_mtx);
103efe7553eSKonstantin Belousov 			object1 = vm_object_allocate(OBJT_PHYS, pindex);
104a9fa2c05SAlfred Perlstein 			mtx_lock(&phys_pager_mtx);
105efe7553eSKonstantin Belousov 			object = vm_pager_object_lookup(&phys_pager_object_list,
106efe7553eSKonstantin Belousov 			    handle);
107efe7553eSKonstantin Belousov 			if (object != NULL) {
108efe7553eSKonstantin Belousov 				/*
109efe7553eSKonstantin Belousov 				 * We raced with other thread while
110efe7553eSKonstantin Belousov 				 * allocating object.
111efe7553eSKonstantin Belousov 				 */
112efe7553eSKonstantin Belousov 				if (pindex > object->size)
113efe7553eSKonstantin Belousov 					object->size = pindex;
114a720b31cSKonstantin Belousov 				init = false;
115efe7553eSKonstantin Belousov 			} else {
116efe7553eSKonstantin Belousov 				object = object1;
117efe7553eSKonstantin Belousov 				object1 = NULL;
118efe7553eSKonstantin Belousov 				object->handle = handle;
119a720b31cSKonstantin Belousov 				object->un_pager.phys.ops = ops;
120a720b31cSKonstantin Belousov 				object->un_pager.phys.data_ptr = data;
121a720b31cSKonstantin Belousov 				if (ops->phys_pg_populate != NULL)
122ed01d989SKonstantin Belousov 					vm_object_set_flag(object, OBJ_POPULATE);
123272cc3c4SKonstantin Belousov 				TAILQ_INSERT_TAIL(&phys_pager_object_list,
124272cc3c4SKonstantin Belousov 				    object, pager_object_list);
125efe7553eSKonstantin Belousov 			}
12624964514SPeter Wemm 		} else {
1272f7af3dbSAlan Cox 			if (pindex > object->size)
1282f7af3dbSAlan Cox 				object->size = pindex;
12924964514SPeter Wemm 		}
130efe7553eSKonstantin Belousov 		mtx_unlock(&phys_pager_mtx);
131efe7553eSKonstantin Belousov 		vm_object_deallocate(object1);
132b5861b34SAlfred Perlstein 	} else {
1332f7af3dbSAlan Cox 		object = vm_object_allocate(OBJT_PHYS, pindex);
134a720b31cSKonstantin Belousov 		object->un_pager.phys.ops = ops;
135a720b31cSKonstantin Belousov 		object->un_pager.phys.data_ptr = data;
136a720b31cSKonstantin Belousov 		if (ops->phys_pg_populate != NULL)
137ed01d989SKonstantin Belousov 			vm_object_set_flag(object, OBJ_POPULATE);
138b5861b34SAlfred Perlstein 	}
139a720b31cSKonstantin Belousov 	if (init && ops->phys_pg_ctor != NULL)
140a720b31cSKonstantin Belousov 		ops->phys_pg_ctor(object, prot, foff, cred);
14124964514SPeter Wemm 
14224964514SPeter Wemm 	return (object);
14324964514SPeter Wemm }
14424964514SPeter Wemm 
145a720b31cSKonstantin Belousov static vm_object_t
phys_pager_alloc(void * handle,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * ucred)146a720b31cSKonstantin Belousov phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
147a720b31cSKonstantin Belousov     vm_ooffset_t foff, struct ucred *ucred)
148a720b31cSKonstantin Belousov {
149a720b31cSKonstantin Belousov 	return (phys_pager_allocate(handle, &default_phys_pg_ops, NULL,
150a720b31cSKonstantin Belousov 	    size, prot, foff, ucred));
151a720b31cSKonstantin Belousov }
152a720b31cSKonstantin Belousov 
15324964514SPeter Wemm static void
phys_pager_dealloc(vm_object_t object)154bb663856SPeter Wemm phys_pager_dealloc(vm_object_t object)
15524964514SPeter Wemm {
15624964514SPeter Wemm 
157a9fa2c05SAlfred Perlstein 	if (object->handle != NULL) {
15889f6b863SAttilio Rao 		VM_OBJECT_WUNLOCK(object);
159a9fa2c05SAlfred Perlstein 		mtx_lock(&phys_pager_mtx);
16024964514SPeter Wemm 		TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list);
161a9fa2c05SAlfred Perlstein 		mtx_unlock(&phys_pager_mtx);
16289f6b863SAttilio Rao 		VM_OBJECT_WLOCK(object);
163a9fa2c05SAlfred Perlstein 	}
164e735691bSJohn Baldwin 	object->type = OBJT_DEAD;
165a720b31cSKonstantin Belousov 	if (object->un_pager.phys.ops->phys_pg_dtor != NULL)
166a720b31cSKonstantin Belousov 		object->un_pager.phys.ops->phys_pg_dtor(object);
167a720b31cSKonstantin Belousov 	object->handle = NULL;
16824964514SPeter Wemm }
16924964514SPeter Wemm 
17024964514SPeter Wemm /*
17124964514SPeter Wemm  * Fill as many pages as vm_fault has allocated for us.
17224964514SPeter Wemm  */
173e265f054SAlan Cox static int
default_phys_pager_getpages(vm_object_t object,vm_page_t * m,int count,int * rbehind,int * rahead)174a720b31cSKonstantin Belousov default_phys_pager_getpages(vm_object_t object, vm_page_t *m, int count,
175a720b31cSKonstantin Belousov     int *rbehind, int *rahead)
176e265f054SAlan Cox {
177e265f054SAlan Cox 	int i;
178e265f054SAlan Cox 
1798a3ef857SAlan Cox 	for (i = 0; i < count; i++) {
1800012f373SJeff Roberson 		if (vm_page_none_valid(m[i])) {
1818a3ef857SAlan Cox 			if ((m[i]->flags & PG_ZERO) == 0)
1828a3ef857SAlan Cox 				pmap_zero_page(m[i]);
1830012f373SJeff Roberson 			vm_page_valid(m[i]);
1848a3ef857SAlan Cox 		}
1850012f373SJeff Roberson 		KASSERT(vm_page_all_valid(m[i]),
1868a3ef857SAlan Cox 		    ("phys_pager_getpages: partially valid page %p", m[i]));
18753f55a43SAlan Cox 		KASSERT(m[i]->dirty == 0,
18853f55a43SAlan Cox 		    ("phys_pager_getpages: dirty page %p", m[i]));
18924964514SPeter Wemm 	}
190b0cd2017SGleb Smirnoff 	if (rbehind)
191b0cd2017SGleb Smirnoff 		*rbehind = 0;
192b0cd2017SGleb Smirnoff 	if (rahead)
193b0cd2017SGleb Smirnoff 		*rahead = 0;
19424964514SPeter Wemm 	return (VM_PAGER_OK);
19524964514SPeter Wemm }
19624964514SPeter Wemm 
197a720b31cSKonstantin Belousov static int
phys_pager_getpages(vm_object_t object,vm_page_t * m,int count,int * rbehind,int * rahead)198a720b31cSKonstantin Belousov phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind,
199a720b31cSKonstantin Belousov     int *rahead)
200a720b31cSKonstantin Belousov {
201a720b31cSKonstantin Belousov 	return (object->un_pager.phys.ops->phys_pg_getpages(object, m,
202a720b31cSKonstantin Belousov 	    count, rbehind, rahead));
203a720b31cSKonstantin Belousov }
204a720b31cSKonstantin Belousov 
205ed01d989SKonstantin Belousov /*
206ed01d989SKonstantin Belousov  * Implement a pretty aggressive clustered getpages strategy.  Hint that
207ed01d989SKonstantin Belousov  * everything in an entire 4MB window should be prefaulted at once.
208ed01d989SKonstantin Belousov  *
209ed01d989SKonstantin Belousov  * 4MB (1024 slots per page table page) is convenient for x86,
210ed01d989SKonstantin Belousov  * but may not be for other arches.
211ed01d989SKonstantin Belousov  */
212ed01d989SKonstantin Belousov #ifndef PHYSCLUSTER
213ed01d989SKonstantin Belousov #define PHYSCLUSTER 1024
214ed01d989SKonstantin Belousov #endif
215ed01d989SKonstantin Belousov static int phys_pager_cluster = PHYSCLUSTER;
216ed01d989SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, phys_pager_cluster, CTLFLAG_RWTUN,
217ed01d989SKonstantin Belousov     &phys_pager_cluster, 0,
218ed01d989SKonstantin Belousov     "prefault window size for phys pager");
219ed01d989SKonstantin Belousov 
220ed01d989SKonstantin Belousov /*
221ed01d989SKonstantin Belousov  * Max hint to vm_page_alloc() about the further allocation needs
222ed01d989SKonstantin Belousov  * inside the phys_pager_populate() loop.  The number of bits used to
223ed01d989SKonstantin Belousov  * implement VM_ALLOC_COUNT() determines the hard limit on this value.
224ed01d989SKonstantin Belousov  * That limit is currently 65535.
225ed01d989SKonstantin Belousov  */
226ed01d989SKonstantin Belousov #define	PHYSALLOC	16
227ed01d989SKonstantin Belousov 
228ed01d989SKonstantin Belousov static int
default_phys_pager_populate(vm_object_t object,vm_pindex_t pidx,int fault_type __unused,vm_prot_t max_prot __unused,vm_pindex_t * first,vm_pindex_t * last)229a720b31cSKonstantin Belousov default_phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
230ed01d989SKonstantin Belousov     int fault_type __unused, vm_prot_t max_prot __unused, vm_pindex_t *first,
231ed01d989SKonstantin Belousov     vm_pindex_t *last)
232ed01d989SKonstantin Belousov {
233ed01d989SKonstantin Belousov 	vm_page_t m;
234ed01d989SKonstantin Belousov 	vm_pindex_t base, end, i;
235ed01d989SKonstantin Belousov 	int ahead;
236ed01d989SKonstantin Belousov 
237ed01d989SKonstantin Belousov 	base = rounddown(pidx, phys_pager_cluster);
238ed01d989SKonstantin Belousov 	end = base + phys_pager_cluster - 1;
239ed01d989SKonstantin Belousov 	if (end >= object->size)
240ed01d989SKonstantin Belousov 		end = object->size - 1;
241ed01d989SKonstantin Belousov 	if (*first > base)
242ed01d989SKonstantin Belousov 		base = *first;
243ed01d989SKonstantin Belousov 	if (end > *last)
244ed01d989SKonstantin Belousov 		end = *last;
245ed01d989SKonstantin Belousov 	*first = base;
246ed01d989SKonstantin Belousov 	*last = end;
247ed01d989SKonstantin Belousov 
248ed01d989SKonstantin Belousov 	for (i = base; i <= end; i++) {
249ed01d989SKonstantin Belousov 		ahead = MIN(end - i, PHYSALLOC);
25063e97555SJeff Roberson 		m = vm_page_grab(object, i,
25163e97555SJeff Roberson 		    VM_ALLOC_NORMAL | VM_ALLOC_COUNT(ahead));
2520012f373SJeff Roberson 		if (!vm_page_all_valid(m))
253ed01d989SKonstantin Belousov 			vm_page_zero_invalid(m, TRUE);
254ed01d989SKonstantin Belousov 		KASSERT(m->dirty == 0,
255ed01d989SKonstantin Belousov 		    ("phys_pager_populate: dirty page %p", m));
256ed01d989SKonstantin Belousov 	}
257ed01d989SKonstantin Belousov 	return (VM_PAGER_OK);
258ed01d989SKonstantin Belousov }
259ed01d989SKonstantin Belousov 
260a720b31cSKonstantin Belousov static int
phys_pager_populate(vm_object_t object,vm_pindex_t pidx,int fault_type,vm_prot_t max_prot,vm_pindex_t * first,vm_pindex_t * last)261a720b31cSKonstantin Belousov phys_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type,
262a720b31cSKonstantin Belousov     vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
263a720b31cSKonstantin Belousov {
264a720b31cSKonstantin Belousov 	return (object->un_pager.phys.ops->phys_pg_populate(object, pidx,
265a720b31cSKonstantin Belousov 	    fault_type, max_prot, first, last));
266a720b31cSKonstantin Belousov }
267a720b31cSKonstantin Belousov 
26824964514SPeter Wemm static void
phys_pager_putpages(vm_object_t object,vm_page_t * m,int count,int flags,int * rtvals)269f74be55eSDimitry Andric phys_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags,
270bb663856SPeter Wemm     int *rtvals)
27124964514SPeter Wemm {
272bb663856SPeter Wemm 
27324964514SPeter Wemm 	panic("phys_pager_putpage called");
27424964514SPeter Wemm }
27524964514SPeter Wemm 
27624964514SPeter Wemm static boolean_t
default_phys_pager_haspage(vm_object_t object,vm_pindex_t pindex,int * before,int * after)277a720b31cSKonstantin Belousov default_phys_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
278bb663856SPeter Wemm     int *after)
27924964514SPeter Wemm {
28024964514SPeter Wemm 	vm_pindex_t base, end;
28124964514SPeter Wemm 
282ed01d989SKonstantin Belousov 	base = rounddown(pindex, phys_pager_cluster);
283ed01d989SKonstantin Belousov 	end = base + phys_pager_cluster - 1;
28424964514SPeter Wemm 	if (before != NULL)
28524964514SPeter Wemm 		*before = pindex - base;
28624964514SPeter Wemm 	if (after != NULL)
28724964514SPeter Wemm 		*after = end - pindex;
28824964514SPeter Wemm 	return (TRUE);
28924964514SPeter Wemm }
290bb663856SPeter Wemm 
291a720b31cSKonstantin Belousov static boolean_t
phys_pager_haspage(vm_object_t object,vm_pindex_t pindex,int * before,int * after)292a720b31cSKonstantin Belousov phys_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
293a720b31cSKonstantin Belousov     int *after)
294a720b31cSKonstantin Belousov {
295a720b31cSKonstantin Belousov 	return (object->un_pager.phys.ops->phys_pg_haspage(object, pindex,
296a720b31cSKonstantin Belousov 	    before, after));
297a720b31cSKonstantin Belousov }
298a720b31cSKonstantin Belousov 
299d474440aSKonstantin Belousov const struct pagerops physpagerops = {
30000a3fe96SKonstantin Belousov 	.pgo_kvme_type = KVME_TYPE_PHYS,
3014e658600SPoul-Henning Kamp 	.pgo_init =	phys_pager_init,
3024e658600SPoul-Henning Kamp 	.pgo_alloc =	phys_pager_alloc,
3034e658600SPoul-Henning Kamp 	.pgo_dealloc = 	phys_pager_dealloc,
3044e658600SPoul-Henning Kamp 	.pgo_getpages =	phys_pager_getpages,
3054e658600SPoul-Henning Kamp 	.pgo_putpages =	phys_pager_putpages,
3064e658600SPoul-Henning Kamp 	.pgo_haspage =	phys_pager_haspage,
307ed01d989SKonstantin Belousov 	.pgo_populate =	phys_pager_populate,
308bb663856SPeter Wemm };
309