160727d8bSWarner Losh /*-
2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni *
424964514SPeter Wemm * Copyright (c) 2000 Peter Wemm
524964514SPeter Wemm *
624964514SPeter Wemm * Redistribution and use in source and binary forms, with or without
724964514SPeter Wemm * modification, are permitted provided that the following conditions
824964514SPeter Wemm * are met:
924964514SPeter Wemm * 1. Redistributions of source code must retain the above copyright
1024964514SPeter Wemm * notice, this list of conditions and the following disclaimer.
1124964514SPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright
1224964514SPeter Wemm * notice, this list of conditions and the following disclaimer in the
1324964514SPeter Wemm * documentation and/or other materials provided with the distribution.
1424964514SPeter Wemm *
1524964514SPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1624964514SPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1724964514SPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1824964514SPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1924964514SPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2024964514SPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2124964514SPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2224964514SPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2324964514SPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2424964514SPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2524964514SPeter Wemm * SUCH DAMAGE.
2624964514SPeter Wemm */
2724964514SPeter Wemm
2824964514SPeter Wemm #include <sys/param.h>
2924964514SPeter Wemm #include <sys/systm.h>
3024964514SPeter Wemm #include <sys/conf.h>
31a9fa2c05SAlfred Perlstein #include <sys/kernel.h>
32fb919e4dSMark Murray #include <sys/lock.h>
330cddd8f0SMatthew Dillon #include <sys/proc.h>
34fb919e4dSMark Murray #include <sys/mutex.h>
3524964514SPeter Wemm #include <sys/mman.h>
3689f6b863SAttilio Rao #include <sys/rwlock.h>
3724964514SPeter Wemm #include <sys/sysctl.h>
3800a3fe96SKonstantin Belousov #include <sys/user.h>
3924964514SPeter Wemm
4024964514SPeter Wemm #include <vm/vm.h>
41c7aebda8SAttilio Rao #include <vm/vm_param.h>
4224964514SPeter Wemm #include <vm/vm_object.h>
4324964514SPeter Wemm #include <vm/vm_page.h>
44ed01d989SKonstantin Belousov #include <vm/vm_pageout.h>
4524964514SPeter Wemm #include <vm/vm_pager.h>
4624964514SPeter Wemm
47248a0568SRemko Lodder /* list of phys pager objects */
4824964514SPeter Wemm static struct pagerlst phys_pager_object_list;
49a9fa2c05SAlfred Perlstein /* protect access to phys_pager_object_list */
50a9fa2c05SAlfred Perlstein static struct mtx phys_pager_mtx;
5124964514SPeter Wemm
52a720b31cSKonstantin Belousov static int default_phys_pager_getpages(vm_object_t object, vm_page_t *m,
53a720b31cSKonstantin Belousov int count, int *rbehind, int *rahead);
54a720b31cSKonstantin Belousov static int default_phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
55a720b31cSKonstantin Belousov int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last);
56a720b31cSKonstantin Belousov static boolean_t default_phys_pager_haspage(vm_object_t object,
57a720b31cSKonstantin Belousov vm_pindex_t pindex, int *before, int *after);
58d474440aSKonstantin Belousov const struct phys_pager_ops default_phys_pg_ops = {
59a720b31cSKonstantin Belousov .phys_pg_getpages = default_phys_pager_getpages,
60a720b31cSKonstantin Belousov .phys_pg_populate = default_phys_pager_populate,
61a720b31cSKonstantin Belousov .phys_pg_haspage = default_phys_pager_haspage,
62a720b31cSKonstantin Belousov .phys_pg_ctor = NULL,
63a720b31cSKonstantin Belousov .phys_pg_dtor = NULL,
64a720b31cSKonstantin Belousov };
65a720b31cSKonstantin Belousov
6624964514SPeter Wemm static void
phys_pager_init(void)67bb663856SPeter Wemm phys_pager_init(void)
6824964514SPeter Wemm {
69bb663856SPeter Wemm
7024964514SPeter Wemm TAILQ_INIT(&phys_pager_object_list);
716008862bSJohn Baldwin mtx_init(&phys_pager_mtx, "phys_pager list", NULL, MTX_DEF);
7224964514SPeter Wemm }
7324964514SPeter Wemm
74a720b31cSKonstantin Belousov vm_object_t
phys_pager_allocate(void * handle,const struct phys_pager_ops * ops,void * data,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * cred)75d474440aSKonstantin Belousov phys_pager_allocate(void *handle, const struct phys_pager_ops *ops, void *data,
76a720b31cSKonstantin Belousov vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred)
7724964514SPeter Wemm {
78efe7553eSKonstantin Belousov vm_object_t object, object1;
792f7af3dbSAlan Cox vm_pindex_t pindex;
80a720b31cSKonstantin Belousov bool init;
8124964514SPeter Wemm
8224964514SPeter Wemm /*
8324964514SPeter Wemm * Offset should be page aligned.
8424964514SPeter Wemm */
8524964514SPeter Wemm if (foff & PAGE_MASK)
8624964514SPeter Wemm return (NULL);
8724964514SPeter Wemm
882f7af3dbSAlan Cox pindex = OFF_TO_IDX(foff + PAGE_MASK + size);
89a720b31cSKonstantin Belousov init = true;
9024964514SPeter Wemm
91b5861b34SAlfred Perlstein if (handle != NULL) {
92efe7553eSKonstantin Belousov mtx_lock(&phys_pager_mtx);
9324964514SPeter Wemm /*
9424964514SPeter Wemm * Look up pager, creating as necessary.
9524964514SPeter Wemm */
96efe7553eSKonstantin Belousov object1 = NULL;
9724964514SPeter Wemm object = vm_pager_object_lookup(&phys_pager_object_list, handle);
9824964514SPeter Wemm if (object == NULL) {
9924964514SPeter Wemm /*
10024964514SPeter Wemm * Allocate object and associate it with the pager.
10124964514SPeter Wemm */
102efe7553eSKonstantin Belousov mtx_unlock(&phys_pager_mtx);
103efe7553eSKonstantin Belousov object1 = vm_object_allocate(OBJT_PHYS, pindex);
104a9fa2c05SAlfred Perlstein mtx_lock(&phys_pager_mtx);
105efe7553eSKonstantin Belousov object = vm_pager_object_lookup(&phys_pager_object_list,
106efe7553eSKonstantin Belousov handle);
107efe7553eSKonstantin Belousov if (object != NULL) {
108efe7553eSKonstantin Belousov /*
109efe7553eSKonstantin Belousov * We raced with other thread while
110efe7553eSKonstantin Belousov * allocating object.
111efe7553eSKonstantin Belousov */
112efe7553eSKonstantin Belousov if (pindex > object->size)
113efe7553eSKonstantin Belousov object->size = pindex;
114a720b31cSKonstantin Belousov init = false;
115efe7553eSKonstantin Belousov } else {
116efe7553eSKonstantin Belousov object = object1;
117efe7553eSKonstantin Belousov object1 = NULL;
118efe7553eSKonstantin Belousov object->handle = handle;
119a720b31cSKonstantin Belousov object->un_pager.phys.ops = ops;
120a720b31cSKonstantin Belousov object->un_pager.phys.data_ptr = data;
121a720b31cSKonstantin Belousov if (ops->phys_pg_populate != NULL)
122ed01d989SKonstantin Belousov vm_object_set_flag(object, OBJ_POPULATE);
123272cc3c4SKonstantin Belousov TAILQ_INSERT_TAIL(&phys_pager_object_list,
124272cc3c4SKonstantin Belousov object, pager_object_list);
125efe7553eSKonstantin Belousov }
12624964514SPeter Wemm } else {
1272f7af3dbSAlan Cox if (pindex > object->size)
1282f7af3dbSAlan Cox object->size = pindex;
12924964514SPeter Wemm }
130efe7553eSKonstantin Belousov mtx_unlock(&phys_pager_mtx);
131efe7553eSKonstantin Belousov vm_object_deallocate(object1);
132b5861b34SAlfred Perlstein } else {
1332f7af3dbSAlan Cox object = vm_object_allocate(OBJT_PHYS, pindex);
134a720b31cSKonstantin Belousov object->un_pager.phys.ops = ops;
135a720b31cSKonstantin Belousov object->un_pager.phys.data_ptr = data;
136a720b31cSKonstantin Belousov if (ops->phys_pg_populate != NULL)
137ed01d989SKonstantin Belousov vm_object_set_flag(object, OBJ_POPULATE);
138b5861b34SAlfred Perlstein }
139a720b31cSKonstantin Belousov if (init && ops->phys_pg_ctor != NULL)
140a720b31cSKonstantin Belousov ops->phys_pg_ctor(object, prot, foff, cred);
14124964514SPeter Wemm
14224964514SPeter Wemm return (object);
14324964514SPeter Wemm }
14424964514SPeter Wemm
145a720b31cSKonstantin Belousov static vm_object_t
phys_pager_alloc(void * handle,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * ucred)146a720b31cSKonstantin Belousov phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
147a720b31cSKonstantin Belousov vm_ooffset_t foff, struct ucred *ucred)
148a720b31cSKonstantin Belousov {
149a720b31cSKonstantin Belousov return (phys_pager_allocate(handle, &default_phys_pg_ops, NULL,
150a720b31cSKonstantin Belousov size, prot, foff, ucred));
151a720b31cSKonstantin Belousov }
152a720b31cSKonstantin Belousov
15324964514SPeter Wemm static void
phys_pager_dealloc(vm_object_t object)154bb663856SPeter Wemm phys_pager_dealloc(vm_object_t object)
15524964514SPeter Wemm {
15624964514SPeter Wemm
157a9fa2c05SAlfred Perlstein if (object->handle != NULL) {
15889f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object);
159a9fa2c05SAlfred Perlstein mtx_lock(&phys_pager_mtx);
16024964514SPeter Wemm TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list);
161a9fa2c05SAlfred Perlstein mtx_unlock(&phys_pager_mtx);
16289f6b863SAttilio Rao VM_OBJECT_WLOCK(object);
163a9fa2c05SAlfred Perlstein }
164e735691bSJohn Baldwin object->type = OBJT_DEAD;
165a720b31cSKonstantin Belousov if (object->un_pager.phys.ops->phys_pg_dtor != NULL)
166a720b31cSKonstantin Belousov object->un_pager.phys.ops->phys_pg_dtor(object);
167a720b31cSKonstantin Belousov object->handle = NULL;
16824964514SPeter Wemm }
16924964514SPeter Wemm
17024964514SPeter Wemm /*
17124964514SPeter Wemm * Fill as many pages as vm_fault has allocated for us.
17224964514SPeter Wemm */
173e265f054SAlan Cox static int
default_phys_pager_getpages(vm_object_t object,vm_page_t * m,int count,int * rbehind,int * rahead)174a720b31cSKonstantin Belousov default_phys_pager_getpages(vm_object_t object, vm_page_t *m, int count,
175a720b31cSKonstantin Belousov int *rbehind, int *rahead)
176e265f054SAlan Cox {
177e265f054SAlan Cox int i;
178e265f054SAlan Cox
1798a3ef857SAlan Cox for (i = 0; i < count; i++) {
1800012f373SJeff Roberson if (vm_page_none_valid(m[i])) {
1818a3ef857SAlan Cox if ((m[i]->flags & PG_ZERO) == 0)
1828a3ef857SAlan Cox pmap_zero_page(m[i]);
1830012f373SJeff Roberson vm_page_valid(m[i]);
1848a3ef857SAlan Cox }
1850012f373SJeff Roberson KASSERT(vm_page_all_valid(m[i]),
1868a3ef857SAlan Cox ("phys_pager_getpages: partially valid page %p", m[i]));
18753f55a43SAlan Cox KASSERT(m[i]->dirty == 0,
18853f55a43SAlan Cox ("phys_pager_getpages: dirty page %p", m[i]));
18924964514SPeter Wemm }
190b0cd2017SGleb Smirnoff if (rbehind)
191b0cd2017SGleb Smirnoff *rbehind = 0;
192b0cd2017SGleb Smirnoff if (rahead)
193b0cd2017SGleb Smirnoff *rahead = 0;
19424964514SPeter Wemm return (VM_PAGER_OK);
19524964514SPeter Wemm }
19624964514SPeter Wemm
197a720b31cSKonstantin Belousov static int
phys_pager_getpages(vm_object_t object,vm_page_t * m,int count,int * rbehind,int * rahead)198a720b31cSKonstantin Belousov phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind,
199a720b31cSKonstantin Belousov int *rahead)
200a720b31cSKonstantin Belousov {
201a720b31cSKonstantin Belousov return (object->un_pager.phys.ops->phys_pg_getpages(object, m,
202a720b31cSKonstantin Belousov count, rbehind, rahead));
203a720b31cSKonstantin Belousov }
204a720b31cSKonstantin Belousov
205ed01d989SKonstantin Belousov /*
206ed01d989SKonstantin Belousov * Implement a pretty aggressive clustered getpages strategy. Hint that
207ed01d989SKonstantin Belousov * everything in an entire 4MB window should be prefaulted at once.
208ed01d989SKonstantin Belousov *
209ed01d989SKonstantin Belousov * 4MB (1024 slots per page table page) is convenient for x86,
210ed01d989SKonstantin Belousov * but may not be for other arches.
211ed01d989SKonstantin Belousov */
212ed01d989SKonstantin Belousov #ifndef PHYSCLUSTER
213ed01d989SKonstantin Belousov #define PHYSCLUSTER 1024
214ed01d989SKonstantin Belousov #endif
215ed01d989SKonstantin Belousov static int phys_pager_cluster = PHYSCLUSTER;
216ed01d989SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, phys_pager_cluster, CTLFLAG_RWTUN,
217ed01d989SKonstantin Belousov &phys_pager_cluster, 0,
218ed01d989SKonstantin Belousov "prefault window size for phys pager");
219ed01d989SKonstantin Belousov
220ed01d989SKonstantin Belousov /*
221ed01d989SKonstantin Belousov * Max hint to vm_page_alloc() about the further allocation needs
222ed01d989SKonstantin Belousov * inside the phys_pager_populate() loop. The number of bits used to
223ed01d989SKonstantin Belousov * implement VM_ALLOC_COUNT() determines the hard limit on this value.
224ed01d989SKonstantin Belousov * That limit is currently 65535.
225ed01d989SKonstantin Belousov */
226ed01d989SKonstantin Belousov #define PHYSALLOC 16
227ed01d989SKonstantin Belousov
228ed01d989SKonstantin Belousov static int
default_phys_pager_populate(vm_object_t object,vm_pindex_t pidx,int fault_type __unused,vm_prot_t max_prot __unused,vm_pindex_t * first,vm_pindex_t * last)229a720b31cSKonstantin Belousov default_phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
230ed01d989SKonstantin Belousov int fault_type __unused, vm_prot_t max_prot __unused, vm_pindex_t *first,
231ed01d989SKonstantin Belousov vm_pindex_t *last)
232ed01d989SKonstantin Belousov {
233ed01d989SKonstantin Belousov vm_page_t m;
234ed01d989SKonstantin Belousov vm_pindex_t base, end, i;
235ed01d989SKonstantin Belousov int ahead;
236ed01d989SKonstantin Belousov
237ed01d989SKonstantin Belousov base = rounddown(pidx, phys_pager_cluster);
238ed01d989SKonstantin Belousov end = base + phys_pager_cluster - 1;
239ed01d989SKonstantin Belousov if (end >= object->size)
240ed01d989SKonstantin Belousov end = object->size - 1;
241ed01d989SKonstantin Belousov if (*first > base)
242ed01d989SKonstantin Belousov base = *first;
243ed01d989SKonstantin Belousov if (end > *last)
244ed01d989SKonstantin Belousov end = *last;
245ed01d989SKonstantin Belousov *first = base;
246ed01d989SKonstantin Belousov *last = end;
247ed01d989SKonstantin Belousov
248ed01d989SKonstantin Belousov for (i = base; i <= end; i++) {
249ed01d989SKonstantin Belousov ahead = MIN(end - i, PHYSALLOC);
25063e97555SJeff Roberson m = vm_page_grab(object, i,
25163e97555SJeff Roberson VM_ALLOC_NORMAL | VM_ALLOC_COUNT(ahead));
2520012f373SJeff Roberson if (!vm_page_all_valid(m))
253ed01d989SKonstantin Belousov vm_page_zero_invalid(m, TRUE);
254ed01d989SKonstantin Belousov KASSERT(m->dirty == 0,
255ed01d989SKonstantin Belousov ("phys_pager_populate: dirty page %p", m));
256ed01d989SKonstantin Belousov }
257ed01d989SKonstantin Belousov return (VM_PAGER_OK);
258ed01d989SKonstantin Belousov }
259ed01d989SKonstantin Belousov
260a720b31cSKonstantin Belousov static int
phys_pager_populate(vm_object_t object,vm_pindex_t pidx,int fault_type,vm_prot_t max_prot,vm_pindex_t * first,vm_pindex_t * last)261a720b31cSKonstantin Belousov phys_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type,
262a720b31cSKonstantin Belousov vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
263a720b31cSKonstantin Belousov {
264a720b31cSKonstantin Belousov return (object->un_pager.phys.ops->phys_pg_populate(object, pidx,
265a720b31cSKonstantin Belousov fault_type, max_prot, first, last));
266a720b31cSKonstantin Belousov }
267a720b31cSKonstantin Belousov
26824964514SPeter Wemm static void
phys_pager_putpages(vm_object_t object,vm_page_t * m,int count,int flags,int * rtvals)269f74be55eSDimitry Andric phys_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags,
270bb663856SPeter Wemm int *rtvals)
27124964514SPeter Wemm {
272bb663856SPeter Wemm
27324964514SPeter Wemm panic("phys_pager_putpage called");
27424964514SPeter Wemm }
27524964514SPeter Wemm
27624964514SPeter Wemm static boolean_t
default_phys_pager_haspage(vm_object_t object,vm_pindex_t pindex,int * before,int * after)277a720b31cSKonstantin Belousov default_phys_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
278bb663856SPeter Wemm int *after)
27924964514SPeter Wemm {
28024964514SPeter Wemm vm_pindex_t base, end;
28124964514SPeter Wemm
282ed01d989SKonstantin Belousov base = rounddown(pindex, phys_pager_cluster);
283ed01d989SKonstantin Belousov end = base + phys_pager_cluster - 1;
28424964514SPeter Wemm if (before != NULL)
28524964514SPeter Wemm *before = pindex - base;
28624964514SPeter Wemm if (after != NULL)
28724964514SPeter Wemm *after = end - pindex;
28824964514SPeter Wemm return (TRUE);
28924964514SPeter Wemm }
290bb663856SPeter Wemm
291a720b31cSKonstantin Belousov static boolean_t
phys_pager_haspage(vm_object_t object,vm_pindex_t pindex,int * before,int * after)292a720b31cSKonstantin Belousov phys_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
293a720b31cSKonstantin Belousov int *after)
294a720b31cSKonstantin Belousov {
295a720b31cSKonstantin Belousov return (object->un_pager.phys.ops->phys_pg_haspage(object, pindex,
296a720b31cSKonstantin Belousov before, after));
297a720b31cSKonstantin Belousov }
298a720b31cSKonstantin Belousov
299d474440aSKonstantin Belousov const struct pagerops physpagerops = {
30000a3fe96SKonstantin Belousov .pgo_kvme_type = KVME_TYPE_PHYS,
3014e658600SPoul-Henning Kamp .pgo_init = phys_pager_init,
3024e658600SPoul-Henning Kamp .pgo_alloc = phys_pager_alloc,
3034e658600SPoul-Henning Kamp .pgo_dealloc = phys_pager_dealloc,
3044e658600SPoul-Henning Kamp .pgo_getpages = phys_pager_getpages,
3054e658600SPoul-Henning Kamp .pgo_putpages = phys_pager_putpages,
3064e658600SPoul-Henning Kamp .pgo_haspage = phys_pager_haspage,
307ed01d989SKonstantin Belousov .pgo_populate = phys_pager_populate,
308bb663856SPeter Wemm };
309