xref: /freebsd/sys/vm/vm_domainset.c (revision 17fbf3cf3420c23aaa7967e9373eaec16133520b)
17b11a483SJeff Roberson /*-
27b11a483SJeff Roberson  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
37b11a483SJeff Roberson  *
47b11a483SJeff Roberson  * Copyright (c) 2017,	Jeffrey Roberson <jeff@freebsd.org>
57b11a483SJeff Roberson  * All rights reserved.
67b11a483SJeff Roberson  *
77b11a483SJeff Roberson  * Redistribution and use in source and binary forms, with or without
87b11a483SJeff Roberson  * modification, are permitted provided that the following conditions
97b11a483SJeff Roberson  * are met:
107b11a483SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
117b11a483SJeff Roberson  *    notice unmodified, this list of conditions, and the following
127b11a483SJeff Roberson  *    disclaimer.
137b11a483SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
147b11a483SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
157b11a483SJeff Roberson  *    documentation and/or other materials provided with the distribution.
167b11a483SJeff Roberson  *
177b11a483SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
187b11a483SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
197b11a483SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
207b11a483SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
217b11a483SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
227b11a483SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
237b11a483SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
247b11a483SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
257b11a483SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
267b11a483SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
277b11a483SJeff Roberson  *
287b11a483SJeff Roberson  */
297b11a483SJeff Roberson 
307b11a483SJeff Roberson #include <sys/cdefs.h>
317b11a483SJeff Roberson __FBSDID("$FreeBSD$");
327b11a483SJeff Roberson 
337b11a483SJeff Roberson #include "opt_vm.h"
347b11a483SJeff Roberson 
357b11a483SJeff Roberson #include <sys/param.h>
367b11a483SJeff Roberson #include <sys/systm.h>
377b11a483SJeff Roberson #include <sys/bitset.h>
387b11a483SJeff Roberson #include <sys/domainset.h>
397b11a483SJeff Roberson #include <sys/proc.h>
407b11a483SJeff Roberson #include <sys/lock.h>
417b11a483SJeff Roberson #include <sys/mutex.h>
427b11a483SJeff Roberson #include <sys/malloc.h>
434c29d2deSMark Johnston #include <sys/rwlock.h>
447b11a483SJeff Roberson #include <sys/vmmeter.h>
457b11a483SJeff Roberson 
467b11a483SJeff Roberson #include <vm/vm.h>
477b11a483SJeff Roberson #include <vm/vm_param.h>
487b11a483SJeff Roberson #include <vm/vm_domainset.h>
497b11a483SJeff Roberson #include <vm/vm_object.h>
507b11a483SJeff Roberson #include <vm/vm_page.h>
517b11a483SJeff Roberson #include <vm/vm_phys.h>
527b11a483SJeff Roberson 
53b6715dabSJeff Roberson #ifdef NUMA
547b11a483SJeff Roberson /*
557b11a483SJeff Roberson  * Iterators are written such that the first nowait pass has as short a
567b11a483SJeff Roberson  * codepath as possible to eliminate bloat from the allocator.  It is
577b11a483SJeff Roberson  * assumed that most allocations are successful.
587b11a483SJeff Roberson  */
597b11a483SJeff Roberson 
60e5818a53SJeff Roberson static int vm_domainset_default_stride = 64;
61e5818a53SJeff Roberson 
627b11a483SJeff Roberson /*
637b11a483SJeff Roberson  * Determine which policy is to be used for this allocation.
647b11a483SJeff Roberson  */
657b11a483SJeff Roberson static void
664c29d2deSMark Johnston vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds,
674c29d2deSMark Johnston     int *iter, struct vm_object *obj, vm_pindex_t pindex)
687b11a483SJeff Roberson {
697b11a483SJeff Roberson 
704c29d2deSMark Johnston 	di->di_domain = ds;
714c29d2deSMark Johnston 	di->di_iter = iter;
724c29d2deSMark Johnston 	di->di_policy = ds->ds_policy;
73e5818a53SJeff Roberson 	if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) {
7423d123c6SMark Johnston #if VM_NRESERVLEVEL > 0
75e5818a53SJeff Roberson 		if (vm_object_reserv(obj)) {
76e5818a53SJeff Roberson 			/*
77e5818a53SJeff Roberson 			 * Color the pindex so we end up on the correct
78e5818a53SJeff Roberson 			 * reservation boundary.
79e5818a53SJeff Roberson 			 */
80e5818a53SJeff Roberson 			pindex += obj->pg_color;
81e5818a53SJeff Roberson 			pindex >>= VM_LEVEL_0_ORDER;
8223d123c6SMark Johnston 		} else
8373e37d1dSMatt Macy #endif
84e5818a53SJeff Roberson 			pindex /= vm_domainset_default_stride;
85e5818a53SJeff Roberson 		/*
86e5818a53SJeff Roberson 		 * Offset pindex so the first page of each object does
87e5818a53SJeff Roberson 		 * not end up in domain 0.
88e5818a53SJeff Roberson 		 */
89e5818a53SJeff Roberson 		if (obj != NULL)
90e5818a53SJeff Roberson 			pindex += (((uintptr_t)obj) / sizeof(*obj));
91e5818a53SJeff Roberson 		di->di_offset = pindex;
92e5818a53SJeff Roberson 	}
93c56c7299SMark Johnston 	/* Skip domains below min on the first pass. */
9423984ce5SMark Johnston 	di->di_minskip = true;
957b11a483SJeff Roberson }
967b11a483SJeff Roberson 
977b11a483SJeff Roberson static void
987b11a483SJeff Roberson vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
997b11a483SJeff Roberson {
1007b11a483SJeff Roberson 
101e5818a53SJeff Roberson 	*domain = di->di_domain->ds_order[
102e5818a53SJeff Roberson 	    ++(*di->di_iter) % di->di_domain->ds_cnt];
1037b11a483SJeff Roberson }
1047b11a483SJeff Roberson 
1057b11a483SJeff Roberson static void
1067b11a483SJeff Roberson vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain)
1077b11a483SJeff Roberson {
1087b11a483SJeff Roberson 	int d;
1097b11a483SJeff Roberson 
1107b11a483SJeff Roberson 	do {
111e5818a53SJeff Roberson 		d = di->di_domain->ds_order[
112e5818a53SJeff Roberson 		    ++(*di->di_iter) % di->di_domain->ds_cnt];
113e5818a53SJeff Roberson 	} while (d == di->di_domain->ds_prefer);
114e5818a53SJeff Roberson 	*domain = d;
115e5818a53SJeff Roberson }
116e5818a53SJeff Roberson 
117e5818a53SJeff Roberson static void
118e5818a53SJeff Roberson vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain)
119e5818a53SJeff Roberson {
120e5818a53SJeff Roberson 	int d;
121e5818a53SJeff Roberson 
122e5818a53SJeff Roberson 	d = di->di_offset % di->di_domain->ds_cnt;
123e5818a53SJeff Roberson 	*di->di_iter = d;
124e5818a53SJeff Roberson 	*domain = di->di_domain->ds_order[d];
1257b11a483SJeff Roberson }
1267b11a483SJeff Roberson 
1277b11a483SJeff Roberson static void
1287b11a483SJeff Roberson vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
1297b11a483SJeff Roberson {
1307b11a483SJeff Roberson 
1317b11a483SJeff Roberson 	KASSERT(di->di_n > 0,
1327b11a483SJeff Roberson 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
133e5818a53SJeff Roberson 	switch (di->di_policy) {
1347b11a483SJeff Roberson 	case DOMAINSET_POLICY_FIRSTTOUCH:
1357b11a483SJeff Roberson 		/*
1367b11a483SJeff Roberson 		 * To prevent impossible allocations we convert an invalid
1377b11a483SJeff Roberson 		 * first-touch to round-robin.
1387b11a483SJeff Roberson 		 */
1397b11a483SJeff Roberson 		/* FALLTHROUGH */
140e5818a53SJeff Roberson 	case DOMAINSET_POLICY_INTERLEAVE:
141e5818a53SJeff Roberson 		/* FALLTHROUGH */
1427b11a483SJeff Roberson 	case DOMAINSET_POLICY_ROUNDROBIN:
1437b11a483SJeff Roberson 		vm_domainset_iter_rr(di, domain);
1447b11a483SJeff Roberson 		break;
1457b11a483SJeff Roberson 	case DOMAINSET_POLICY_PREFER:
1467b11a483SJeff Roberson 		vm_domainset_iter_prefer(di, domain);
1477b11a483SJeff Roberson 		break;
1487b11a483SJeff Roberson 	default:
1497b11a483SJeff Roberson 		panic("vm_domainset_iter_first: Unknown policy %d",
150e5818a53SJeff Roberson 		    di->di_policy);
1517b11a483SJeff Roberson 	}
1527b11a483SJeff Roberson 	KASSERT(*domain < vm_ndomains,
1537b11a483SJeff Roberson 	    ("vm_domainset_iter_next: Invalid domain %d", *domain));
1547b11a483SJeff Roberson }
1557b11a483SJeff Roberson 
1567b11a483SJeff Roberson static void
1577b11a483SJeff Roberson vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
1587b11a483SJeff Roberson {
1597b11a483SJeff Roberson 
160e5818a53SJeff Roberson 	switch (di->di_policy) {
1617b11a483SJeff Roberson 	case DOMAINSET_POLICY_FIRSTTOUCH:
1627b11a483SJeff Roberson 		*domain = PCPU_GET(domain);
1637b11a483SJeff Roberson 		if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) {
164e5818a53SJeff Roberson 			/*
165e5818a53SJeff Roberson 			 * Add an extra iteration because we will visit the
166e5818a53SJeff Roberson 			 * current domain a second time in the rr iterator.
167e5818a53SJeff Roberson 			 */
168e5818a53SJeff Roberson 			di->di_n = di->di_domain->ds_cnt + 1;
1697b11a483SJeff Roberson 			break;
1707b11a483SJeff Roberson 		}
1717b11a483SJeff Roberson 		/*
1727b11a483SJeff Roberson 		 * To prevent impossible allocations we convert an invalid
1737b11a483SJeff Roberson 		 * first-touch to round-robin.
1747b11a483SJeff Roberson 		 */
1757b11a483SJeff Roberson 		/* FALLTHROUGH */
1767b11a483SJeff Roberson 	case DOMAINSET_POLICY_ROUNDROBIN:
1777b11a483SJeff Roberson 		di->di_n = di->di_domain->ds_cnt;
1787b11a483SJeff Roberson 		vm_domainset_iter_rr(di, domain);
1797b11a483SJeff Roberson 		break;
1807b11a483SJeff Roberson 	case DOMAINSET_POLICY_PREFER:
1817b11a483SJeff Roberson 		*domain = di->di_domain->ds_prefer;
1827b11a483SJeff Roberson 		di->di_n = di->di_domain->ds_cnt;
1837b11a483SJeff Roberson 		break;
184e5818a53SJeff Roberson 	case DOMAINSET_POLICY_INTERLEAVE:
185e5818a53SJeff Roberson 		vm_domainset_iter_interleave(di, domain);
186e5818a53SJeff Roberson 		di->di_n = di->di_domain->ds_cnt;
187e5818a53SJeff Roberson 		break;
1887b11a483SJeff Roberson 	default:
1897b11a483SJeff Roberson 		panic("vm_domainset_iter_first: Unknown policy %d",
190e5818a53SJeff Roberson 		    di->di_policy);
1917b11a483SJeff Roberson 	}
1927b11a483SJeff Roberson 	KASSERT(di->di_n > 0,
1937b11a483SJeff Roberson 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
1947b11a483SJeff Roberson 	KASSERT(*domain < vm_ndomains,
1957b11a483SJeff Roberson 	    ("vm_domainset_iter_first: Invalid domain %d", *domain));
1967b11a483SJeff Roberson }
1977b11a483SJeff Roberson 
1987b11a483SJeff Roberson void
1997b11a483SJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
200e5818a53SJeff Roberson     vm_pindex_t pindex, int *domain, int *req)
2017b11a483SJeff Roberson {
2024c29d2deSMark Johnston 	struct domainset_ref *dr;
2037b11a483SJeff Roberson 
2044c29d2deSMark Johnston 	/*
2054c29d2deSMark Johnston 	 * Object policy takes precedence over thread policy.  The policies
2064c29d2deSMark Johnston 	 * are immutable and unsynchronized.  Updates can race but pointer
2074c29d2deSMark Johnston 	 * loads are assumed to be atomic.
2084c29d2deSMark Johnston 	 */
2094c29d2deSMark Johnston 	if (obj != NULL && obj->domain.dr_policy != NULL)
2104c29d2deSMark Johnston 		dr = &obj->domain;
2114c29d2deSMark Johnston 	else
2124c29d2deSMark Johnston 		dr = &curthread->td_domain;
2134c29d2deSMark Johnston 	vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex);
2147b11a483SJeff Roberson 	di->di_flags = *req;
2157b11a483SJeff Roberson 	*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
2167b11a483SJeff Roberson 	    VM_ALLOC_NOWAIT;
2177b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
218463406acSMark Johnston 	if (vm_page_count_min_domain(*domain))
2194c29d2deSMark Johnston 		vm_domainset_iter_page(di, obj, domain);
2207b11a483SJeff Roberson }
2217b11a483SJeff Roberson 
2227b11a483SJeff Roberson int
2234c29d2deSMark Johnston vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
2244c29d2deSMark Johnston     int *domain)
2257b11a483SJeff Roberson {
2267b11a483SJeff Roberson 
2277b11a483SJeff Roberson 	/* If there are more domains to visit we run the iterator. */
22823984ce5SMark Johnston 	while (--di->di_n != 0) {
2297b11a483SJeff Roberson 		vm_domainset_iter_next(di, domain);
230463406acSMark Johnston 		if (!di->di_minskip || !vm_page_count_min_domain(*domain))
23123984ce5SMark Johnston 			return (0);
23223984ce5SMark Johnston 	}
2334c29d2deSMark Johnston 
2344c29d2deSMark Johnston 	/* If we skipped domains below min restart the search. */
23523984ce5SMark Johnston 	if (di->di_minskip) {
23623984ce5SMark Johnston 		di->di_minskip = false;
23723984ce5SMark Johnston 		vm_domainset_iter_first(di, domain);
2387b11a483SJeff Roberson 		return (0);
2397b11a483SJeff Roberson 	}
2407b11a483SJeff Roberson 
2417b11a483SJeff Roberson 	/* If we visited all domains and this was a NOWAIT we return error. */
2427b11a483SJeff Roberson 	if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
2437b11a483SJeff Roberson 		return (ENOMEM);
2447b11a483SJeff Roberson 
2454c29d2deSMark Johnston 	/* Wait for one of the domains to accumulate some free pages. */
2464c29d2deSMark Johnston 	if (obj != NULL)
2474c29d2deSMark Johnston 		VM_OBJECT_WUNLOCK(obj);
2484c29d2deSMark Johnston 	vm_wait_doms(&di->di_domain->ds_mask);
2494c29d2deSMark Johnston 	if (obj != NULL)
2504c29d2deSMark Johnston 		VM_OBJECT_WLOCK(obj);
2514c29d2deSMark Johnston 	if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0)
2524c29d2deSMark Johnston 		return (ENOMEM);
2534c29d2deSMark Johnston 
2544c29d2deSMark Johnston 	/* Restart the search. */
2557b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
2567b11a483SJeff Roberson 
2577b11a483SJeff Roberson 	return (0);
2587b11a483SJeff Roberson }
2597b11a483SJeff Roberson 
2604c29d2deSMark Johnston static void
2614c29d2deSMark Johnston _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain,
2624c29d2deSMark Johnston     int *flags)
2637b11a483SJeff Roberson {
2647b11a483SJeff Roberson 
2657b11a483SJeff Roberson 	di->di_flags = *flags;
2667b11a483SJeff Roberson 	*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
2677b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
268463406acSMark Johnston 	if (vm_page_count_min_domain(*domain))
2694c29d2deSMark Johnston 		vm_domainset_iter_policy(di, domain);
2704c29d2deSMark Johnston }
2714c29d2deSMark Johnston 
2724c29d2deSMark Johnston void
2734c29d2deSMark Johnston vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
2744c29d2deSMark Johnston     struct domainset *ds, int *domain, int *flags)
2754c29d2deSMark Johnston {
2764c29d2deSMark Johnston 
2774c29d2deSMark Johnston 	vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0);
2784c29d2deSMark Johnston 	_vm_domainset_iter_policy_init(di, domain, flags);
2794c29d2deSMark Johnston }
2804c29d2deSMark Johnston 
2814c29d2deSMark Johnston void
2824c29d2deSMark Johnston vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
2834c29d2deSMark Johnston     struct domainset_ref *dr, int *domain, int *flags)
2844c29d2deSMark Johnston {
2854c29d2deSMark Johnston 
2864c29d2deSMark Johnston 	vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0);
2874c29d2deSMark Johnston 	_vm_domainset_iter_policy_init(di, domain, flags);
2887b11a483SJeff Roberson }
2897b11a483SJeff Roberson 
2907b11a483SJeff Roberson int
2914c29d2deSMark Johnston vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
2927b11a483SJeff Roberson {
2937b11a483SJeff Roberson 
2947b11a483SJeff Roberson 	/* If there are more domains to visit we run the iterator. */
29523984ce5SMark Johnston 	while (--di->di_n != 0) {
2967b11a483SJeff Roberson 		vm_domainset_iter_next(di, domain);
297463406acSMark Johnston 		if (!di->di_minskip || !vm_page_count_min_domain(*domain))
29823984ce5SMark Johnston 			return (0);
29923984ce5SMark Johnston 	}
30023984ce5SMark Johnston 
301c56c7299SMark Johnston 	/* If we skipped domains below min restart the search. */
30223984ce5SMark Johnston 	if (di->di_minskip) {
30323984ce5SMark Johnston 		di->di_minskip = false;
30423984ce5SMark Johnston 		vm_domainset_iter_first(di, domain);
3057b11a483SJeff Roberson 		return (0);
3067b11a483SJeff Roberson 	}
3077b11a483SJeff Roberson 
3087b11a483SJeff Roberson 	/* If we visited all domains and this was a NOWAIT we return error. */
3097b11a483SJeff Roberson 	if ((di->di_flags & M_WAITOK) == 0)
3107b11a483SJeff Roberson 		return (ENOMEM);
3117b11a483SJeff Roberson 
3124c29d2deSMark Johnston 	/* Wait for one of the domains to accumulate some free pages. */
3134c29d2deSMark Johnston 	vm_wait_doms(&di->di_domain->ds_mask);
3144c29d2deSMark Johnston 
3154c29d2deSMark Johnston 	/* Restart the search. */
3167b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
3177b11a483SJeff Roberson 
3187b11a483SJeff Roberson 	return (0);
3197b11a483SJeff Roberson }
320b6715dabSJeff Roberson 
321b6715dabSJeff Roberson #else /* !NUMA */
3224c29d2deSMark Johnston 
323b6715dabSJeff Roberson int
3244c29d2deSMark Johnston vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
3254c29d2deSMark Johnston     int *domain)
326b6715dabSJeff Roberson {
327b6715dabSJeff Roberson 
328b6715dabSJeff Roberson 	return (EJUSTRETURN);
329b6715dabSJeff Roberson }
330b6715dabSJeff Roberson 
331b6715dabSJeff Roberson void
3324c29d2deSMark Johnston vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
3334c29d2deSMark Johnston     vm_pindex_t pindex, int *domain, int *flags)
334b6715dabSJeff Roberson {
335b6715dabSJeff Roberson 
336b6715dabSJeff Roberson 	*domain = 0;
337b6715dabSJeff Roberson }
338b6715dabSJeff Roberson 
339b6715dabSJeff Roberson int
3404c29d2deSMark Johnston vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
341b6715dabSJeff Roberson {
342b6715dabSJeff Roberson 
343b6715dabSJeff Roberson 	return (EJUSTRETURN);
344b6715dabSJeff Roberson }
345b6715dabSJeff Roberson 
346b6715dabSJeff Roberson void
3474c29d2deSMark Johnston vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
3484c29d2deSMark Johnston     struct domainset *ds, int *domain, int *flags)
349b6715dabSJeff Roberson {
350b6715dabSJeff Roberson 
351b6715dabSJeff Roberson 	*domain = 0;
352b6715dabSJeff Roberson }
353b6715dabSJeff Roberson 
354*17fbf3cfSMark Johnston void
355*17fbf3cfSMark Johnston vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
356*17fbf3cfSMark Johnston     struct domainset_ref *dr, int *domain, int *flags)
357*17fbf3cfSMark Johnston {
358*17fbf3cfSMark Johnston 
359*17fbf3cfSMark Johnston 	*domain = 0;
360*17fbf3cfSMark Johnston }
361*17fbf3cfSMark Johnston 
3624c29d2deSMark Johnston #endif /* NUMA */
363