xref: /freebsd/sys/vm/vm_domainset.c (revision e5818a53dbd212809059bb306775a4b7e0e30c5f)
17b11a483SJeff Roberson /*-
27b11a483SJeff Roberson  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
37b11a483SJeff Roberson  *
47b11a483SJeff Roberson  * Copyright (c) 2017,	Jeffrey Roberson <jeff@freebsd.org>
57b11a483SJeff Roberson  * All rights reserved.
67b11a483SJeff Roberson  *
77b11a483SJeff Roberson  * Redistribution and use in source and binary forms, with or without
87b11a483SJeff Roberson  * modification, are permitted provided that the following conditions
97b11a483SJeff Roberson  * are met:
107b11a483SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
117b11a483SJeff Roberson  *    notice unmodified, this list of conditions, and the following
127b11a483SJeff Roberson  *    disclaimer.
137b11a483SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
147b11a483SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
157b11a483SJeff Roberson  *    documentation and/or other materials provided with the distribution.
167b11a483SJeff Roberson  *
177b11a483SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
187b11a483SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
197b11a483SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
207b11a483SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
217b11a483SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
227b11a483SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
237b11a483SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
247b11a483SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
257b11a483SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
267b11a483SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
277b11a483SJeff Roberson  *
287b11a483SJeff Roberson  */
297b11a483SJeff Roberson 
307b11a483SJeff Roberson #include <sys/cdefs.h>
317b11a483SJeff Roberson __FBSDID("$FreeBSD$");
327b11a483SJeff Roberson 
337b11a483SJeff Roberson #include "opt_vm.h"
347b11a483SJeff Roberson 
357b11a483SJeff Roberson #include <sys/param.h>
367b11a483SJeff Roberson #include <sys/systm.h>
377b11a483SJeff Roberson #include <sys/bitset.h>
387b11a483SJeff Roberson #include <sys/domainset.h>
397b11a483SJeff Roberson #include <sys/proc.h>
407b11a483SJeff Roberson #include <sys/lock.h>
417b11a483SJeff Roberson #include <sys/mutex.h>
427b11a483SJeff Roberson #include <sys/malloc.h>
437b11a483SJeff Roberson #include <sys/vmmeter.h>
447b11a483SJeff Roberson 
457b11a483SJeff Roberson #include <vm/vm.h>
467b11a483SJeff Roberson #include <vm/vm_param.h>
477b11a483SJeff Roberson #include <vm/vm_domainset.h>
487b11a483SJeff Roberson #include <vm/vm_object.h>
497b11a483SJeff Roberson #include <vm/vm_page.h>
507b11a483SJeff Roberson #include <vm/vm_phys.h>
517b11a483SJeff Roberson 
52b6715dabSJeff Roberson #ifdef NUMA
537b11a483SJeff Roberson /*
547b11a483SJeff Roberson  * Iterators are written such that the first nowait pass has as short a
557b11a483SJeff Roberson  * codepath as possible to eliminate bloat from the allocator.  It is
567b11a483SJeff Roberson  * assumed that most allocations are successful.
577b11a483SJeff Roberson  */
587b11a483SJeff Roberson 
59*e5818a53SJeff Roberson static int vm_domainset_default_stride = 64;
60*e5818a53SJeff Roberson 
617b11a483SJeff Roberson /*
627b11a483SJeff Roberson  * Determine which policy is to be used for this allocation.
637b11a483SJeff Roberson  */
647b11a483SJeff Roberson static void
65*e5818a53SJeff Roberson vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj,
66*e5818a53SJeff Roberson     vm_pindex_t pindex)
677b11a483SJeff Roberson {
687b11a483SJeff Roberson 	struct domainset *domain;
697b11a483SJeff Roberson 
707b11a483SJeff Roberson 	/*
717b11a483SJeff Roberson 	 * object policy takes precedence over thread policy.  The policies
727b11a483SJeff Roberson 	 * are immutable and unsynchronized.  Updates can race but pointer
737b11a483SJeff Roberson 	 * loads are assumed to be atomic.
747b11a483SJeff Roberson 	 */
757b11a483SJeff Roberson 	if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) {
767b11a483SJeff Roberson 		di->di_domain = domain;
777b11a483SJeff Roberson 		di->di_iter = &obj->domain.dr_iterator;
787b11a483SJeff Roberson 	} else {
797b11a483SJeff Roberson 		di->di_domain = curthread->td_domain.dr_policy;
807b11a483SJeff Roberson 		di->di_iter = &curthread->td_domain.dr_iterator;
817b11a483SJeff Roberson 	}
82*e5818a53SJeff Roberson 	di->di_policy = di->di_domain->ds_policy;
83*e5818a53SJeff Roberson 	if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) {
84*e5818a53SJeff Roberson 		if (vm_object_reserv(obj)) {
85*e5818a53SJeff Roberson 			/*
86*e5818a53SJeff Roberson 			 * Color the pindex so we end up on the correct
87*e5818a53SJeff Roberson 			 * reservation boundary.
88*e5818a53SJeff Roberson 			 */
89*e5818a53SJeff Roberson 			pindex += obj->pg_color;
90*e5818a53SJeff Roberson 			pindex >>= VM_LEVEL_0_ORDER;
91*e5818a53SJeff Roberson 		} else
92*e5818a53SJeff Roberson 			pindex /= vm_domainset_default_stride;
93*e5818a53SJeff Roberson 		/*
94*e5818a53SJeff Roberson 		 * Offset pindex so the first page of each object does
95*e5818a53SJeff Roberson 		 * not end up in domain 0.
96*e5818a53SJeff Roberson 		 */
97*e5818a53SJeff Roberson 		if (obj != NULL)
98*e5818a53SJeff Roberson 			pindex += (((uintptr_t)obj) / sizeof(*obj));
99*e5818a53SJeff Roberson 		di->di_offset = pindex;
100*e5818a53SJeff Roberson 	}
1017b11a483SJeff Roberson }
1027b11a483SJeff Roberson 
1037b11a483SJeff Roberson static void
1047b11a483SJeff Roberson vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
1057b11a483SJeff Roberson {
1067b11a483SJeff Roberson 
107*e5818a53SJeff Roberson 	*domain = di->di_domain->ds_order[
108*e5818a53SJeff Roberson 	    ++(*di->di_iter) % di->di_domain->ds_cnt];
1097b11a483SJeff Roberson }
1107b11a483SJeff Roberson 
1117b11a483SJeff Roberson static void
1127b11a483SJeff Roberson vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain)
1137b11a483SJeff Roberson {
1147b11a483SJeff Roberson 	int d;
1157b11a483SJeff Roberson 
1167b11a483SJeff Roberson 	do {
117*e5818a53SJeff Roberson 		d = di->di_domain->ds_order[
118*e5818a53SJeff Roberson 		    ++(*di->di_iter) % di->di_domain->ds_cnt];
119*e5818a53SJeff Roberson 	} while (d == di->di_domain->ds_prefer);
120*e5818a53SJeff Roberson 	*domain = d;
121*e5818a53SJeff Roberson }
122*e5818a53SJeff Roberson 
123*e5818a53SJeff Roberson static void
124*e5818a53SJeff Roberson vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain)
125*e5818a53SJeff Roberson {
126*e5818a53SJeff Roberson 	int d;
127*e5818a53SJeff Roberson 
128*e5818a53SJeff Roberson 	d = di->di_offset % di->di_domain->ds_cnt;
129*e5818a53SJeff Roberson 	*di->di_iter = d;
130*e5818a53SJeff Roberson 	*domain = di->di_domain->ds_order[d];
1317b11a483SJeff Roberson }
1327b11a483SJeff Roberson 
1337b11a483SJeff Roberson static void
1347b11a483SJeff Roberson vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
1357b11a483SJeff Roberson {
1367b11a483SJeff Roberson 
1377b11a483SJeff Roberson 	KASSERT(di->di_n > 0,
1387b11a483SJeff Roberson 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
139*e5818a53SJeff Roberson 	switch (di->di_policy) {
1407b11a483SJeff Roberson 	case DOMAINSET_POLICY_FIRSTTOUCH:
1417b11a483SJeff Roberson 		/*
1427b11a483SJeff Roberson 		 * To prevent impossible allocations we convert an invalid
1437b11a483SJeff Roberson 		 * first-touch to round-robin.
1447b11a483SJeff Roberson 		 */
1457b11a483SJeff Roberson 		/* FALLTHROUGH */
146*e5818a53SJeff Roberson 	case DOMAINSET_POLICY_INTERLEAVE:
147*e5818a53SJeff Roberson 		/* FALLTHROUGH */
1487b11a483SJeff Roberson 	case DOMAINSET_POLICY_ROUNDROBIN:
1497b11a483SJeff Roberson 		vm_domainset_iter_rr(di, domain);
1507b11a483SJeff Roberson 		break;
1517b11a483SJeff Roberson 	case DOMAINSET_POLICY_PREFER:
1527b11a483SJeff Roberson 		vm_domainset_iter_prefer(di, domain);
1537b11a483SJeff Roberson 		break;
1547b11a483SJeff Roberson 	default:
1557b11a483SJeff Roberson 		panic("vm_domainset_iter_first: Unknown policy %d",
156*e5818a53SJeff Roberson 		    di->di_policy);
1577b11a483SJeff Roberson 	}
1587b11a483SJeff Roberson 	KASSERT(*domain < vm_ndomains,
1597b11a483SJeff Roberson 	    ("vm_domainset_iter_next: Invalid domain %d", *domain));
1607b11a483SJeff Roberson }
1617b11a483SJeff Roberson 
1627b11a483SJeff Roberson static void
1637b11a483SJeff Roberson vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
1647b11a483SJeff Roberson {
1657b11a483SJeff Roberson 
166*e5818a53SJeff Roberson 	switch (di->di_policy) {
1677b11a483SJeff Roberson 	case DOMAINSET_POLICY_FIRSTTOUCH:
1687b11a483SJeff Roberson 		*domain = PCPU_GET(domain);
1697b11a483SJeff Roberson 		if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) {
170*e5818a53SJeff Roberson 			/*
171*e5818a53SJeff Roberson 			 * Add an extra iteration because we will visit the
172*e5818a53SJeff Roberson 			 * current domain a second time in the rr iterator.
173*e5818a53SJeff Roberson 			 */
174*e5818a53SJeff Roberson 			di->di_n = di->di_domain->ds_cnt + 1;
1757b11a483SJeff Roberson 			break;
1767b11a483SJeff Roberson 		}
1777b11a483SJeff Roberson 		/*
1787b11a483SJeff Roberson 		 * To prevent impossible allocations we convert an invalid
1797b11a483SJeff Roberson 		 * first-touch to round-robin.
1807b11a483SJeff Roberson 		 */
1817b11a483SJeff Roberson 		/* FALLTHROUGH */
1827b11a483SJeff Roberson 	case DOMAINSET_POLICY_ROUNDROBIN:
1837b11a483SJeff Roberson 		di->di_n = di->di_domain->ds_cnt;
1847b11a483SJeff Roberson 		vm_domainset_iter_rr(di, domain);
1857b11a483SJeff Roberson 		break;
1867b11a483SJeff Roberson 	case DOMAINSET_POLICY_PREFER:
1877b11a483SJeff Roberson 		*domain = di->di_domain->ds_prefer;
1887b11a483SJeff Roberson 		di->di_n = di->di_domain->ds_cnt;
1897b11a483SJeff Roberson 		break;
190*e5818a53SJeff Roberson 	case DOMAINSET_POLICY_INTERLEAVE:
191*e5818a53SJeff Roberson 		vm_domainset_iter_interleave(di, domain);
192*e5818a53SJeff Roberson 		di->di_n = di->di_domain->ds_cnt;
193*e5818a53SJeff Roberson 		break;
1947b11a483SJeff Roberson 	default:
1957b11a483SJeff Roberson 		panic("vm_domainset_iter_first: Unknown policy %d",
196*e5818a53SJeff Roberson 		    di->di_policy);
1977b11a483SJeff Roberson 	}
1987b11a483SJeff Roberson 	KASSERT(di->di_n > 0,
1997b11a483SJeff Roberson 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
2007b11a483SJeff Roberson 	KASSERT(*domain < vm_ndomains,
2017b11a483SJeff Roberson 	    ("vm_domainset_iter_first: Invalid domain %d", *domain));
2027b11a483SJeff Roberson }
2037b11a483SJeff Roberson 
2047b11a483SJeff Roberson void
2057b11a483SJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
206*e5818a53SJeff Roberson     vm_pindex_t pindex, int *domain, int *req)
2077b11a483SJeff Roberson {
2087b11a483SJeff Roberson 
209*e5818a53SJeff Roberson 	vm_domainset_iter_init(di, obj, pindex);
2107b11a483SJeff Roberson 	di->di_flags = *req;
2117b11a483SJeff Roberson 	*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
2127b11a483SJeff Roberson 	    VM_ALLOC_NOWAIT;
2137b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
2147b11a483SJeff Roberson }
2157b11a483SJeff Roberson 
2167b11a483SJeff Roberson int
2177b11a483SJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
2187b11a483SJeff Roberson {
2197b11a483SJeff Roberson 
2207b11a483SJeff Roberson 	/*
2217b11a483SJeff Roberson 	 * If we exhausted all options with NOWAIT and did a WAITFAIL it
2227b11a483SJeff Roberson 	 * is time to return an error to the caller.
2237b11a483SJeff Roberson 	 */
2247b11a483SJeff Roberson 	if ((*req & VM_ALLOC_WAITFAIL) != 0)
2257b11a483SJeff Roberson 		return (ENOMEM);
2267b11a483SJeff Roberson 
2277b11a483SJeff Roberson 	/* If there are more domains to visit we run the iterator. */
2287b11a483SJeff Roberson 	if (--di->di_n != 0) {
2297b11a483SJeff Roberson 		vm_domainset_iter_next(di, domain);
2307b11a483SJeff Roberson 		return (0);
2317b11a483SJeff Roberson 	}
2327b11a483SJeff Roberson 
2337b11a483SJeff Roberson 	/* If we visited all domains and this was a NOWAIT we return error. */
2347b11a483SJeff Roberson 	if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
2357b11a483SJeff Roberson 		return (ENOMEM);
2367b11a483SJeff Roberson 
2377b11a483SJeff Roberson 	/*
2387b11a483SJeff Roberson 	 * We have visited all domains with non-blocking allocations, try
2397b11a483SJeff Roberson 	 * from the beginning with a blocking allocation.
2407b11a483SJeff Roberson 	 */
2417b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
2427b11a483SJeff Roberson 	*req = di->di_flags;
2437b11a483SJeff Roberson 
2447b11a483SJeff Roberson 	return (0);
2457b11a483SJeff Roberson }
2467b11a483SJeff Roberson 
2477b11a483SJeff Roberson 
2487b11a483SJeff Roberson void
2497b11a483SJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
2507b11a483SJeff Roberson     struct vm_object *obj, int *domain, int *flags)
2517b11a483SJeff Roberson {
2527b11a483SJeff Roberson 
253*e5818a53SJeff Roberson 	vm_domainset_iter_init(di, obj, 0);
254*e5818a53SJeff Roberson 	if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE)
255*e5818a53SJeff Roberson 		di->di_policy = DOMAINSET_POLICY_ROUNDROBIN;
2567b11a483SJeff Roberson 	di->di_flags = *flags;
2577b11a483SJeff Roberson 	*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
2587b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
2597b11a483SJeff Roberson }
2607b11a483SJeff Roberson 
2617b11a483SJeff Roberson int
2627b11a483SJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
2637b11a483SJeff Roberson {
2647b11a483SJeff Roberson 
2657b11a483SJeff Roberson 	/* If there are more domains to visit we run the iterator. */
2667b11a483SJeff Roberson 	if (--di->di_n != 0) {
2677b11a483SJeff Roberson 		vm_domainset_iter_next(di, domain);
2687b11a483SJeff Roberson 		return (0);
2697b11a483SJeff Roberson 	}
2707b11a483SJeff Roberson 
2717b11a483SJeff Roberson 	/* If we visited all domains and this was a NOWAIT we return error. */
2727b11a483SJeff Roberson 	if ((di->di_flags & M_WAITOK) == 0)
2737b11a483SJeff Roberson 		return (ENOMEM);
2747b11a483SJeff Roberson 
2757b11a483SJeff Roberson 	/*
2767b11a483SJeff Roberson 	 * We have visited all domains with non-blocking allocations, try
2777b11a483SJeff Roberson 	 * from the beginning with a blocking allocation.
2787b11a483SJeff Roberson 	 */
2797b11a483SJeff Roberson 	vm_domainset_iter_first(di, domain);
2807b11a483SJeff Roberson 	*flags = di->di_flags;
2817b11a483SJeff Roberson 
2827b11a483SJeff Roberson 	return (0);
2837b11a483SJeff Roberson }
284b6715dabSJeff Roberson 
285b6715dabSJeff Roberson #else /* !NUMA */
286b6715dabSJeff Roberson int
287b6715dabSJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags)
288b6715dabSJeff Roberson {
289b6715dabSJeff Roberson 
290b6715dabSJeff Roberson 	return (EJUSTRETURN);
291b6715dabSJeff Roberson }
292b6715dabSJeff Roberson 
293b6715dabSJeff Roberson void
294b6715dabSJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di,
295*e5818a53SJeff Roberson             struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags)
296b6715dabSJeff Roberson {
297b6715dabSJeff Roberson 
298b6715dabSJeff Roberson 	*domain = 0;
299b6715dabSJeff Roberson }
300b6715dabSJeff Roberson 
301b6715dabSJeff Roberson int
302b6715dabSJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
303b6715dabSJeff Roberson {
304b6715dabSJeff Roberson 
305b6715dabSJeff Roberson 	return (EJUSTRETURN);
306b6715dabSJeff Roberson }
307b6715dabSJeff Roberson 
308b6715dabSJeff Roberson void
309b6715dabSJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
310b6715dabSJeff Roberson             struct vm_object *obj, int *domain, int *flags)
311b6715dabSJeff Roberson {
312b6715dabSJeff Roberson 
313b6715dabSJeff Roberson 	*domain = 0;
314b6715dabSJeff Roberson }
315b6715dabSJeff Roberson 
316b6715dabSJeff Roberson #endif
317