17b11a483SJeff Roberson /*- 27b11a483SJeff Roberson * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 37b11a483SJeff Roberson * 47b11a483SJeff Roberson * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 57b11a483SJeff Roberson * All rights reserved. 67b11a483SJeff Roberson * 77b11a483SJeff Roberson * Redistribution and use in source and binary forms, with or without 87b11a483SJeff Roberson * modification, are permitted provided that the following conditions 97b11a483SJeff Roberson * are met: 107b11a483SJeff Roberson * 1. Redistributions of source code must retain the above copyright 117b11a483SJeff Roberson * notice unmodified, this list of conditions, and the following 127b11a483SJeff Roberson * disclaimer. 137b11a483SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 147b11a483SJeff Roberson * notice, this list of conditions and the following disclaimer in the 157b11a483SJeff Roberson * documentation and/or other materials provided with the distribution. 167b11a483SJeff Roberson * 177b11a483SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 187b11a483SJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 197b11a483SJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 207b11a483SJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 217b11a483SJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 227b11a483SJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 237b11a483SJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 247b11a483SJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 257b11a483SJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 267b11a483SJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 277b11a483SJeff Roberson * 287b11a483SJeff Roberson */ 297b11a483SJeff Roberson 307b11a483SJeff Roberson #include <sys/cdefs.h> 317b11a483SJeff Roberson __FBSDID("$FreeBSD$"); 327b11a483SJeff Roberson 337b11a483SJeff Roberson #include "opt_vm.h" 347b11a483SJeff Roberson 357b11a483SJeff Roberson #include <sys/param.h> 367b11a483SJeff Roberson #include <sys/systm.h> 377b11a483SJeff Roberson #include <sys/bitset.h> 387b11a483SJeff Roberson #include <sys/domainset.h> 397b11a483SJeff Roberson #include <sys/proc.h> 407b11a483SJeff Roberson #include <sys/lock.h> 417b11a483SJeff Roberson #include <sys/mutex.h> 427b11a483SJeff Roberson #include <sys/malloc.h> 437b11a483SJeff Roberson #include <sys/vmmeter.h> 447b11a483SJeff Roberson 457b11a483SJeff Roberson #include <vm/vm.h> 467b11a483SJeff Roberson #include <vm/vm_param.h> 477b11a483SJeff Roberson #include <vm/vm_domainset.h> 487b11a483SJeff Roberson #include <vm/vm_object.h> 497b11a483SJeff Roberson #include <vm/vm_page.h> 507b11a483SJeff Roberson #include <vm/vm_phys.h> 517b11a483SJeff Roberson 52b6715dabSJeff Roberson #ifdef NUMA 537b11a483SJeff Roberson /* 547b11a483SJeff Roberson * Iterators are written such that the first nowait pass has as short a 557b11a483SJeff Roberson * codepath as possible to eliminate bloat from the allocator. It is 567b11a483SJeff Roberson * assumed that most allocations are successful. 577b11a483SJeff Roberson */ 587b11a483SJeff Roberson 59*e5818a53SJeff Roberson static int vm_domainset_default_stride = 64; 60*e5818a53SJeff Roberson 617b11a483SJeff Roberson /* 627b11a483SJeff Roberson * Determine which policy is to be used for this allocation. 637b11a483SJeff Roberson */ 647b11a483SJeff Roberson static void 65*e5818a53SJeff Roberson vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66*e5818a53SJeff Roberson vm_pindex_t pindex) 677b11a483SJeff Roberson { 687b11a483SJeff Roberson struct domainset *domain; 697b11a483SJeff Roberson 707b11a483SJeff Roberson /* 717b11a483SJeff Roberson * object policy takes precedence over thread policy. The policies 727b11a483SJeff Roberson * are immutable and unsynchronized. Updates can race but pointer 737b11a483SJeff Roberson * loads are assumed to be atomic. 747b11a483SJeff Roberson */ 757b11a483SJeff Roberson if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 767b11a483SJeff Roberson di->di_domain = domain; 777b11a483SJeff Roberson di->di_iter = &obj->domain.dr_iterator; 787b11a483SJeff Roberson } else { 797b11a483SJeff Roberson di->di_domain = curthread->td_domain.dr_policy; 807b11a483SJeff Roberson di->di_iter = &curthread->td_domain.dr_iterator; 817b11a483SJeff Roberson } 82*e5818a53SJeff Roberson di->di_policy = di->di_domain->ds_policy; 83*e5818a53SJeff Roberson if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 84*e5818a53SJeff Roberson if (vm_object_reserv(obj)) { 85*e5818a53SJeff Roberson /* 86*e5818a53SJeff Roberson * Color the pindex so we end up on the correct 87*e5818a53SJeff Roberson * reservation boundary. 88*e5818a53SJeff Roberson */ 89*e5818a53SJeff Roberson pindex += obj->pg_color; 90*e5818a53SJeff Roberson pindex >>= VM_LEVEL_0_ORDER; 91*e5818a53SJeff Roberson } else 92*e5818a53SJeff Roberson pindex /= vm_domainset_default_stride; 93*e5818a53SJeff Roberson /* 94*e5818a53SJeff Roberson * Offset pindex so the first page of each object does 95*e5818a53SJeff Roberson * not end up in domain 0. 96*e5818a53SJeff Roberson */ 97*e5818a53SJeff Roberson if (obj != NULL) 98*e5818a53SJeff Roberson pindex += (((uintptr_t)obj) / sizeof(*obj)); 99*e5818a53SJeff Roberson di->di_offset = pindex; 100*e5818a53SJeff Roberson } 1017b11a483SJeff Roberson } 1027b11a483SJeff Roberson 1037b11a483SJeff Roberson static void 1047b11a483SJeff Roberson vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 1057b11a483SJeff Roberson { 1067b11a483SJeff Roberson 107*e5818a53SJeff Roberson *domain = di->di_domain->ds_order[ 108*e5818a53SJeff Roberson ++(*di->di_iter) % di->di_domain->ds_cnt]; 1097b11a483SJeff Roberson } 1107b11a483SJeff Roberson 1117b11a483SJeff Roberson static void 1127b11a483SJeff Roberson vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 1137b11a483SJeff Roberson { 1147b11a483SJeff Roberson int d; 1157b11a483SJeff Roberson 1167b11a483SJeff Roberson do { 117*e5818a53SJeff Roberson d = di->di_domain->ds_order[ 118*e5818a53SJeff Roberson ++(*di->di_iter) % di->di_domain->ds_cnt]; 119*e5818a53SJeff Roberson } while (d == di->di_domain->ds_prefer); 120*e5818a53SJeff Roberson *domain = d; 121*e5818a53SJeff Roberson } 122*e5818a53SJeff Roberson 123*e5818a53SJeff Roberson static void 124*e5818a53SJeff Roberson vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 125*e5818a53SJeff Roberson { 126*e5818a53SJeff Roberson int d; 127*e5818a53SJeff Roberson 128*e5818a53SJeff Roberson d = di->di_offset % di->di_domain->ds_cnt; 129*e5818a53SJeff Roberson *di->di_iter = d; 130*e5818a53SJeff Roberson *domain = di->di_domain->ds_order[d]; 1317b11a483SJeff Roberson } 1327b11a483SJeff Roberson 1337b11a483SJeff Roberson static void 1347b11a483SJeff Roberson vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 1357b11a483SJeff Roberson { 1367b11a483SJeff Roberson 1377b11a483SJeff Roberson KASSERT(di->di_n > 0, 1387b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 139*e5818a53SJeff Roberson switch (di->di_policy) { 1407b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1417b11a483SJeff Roberson /* 1427b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1437b11a483SJeff Roberson * first-touch to round-robin. 1447b11a483SJeff Roberson */ 1457b11a483SJeff Roberson /* FALLTHROUGH */ 146*e5818a53SJeff Roberson case DOMAINSET_POLICY_INTERLEAVE: 147*e5818a53SJeff Roberson /* FALLTHROUGH */ 1487b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1497b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1507b11a483SJeff Roberson break; 1517b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1527b11a483SJeff Roberson vm_domainset_iter_prefer(di, domain); 1537b11a483SJeff Roberson break; 1547b11a483SJeff Roberson default: 1557b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 156*e5818a53SJeff Roberson di->di_policy); 1577b11a483SJeff Roberson } 1587b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 1597b11a483SJeff Roberson ("vm_domainset_iter_next: Invalid domain %d", *domain)); 1607b11a483SJeff Roberson } 1617b11a483SJeff Roberson 1627b11a483SJeff Roberson static void 1637b11a483SJeff Roberson vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 1647b11a483SJeff Roberson { 1657b11a483SJeff Roberson 166*e5818a53SJeff Roberson switch (di->di_policy) { 1677b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1687b11a483SJeff Roberson *domain = PCPU_GET(domain); 1697b11a483SJeff Roberson if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 170*e5818a53SJeff Roberson /* 171*e5818a53SJeff Roberson * Add an extra iteration because we will visit the 172*e5818a53SJeff Roberson * current domain a second time in the rr iterator. 173*e5818a53SJeff Roberson */ 174*e5818a53SJeff Roberson di->di_n = di->di_domain->ds_cnt + 1; 1757b11a483SJeff Roberson break; 1767b11a483SJeff Roberson } 1777b11a483SJeff Roberson /* 1787b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1797b11a483SJeff Roberson * first-touch to round-robin. 1807b11a483SJeff Roberson */ 1817b11a483SJeff Roberson /* FALLTHROUGH */ 1827b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1837b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1847b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1857b11a483SJeff Roberson break; 1867b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1877b11a483SJeff Roberson *domain = di->di_domain->ds_prefer; 1887b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1897b11a483SJeff Roberson break; 190*e5818a53SJeff Roberson case DOMAINSET_POLICY_INTERLEAVE: 191*e5818a53SJeff Roberson vm_domainset_iter_interleave(di, domain); 192*e5818a53SJeff Roberson di->di_n = di->di_domain->ds_cnt; 193*e5818a53SJeff Roberson break; 1947b11a483SJeff Roberson default: 1957b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 196*e5818a53SJeff Roberson di->di_policy); 1977b11a483SJeff Roberson } 1987b11a483SJeff Roberson KASSERT(di->di_n > 0, 1997b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 2007b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 2017b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid domain %d", *domain)); 2027b11a483SJeff Roberson } 2037b11a483SJeff Roberson 2047b11a483SJeff Roberson void 2057b11a483SJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 206*e5818a53SJeff Roberson vm_pindex_t pindex, int *domain, int *req) 2077b11a483SJeff Roberson { 2087b11a483SJeff Roberson 209*e5818a53SJeff Roberson vm_domainset_iter_init(di, obj, pindex); 2107b11a483SJeff Roberson di->di_flags = *req; 2117b11a483SJeff Roberson *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 2127b11a483SJeff Roberson VM_ALLOC_NOWAIT; 2137b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2147b11a483SJeff Roberson } 2157b11a483SJeff Roberson 2167b11a483SJeff Roberson int 2177b11a483SJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 2187b11a483SJeff Roberson { 2197b11a483SJeff Roberson 2207b11a483SJeff Roberson /* 2217b11a483SJeff Roberson * If we exhausted all options with NOWAIT and did a WAITFAIL it 2227b11a483SJeff Roberson * is time to return an error to the caller. 2237b11a483SJeff Roberson */ 2247b11a483SJeff Roberson if ((*req & VM_ALLOC_WAITFAIL) != 0) 2257b11a483SJeff Roberson return (ENOMEM); 2267b11a483SJeff Roberson 2277b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 2287b11a483SJeff Roberson if (--di->di_n != 0) { 2297b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 2307b11a483SJeff Roberson return (0); 2317b11a483SJeff Roberson } 2327b11a483SJeff Roberson 2337b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 2347b11a483SJeff Roberson if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 2357b11a483SJeff Roberson return (ENOMEM); 2367b11a483SJeff Roberson 2377b11a483SJeff Roberson /* 2387b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 2397b11a483SJeff Roberson * from the beginning with a blocking allocation. 2407b11a483SJeff Roberson */ 2417b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2427b11a483SJeff Roberson *req = di->di_flags; 2437b11a483SJeff Roberson 2447b11a483SJeff Roberson return (0); 2457b11a483SJeff Roberson } 2467b11a483SJeff Roberson 2477b11a483SJeff Roberson 2487b11a483SJeff Roberson void 2497b11a483SJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 2507b11a483SJeff Roberson struct vm_object *obj, int *domain, int *flags) 2517b11a483SJeff Roberson { 2527b11a483SJeff Roberson 253*e5818a53SJeff Roberson vm_domainset_iter_init(di, obj, 0); 254*e5818a53SJeff Roberson if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 255*e5818a53SJeff Roberson di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 2567b11a483SJeff Roberson di->di_flags = *flags; 2577b11a483SJeff Roberson *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 2587b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2597b11a483SJeff Roberson } 2607b11a483SJeff Roberson 2617b11a483SJeff Roberson int 2627b11a483SJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 2637b11a483SJeff Roberson { 2647b11a483SJeff Roberson 2657b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 2667b11a483SJeff Roberson if (--di->di_n != 0) { 2677b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 2687b11a483SJeff Roberson return (0); 2697b11a483SJeff Roberson } 2707b11a483SJeff Roberson 2717b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 2727b11a483SJeff Roberson if ((di->di_flags & M_WAITOK) == 0) 2737b11a483SJeff Roberson return (ENOMEM); 2747b11a483SJeff Roberson 2757b11a483SJeff Roberson /* 2767b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 2777b11a483SJeff Roberson * from the beginning with a blocking allocation. 2787b11a483SJeff Roberson */ 2797b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2807b11a483SJeff Roberson *flags = di->di_flags; 2817b11a483SJeff Roberson 2827b11a483SJeff Roberson return (0); 2837b11a483SJeff Roberson } 284b6715dabSJeff Roberson 285b6715dabSJeff Roberson #else /* !NUMA */ 286b6715dabSJeff Roberson int 287b6715dabSJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 288b6715dabSJeff Roberson { 289b6715dabSJeff Roberson 290b6715dabSJeff Roberson return (EJUSTRETURN); 291b6715dabSJeff Roberson } 292b6715dabSJeff Roberson 293b6715dabSJeff Roberson void 294b6715dabSJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, 295*e5818a53SJeff Roberson struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 296b6715dabSJeff Roberson { 297b6715dabSJeff Roberson 298b6715dabSJeff Roberson *domain = 0; 299b6715dabSJeff Roberson } 300b6715dabSJeff Roberson 301b6715dabSJeff Roberson int 302b6715dabSJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 303b6715dabSJeff Roberson { 304b6715dabSJeff Roberson 305b6715dabSJeff Roberson return (EJUSTRETURN); 306b6715dabSJeff Roberson } 307b6715dabSJeff Roberson 308b6715dabSJeff Roberson void 309b6715dabSJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 310b6715dabSJeff Roberson struct vm_object *obj, int *domain, int *flags) 311b6715dabSJeff Roberson { 312b6715dabSJeff Roberson 313b6715dabSJeff Roberson *domain = 0; 314b6715dabSJeff Roberson } 315b6715dabSJeff Roberson 316b6715dabSJeff Roberson #endif 317