17b11a483SJeff Roberson /*- 27b11a483SJeff Roberson * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 37b11a483SJeff Roberson * 47b11a483SJeff Roberson * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 57b11a483SJeff Roberson * All rights reserved. 67b11a483SJeff Roberson * 77b11a483SJeff Roberson * Redistribution and use in source and binary forms, with or without 87b11a483SJeff Roberson * modification, are permitted provided that the following conditions 97b11a483SJeff Roberson * are met: 107b11a483SJeff Roberson * 1. Redistributions of source code must retain the above copyright 117b11a483SJeff Roberson * notice unmodified, this list of conditions, and the following 127b11a483SJeff Roberson * disclaimer. 137b11a483SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 147b11a483SJeff Roberson * notice, this list of conditions and the following disclaimer in the 157b11a483SJeff Roberson * documentation and/or other materials provided with the distribution. 167b11a483SJeff Roberson * 177b11a483SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 187b11a483SJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 197b11a483SJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 207b11a483SJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 217b11a483SJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 227b11a483SJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 237b11a483SJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 247b11a483SJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 257b11a483SJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 267b11a483SJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 277b11a483SJeff Roberson * 287b11a483SJeff Roberson */ 297b11a483SJeff Roberson 307b11a483SJeff Roberson #include <sys/cdefs.h> 317b11a483SJeff Roberson __FBSDID("$FreeBSD$"); 327b11a483SJeff Roberson 337b11a483SJeff Roberson #include "opt_vm.h" 347b11a483SJeff Roberson 357b11a483SJeff Roberson #include <sys/param.h> 367b11a483SJeff Roberson #include <sys/systm.h> 377b11a483SJeff Roberson #include <sys/bitset.h> 387b11a483SJeff Roberson #include <sys/domainset.h> 397b11a483SJeff Roberson #include <sys/proc.h> 407b11a483SJeff Roberson #include <sys/lock.h> 417b11a483SJeff Roberson #include <sys/mutex.h> 427b11a483SJeff Roberson #include <sys/malloc.h> 437b11a483SJeff Roberson #include <sys/vmmeter.h> 447b11a483SJeff Roberson 457b11a483SJeff Roberson #include <vm/vm.h> 467b11a483SJeff Roberson #include <vm/vm_param.h> 477b11a483SJeff Roberson #include <vm/vm_domainset.h> 487b11a483SJeff Roberson #include <vm/vm_object.h> 497b11a483SJeff Roberson #include <vm/vm_page.h> 507b11a483SJeff Roberson #include <vm/vm_phys.h> 517b11a483SJeff Roberson 52*b6715dabSJeff Roberson #ifdef NUMA 537b11a483SJeff Roberson /* 547b11a483SJeff Roberson * Iterators are written such that the first nowait pass has as short a 557b11a483SJeff Roberson * codepath as possible to eliminate bloat from the allocator. It is 567b11a483SJeff Roberson * assumed that most allocations are successful. 577b11a483SJeff Roberson */ 587b11a483SJeff Roberson 597b11a483SJeff Roberson /* 607b11a483SJeff Roberson * Determine which policy is to be used for this allocation. 617b11a483SJeff Roberson */ 627b11a483SJeff Roberson static void 637b11a483SJeff Roberson vm_domainset_iter_domain(struct vm_domainset_iter *di, struct vm_object *obj) 647b11a483SJeff Roberson { 657b11a483SJeff Roberson struct domainset *domain; 667b11a483SJeff Roberson 677b11a483SJeff Roberson /* 687b11a483SJeff Roberson * object policy takes precedence over thread policy. The policies 697b11a483SJeff Roberson * are immutable and unsynchronized. Updates can race but pointer 707b11a483SJeff Roberson * loads are assumed to be atomic. 717b11a483SJeff Roberson */ 727b11a483SJeff Roberson if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 737b11a483SJeff Roberson di->di_domain = domain; 747b11a483SJeff Roberson di->di_iter = &obj->domain.dr_iterator; 757b11a483SJeff Roberson } else { 767b11a483SJeff Roberson di->di_domain = curthread->td_domain.dr_policy; 777b11a483SJeff Roberson di->di_iter = &curthread->td_domain.dr_iterator; 787b11a483SJeff Roberson } 797b11a483SJeff Roberson } 807b11a483SJeff Roberson 817b11a483SJeff Roberson static void 827b11a483SJeff Roberson vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 837b11a483SJeff Roberson { 847b11a483SJeff Roberson int d; 857b11a483SJeff Roberson 867b11a483SJeff Roberson d = *di->di_iter; 877b11a483SJeff Roberson do { 887b11a483SJeff Roberson d = (d + 1) % di->di_domain->ds_max; 897b11a483SJeff Roberson } while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask)); 907b11a483SJeff Roberson *di->di_iter = *domain = d; 917b11a483SJeff Roberson } 927b11a483SJeff Roberson 937b11a483SJeff Roberson static void 947b11a483SJeff Roberson vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 957b11a483SJeff Roberson { 967b11a483SJeff Roberson int d; 977b11a483SJeff Roberson 987b11a483SJeff Roberson d = *di->di_iter; 997b11a483SJeff Roberson do { 1007b11a483SJeff Roberson d = (d + 1) % di->di_domain->ds_max; 1017b11a483SJeff Roberson } while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask) || 1027b11a483SJeff Roberson d == di->di_domain->ds_prefer); 1037b11a483SJeff Roberson *di->di_iter = *domain = d; 1047b11a483SJeff Roberson } 1057b11a483SJeff Roberson 1067b11a483SJeff Roberson static void 1077b11a483SJeff Roberson vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 1087b11a483SJeff Roberson { 1097b11a483SJeff Roberson 1107b11a483SJeff Roberson KASSERT(di->di_n > 0, 1117b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 1127b11a483SJeff Roberson switch (di->di_domain->ds_policy) { 1137b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1147b11a483SJeff Roberson /* 1157b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1167b11a483SJeff Roberson * first-touch to round-robin. 1177b11a483SJeff Roberson */ 1187b11a483SJeff Roberson /* FALLTHROUGH */ 1197b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1207b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1217b11a483SJeff Roberson break; 1227b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1237b11a483SJeff Roberson vm_domainset_iter_prefer(di, domain); 1247b11a483SJeff Roberson break; 1257b11a483SJeff Roberson default: 1267b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 1277b11a483SJeff Roberson di->di_domain->ds_policy); 1287b11a483SJeff Roberson } 1297b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 1307b11a483SJeff Roberson ("vm_domainset_iter_next: Invalid domain %d", *domain)); 1317b11a483SJeff Roberson } 1327b11a483SJeff Roberson 1337b11a483SJeff Roberson static void 1347b11a483SJeff Roberson vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 1357b11a483SJeff Roberson { 1367b11a483SJeff Roberson 1377b11a483SJeff Roberson switch (di->di_domain->ds_policy) { 1387b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1397b11a483SJeff Roberson *domain = PCPU_GET(domain); 1407b11a483SJeff Roberson if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 1417b11a483SJeff Roberson di->di_n = 1; 1427b11a483SJeff Roberson break; 1437b11a483SJeff Roberson } 1447b11a483SJeff Roberson /* 1457b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1467b11a483SJeff Roberson * first-touch to round-robin. 1477b11a483SJeff Roberson */ 1487b11a483SJeff Roberson /* FALLTHROUGH */ 1497b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1507b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1517b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1527b11a483SJeff Roberson break; 1537b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1547b11a483SJeff Roberson *domain = di->di_domain->ds_prefer; 1557b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1567b11a483SJeff Roberson break; 1577b11a483SJeff Roberson default: 1587b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 1597b11a483SJeff Roberson di->di_domain->ds_policy); 1607b11a483SJeff Roberson } 1617b11a483SJeff Roberson KASSERT(di->di_n > 0, 1627b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 1637b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 1647b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid domain %d", *domain)); 1657b11a483SJeff Roberson } 1667b11a483SJeff Roberson 1677b11a483SJeff Roberson void 1687b11a483SJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 1697b11a483SJeff Roberson int *domain, int *req) 1707b11a483SJeff Roberson { 1717b11a483SJeff Roberson 1727b11a483SJeff Roberson vm_domainset_iter_domain(di, obj); 1737b11a483SJeff Roberson di->di_flags = *req; 1747b11a483SJeff Roberson *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 1757b11a483SJeff Roberson VM_ALLOC_NOWAIT; 1767b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 1777b11a483SJeff Roberson } 1787b11a483SJeff Roberson 1797b11a483SJeff Roberson int 1807b11a483SJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 1817b11a483SJeff Roberson { 1827b11a483SJeff Roberson 1837b11a483SJeff Roberson /* 1847b11a483SJeff Roberson * If we exhausted all options with NOWAIT and did a WAITFAIL it 1857b11a483SJeff Roberson * is time to return an error to the caller. 1867b11a483SJeff Roberson */ 1877b11a483SJeff Roberson if ((*req & VM_ALLOC_WAITFAIL) != 0) 1887b11a483SJeff Roberson return (ENOMEM); 1897b11a483SJeff Roberson 1907b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 1917b11a483SJeff Roberson if (--di->di_n != 0) { 1927b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 1937b11a483SJeff Roberson return (0); 1947b11a483SJeff Roberson } 1957b11a483SJeff Roberson 1967b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 1977b11a483SJeff Roberson if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 1987b11a483SJeff Roberson return (ENOMEM); 1997b11a483SJeff Roberson 2007b11a483SJeff Roberson /* 2017b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 2027b11a483SJeff Roberson * from the beginning with a blocking allocation. 2037b11a483SJeff Roberson */ 2047b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2057b11a483SJeff Roberson *req = di->di_flags; 2067b11a483SJeff Roberson 2077b11a483SJeff Roberson return (0); 2087b11a483SJeff Roberson } 2097b11a483SJeff Roberson 2107b11a483SJeff Roberson 2117b11a483SJeff Roberson void 2127b11a483SJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 2137b11a483SJeff Roberson struct vm_object *obj, int *domain, int *flags) 2147b11a483SJeff Roberson { 2157b11a483SJeff Roberson 2167b11a483SJeff Roberson vm_domainset_iter_domain(di, obj); 2177b11a483SJeff Roberson di->di_flags = *flags; 2187b11a483SJeff Roberson *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 2197b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2207b11a483SJeff Roberson } 2217b11a483SJeff Roberson 2227b11a483SJeff Roberson int 2237b11a483SJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 2247b11a483SJeff Roberson { 2257b11a483SJeff Roberson 2267b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 2277b11a483SJeff Roberson if (--di->di_n != 0) { 2287b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 2297b11a483SJeff Roberson return (0); 2307b11a483SJeff Roberson } 2317b11a483SJeff Roberson 2327b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 2337b11a483SJeff Roberson if ((di->di_flags & M_WAITOK) == 0) 2347b11a483SJeff Roberson return (ENOMEM); 2357b11a483SJeff Roberson 2367b11a483SJeff Roberson /* 2377b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 2387b11a483SJeff Roberson * from the beginning with a blocking allocation. 2397b11a483SJeff Roberson */ 2407b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2417b11a483SJeff Roberson *flags = di->di_flags; 2427b11a483SJeff Roberson 2437b11a483SJeff Roberson return (0); 2447b11a483SJeff Roberson } 245*b6715dabSJeff Roberson 246*b6715dabSJeff Roberson #else /* !NUMA */ 247*b6715dabSJeff Roberson int 248*b6715dabSJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 249*b6715dabSJeff Roberson { 250*b6715dabSJeff Roberson 251*b6715dabSJeff Roberson return (EJUSTRETURN); 252*b6715dabSJeff Roberson } 253*b6715dabSJeff Roberson 254*b6715dabSJeff Roberson void 255*b6715dabSJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, 256*b6715dabSJeff Roberson struct vm_object *obj, int *domain, int *flags) 257*b6715dabSJeff Roberson { 258*b6715dabSJeff Roberson 259*b6715dabSJeff Roberson *domain = 0; 260*b6715dabSJeff Roberson } 261*b6715dabSJeff Roberson 262*b6715dabSJeff Roberson int 263*b6715dabSJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 264*b6715dabSJeff Roberson { 265*b6715dabSJeff Roberson 266*b6715dabSJeff Roberson return (EJUSTRETURN); 267*b6715dabSJeff Roberson } 268*b6715dabSJeff Roberson 269*b6715dabSJeff Roberson void 270*b6715dabSJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 271*b6715dabSJeff Roberson struct vm_object *obj, int *domain, int *flags) 272*b6715dabSJeff Roberson { 273*b6715dabSJeff Roberson 274*b6715dabSJeff Roberson *domain = 0; 275*b6715dabSJeff Roberson } 276*b6715dabSJeff Roberson 277*b6715dabSJeff Roberson #endif 278