17b11a483SJeff Roberson /*- 27b11a483SJeff Roberson * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 37b11a483SJeff Roberson * 47b11a483SJeff Roberson * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 57b11a483SJeff Roberson * All rights reserved. 67b11a483SJeff Roberson * 77b11a483SJeff Roberson * Redistribution and use in source and binary forms, with or without 87b11a483SJeff Roberson * modification, are permitted provided that the following conditions 97b11a483SJeff Roberson * are met: 107b11a483SJeff Roberson * 1. Redistributions of source code must retain the above copyright 117b11a483SJeff Roberson * notice unmodified, this list of conditions, and the following 127b11a483SJeff Roberson * disclaimer. 137b11a483SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 147b11a483SJeff Roberson * notice, this list of conditions and the following disclaimer in the 157b11a483SJeff Roberson * documentation and/or other materials provided with the distribution. 167b11a483SJeff Roberson * 177b11a483SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 187b11a483SJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 197b11a483SJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 207b11a483SJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 217b11a483SJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 227b11a483SJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 237b11a483SJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 247b11a483SJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 257b11a483SJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 267b11a483SJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 277b11a483SJeff Roberson * 287b11a483SJeff Roberson */ 297b11a483SJeff Roberson 307b11a483SJeff Roberson #include <sys/cdefs.h> 317b11a483SJeff Roberson __FBSDID("$FreeBSD$"); 327b11a483SJeff Roberson 337b11a483SJeff Roberson #include "opt_vm.h" 347b11a483SJeff Roberson 357b11a483SJeff Roberson #include <sys/param.h> 367b11a483SJeff Roberson #include <sys/systm.h> 377b11a483SJeff Roberson #include <sys/bitset.h> 387b11a483SJeff Roberson #include <sys/domainset.h> 397b11a483SJeff Roberson #include <sys/proc.h> 407b11a483SJeff Roberson #include <sys/lock.h> 417b11a483SJeff Roberson #include <sys/mutex.h> 427b11a483SJeff Roberson #include <sys/malloc.h> 437b11a483SJeff Roberson #include <sys/vmmeter.h> 447b11a483SJeff Roberson 457b11a483SJeff Roberson #include <vm/vm.h> 467b11a483SJeff Roberson #include <vm/vm_param.h> 477b11a483SJeff Roberson #include <vm/vm_domainset.h> 487b11a483SJeff Roberson #include <vm/vm_object.h> 497b11a483SJeff Roberson #include <vm/vm_page.h> 507b11a483SJeff Roberson #include <vm/vm_phys.h> 517b11a483SJeff Roberson 52b6715dabSJeff Roberson #ifdef NUMA 537b11a483SJeff Roberson /* 547b11a483SJeff Roberson * Iterators are written such that the first nowait pass has as short a 557b11a483SJeff Roberson * codepath as possible to eliminate bloat from the allocator. It is 567b11a483SJeff Roberson * assumed that most allocations are successful. 577b11a483SJeff Roberson */ 587b11a483SJeff Roberson 59e5818a53SJeff Roberson static int vm_domainset_default_stride = 64; 60e5818a53SJeff Roberson 617b11a483SJeff Roberson /* 627b11a483SJeff Roberson * Determine which policy is to be used for this allocation. 637b11a483SJeff Roberson */ 647b11a483SJeff Roberson static void 65e5818a53SJeff Roberson vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66e5818a53SJeff Roberson vm_pindex_t pindex) 677b11a483SJeff Roberson { 687b11a483SJeff Roberson struct domainset *domain; 697b11a483SJeff Roberson 707b11a483SJeff Roberson /* 717b11a483SJeff Roberson * object policy takes precedence over thread policy. The policies 727b11a483SJeff Roberson * are immutable and unsynchronized. Updates can race but pointer 737b11a483SJeff Roberson * loads are assumed to be atomic. 747b11a483SJeff Roberson */ 757b11a483SJeff Roberson if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 767b11a483SJeff Roberson di->di_domain = domain; 777b11a483SJeff Roberson di->di_iter = &obj->domain.dr_iterator; 787b11a483SJeff Roberson } else { 797b11a483SJeff Roberson di->di_domain = curthread->td_domain.dr_policy; 807b11a483SJeff Roberson di->di_iter = &curthread->td_domain.dr_iterator; 817b11a483SJeff Roberson } 82e5818a53SJeff Roberson di->di_policy = di->di_domain->ds_policy; 83e5818a53SJeff Roberson if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 8423d123c6SMark Johnston #if VM_NRESERVLEVEL > 0 85e5818a53SJeff Roberson if (vm_object_reserv(obj)) { 86e5818a53SJeff Roberson /* 87e5818a53SJeff Roberson * Color the pindex so we end up on the correct 88e5818a53SJeff Roberson * reservation boundary. 89e5818a53SJeff Roberson */ 90e5818a53SJeff Roberson pindex += obj->pg_color; 91e5818a53SJeff Roberson pindex >>= VM_LEVEL_0_ORDER; 9223d123c6SMark Johnston } else 9373e37d1dSMatt Macy #endif 94e5818a53SJeff Roberson pindex /= vm_domainset_default_stride; 95e5818a53SJeff Roberson /* 96e5818a53SJeff Roberson * Offset pindex so the first page of each object does 97e5818a53SJeff Roberson * not end up in domain 0. 98e5818a53SJeff Roberson */ 99e5818a53SJeff Roberson if (obj != NULL) 100e5818a53SJeff Roberson pindex += (((uintptr_t)obj) / sizeof(*obj)); 101e5818a53SJeff Roberson di->di_offset = pindex; 102e5818a53SJeff Roberson } 103*23984ce5SMark Johnston /* Skip zones below min on the first pass. */ 104*23984ce5SMark Johnston di->di_minskip = true; 1057b11a483SJeff Roberson } 1067b11a483SJeff Roberson 1077b11a483SJeff Roberson static void 1087b11a483SJeff Roberson vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 1097b11a483SJeff Roberson { 1107b11a483SJeff Roberson 111e5818a53SJeff Roberson *domain = di->di_domain->ds_order[ 112e5818a53SJeff Roberson ++(*di->di_iter) % di->di_domain->ds_cnt]; 1137b11a483SJeff Roberson } 1147b11a483SJeff Roberson 1157b11a483SJeff Roberson static void 1167b11a483SJeff Roberson vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 1177b11a483SJeff Roberson { 1187b11a483SJeff Roberson int d; 1197b11a483SJeff Roberson 1207b11a483SJeff Roberson do { 121e5818a53SJeff Roberson d = di->di_domain->ds_order[ 122e5818a53SJeff Roberson ++(*di->di_iter) % di->di_domain->ds_cnt]; 123e5818a53SJeff Roberson } while (d == di->di_domain->ds_prefer); 124e5818a53SJeff Roberson *domain = d; 125e5818a53SJeff Roberson } 126e5818a53SJeff Roberson 127e5818a53SJeff Roberson static void 128e5818a53SJeff Roberson vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 129e5818a53SJeff Roberson { 130e5818a53SJeff Roberson int d; 131e5818a53SJeff Roberson 132e5818a53SJeff Roberson d = di->di_offset % di->di_domain->ds_cnt; 133e5818a53SJeff Roberson *di->di_iter = d; 134e5818a53SJeff Roberson *domain = di->di_domain->ds_order[d]; 1357b11a483SJeff Roberson } 1367b11a483SJeff Roberson 1377b11a483SJeff Roberson static void 1387b11a483SJeff Roberson vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 1397b11a483SJeff Roberson { 1407b11a483SJeff Roberson 1417b11a483SJeff Roberson KASSERT(di->di_n > 0, 1427b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 143e5818a53SJeff Roberson switch (di->di_policy) { 1447b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1457b11a483SJeff Roberson /* 1467b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1477b11a483SJeff Roberson * first-touch to round-robin. 1487b11a483SJeff Roberson */ 1497b11a483SJeff Roberson /* FALLTHROUGH */ 150e5818a53SJeff Roberson case DOMAINSET_POLICY_INTERLEAVE: 151e5818a53SJeff Roberson /* FALLTHROUGH */ 1527b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1537b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1547b11a483SJeff Roberson break; 1557b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1567b11a483SJeff Roberson vm_domainset_iter_prefer(di, domain); 1577b11a483SJeff Roberson break; 1587b11a483SJeff Roberson default: 1597b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 160e5818a53SJeff Roberson di->di_policy); 1617b11a483SJeff Roberson } 1627b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 1637b11a483SJeff Roberson ("vm_domainset_iter_next: Invalid domain %d", *domain)); 1647b11a483SJeff Roberson } 1657b11a483SJeff Roberson 1667b11a483SJeff Roberson static void 1677b11a483SJeff Roberson vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 1687b11a483SJeff Roberson { 1697b11a483SJeff Roberson 170e5818a53SJeff Roberson switch (di->di_policy) { 1717b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1727b11a483SJeff Roberson *domain = PCPU_GET(domain); 1737b11a483SJeff Roberson if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 174e5818a53SJeff Roberson /* 175e5818a53SJeff Roberson * Add an extra iteration because we will visit the 176e5818a53SJeff Roberson * current domain a second time in the rr iterator. 177e5818a53SJeff Roberson */ 178e5818a53SJeff Roberson di->di_n = di->di_domain->ds_cnt + 1; 1797b11a483SJeff Roberson break; 1807b11a483SJeff Roberson } 1817b11a483SJeff Roberson /* 1827b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1837b11a483SJeff Roberson * first-touch to round-robin. 1847b11a483SJeff Roberson */ 1857b11a483SJeff Roberson /* FALLTHROUGH */ 1867b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1877b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1887b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1897b11a483SJeff Roberson break; 1907b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1917b11a483SJeff Roberson *domain = di->di_domain->ds_prefer; 1927b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1937b11a483SJeff Roberson break; 194e5818a53SJeff Roberson case DOMAINSET_POLICY_INTERLEAVE: 195e5818a53SJeff Roberson vm_domainset_iter_interleave(di, domain); 196e5818a53SJeff Roberson di->di_n = di->di_domain->ds_cnt; 197e5818a53SJeff Roberson break; 1987b11a483SJeff Roberson default: 1997b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 200e5818a53SJeff Roberson di->di_policy); 2017b11a483SJeff Roberson } 2027b11a483SJeff Roberson KASSERT(di->di_n > 0, 2037b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 2047b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 2057b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid domain %d", *domain)); 2067b11a483SJeff Roberson } 2077b11a483SJeff Roberson 2087b11a483SJeff Roberson void 2097b11a483SJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 210e5818a53SJeff Roberson vm_pindex_t pindex, int *domain, int *req) 2117b11a483SJeff Roberson { 2127b11a483SJeff Roberson 213e5818a53SJeff Roberson vm_domainset_iter_init(di, obj, pindex); 2147b11a483SJeff Roberson di->di_flags = *req; 2157b11a483SJeff Roberson *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 2167b11a483SJeff Roberson VM_ALLOC_NOWAIT; 2177b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 218*23984ce5SMark Johnston if (DOMAINSET_ISSET(*domain, &vm_min_domains)) 219*23984ce5SMark Johnston vm_domainset_iter_page(di, domain, req); 2207b11a483SJeff Roberson } 2217b11a483SJeff Roberson 2227b11a483SJeff Roberson int 2237b11a483SJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 2247b11a483SJeff Roberson { 2257b11a483SJeff Roberson 2267b11a483SJeff Roberson /* 2277b11a483SJeff Roberson * If we exhausted all options with NOWAIT and did a WAITFAIL it 2287b11a483SJeff Roberson * is time to return an error to the caller. 2297b11a483SJeff Roberson */ 2307b11a483SJeff Roberson if ((*req & VM_ALLOC_WAITFAIL) != 0) 2317b11a483SJeff Roberson return (ENOMEM); 2327b11a483SJeff Roberson 2337b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 234*23984ce5SMark Johnston while (--di->di_n != 0) { 2357b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 236*23984ce5SMark Johnston if (!di->di_minskip || 237*23984ce5SMark Johnston !DOMAINSET_ISSET(*domain, &vm_min_domains)) 238*23984ce5SMark Johnston return (0); 239*23984ce5SMark Johnston } 240*23984ce5SMark Johnston if (di->di_minskip) { 241*23984ce5SMark Johnston di->di_minskip = false; 242*23984ce5SMark Johnston vm_domainset_iter_first(di, domain); 2437b11a483SJeff Roberson return (0); 2447b11a483SJeff Roberson } 2457b11a483SJeff Roberson 2467b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 2477b11a483SJeff Roberson if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 2487b11a483SJeff Roberson return (ENOMEM); 2497b11a483SJeff Roberson 2507b11a483SJeff Roberson /* 2517b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 2527b11a483SJeff Roberson * from the beginning with a blocking allocation. 2537b11a483SJeff Roberson */ 2547b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2557b11a483SJeff Roberson *req = di->di_flags; 2567b11a483SJeff Roberson 2577b11a483SJeff Roberson return (0); 2587b11a483SJeff Roberson } 2597b11a483SJeff Roberson 2607b11a483SJeff Roberson 2617b11a483SJeff Roberson void 2627b11a483SJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 2637b11a483SJeff Roberson struct vm_object *obj, int *domain, int *flags) 2647b11a483SJeff Roberson { 2657b11a483SJeff Roberson 266e5818a53SJeff Roberson vm_domainset_iter_init(di, obj, 0); 267e5818a53SJeff Roberson if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 268e5818a53SJeff Roberson di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 2697b11a483SJeff Roberson di->di_flags = *flags; 2707b11a483SJeff Roberson *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 2717b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 272*23984ce5SMark Johnston if (DOMAINSET_ISSET(*domain, &vm_min_domains)) 273*23984ce5SMark Johnston vm_domainset_iter_malloc(di, domain, flags); 2747b11a483SJeff Roberson } 2757b11a483SJeff Roberson 2767b11a483SJeff Roberson int 2777b11a483SJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 2787b11a483SJeff Roberson { 2797b11a483SJeff Roberson 2807b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 281*23984ce5SMark Johnston while (--di->di_n != 0) { 2827b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 283*23984ce5SMark Johnston if (!di->di_minskip || 284*23984ce5SMark Johnston !DOMAINSET_ISSET(*domain, &vm_min_domains)) 285*23984ce5SMark Johnston return (0); 286*23984ce5SMark Johnston } 287*23984ce5SMark Johnston 288*23984ce5SMark Johnston /* If we skipped zones below min start the search from the beginning. */ 289*23984ce5SMark Johnston if (di->di_minskip) { 290*23984ce5SMark Johnston di->di_minskip = false; 291*23984ce5SMark Johnston vm_domainset_iter_first(di, domain); 2927b11a483SJeff Roberson return (0); 2937b11a483SJeff Roberson } 2947b11a483SJeff Roberson 2957b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 2967b11a483SJeff Roberson if ((di->di_flags & M_WAITOK) == 0) 2977b11a483SJeff Roberson return (ENOMEM); 2987b11a483SJeff Roberson 2997b11a483SJeff Roberson /* 3007b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 3017b11a483SJeff Roberson * from the beginning with a blocking allocation. 3027b11a483SJeff Roberson */ 3037b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 3047b11a483SJeff Roberson *flags = di->di_flags; 3057b11a483SJeff Roberson 3067b11a483SJeff Roberson return (0); 3077b11a483SJeff Roberson } 308b6715dabSJeff Roberson 309b6715dabSJeff Roberson #else /* !NUMA */ 310b6715dabSJeff Roberson int 311b6715dabSJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 312b6715dabSJeff Roberson { 313b6715dabSJeff Roberson 314b6715dabSJeff Roberson return (EJUSTRETURN); 315b6715dabSJeff Roberson } 316b6715dabSJeff Roberson 317b6715dabSJeff Roberson void 318b6715dabSJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, 319e5818a53SJeff Roberson struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 320b6715dabSJeff Roberson { 321b6715dabSJeff Roberson 322b6715dabSJeff Roberson *domain = 0; 323b6715dabSJeff Roberson } 324b6715dabSJeff Roberson 325b6715dabSJeff Roberson int 326b6715dabSJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 327b6715dabSJeff Roberson { 328b6715dabSJeff Roberson 329b6715dabSJeff Roberson return (EJUSTRETURN); 330b6715dabSJeff Roberson } 331b6715dabSJeff Roberson 332b6715dabSJeff Roberson void 333b6715dabSJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 334b6715dabSJeff Roberson struct vm_object *obj, int *domain, int *flags) 335b6715dabSJeff Roberson { 336b6715dabSJeff Roberson 337b6715dabSJeff Roberson *domain = 0; 338b6715dabSJeff Roberson } 339b6715dabSJeff Roberson 340b6715dabSJeff Roberson #endif 341