17b11a483SJeff Roberson /*- 27b11a483SJeff Roberson * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 37b11a483SJeff Roberson * 47b11a483SJeff Roberson * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 57b11a483SJeff Roberson * All rights reserved. 67b11a483SJeff Roberson * 77b11a483SJeff Roberson * Redistribution and use in source and binary forms, with or without 87b11a483SJeff Roberson * modification, are permitted provided that the following conditions 97b11a483SJeff Roberson * are met: 107b11a483SJeff Roberson * 1. Redistributions of source code must retain the above copyright 117b11a483SJeff Roberson * notice unmodified, this list of conditions, and the following 127b11a483SJeff Roberson * disclaimer. 137b11a483SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 147b11a483SJeff Roberson * notice, this list of conditions and the following disclaimer in the 157b11a483SJeff Roberson * documentation and/or other materials provided with the distribution. 167b11a483SJeff Roberson * 177b11a483SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 187b11a483SJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 197b11a483SJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 207b11a483SJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 217b11a483SJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 227b11a483SJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 237b11a483SJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 247b11a483SJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 257b11a483SJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 267b11a483SJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 277b11a483SJeff Roberson * 287b11a483SJeff Roberson */ 297b11a483SJeff Roberson 307b11a483SJeff Roberson #include <sys/cdefs.h> 317b11a483SJeff Roberson __FBSDID("$FreeBSD$"); 327b11a483SJeff Roberson 337b11a483SJeff Roberson #include "opt_vm.h" 347b11a483SJeff Roberson 357b11a483SJeff Roberson #include <sys/param.h> 367b11a483SJeff Roberson #include <sys/systm.h> 377b11a483SJeff Roberson #include <sys/bitset.h> 387b11a483SJeff Roberson #include <sys/domainset.h> 397b11a483SJeff Roberson #include <sys/proc.h> 407b11a483SJeff Roberson #include <sys/lock.h> 417b11a483SJeff Roberson #include <sys/mutex.h> 427b11a483SJeff Roberson #include <sys/malloc.h> 437b11a483SJeff Roberson #include <sys/vmmeter.h> 447b11a483SJeff Roberson 457b11a483SJeff Roberson #include <vm/vm.h> 467b11a483SJeff Roberson #include <vm/vm_param.h> 477b11a483SJeff Roberson #include <vm/vm_domainset.h> 487b11a483SJeff Roberson #include <vm/vm_object.h> 497b11a483SJeff Roberson #include <vm/vm_page.h> 507b11a483SJeff Roberson #include <vm/vm_phys.h> 517b11a483SJeff Roberson 52b6715dabSJeff Roberson #ifdef NUMA 537b11a483SJeff Roberson /* 547b11a483SJeff Roberson * Iterators are written such that the first nowait pass has as short a 557b11a483SJeff Roberson * codepath as possible to eliminate bloat from the allocator. It is 567b11a483SJeff Roberson * assumed that most allocations are successful. 577b11a483SJeff Roberson */ 587b11a483SJeff Roberson 59e5818a53SJeff Roberson static int vm_domainset_default_stride = 64; 60e5818a53SJeff Roberson 617b11a483SJeff Roberson /* 627b11a483SJeff Roberson * Determine which policy is to be used for this allocation. 637b11a483SJeff Roberson */ 647b11a483SJeff Roberson static void 65e5818a53SJeff Roberson vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66e5818a53SJeff Roberson vm_pindex_t pindex) 677b11a483SJeff Roberson { 687b11a483SJeff Roberson struct domainset *domain; 69*463406acSMark Johnston struct thread *td; 707b11a483SJeff Roberson 717b11a483SJeff Roberson /* 727b11a483SJeff Roberson * object policy takes precedence over thread policy. The policies 737b11a483SJeff Roberson * are immutable and unsynchronized. Updates can race but pointer 747b11a483SJeff Roberson * loads are assumed to be atomic. 757b11a483SJeff Roberson */ 767b11a483SJeff Roberson if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 777b11a483SJeff Roberson di->di_domain = domain; 787b11a483SJeff Roberson di->di_iter = &obj->domain.dr_iterator; 797b11a483SJeff Roberson } else { 80*463406acSMark Johnston td = curthread; 81*463406acSMark Johnston di->di_domain = td->td_domain.dr_policy; 82*463406acSMark Johnston di->di_iter = &td->td_domain.dr_iterator; 837b11a483SJeff Roberson } 84e5818a53SJeff Roberson di->di_policy = di->di_domain->ds_policy; 85e5818a53SJeff Roberson if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 8623d123c6SMark Johnston #if VM_NRESERVLEVEL > 0 87e5818a53SJeff Roberson if (vm_object_reserv(obj)) { 88e5818a53SJeff Roberson /* 89e5818a53SJeff Roberson * Color the pindex so we end up on the correct 90e5818a53SJeff Roberson * reservation boundary. 91e5818a53SJeff Roberson */ 92e5818a53SJeff Roberson pindex += obj->pg_color; 93e5818a53SJeff Roberson pindex >>= VM_LEVEL_0_ORDER; 9423d123c6SMark Johnston } else 9573e37d1dSMatt Macy #endif 96e5818a53SJeff Roberson pindex /= vm_domainset_default_stride; 97e5818a53SJeff Roberson /* 98e5818a53SJeff Roberson * Offset pindex so the first page of each object does 99e5818a53SJeff Roberson * not end up in domain 0. 100e5818a53SJeff Roberson */ 101e5818a53SJeff Roberson if (obj != NULL) 102e5818a53SJeff Roberson pindex += (((uintptr_t)obj) / sizeof(*obj)); 103e5818a53SJeff Roberson di->di_offset = pindex; 104e5818a53SJeff Roberson } 105c56c7299SMark Johnston /* Skip domains below min on the first pass. */ 10623984ce5SMark Johnston di->di_minskip = true; 1077b11a483SJeff Roberson } 1087b11a483SJeff Roberson 1097b11a483SJeff Roberson static void 1107b11a483SJeff Roberson vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 1117b11a483SJeff Roberson { 1127b11a483SJeff Roberson 113e5818a53SJeff Roberson *domain = di->di_domain->ds_order[ 114e5818a53SJeff Roberson ++(*di->di_iter) % di->di_domain->ds_cnt]; 1157b11a483SJeff Roberson } 1167b11a483SJeff Roberson 1177b11a483SJeff Roberson static void 1187b11a483SJeff Roberson vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 1197b11a483SJeff Roberson { 1207b11a483SJeff Roberson int d; 1217b11a483SJeff Roberson 1227b11a483SJeff Roberson do { 123e5818a53SJeff Roberson d = di->di_domain->ds_order[ 124e5818a53SJeff Roberson ++(*di->di_iter) % di->di_domain->ds_cnt]; 125e5818a53SJeff Roberson } while (d == di->di_domain->ds_prefer); 126e5818a53SJeff Roberson *domain = d; 127e5818a53SJeff Roberson } 128e5818a53SJeff Roberson 129e5818a53SJeff Roberson static void 130e5818a53SJeff Roberson vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 131e5818a53SJeff Roberson { 132e5818a53SJeff Roberson int d; 133e5818a53SJeff Roberson 134e5818a53SJeff Roberson d = di->di_offset % di->di_domain->ds_cnt; 135e5818a53SJeff Roberson *di->di_iter = d; 136e5818a53SJeff Roberson *domain = di->di_domain->ds_order[d]; 1377b11a483SJeff Roberson } 1387b11a483SJeff Roberson 1397b11a483SJeff Roberson static void 1407b11a483SJeff Roberson vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 1417b11a483SJeff Roberson { 1427b11a483SJeff Roberson 1437b11a483SJeff Roberson KASSERT(di->di_n > 0, 1447b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 145e5818a53SJeff Roberson switch (di->di_policy) { 1467b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1477b11a483SJeff Roberson /* 1487b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1497b11a483SJeff Roberson * first-touch to round-robin. 1507b11a483SJeff Roberson */ 1517b11a483SJeff Roberson /* FALLTHROUGH */ 152e5818a53SJeff Roberson case DOMAINSET_POLICY_INTERLEAVE: 153e5818a53SJeff Roberson /* FALLTHROUGH */ 1547b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1557b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1567b11a483SJeff Roberson break; 1577b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1587b11a483SJeff Roberson vm_domainset_iter_prefer(di, domain); 1597b11a483SJeff Roberson break; 1607b11a483SJeff Roberson default: 1617b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 162e5818a53SJeff Roberson di->di_policy); 1637b11a483SJeff Roberson } 1647b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 1657b11a483SJeff Roberson ("vm_domainset_iter_next: Invalid domain %d", *domain)); 1667b11a483SJeff Roberson } 1677b11a483SJeff Roberson 1687b11a483SJeff Roberson static void 1697b11a483SJeff Roberson vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 1707b11a483SJeff Roberson { 1717b11a483SJeff Roberson 172e5818a53SJeff Roberson switch (di->di_policy) { 1737b11a483SJeff Roberson case DOMAINSET_POLICY_FIRSTTOUCH: 1747b11a483SJeff Roberson *domain = PCPU_GET(domain); 1757b11a483SJeff Roberson if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 176e5818a53SJeff Roberson /* 177e5818a53SJeff Roberson * Add an extra iteration because we will visit the 178e5818a53SJeff Roberson * current domain a second time in the rr iterator. 179e5818a53SJeff Roberson */ 180e5818a53SJeff Roberson di->di_n = di->di_domain->ds_cnt + 1; 1817b11a483SJeff Roberson break; 1827b11a483SJeff Roberson } 1837b11a483SJeff Roberson /* 1847b11a483SJeff Roberson * To prevent impossible allocations we convert an invalid 1857b11a483SJeff Roberson * first-touch to round-robin. 1867b11a483SJeff Roberson */ 1877b11a483SJeff Roberson /* FALLTHROUGH */ 1887b11a483SJeff Roberson case DOMAINSET_POLICY_ROUNDROBIN: 1897b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1907b11a483SJeff Roberson vm_domainset_iter_rr(di, domain); 1917b11a483SJeff Roberson break; 1927b11a483SJeff Roberson case DOMAINSET_POLICY_PREFER: 1937b11a483SJeff Roberson *domain = di->di_domain->ds_prefer; 1947b11a483SJeff Roberson di->di_n = di->di_domain->ds_cnt; 1957b11a483SJeff Roberson break; 196e5818a53SJeff Roberson case DOMAINSET_POLICY_INTERLEAVE: 197e5818a53SJeff Roberson vm_domainset_iter_interleave(di, domain); 198e5818a53SJeff Roberson di->di_n = di->di_domain->ds_cnt; 199e5818a53SJeff Roberson break; 2007b11a483SJeff Roberson default: 2017b11a483SJeff Roberson panic("vm_domainset_iter_first: Unknown policy %d", 202e5818a53SJeff Roberson di->di_policy); 2037b11a483SJeff Roberson } 2047b11a483SJeff Roberson KASSERT(di->di_n > 0, 2057b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 2067b11a483SJeff Roberson KASSERT(*domain < vm_ndomains, 2077b11a483SJeff Roberson ("vm_domainset_iter_first: Invalid domain %d", *domain)); 2087b11a483SJeff Roberson } 2097b11a483SJeff Roberson 2107b11a483SJeff Roberson void 2117b11a483SJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 212e5818a53SJeff Roberson vm_pindex_t pindex, int *domain, int *req) 2137b11a483SJeff Roberson { 2147b11a483SJeff Roberson 215e5818a53SJeff Roberson vm_domainset_iter_init(di, obj, pindex); 2167b11a483SJeff Roberson di->di_flags = *req; 2177b11a483SJeff Roberson *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 2187b11a483SJeff Roberson VM_ALLOC_NOWAIT; 2197b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 220*463406acSMark Johnston if (vm_page_count_min_domain(*domain)) 22123984ce5SMark Johnston vm_domainset_iter_page(di, domain, req); 2227b11a483SJeff Roberson } 2237b11a483SJeff Roberson 2247b11a483SJeff Roberson int 2257b11a483SJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 2267b11a483SJeff Roberson { 2277b11a483SJeff Roberson 2287b11a483SJeff Roberson /* 2297b11a483SJeff Roberson * If we exhausted all options with NOWAIT and did a WAITFAIL it 2307b11a483SJeff Roberson * is time to return an error to the caller. 2317b11a483SJeff Roberson */ 2327b11a483SJeff Roberson if ((*req & VM_ALLOC_WAITFAIL) != 0) 2337b11a483SJeff Roberson return (ENOMEM); 2347b11a483SJeff Roberson 2357b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 23623984ce5SMark Johnston while (--di->di_n != 0) { 2377b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 238*463406acSMark Johnston if (!di->di_minskip || !vm_page_count_min_domain(*domain)) 23923984ce5SMark Johnston return (0); 24023984ce5SMark Johnston } 24123984ce5SMark Johnston if (di->di_minskip) { 24223984ce5SMark Johnston di->di_minskip = false; 24323984ce5SMark Johnston vm_domainset_iter_first(di, domain); 2447b11a483SJeff Roberson return (0); 2457b11a483SJeff Roberson } 2467b11a483SJeff Roberson 2477b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 2487b11a483SJeff Roberson if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 2497b11a483SJeff Roberson return (ENOMEM); 2507b11a483SJeff Roberson 2517b11a483SJeff Roberson /* 2527b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 2537b11a483SJeff Roberson * from the beginning with a blocking allocation. 2547b11a483SJeff Roberson */ 2557b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 2567b11a483SJeff Roberson *req = di->di_flags; 2577b11a483SJeff Roberson 2587b11a483SJeff Roberson return (0); 2597b11a483SJeff Roberson } 2607b11a483SJeff Roberson 2617b11a483SJeff Roberson 2627b11a483SJeff Roberson void 2637b11a483SJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 2647b11a483SJeff Roberson struct vm_object *obj, int *domain, int *flags) 2657b11a483SJeff Roberson { 2667b11a483SJeff Roberson 267e5818a53SJeff Roberson vm_domainset_iter_init(di, obj, 0); 268e5818a53SJeff Roberson if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 269e5818a53SJeff Roberson di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 2707b11a483SJeff Roberson di->di_flags = *flags; 2717b11a483SJeff Roberson *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 2727b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 273*463406acSMark Johnston if (vm_page_count_min_domain(*domain)) 27423984ce5SMark Johnston vm_domainset_iter_malloc(di, domain, flags); 2757b11a483SJeff Roberson } 2767b11a483SJeff Roberson 2777b11a483SJeff Roberson int 2787b11a483SJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 2797b11a483SJeff Roberson { 2807b11a483SJeff Roberson 2817b11a483SJeff Roberson /* If there are more domains to visit we run the iterator. */ 28223984ce5SMark Johnston while (--di->di_n != 0) { 2837b11a483SJeff Roberson vm_domainset_iter_next(di, domain); 284*463406acSMark Johnston if (!di->di_minskip || !vm_page_count_min_domain(*domain)) 28523984ce5SMark Johnston return (0); 28623984ce5SMark Johnston } 28723984ce5SMark Johnston 288c56c7299SMark Johnston /* If we skipped domains below min restart the search. */ 28923984ce5SMark Johnston if (di->di_minskip) { 29023984ce5SMark Johnston di->di_minskip = false; 29123984ce5SMark Johnston vm_domainset_iter_first(di, domain); 2927b11a483SJeff Roberson return (0); 2937b11a483SJeff Roberson } 2947b11a483SJeff Roberson 2957b11a483SJeff Roberson /* If we visited all domains and this was a NOWAIT we return error. */ 2967b11a483SJeff Roberson if ((di->di_flags & M_WAITOK) == 0) 2977b11a483SJeff Roberson return (ENOMEM); 2987b11a483SJeff Roberson 2997b11a483SJeff Roberson /* 3007b11a483SJeff Roberson * We have visited all domains with non-blocking allocations, try 3017b11a483SJeff Roberson * from the beginning with a blocking allocation. 3027b11a483SJeff Roberson */ 3037b11a483SJeff Roberson vm_domainset_iter_first(di, domain); 3047b11a483SJeff Roberson *flags = di->di_flags; 3057b11a483SJeff Roberson 3067b11a483SJeff Roberson return (0); 3077b11a483SJeff Roberson } 308b6715dabSJeff Roberson 309b6715dabSJeff Roberson #else /* !NUMA */ 310b6715dabSJeff Roberson int 311b6715dabSJeff Roberson vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 312b6715dabSJeff Roberson { 313b6715dabSJeff Roberson 314b6715dabSJeff Roberson return (EJUSTRETURN); 315b6715dabSJeff Roberson } 316b6715dabSJeff Roberson 317b6715dabSJeff Roberson void 318b6715dabSJeff Roberson vm_domainset_iter_page_init(struct vm_domainset_iter *di, 319e5818a53SJeff Roberson struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 320b6715dabSJeff Roberson { 321b6715dabSJeff Roberson 322b6715dabSJeff Roberson *domain = 0; 323b6715dabSJeff Roberson } 324b6715dabSJeff Roberson 325b6715dabSJeff Roberson int 326b6715dabSJeff Roberson vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 327b6715dabSJeff Roberson { 328b6715dabSJeff Roberson 329b6715dabSJeff Roberson return (EJUSTRETURN); 330b6715dabSJeff Roberson } 331b6715dabSJeff Roberson 332b6715dabSJeff Roberson void 333b6715dabSJeff Roberson vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 334b6715dabSJeff Roberson struct vm_object *obj, int *domain, int *flags) 335b6715dabSJeff Roberson { 336b6715dabSJeff Roberson 337b6715dabSJeff Roberson *domain = 0; 338b6715dabSJeff Roberson } 339b6715dabSJeff Roberson 340b6715dabSJeff Roberson #endif 341