1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitset.h> 38 #include <sys/domainset.h> 39 #include <sys/proc.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/malloc.h> 43 #include <sys/vmmeter.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_domainset.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 52 #ifdef NUMA 53 /* 54 * Iterators are written such that the first nowait pass has as short a 55 * codepath as possible to eliminate bloat from the allocator. It is 56 * assumed that most allocations are successful. 57 */ 58 59 static int vm_domainset_default_stride = 64; 60 61 /* 62 * Determine which policy is to be used for this allocation. 63 */ 64 static void 65 vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66 vm_pindex_t pindex) 67 { 68 struct domainset *domain; 69 70 /* 71 * object policy takes precedence over thread policy. The policies 72 * are immutable and unsynchronized. Updates can race but pointer 73 * loads are assumed to be atomic. 74 */ 75 if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 76 di->di_domain = domain; 77 di->di_iter = &obj->domain.dr_iterator; 78 } else { 79 di->di_domain = curthread->td_domain.dr_policy; 80 di->di_iter = &curthread->td_domain.dr_iterator; 81 } 82 di->di_policy = di->di_domain->ds_policy; 83 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 84 #if VM_NRESERVLEVEL > 0 85 if (vm_object_reserv(obj)) { 86 /* 87 * Color the pindex so we end up on the correct 88 * reservation boundary. 89 */ 90 pindex += obj->pg_color; 91 pindex >>= VM_LEVEL_0_ORDER; 92 } else 93 #endif 94 pindex /= vm_domainset_default_stride; 95 /* 96 * Offset pindex so the first page of each object does 97 * not end up in domain 0. 98 */ 99 if (obj != NULL) 100 pindex += (((uintptr_t)obj) / sizeof(*obj)); 101 di->di_offset = pindex; 102 } 103 } 104 105 static void 106 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 107 { 108 109 *domain = di->di_domain->ds_order[ 110 ++(*di->di_iter) % di->di_domain->ds_cnt]; 111 } 112 113 static void 114 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 115 { 116 int d; 117 118 do { 119 d = di->di_domain->ds_order[ 120 ++(*di->di_iter) % di->di_domain->ds_cnt]; 121 } while (d == di->di_domain->ds_prefer); 122 *domain = d; 123 } 124 125 static void 126 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 127 { 128 int d; 129 130 d = di->di_offset % di->di_domain->ds_cnt; 131 *di->di_iter = d; 132 *domain = di->di_domain->ds_order[d]; 133 } 134 135 static void 136 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 137 { 138 139 KASSERT(di->di_n > 0, 140 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 141 switch (di->di_policy) { 142 case DOMAINSET_POLICY_FIRSTTOUCH: 143 /* 144 * To prevent impossible allocations we convert an invalid 145 * first-touch to round-robin. 146 */ 147 /* FALLTHROUGH */ 148 case DOMAINSET_POLICY_INTERLEAVE: 149 /* FALLTHROUGH */ 150 case DOMAINSET_POLICY_ROUNDROBIN: 151 vm_domainset_iter_rr(di, domain); 152 break; 153 case DOMAINSET_POLICY_PREFER: 154 vm_domainset_iter_prefer(di, domain); 155 break; 156 default: 157 panic("vm_domainset_iter_first: Unknown policy %d", 158 di->di_policy); 159 } 160 KASSERT(*domain < vm_ndomains, 161 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 162 } 163 164 static void 165 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 166 { 167 168 switch (di->di_policy) { 169 case DOMAINSET_POLICY_FIRSTTOUCH: 170 *domain = PCPU_GET(domain); 171 if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 172 /* 173 * Add an extra iteration because we will visit the 174 * current domain a second time in the rr iterator. 175 */ 176 di->di_n = di->di_domain->ds_cnt + 1; 177 break; 178 } 179 /* 180 * To prevent impossible allocations we convert an invalid 181 * first-touch to round-robin. 182 */ 183 /* FALLTHROUGH */ 184 case DOMAINSET_POLICY_ROUNDROBIN: 185 di->di_n = di->di_domain->ds_cnt; 186 vm_domainset_iter_rr(di, domain); 187 break; 188 case DOMAINSET_POLICY_PREFER: 189 *domain = di->di_domain->ds_prefer; 190 di->di_n = di->di_domain->ds_cnt; 191 break; 192 case DOMAINSET_POLICY_INTERLEAVE: 193 vm_domainset_iter_interleave(di, domain); 194 di->di_n = di->di_domain->ds_cnt; 195 break; 196 default: 197 panic("vm_domainset_iter_first: Unknown policy %d", 198 di->di_policy); 199 } 200 KASSERT(di->di_n > 0, 201 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 202 KASSERT(*domain < vm_ndomains, 203 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 204 } 205 206 void 207 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 208 vm_pindex_t pindex, int *domain, int *req) 209 { 210 211 vm_domainset_iter_init(di, obj, pindex); 212 di->di_flags = *req; 213 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 214 VM_ALLOC_NOWAIT; 215 vm_domainset_iter_first(di, domain); 216 } 217 218 int 219 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 220 { 221 222 /* 223 * If we exhausted all options with NOWAIT and did a WAITFAIL it 224 * is time to return an error to the caller. 225 */ 226 if ((*req & VM_ALLOC_WAITFAIL) != 0) 227 return (ENOMEM); 228 229 /* If there are more domains to visit we run the iterator. */ 230 if (--di->di_n != 0) { 231 vm_domainset_iter_next(di, domain); 232 return (0); 233 } 234 235 /* If we visited all domains and this was a NOWAIT we return error. */ 236 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 237 return (ENOMEM); 238 239 /* 240 * We have visited all domains with non-blocking allocations, try 241 * from the beginning with a blocking allocation. 242 */ 243 vm_domainset_iter_first(di, domain); 244 *req = di->di_flags; 245 246 return (0); 247 } 248 249 250 void 251 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 252 struct vm_object *obj, int *domain, int *flags) 253 { 254 255 vm_domainset_iter_init(di, obj, 0); 256 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 257 di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 258 di->di_flags = *flags; 259 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 260 vm_domainset_iter_first(di, domain); 261 } 262 263 int 264 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 265 { 266 267 /* If there are more domains to visit we run the iterator. */ 268 if (--di->di_n != 0) { 269 vm_domainset_iter_next(di, domain); 270 return (0); 271 } 272 273 /* If we visited all domains and this was a NOWAIT we return error. */ 274 if ((di->di_flags & M_WAITOK) == 0) 275 return (ENOMEM); 276 277 /* 278 * We have visited all domains with non-blocking allocations, try 279 * from the beginning with a blocking allocation. 280 */ 281 vm_domainset_iter_first(di, domain); 282 *flags = di->di_flags; 283 284 return (0); 285 } 286 287 #else /* !NUMA */ 288 int 289 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 290 { 291 292 return (EJUSTRETURN); 293 } 294 295 void 296 vm_domainset_iter_page_init(struct vm_domainset_iter *di, 297 struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 298 { 299 300 *domain = 0; 301 } 302 303 int 304 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 305 { 306 307 return (EJUSTRETURN); 308 } 309 310 void 311 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 312 struct vm_object *obj, int *domain, int *flags) 313 { 314 315 *domain = 0; 316 } 317 318 #endif 319