1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitset.h> 38 #include <sys/domainset.h> 39 #include <sys/proc.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/malloc.h> 43 #include <sys/vmmeter.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_domainset.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 52 #ifdef NUMA 53 /* 54 * Iterators are written such that the first nowait pass has as short a 55 * codepath as possible to eliminate bloat from the allocator. It is 56 * assumed that most allocations are successful. 57 */ 58 59 static int vm_domainset_default_stride = 64; 60 61 /* 62 * Determine which policy is to be used for this allocation. 63 */ 64 static void 65 vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66 vm_pindex_t pindex) 67 { 68 struct domainset *domain; 69 70 /* 71 * object policy takes precedence over thread policy. The policies 72 * are immutable and unsynchronized. Updates can race but pointer 73 * loads are assumed to be atomic. 74 */ 75 if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 76 di->di_domain = domain; 77 di->di_iter = &obj->domain.dr_iterator; 78 } else { 79 di->di_domain = curthread->td_domain.dr_policy; 80 di->di_iter = &curthread->td_domain.dr_iterator; 81 } 82 di->di_policy = di->di_domain->ds_policy; 83 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 84 #if VM_NRESERVLEVEL > 0 85 if (vm_object_reserv(obj)) { 86 /* 87 * Color the pindex so we end up on the correct 88 * reservation boundary. 89 */ 90 pindex += obj->pg_color; 91 pindex >>= VM_LEVEL_0_ORDER; 92 } else 93 #endif 94 pindex /= vm_domainset_default_stride; 95 /* 96 * Offset pindex so the first page of each object does 97 * not end up in domain 0. 98 */ 99 if (obj != NULL) 100 pindex += (((uintptr_t)obj) / sizeof(*obj)); 101 di->di_offset = pindex; 102 } 103 /* Skip domains below min on the first pass. */ 104 di->di_minskip = true; 105 } 106 107 static void 108 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 109 { 110 111 *domain = di->di_domain->ds_order[ 112 ++(*di->di_iter) % di->di_domain->ds_cnt]; 113 } 114 115 static void 116 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 117 { 118 int d; 119 120 do { 121 d = di->di_domain->ds_order[ 122 ++(*di->di_iter) % di->di_domain->ds_cnt]; 123 } while (d == di->di_domain->ds_prefer); 124 *domain = d; 125 } 126 127 static void 128 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 129 { 130 int d; 131 132 d = di->di_offset % di->di_domain->ds_cnt; 133 *di->di_iter = d; 134 *domain = di->di_domain->ds_order[d]; 135 } 136 137 static void 138 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 139 { 140 141 KASSERT(di->di_n > 0, 142 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 143 switch (di->di_policy) { 144 case DOMAINSET_POLICY_FIRSTTOUCH: 145 /* 146 * To prevent impossible allocations we convert an invalid 147 * first-touch to round-robin. 148 */ 149 /* FALLTHROUGH */ 150 case DOMAINSET_POLICY_INTERLEAVE: 151 /* FALLTHROUGH */ 152 case DOMAINSET_POLICY_ROUNDROBIN: 153 vm_domainset_iter_rr(di, domain); 154 break; 155 case DOMAINSET_POLICY_PREFER: 156 vm_domainset_iter_prefer(di, domain); 157 break; 158 default: 159 panic("vm_domainset_iter_first: Unknown policy %d", 160 di->di_policy); 161 } 162 KASSERT(*domain < vm_ndomains, 163 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 164 } 165 166 static void 167 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 168 { 169 170 switch (di->di_policy) { 171 case DOMAINSET_POLICY_FIRSTTOUCH: 172 *domain = PCPU_GET(domain); 173 if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 174 /* 175 * Add an extra iteration because we will visit the 176 * current domain a second time in the rr iterator. 177 */ 178 di->di_n = di->di_domain->ds_cnt + 1; 179 break; 180 } 181 /* 182 * To prevent impossible allocations we convert an invalid 183 * first-touch to round-robin. 184 */ 185 /* FALLTHROUGH */ 186 case DOMAINSET_POLICY_ROUNDROBIN: 187 di->di_n = di->di_domain->ds_cnt; 188 vm_domainset_iter_rr(di, domain); 189 break; 190 case DOMAINSET_POLICY_PREFER: 191 *domain = di->di_domain->ds_prefer; 192 di->di_n = di->di_domain->ds_cnt; 193 break; 194 case DOMAINSET_POLICY_INTERLEAVE: 195 vm_domainset_iter_interleave(di, domain); 196 di->di_n = di->di_domain->ds_cnt; 197 break; 198 default: 199 panic("vm_domainset_iter_first: Unknown policy %d", 200 di->di_policy); 201 } 202 KASSERT(di->di_n > 0, 203 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 204 KASSERT(*domain < vm_ndomains, 205 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 206 } 207 208 void 209 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 210 vm_pindex_t pindex, int *domain, int *req) 211 { 212 213 vm_domainset_iter_init(di, obj, pindex); 214 di->di_flags = *req; 215 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 216 VM_ALLOC_NOWAIT; 217 vm_domainset_iter_first(di, domain); 218 if (DOMAINSET_ISSET(*domain, &vm_min_domains)) 219 vm_domainset_iter_page(di, domain, req); 220 } 221 222 int 223 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 224 { 225 226 /* 227 * If we exhausted all options with NOWAIT and did a WAITFAIL it 228 * is time to return an error to the caller. 229 */ 230 if ((*req & VM_ALLOC_WAITFAIL) != 0) 231 return (ENOMEM); 232 233 /* If there are more domains to visit we run the iterator. */ 234 while (--di->di_n != 0) { 235 vm_domainset_iter_next(di, domain); 236 if (!di->di_minskip || 237 !DOMAINSET_ISSET(*domain, &vm_min_domains)) 238 return (0); 239 } 240 if (di->di_minskip) { 241 di->di_minskip = false; 242 vm_domainset_iter_first(di, domain); 243 return (0); 244 } 245 246 /* If we visited all domains and this was a NOWAIT we return error. */ 247 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 248 return (ENOMEM); 249 250 /* 251 * We have visited all domains with non-blocking allocations, try 252 * from the beginning with a blocking allocation. 253 */ 254 vm_domainset_iter_first(di, domain); 255 *req = di->di_flags; 256 257 return (0); 258 } 259 260 261 void 262 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 263 struct vm_object *obj, int *domain, int *flags) 264 { 265 266 vm_domainset_iter_init(di, obj, 0); 267 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 268 di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 269 di->di_flags = *flags; 270 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 271 vm_domainset_iter_first(di, domain); 272 if (DOMAINSET_ISSET(*domain, &vm_min_domains)) 273 vm_domainset_iter_malloc(di, domain, flags); 274 } 275 276 int 277 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 278 { 279 280 /* If there are more domains to visit we run the iterator. */ 281 while (--di->di_n != 0) { 282 vm_domainset_iter_next(di, domain); 283 if (!di->di_minskip || 284 !DOMAINSET_ISSET(*domain, &vm_min_domains)) 285 return (0); 286 } 287 288 /* If we skipped domains below min restart the search. */ 289 if (di->di_minskip) { 290 di->di_minskip = false; 291 vm_domainset_iter_first(di, domain); 292 return (0); 293 } 294 295 /* If we visited all domains and this was a NOWAIT we return error. */ 296 if ((di->di_flags & M_WAITOK) == 0) 297 return (ENOMEM); 298 299 /* 300 * We have visited all domains with non-blocking allocations, try 301 * from the beginning with a blocking allocation. 302 */ 303 vm_domainset_iter_first(di, domain); 304 *flags = di->di_flags; 305 306 return (0); 307 } 308 309 #else /* !NUMA */ 310 int 311 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 312 { 313 314 return (EJUSTRETURN); 315 } 316 317 void 318 vm_domainset_iter_page_init(struct vm_domainset_iter *di, 319 struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 320 { 321 322 *domain = 0; 323 } 324 325 int 326 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 327 { 328 329 return (EJUSTRETURN); 330 } 331 332 void 333 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 334 struct vm_object *obj, int *domain, int *flags) 335 { 336 337 *domain = 0; 338 } 339 340 #endif 341