1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_vm.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bitset.h> 36 #include <sys/domainset.h> 37 #include <sys/proc.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/malloc.h> 41 #include <sys/rwlock.h> 42 #include <sys/pctrie.h> 43 #include <sys/vmmeter.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_domainset.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 52 #ifdef NUMA 53 /* 54 * Iterators are written such that the first nowait pass has as short a 55 * codepath as possible to eliminate bloat from the allocator. It is 56 * assumed that most allocations are successful. 57 */ 58 59 static int vm_domainset_default_stride = 64; 60 61 /* 62 * Determine which policy is to be used for this allocation. 63 */ 64 static void 65 vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, 66 int *iter, struct vm_object *obj, vm_pindex_t pindex) 67 { 68 69 di->di_domain = ds; 70 di->di_iter = iter; 71 di->di_policy = ds->ds_policy; 72 DOMAINSET_COPY(&ds->ds_mask, &di->di_valid_mask); 73 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 74 #if VM_NRESERVLEVEL > 0 75 if (vm_object_reserv(obj)) { 76 /* 77 * Color the pindex so we end up on the correct 78 * reservation boundary. 79 */ 80 pindex += obj->pg_color; 81 #if VM_NRESERVLEVEL > 1 82 pindex >>= VM_LEVEL_1_ORDER; 83 #endif 84 pindex >>= VM_LEVEL_0_ORDER; 85 } else 86 #endif 87 pindex /= vm_domainset_default_stride; 88 /* 89 * Offset pindex so the first page of each object does 90 * not end up in domain 0. 91 */ 92 if (obj != NULL) 93 pindex += (((uintptr_t)obj) / sizeof(*obj)); 94 di->di_offset = pindex; 95 } 96 /* Skip domains below min on the first pass. */ 97 di->di_minskip = true; 98 } 99 100 static void 101 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 102 { 103 104 *domain = di->di_domain->ds_order[ 105 ++(*di->di_iter) % di->di_domain->ds_cnt]; 106 } 107 108 static void 109 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 110 { 111 int d; 112 113 do { 114 d = di->di_domain->ds_order[ 115 ++(*di->di_iter) % di->di_domain->ds_cnt]; 116 } while (d == di->di_domain->ds_prefer); 117 *domain = d; 118 } 119 120 static void 121 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 122 { 123 int d; 124 125 d = di->di_offset % di->di_domain->ds_cnt; 126 *di->di_iter = d; 127 *domain = di->di_domain->ds_order[d]; 128 } 129 130 static void 131 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 132 { 133 134 KASSERT(di->di_n > 0, 135 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 136 switch (di->di_policy) { 137 case DOMAINSET_POLICY_FIRSTTOUCH: 138 /* 139 * To prevent impossible allocations we convert an invalid 140 * first-touch to round-robin. 141 */ 142 /* FALLTHROUGH */ 143 case DOMAINSET_POLICY_INTERLEAVE: 144 /* FALLTHROUGH */ 145 case DOMAINSET_POLICY_ROUNDROBIN: 146 vm_domainset_iter_rr(di, domain); 147 break; 148 case DOMAINSET_POLICY_PREFER: 149 vm_domainset_iter_prefer(di, domain); 150 break; 151 default: 152 panic("vm_domainset_iter_first: Unknown policy %d", 153 di->di_policy); 154 } 155 KASSERT(*domain < vm_ndomains, 156 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 157 } 158 159 static void 160 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 161 { 162 163 switch (di->di_policy) { 164 case DOMAINSET_POLICY_FIRSTTOUCH: 165 *domain = PCPU_GET(domain); 166 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) { 167 /* 168 * Add an extra iteration because we will visit the 169 * current domain a second time in the rr iterator. 170 */ 171 di->di_n = di->di_domain->ds_cnt + 1; 172 break; 173 } 174 /* 175 * To prevent impossible allocations we convert an invalid 176 * first-touch to round-robin. 177 */ 178 /* FALLTHROUGH */ 179 case DOMAINSET_POLICY_ROUNDROBIN: 180 di->di_n = di->di_domain->ds_cnt; 181 vm_domainset_iter_rr(di, domain); 182 break; 183 case DOMAINSET_POLICY_PREFER: 184 *domain = di->di_domain->ds_prefer; 185 di->di_n = di->di_domain->ds_cnt; 186 break; 187 case DOMAINSET_POLICY_INTERLEAVE: 188 vm_domainset_iter_interleave(di, domain); 189 di->di_n = di->di_domain->ds_cnt; 190 break; 191 default: 192 panic("vm_domainset_iter_first: Unknown policy %d", 193 di->di_policy); 194 } 195 KASSERT(di->di_n > 0, 196 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 197 KASSERT(*domain < vm_ndomains, 198 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 199 } 200 201 void 202 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 203 vm_pindex_t pindex, int *domain, int *req, struct pctrie_iter *pages) 204 { 205 struct domainset_ref *dr; 206 207 /* 208 * Object policy takes precedence over thread policy. The policies 209 * are immutable and unsynchronized. Updates can race but pointer 210 * loads are assumed to be atomic. 211 */ 212 if (obj != NULL && obj->domain.dr_policy != NULL) 213 dr = &obj->domain; 214 else 215 dr = &curthread->td_domain; 216 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); 217 di->di_flags = *req; 218 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 219 VM_ALLOC_NOWAIT; 220 vm_domainset_iter_first(di, domain); 221 if (vm_page_count_min_domain(*domain)) 222 vm_domainset_iter_page(di, obj, domain, pages); 223 } 224 225 int 226 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 227 int *domain, struct pctrie_iter *pages) 228 { 229 if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask))) 230 return (ENOMEM); 231 232 /* If there are more domains to visit we run the iterator. */ 233 while (--di->di_n != 0) { 234 vm_domainset_iter_next(di, domain); 235 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 236 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 237 return (0); 238 } 239 240 /* If we skipped domains below min restart the search. */ 241 if (di->di_minskip) { 242 di->di_minskip = false; 243 vm_domainset_iter_first(di, domain); 244 return (0); 245 } 246 247 /* If we visited all domains and this was a NOWAIT we return error. */ 248 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 249 return (ENOMEM); 250 251 /* Wait for one of the domains to accumulate some free pages. */ 252 if (obj != NULL) { 253 VM_OBJECT_WUNLOCK(obj); 254 if (pages != NULL) 255 pctrie_iter_reset(pages); 256 } 257 vm_wait_doms(&di->di_valid_mask, 0); 258 if (obj != NULL) 259 VM_OBJECT_WLOCK(obj); 260 if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) 261 return (ENOMEM); 262 263 /* Restart the search. */ 264 vm_domainset_iter_first(di, domain); 265 266 return (0); 267 } 268 269 static void 270 _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, 271 int *flags) 272 { 273 274 di->di_flags = *flags; 275 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 276 vm_domainset_iter_first(di, domain); 277 if (vm_page_count_min_domain(*domain)) 278 vm_domainset_iter_policy(di, domain); 279 } 280 281 void 282 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 283 struct domainset *ds, int *domain, int *flags) 284 { 285 286 vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); 287 _vm_domainset_iter_policy_init(di, domain, flags); 288 } 289 290 void 291 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 292 struct domainset_ref *dr, int *domain, int *flags) 293 { 294 295 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); 296 _vm_domainset_iter_policy_init(di, domain, flags); 297 } 298 299 int 300 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 301 { 302 if (DOMAINSET_EMPTY(&di->di_valid_mask)) 303 return (ENOMEM); 304 305 /* If there are more domains to visit we run the iterator. */ 306 while (--di->di_n != 0) { 307 vm_domainset_iter_next(di, domain); 308 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 309 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 310 return (0); 311 } 312 313 /* If we skipped domains below min restart the search. */ 314 if (di->di_minskip) { 315 di->di_minskip = false; 316 vm_domainset_iter_first(di, domain); 317 return (0); 318 } 319 320 /* If we visited all domains and this was a NOWAIT we return error. */ 321 if ((di->di_flags & M_WAITOK) == 0) 322 return (ENOMEM); 323 324 /* Wait for one of the domains to accumulate some free pages. */ 325 vm_wait_doms(&di->di_valid_mask, 0); 326 327 /* Restart the search. */ 328 vm_domainset_iter_first(di, domain); 329 330 return (0); 331 } 332 333 void 334 vm_domainset_iter_ignore(struct vm_domainset_iter *di, int domain) 335 { 336 KASSERT(DOMAINSET_ISSET(domain, &di->di_valid_mask), 337 ("%s: domain %d not present in di_valid_mask for di %p", 338 __func__, domain, di)); 339 DOMAINSET_CLR(domain, &di->di_valid_mask); 340 } 341 342 #else /* !NUMA */ 343 344 int 345 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 346 int *domain, struct pctrie_iter *pages) 347 { 348 349 return (EJUSTRETURN); 350 } 351 352 void 353 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 354 vm_pindex_t pindex, int *domain, int *flags, struct pctrie_iter *pages) 355 { 356 357 *domain = 0; 358 } 359 360 int 361 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 362 { 363 364 return (EJUSTRETURN); 365 } 366 367 void 368 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 369 struct domainset *ds, int *domain, int *flags) 370 { 371 372 *domain = 0; 373 } 374 375 void 376 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 377 struct domainset_ref *dr, int *domain, int *flags) 378 { 379 380 *domain = 0; 381 } 382 383 void 384 vm_domainset_iter_ignore(struct vm_domainset_iter *di __unused, 385 int domain __unused) 386 { 387 } 388 389 #endif /* NUMA */ 390