1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_vm.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bitset.h> 36 #include <sys/domainset.h> 37 #include <sys/proc.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/malloc.h> 41 #include <sys/rwlock.h> 42 #include <sys/vmmeter.h> 43 44 #include <vm/vm.h> 45 #include <vm/vm_param.h> 46 #include <vm/vm_domainset.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/vm_phys.h> 50 51 #ifdef NUMA 52 /* 53 * Iterators are written such that the first nowait pass has as short a 54 * codepath as possible to eliminate bloat from the allocator. It is 55 * assumed that most allocations are successful. 56 */ 57 58 static int vm_domainset_default_stride = 64; 59 60 /* 61 * Determine which policy is to be used for this allocation. 62 */ 63 static void 64 vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, 65 int *iter, struct vm_object *obj, vm_pindex_t pindex) 66 { 67 68 di->di_domain = ds; 69 di->di_iter = iter; 70 di->di_policy = ds->ds_policy; 71 DOMAINSET_COPY(&ds->ds_mask, &di->di_valid_mask); 72 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 73 #if VM_NRESERVLEVEL > 0 74 if (vm_object_reserv(obj)) { 75 /* 76 * Color the pindex so we end up on the correct 77 * reservation boundary. 78 */ 79 pindex += obj->pg_color; 80 pindex >>= VM_LEVEL_0_ORDER; 81 } else 82 #endif 83 pindex /= vm_domainset_default_stride; 84 /* 85 * Offset pindex so the first page of each object does 86 * not end up in domain 0. 87 */ 88 if (obj != NULL) 89 pindex += (((uintptr_t)obj) / sizeof(*obj)); 90 di->di_offset = pindex; 91 } 92 /* Skip domains below min on the first pass. */ 93 di->di_minskip = true; 94 } 95 96 static void 97 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 98 { 99 100 *domain = di->di_domain->ds_order[ 101 ++(*di->di_iter) % di->di_domain->ds_cnt]; 102 } 103 104 static void 105 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 106 { 107 int d; 108 109 do { 110 d = di->di_domain->ds_order[ 111 ++(*di->di_iter) % di->di_domain->ds_cnt]; 112 } while (d == di->di_domain->ds_prefer); 113 *domain = d; 114 } 115 116 static void 117 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 118 { 119 int d; 120 121 d = di->di_offset % di->di_domain->ds_cnt; 122 *di->di_iter = d; 123 *domain = di->di_domain->ds_order[d]; 124 } 125 126 static void 127 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 128 { 129 130 KASSERT(di->di_n > 0, 131 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 132 switch (di->di_policy) { 133 case DOMAINSET_POLICY_FIRSTTOUCH: 134 /* 135 * To prevent impossible allocations we convert an invalid 136 * first-touch to round-robin. 137 */ 138 /* FALLTHROUGH */ 139 case DOMAINSET_POLICY_INTERLEAVE: 140 /* FALLTHROUGH */ 141 case DOMAINSET_POLICY_ROUNDROBIN: 142 vm_domainset_iter_rr(di, domain); 143 break; 144 case DOMAINSET_POLICY_PREFER: 145 vm_domainset_iter_prefer(di, domain); 146 break; 147 default: 148 panic("vm_domainset_iter_first: Unknown policy %d", 149 di->di_policy); 150 } 151 KASSERT(*domain < vm_ndomains, 152 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 153 } 154 155 static void 156 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 157 { 158 159 switch (di->di_policy) { 160 case DOMAINSET_POLICY_FIRSTTOUCH: 161 *domain = PCPU_GET(domain); 162 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) { 163 /* 164 * Add an extra iteration because we will visit the 165 * current domain a second time in the rr iterator. 166 */ 167 di->di_n = di->di_domain->ds_cnt + 1; 168 break; 169 } 170 /* 171 * To prevent impossible allocations we convert an invalid 172 * first-touch to round-robin. 173 */ 174 /* FALLTHROUGH */ 175 case DOMAINSET_POLICY_ROUNDROBIN: 176 di->di_n = di->di_domain->ds_cnt; 177 vm_domainset_iter_rr(di, domain); 178 break; 179 case DOMAINSET_POLICY_PREFER: 180 *domain = di->di_domain->ds_prefer; 181 di->di_n = di->di_domain->ds_cnt; 182 break; 183 case DOMAINSET_POLICY_INTERLEAVE: 184 vm_domainset_iter_interleave(di, domain); 185 di->di_n = di->di_domain->ds_cnt; 186 break; 187 default: 188 panic("vm_domainset_iter_first: Unknown policy %d", 189 di->di_policy); 190 } 191 KASSERT(di->di_n > 0, 192 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 193 KASSERT(*domain < vm_ndomains, 194 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 195 } 196 197 void 198 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 199 vm_pindex_t pindex, int *domain, int *req) 200 { 201 struct domainset_ref *dr; 202 203 /* 204 * Object policy takes precedence over thread policy. The policies 205 * are immutable and unsynchronized. Updates can race but pointer 206 * loads are assumed to be atomic. 207 */ 208 if (obj != NULL && obj->domain.dr_policy != NULL) 209 dr = &obj->domain; 210 else 211 dr = &curthread->td_domain; 212 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); 213 di->di_flags = *req; 214 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 215 VM_ALLOC_NOWAIT; 216 vm_domainset_iter_first(di, domain); 217 if (vm_page_count_min_domain(*domain)) 218 vm_domainset_iter_page(di, obj, domain); 219 } 220 221 int 222 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 223 int *domain) 224 { 225 if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask))) 226 return (ENOMEM); 227 228 /* If there are more domains to visit we run the iterator. */ 229 while (--di->di_n != 0) { 230 vm_domainset_iter_next(di, domain); 231 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 232 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 233 return (0); 234 } 235 236 /* If we skipped domains below min restart the search. */ 237 if (di->di_minskip) { 238 di->di_minskip = false; 239 vm_domainset_iter_first(di, domain); 240 return (0); 241 } 242 243 /* If we visited all domains and this was a NOWAIT we return error. */ 244 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 245 return (ENOMEM); 246 247 /* Wait for one of the domains to accumulate some free pages. */ 248 if (obj != NULL) 249 VM_OBJECT_WUNLOCK(obj); 250 vm_wait_doms(&di->di_valid_mask, 0); 251 if (obj != NULL) 252 VM_OBJECT_WLOCK(obj); 253 if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) 254 return (ENOMEM); 255 256 /* Restart the search. */ 257 vm_domainset_iter_first(di, domain); 258 259 return (0); 260 } 261 262 static void 263 _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, 264 int *flags) 265 { 266 267 di->di_flags = *flags; 268 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 269 vm_domainset_iter_first(di, domain); 270 if (vm_page_count_min_domain(*domain)) 271 vm_domainset_iter_policy(di, domain); 272 } 273 274 void 275 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 276 struct domainset *ds, int *domain, int *flags) 277 { 278 279 vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); 280 _vm_domainset_iter_policy_init(di, domain, flags); 281 } 282 283 void 284 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 285 struct domainset_ref *dr, int *domain, int *flags) 286 { 287 288 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); 289 _vm_domainset_iter_policy_init(di, domain, flags); 290 } 291 292 int 293 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 294 { 295 if (DOMAINSET_EMPTY(&di->di_valid_mask)) 296 return (ENOMEM); 297 298 /* If there are more domains to visit we run the iterator. */ 299 while (--di->di_n != 0) { 300 vm_domainset_iter_next(di, domain); 301 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 302 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 303 return (0); 304 } 305 306 /* If we skipped domains below min restart the search. */ 307 if (di->di_minskip) { 308 di->di_minskip = false; 309 vm_domainset_iter_first(di, domain); 310 return (0); 311 } 312 313 /* If we visited all domains and this was a NOWAIT we return error. */ 314 if ((di->di_flags & M_WAITOK) == 0) 315 return (ENOMEM); 316 317 /* Wait for one of the domains to accumulate some free pages. */ 318 vm_wait_doms(&di->di_valid_mask, 0); 319 320 /* Restart the search. */ 321 vm_domainset_iter_first(di, domain); 322 323 return (0); 324 } 325 326 void 327 vm_domainset_iter_ignore(struct vm_domainset_iter *di, int domain) 328 { 329 KASSERT(DOMAINSET_ISSET(domain, &di->di_valid_mask), 330 ("%s: domain %d not present in di_valid_mask for di %p", 331 __func__, domain, di)); 332 DOMAINSET_CLR(domain, &di->di_valid_mask); 333 } 334 335 #else /* !NUMA */ 336 337 int 338 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 339 int *domain) 340 { 341 342 return (EJUSTRETURN); 343 } 344 345 void 346 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 347 vm_pindex_t pindex, int *domain, int *flags) 348 { 349 350 *domain = 0; 351 } 352 353 int 354 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 355 { 356 357 return (EJUSTRETURN); 358 } 359 360 void 361 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 362 struct domainset *ds, int *domain, int *flags) 363 { 364 365 *domain = 0; 366 } 367 368 void 369 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 370 struct domainset_ref *dr, int *domain, int *flags) 371 { 372 373 *domain = 0; 374 } 375 376 void 377 vm_domainset_iter_ignore(struct vm_domainset_iter *di __unused, 378 int domain __unused) 379 { 380 } 381 382 #endif /* NUMA */ 383