1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_vm.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bitset.h> 36 #include <sys/domainset.h> 37 #include <sys/proc.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/malloc.h> 41 #include <sys/rwlock.h> 42 #include <sys/vmmeter.h> 43 44 #include <vm/vm.h> 45 #include <vm/vm_param.h> 46 #include <vm/vm_domainset.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/vm_phys.h> 50 51 #ifdef NUMA 52 /* 53 * Iterators are written such that the first nowait pass has as short a 54 * codepath as possible to eliminate bloat from the allocator. It is 55 * assumed that most allocations are successful. 56 */ 57 58 static int vm_domainset_default_stride = 64; 59 60 /* 61 * Determine which policy is to be used for this allocation. 62 */ 63 static void 64 vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, 65 int *iter, struct vm_object *obj, vm_pindex_t pindex) 66 { 67 68 di->di_domain = ds; 69 di->di_iter = iter; 70 di->di_policy = ds->ds_policy; 71 DOMAINSET_COPY(&ds->ds_mask, &di->di_valid_mask); 72 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 73 #if VM_NRESERVLEVEL > 0 74 if (vm_object_reserv(obj)) { 75 /* 76 * Color the pindex so we end up on the correct 77 * reservation boundary. 78 */ 79 pindex += obj->pg_color; 80 #if VM_NRESERVLEVEL > 1 81 pindex >>= VM_LEVEL_1_ORDER; 82 #endif 83 pindex >>= VM_LEVEL_0_ORDER; 84 } else 85 #endif 86 pindex /= vm_domainset_default_stride; 87 /* 88 * Offset pindex so the first page of each object does 89 * not end up in domain 0. 90 */ 91 if (obj != NULL) 92 pindex += (((uintptr_t)obj) / sizeof(*obj)); 93 di->di_offset = pindex; 94 } 95 /* Skip domains below min on the first pass. */ 96 di->di_minskip = true; 97 } 98 99 static void 100 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 101 { 102 103 *domain = di->di_domain->ds_order[ 104 ++(*di->di_iter) % di->di_domain->ds_cnt]; 105 } 106 107 static void 108 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 109 { 110 int d; 111 112 do { 113 d = di->di_domain->ds_order[ 114 ++(*di->di_iter) % di->di_domain->ds_cnt]; 115 } while (d == di->di_domain->ds_prefer); 116 *domain = d; 117 } 118 119 static void 120 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 121 { 122 int d; 123 124 d = di->di_offset % di->di_domain->ds_cnt; 125 *di->di_iter = d; 126 *domain = di->di_domain->ds_order[d]; 127 } 128 129 static void 130 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 131 { 132 133 KASSERT(di->di_n > 0, 134 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 135 switch (di->di_policy) { 136 case DOMAINSET_POLICY_FIRSTTOUCH: 137 /* 138 * To prevent impossible allocations we convert an invalid 139 * first-touch to round-robin. 140 */ 141 /* FALLTHROUGH */ 142 case DOMAINSET_POLICY_INTERLEAVE: 143 /* FALLTHROUGH */ 144 case DOMAINSET_POLICY_ROUNDROBIN: 145 vm_domainset_iter_rr(di, domain); 146 break; 147 case DOMAINSET_POLICY_PREFER: 148 vm_domainset_iter_prefer(di, domain); 149 break; 150 default: 151 panic("vm_domainset_iter_first: Unknown policy %d", 152 di->di_policy); 153 } 154 KASSERT(*domain < vm_ndomains, 155 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 156 } 157 158 static void 159 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 160 { 161 162 switch (di->di_policy) { 163 case DOMAINSET_POLICY_FIRSTTOUCH: 164 *domain = PCPU_GET(domain); 165 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) { 166 /* 167 * Add an extra iteration because we will visit the 168 * current domain a second time in the rr iterator. 169 */ 170 di->di_n = di->di_domain->ds_cnt + 1; 171 break; 172 } 173 /* 174 * To prevent impossible allocations we convert an invalid 175 * first-touch to round-robin. 176 */ 177 /* FALLTHROUGH */ 178 case DOMAINSET_POLICY_ROUNDROBIN: 179 di->di_n = di->di_domain->ds_cnt; 180 vm_domainset_iter_rr(di, domain); 181 break; 182 case DOMAINSET_POLICY_PREFER: 183 *domain = di->di_domain->ds_prefer; 184 di->di_n = di->di_domain->ds_cnt; 185 break; 186 case DOMAINSET_POLICY_INTERLEAVE: 187 vm_domainset_iter_interleave(di, domain); 188 di->di_n = di->di_domain->ds_cnt; 189 break; 190 default: 191 panic("vm_domainset_iter_first: Unknown policy %d", 192 di->di_policy); 193 } 194 KASSERT(di->di_n > 0, 195 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 196 KASSERT(*domain < vm_ndomains, 197 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 198 } 199 200 void 201 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 202 vm_pindex_t pindex, int *domain, int *req) 203 { 204 struct domainset_ref *dr; 205 206 /* 207 * Object policy takes precedence over thread policy. The policies 208 * are immutable and unsynchronized. Updates can race but pointer 209 * loads are assumed to be atomic. 210 */ 211 if (obj != NULL && obj->domain.dr_policy != NULL) 212 dr = &obj->domain; 213 else 214 dr = &curthread->td_domain; 215 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); 216 di->di_flags = *req; 217 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 218 VM_ALLOC_NOWAIT; 219 vm_domainset_iter_first(di, domain); 220 if (vm_page_count_min_domain(*domain)) 221 vm_domainset_iter_page(di, obj, domain); 222 } 223 224 int 225 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 226 int *domain) 227 { 228 if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask))) 229 return (ENOMEM); 230 231 /* If there are more domains to visit we run the iterator. */ 232 while (--di->di_n != 0) { 233 vm_domainset_iter_next(di, domain); 234 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 235 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 236 return (0); 237 } 238 239 /* If we skipped domains below min restart the search. */ 240 if (di->di_minskip) { 241 di->di_minskip = false; 242 vm_domainset_iter_first(di, domain); 243 return (0); 244 } 245 246 /* If we visited all domains and this was a NOWAIT we return error. */ 247 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 248 return (ENOMEM); 249 250 /* Wait for one of the domains to accumulate some free pages. */ 251 if (obj != NULL) 252 VM_OBJECT_WUNLOCK(obj); 253 vm_wait_doms(&di->di_valid_mask, 0); 254 if (obj != NULL) 255 VM_OBJECT_WLOCK(obj); 256 if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) 257 return (ENOMEM); 258 259 /* Restart the search. */ 260 vm_domainset_iter_first(di, domain); 261 262 return (0); 263 } 264 265 static void 266 _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, 267 int *flags) 268 { 269 270 di->di_flags = *flags; 271 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 272 vm_domainset_iter_first(di, domain); 273 if (vm_page_count_min_domain(*domain)) 274 vm_domainset_iter_policy(di, domain); 275 } 276 277 void 278 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 279 struct domainset *ds, int *domain, int *flags) 280 { 281 282 vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); 283 _vm_domainset_iter_policy_init(di, domain, flags); 284 } 285 286 void 287 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 288 struct domainset_ref *dr, int *domain, int *flags) 289 { 290 291 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); 292 _vm_domainset_iter_policy_init(di, domain, flags); 293 } 294 295 int 296 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 297 { 298 if (DOMAINSET_EMPTY(&di->di_valid_mask)) 299 return (ENOMEM); 300 301 /* If there are more domains to visit we run the iterator. */ 302 while (--di->di_n != 0) { 303 vm_domainset_iter_next(di, domain); 304 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 305 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 306 return (0); 307 } 308 309 /* If we skipped domains below min restart the search. */ 310 if (di->di_minskip) { 311 di->di_minskip = false; 312 vm_domainset_iter_first(di, domain); 313 return (0); 314 } 315 316 /* If we visited all domains and this was a NOWAIT we return error. */ 317 if ((di->di_flags & M_WAITOK) == 0) 318 return (ENOMEM); 319 320 /* Wait for one of the domains to accumulate some free pages. */ 321 vm_wait_doms(&di->di_valid_mask, 0); 322 323 /* Restart the search. */ 324 vm_domainset_iter_first(di, domain); 325 326 return (0); 327 } 328 329 void 330 vm_domainset_iter_ignore(struct vm_domainset_iter *di, int domain) 331 { 332 KASSERT(DOMAINSET_ISSET(domain, &di->di_valid_mask), 333 ("%s: domain %d not present in di_valid_mask for di %p", 334 __func__, domain, di)); 335 DOMAINSET_CLR(domain, &di->di_valid_mask); 336 } 337 338 #else /* !NUMA */ 339 340 int 341 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 342 int *domain) 343 { 344 345 return (EJUSTRETURN); 346 } 347 348 void 349 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 350 vm_pindex_t pindex, int *domain, int *flags) 351 { 352 353 *domain = 0; 354 } 355 356 int 357 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 358 { 359 360 return (EJUSTRETURN); 361 } 362 363 void 364 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 365 struct domainset *ds, int *domain, int *flags) 366 { 367 368 *domain = 0; 369 } 370 371 void 372 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 373 struct domainset_ref *dr, int *domain, int *flags) 374 { 375 376 *domain = 0; 377 } 378 379 void 380 vm_domainset_iter_ignore(struct vm_domainset_iter *di __unused, 381 int domain __unused) 382 { 383 } 384 385 #endif /* NUMA */ 386