1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_vm.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bitset.h> 36 #include <sys/domainset.h> 37 #include <sys/proc.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/malloc.h> 41 #include <sys/rwlock.h> 42 #include <sys/pctrie.h> 43 #include <sys/vmmeter.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_domainset.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 52 #ifdef NUMA 53 /* 54 * Iterators are written such that the first nowait pass has as short a 55 * codepath as possible to eliminate bloat from the allocator. It is 56 * assumed that most allocations are successful. 57 */ 58 59 static int vm_domainset_default_stride = 64; 60 61 static bool vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain); 62 63 64 /* 65 * Determine which policy is to be used for this allocation. 66 */ 67 static void 68 vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, 69 int *iter, struct vm_object *obj, vm_pindex_t pindex) 70 { 71 72 di->di_domain = ds; 73 di->di_iter = iter; 74 di->di_policy = ds->ds_policy; 75 DOMAINSET_COPY(&ds->ds_mask, &di->di_valid_mask); 76 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 77 #if VM_NRESERVLEVEL > 0 78 if (vm_object_reserv(obj)) { 79 /* 80 * Color the pindex so we end up on the correct 81 * reservation boundary. 82 */ 83 pindex += obj->pg_color; 84 #if VM_NRESERVLEVEL > 1 85 pindex >>= VM_LEVEL_1_ORDER; 86 #endif 87 pindex >>= VM_LEVEL_0_ORDER; 88 } else 89 #endif 90 pindex /= vm_domainset_default_stride; 91 /* 92 * Offset pindex so the first page of each object does 93 * not end up in domain 0. 94 */ 95 if (obj != NULL) 96 pindex += (((uintptr_t)obj) / sizeof(*obj)); 97 di->di_offset = pindex; 98 } 99 } 100 101 static void 102 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 103 { 104 105 /* Grab the next domain in 'ds_order'. */ 106 *domain = di->di_domain->ds_order[ 107 (*di->di_iter)++ % di->di_domain->ds_cnt]; 108 } 109 110 static void 111 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 112 { 113 int d; 114 115 d = di->di_offset % di->di_domain->ds_cnt; 116 *di->di_iter = d; 117 *domain = di->di_domain->ds_order[d]; 118 } 119 120 /* 121 * Internal function determining the current phase's first candidate domain. 122 * 123 * Returns whether these is an eligible domain, which is returned through 124 * '*domain'. '*domain' can be modified even if there is no eligible domain. 125 * 126 * See herald comment of vm_domainset_iter_first() below about phases. 127 */ 128 static bool 129 vm_domainset_iter_phase_first(struct vm_domainset_iter *di, int *domain) 130 { 131 switch (di->di_policy) { 132 case DOMAINSET_POLICY_FIRSTTOUCH: 133 *domain = PCPU_GET(domain); 134 break; 135 case DOMAINSET_POLICY_ROUNDROBIN: 136 vm_domainset_iter_rr(di, domain); 137 break; 138 case DOMAINSET_POLICY_PREFER: 139 *domain = di->di_domain->ds_prefer; 140 break; 141 case DOMAINSET_POLICY_INTERLEAVE: 142 vm_domainset_iter_interleave(di, domain); 143 break; 144 default: 145 panic("%s: Unknown policy %d", __func__, di->di_policy); 146 } 147 KASSERT(*domain < vm_ndomains, 148 ("%s: Invalid domain %d", __func__, *domain)); 149 150 /* 151 * Has the policy's start domain already been visited? 152 */ 153 if (!DOMAINSET_ISSET(*domain, &di->di_remain_mask)) 154 return (vm_domainset_iter_next(di, domain)); 155 156 DOMAINSET_CLR(*domain, &di->di_remain_mask); 157 158 /* Does it have enough free pages (phase 1)? */ 159 if (di->di_minskip && vm_page_count_min_domain(*domain)) { 160 /* Mark the domain as eligible for phase 2. */ 161 DOMAINSET_SET(*domain, &di->di_min_mask); 162 return (vm_domainset_iter_next(di, domain)); 163 } 164 165 return (true); 166 } 167 168 /* 169 * Resets an iterator to point to the first candidate domain. 170 * 171 * Returns whether there is an eligible domain to start with. '*domain' may be 172 * modified even if there is none. 173 * 174 * There must have been one call to vm_domainset_iter_init() before. 175 * 176 * This function must be called at least once before calling 177 * vm_domainset_iter_next(). Note that functions wrapping 178 * vm_domainset_iter_init() usually do that themselves. 179 * 180 * This function may be called again to reset the iterator to the policy's first 181 * candidate domain. After each reset, the iterator will visit the same domains 182 * as in the previous iteration minus those on which vm_domainset_iter_ignore() 183 * has been called. Note that the first candidate domain may change at each 184 * reset (at time of this writing, only on the DOMAINSET_POLICY_ROUNDROBIN 185 * policy). 186 * 187 * Domains which have a number of free pages over 'v_free_min' are always 188 * visited first (this is called the "phase 1" in comments, "phase 2" being the 189 * examination of the remaining domains; no domains are ever visited twice). 190 */ 191 static bool 192 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 193 { 194 /* Initialize the mask of domains to visit. */ 195 DOMAINSET_COPY(&di->di_valid_mask, &di->di_remain_mask); 196 /* 197 * No candidate domains for phase 2 at start. This will be filled by 198 * phase 1. 199 */ 200 DOMAINSET_ZERO(&di->di_min_mask); 201 /* Skip domains below 'v_free_min' on phase 1. */ 202 di->di_minskip = true; 203 204 return (vm_domainset_iter_phase_first(di, domain)); 205 } 206 207 /* 208 * Advances the iterator to the next candidate domain. 209 * 210 * Returns whether there was another domain to visit. '*domain' may be modified 211 * even if there is none. 212 * 213 * vm_domainset_iter_first() must have been called at least once before using 214 * this function (see its herald comment for more details on iterators). 215 */ 216 static bool 217 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 218 { 219 /* Loop while there remains domains to visit in the current phase. */ 220 while (!DOMAINSET_EMPTY(&di->di_remain_mask)) { 221 /* Grab the next domain in 'ds_order'. */ 222 vm_domainset_iter_rr(di, domain); 223 KASSERT(*domain < vm_ndomains, 224 ("%s: Invalid domain %d", __func__, *domain)); 225 226 if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) { 227 DOMAINSET_CLR(*domain, &di->di_remain_mask); 228 if (!di->di_minskip || !vm_page_count_min_domain(*domain)) 229 return (true); 230 DOMAINSET_SET(*domain, &di->di_min_mask); 231 } 232 } 233 234 /* 235 * If phase 1 (skip low memory domains) is over, start phase 2 (consider 236 * low memory domains). 237 */ 238 if (di->di_minskip) { 239 di->di_minskip = false; 240 /* Browse domains that were under 'v_free_min'. */ 241 DOMAINSET_COPY(&di->di_min_mask, &di->di_remain_mask); 242 return (vm_domainset_iter_phase_first(di, domain)); 243 } 244 245 return (false); 246 } 247 248 int 249 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 250 vm_pindex_t pindex, int *domain, int *req) 251 { 252 struct domainset_ref *dr; 253 254 di->di_flags = *req; 255 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 256 VM_ALLOC_NOWAIT; 257 258 /* 259 * Object policy takes precedence over thread policy. The policies 260 * are immutable and unsynchronized. Updates can race but pointer 261 * loads are assumed to be atomic. 262 */ 263 if (obj != NULL && obj->domain.dr_policy != NULL) 264 dr = &obj->domain; 265 else 266 dr = &curthread->td_domain; 267 268 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); 269 /* 270 * XXXOC: Shouldn't we just panic on 'false' if VM_ALLOC_WAITOK was 271 * passed? 272 */ 273 return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); 274 } 275 276 int 277 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 278 int *domain, struct pctrie_iter *pages) 279 { 280 if (vm_domainset_iter_next(di, domain)) 281 return (0); 282 283 /* If we visited all domains and this was a NOWAIT we return error. */ 284 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 285 return (ENOMEM); 286 287 /* Wait for one of the domains to accumulate some free pages. */ 288 if (obj != NULL) { 289 VM_OBJECT_WUNLOCK(obj); 290 if (pages != NULL) 291 pctrie_iter_reset(pages); 292 } 293 vm_wait_doms(&di->di_valid_mask, 0); 294 if (obj != NULL) 295 VM_OBJECT_WLOCK(obj); 296 if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) 297 return (ENOMEM); 298 299 /* Restart the search. */ 300 /* XXXOC: Shouldn't we just panic on 'false'? */ 301 return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); 302 } 303 304 static int 305 _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, 306 int *flags) 307 { 308 di->di_flags = *flags; 309 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 310 /* XXXOC: Shouldn't we just panic on 'false' if M_WAITOK was passed? */ 311 return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); 312 } 313 314 int 315 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 316 struct domainset *ds, int *domain, int *flags) 317 { 318 319 vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); 320 return (_vm_domainset_iter_policy_init(di, domain, flags)); 321 } 322 323 int 324 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 325 struct domainset_ref *dr, int *domain, int *flags) 326 { 327 328 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); 329 return (_vm_domainset_iter_policy_init(di, domain, flags)); 330 } 331 332 int 333 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 334 { 335 if (vm_domainset_iter_next(di, domain)) 336 return (0); 337 338 /* If we visited all domains and this was a NOWAIT we return error. */ 339 if ((di->di_flags & M_WAITOK) == 0) 340 return (ENOMEM); 341 342 /* Wait for one of the domains to accumulate some free pages. */ 343 vm_wait_doms(&di->di_valid_mask, 0); 344 345 /* Restart the search. */ 346 /* XXXOC: Shouldn't we just panic on 'false'? */ 347 return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); 348 } 349 350 void 351 vm_domainset_iter_ignore(struct vm_domainset_iter *di, int domain) 352 { 353 KASSERT(DOMAINSET_ISSET(domain, &di->di_valid_mask), 354 ("%s: domain %d not present in di_valid_mask for di %p", 355 __func__, domain, di)); 356 DOMAINSET_CLR(domain, &di->di_valid_mask); 357 } 358 359 #else /* !NUMA */ 360 361 int 362 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 363 int *domain, struct pctrie_iter *pages) 364 { 365 366 return (EJUSTRETURN); 367 } 368 369 int 370 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 371 vm_pindex_t pindex, int *domain, int *flags) 372 { 373 *domain = 0; 374 return (0); 375 } 376 377 int 378 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 379 { 380 381 return (EJUSTRETURN); 382 } 383 384 int 385 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 386 struct domainset *ds, int *domain, int *flags) 387 { 388 *domain = 0; 389 return (0); 390 } 391 392 int 393 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 394 struct domainset_ref *dr, int *domain, int *flags) 395 { 396 *domain = 0; 397 return (0); 398 } 399 400 void 401 vm_domainset_iter_ignore(struct vm_domainset_iter *di __unused, 402 int domain __unused) 403 { 404 } 405 406 #endif /* NUMA */ 407