1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_vm.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bitset.h> 36 #include <sys/domainset.h> 37 #include <sys/proc.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/malloc.h> 41 #include <sys/rwlock.h> 42 #include <sys/vmmeter.h> 43 44 #include <vm/vm.h> 45 #include <vm/vm_param.h> 46 #include <vm/vm_domainset.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/vm_phys.h> 50 51 #ifdef NUMA 52 /* 53 * Iterators are written such that the first nowait pass has as short a 54 * codepath as possible to eliminate bloat from the allocator. It is 55 * assumed that most allocations are successful. 56 */ 57 58 static int vm_domainset_default_stride = 64; 59 60 /* 61 * Determine which policy is to be used for this allocation. 62 */ 63 static void 64 vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, 65 int *iter, struct vm_object *obj, vm_pindex_t pindex) 66 { 67 68 di->di_domain = ds; 69 di->di_iter = iter; 70 di->di_policy = ds->ds_policy; 71 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 72 #if VM_NRESERVLEVEL > 0 73 if (vm_object_reserv(obj)) { 74 /* 75 * Color the pindex so we end up on the correct 76 * reservation boundary. 77 */ 78 pindex += obj->pg_color; 79 pindex >>= VM_LEVEL_0_ORDER; 80 } else 81 #endif 82 pindex /= vm_domainset_default_stride; 83 /* 84 * Offset pindex so the first page of each object does 85 * not end up in domain 0. 86 */ 87 if (obj != NULL) 88 pindex += (((uintptr_t)obj) / sizeof(*obj)); 89 di->di_offset = pindex; 90 } 91 /* Skip domains below min on the first pass. */ 92 di->di_minskip = true; 93 } 94 95 static void 96 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 97 { 98 99 *domain = di->di_domain->ds_order[ 100 ++(*di->di_iter) % di->di_domain->ds_cnt]; 101 } 102 103 static void 104 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 105 { 106 int d; 107 108 do { 109 d = di->di_domain->ds_order[ 110 ++(*di->di_iter) % di->di_domain->ds_cnt]; 111 } while (d == di->di_domain->ds_prefer); 112 *domain = d; 113 } 114 115 static void 116 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 117 { 118 int d; 119 120 d = di->di_offset % di->di_domain->ds_cnt; 121 *di->di_iter = d; 122 *domain = di->di_domain->ds_order[d]; 123 } 124 125 static void 126 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 127 { 128 129 KASSERT(di->di_n > 0, 130 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 131 switch (di->di_policy) { 132 case DOMAINSET_POLICY_FIRSTTOUCH: 133 /* 134 * To prevent impossible allocations we convert an invalid 135 * first-touch to round-robin. 136 */ 137 /* FALLTHROUGH */ 138 case DOMAINSET_POLICY_INTERLEAVE: 139 /* FALLTHROUGH */ 140 case DOMAINSET_POLICY_ROUNDROBIN: 141 vm_domainset_iter_rr(di, domain); 142 break; 143 case DOMAINSET_POLICY_PREFER: 144 vm_domainset_iter_prefer(di, domain); 145 break; 146 default: 147 panic("vm_domainset_iter_first: Unknown policy %d", 148 di->di_policy); 149 } 150 KASSERT(*domain < vm_ndomains, 151 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 152 } 153 154 static void 155 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 156 { 157 158 switch (di->di_policy) { 159 case DOMAINSET_POLICY_FIRSTTOUCH: 160 *domain = PCPU_GET(domain); 161 if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 162 /* 163 * Add an extra iteration because we will visit the 164 * current domain a second time in the rr iterator. 165 */ 166 di->di_n = di->di_domain->ds_cnt + 1; 167 break; 168 } 169 /* 170 * To prevent impossible allocations we convert an invalid 171 * first-touch to round-robin. 172 */ 173 /* FALLTHROUGH */ 174 case DOMAINSET_POLICY_ROUNDROBIN: 175 di->di_n = di->di_domain->ds_cnt; 176 vm_domainset_iter_rr(di, domain); 177 break; 178 case DOMAINSET_POLICY_PREFER: 179 *domain = di->di_domain->ds_prefer; 180 di->di_n = di->di_domain->ds_cnt; 181 break; 182 case DOMAINSET_POLICY_INTERLEAVE: 183 vm_domainset_iter_interleave(di, domain); 184 di->di_n = di->di_domain->ds_cnt; 185 break; 186 default: 187 panic("vm_domainset_iter_first: Unknown policy %d", 188 di->di_policy); 189 } 190 KASSERT(di->di_n > 0, 191 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 192 KASSERT(*domain < vm_ndomains, 193 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 194 } 195 196 void 197 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 198 vm_pindex_t pindex, int *domain, int *req) 199 { 200 struct domainset_ref *dr; 201 202 /* 203 * Object policy takes precedence over thread policy. The policies 204 * are immutable and unsynchronized. Updates can race but pointer 205 * loads are assumed to be atomic. 206 */ 207 if (obj != NULL && obj->domain.dr_policy != NULL) 208 dr = &obj->domain; 209 else 210 dr = &curthread->td_domain; 211 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); 212 di->di_flags = *req; 213 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 214 VM_ALLOC_NOWAIT; 215 vm_domainset_iter_first(di, domain); 216 if (vm_page_count_min_domain(*domain)) 217 vm_domainset_iter_page(di, obj, domain); 218 } 219 220 int 221 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 222 int *domain) 223 { 224 225 /* If there are more domains to visit we run the iterator. */ 226 while (--di->di_n != 0) { 227 vm_domainset_iter_next(di, domain); 228 if (!di->di_minskip || !vm_page_count_min_domain(*domain)) 229 return (0); 230 } 231 232 /* If we skipped domains below min restart the search. */ 233 if (di->di_minskip) { 234 di->di_minskip = false; 235 vm_domainset_iter_first(di, domain); 236 return (0); 237 } 238 239 /* If we visited all domains and this was a NOWAIT we return error. */ 240 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 241 return (ENOMEM); 242 243 /* Wait for one of the domains to accumulate some free pages. */ 244 if (obj != NULL) 245 VM_OBJECT_WUNLOCK(obj); 246 vm_wait_doms(&di->di_domain->ds_mask, 0); 247 if (obj != NULL) 248 VM_OBJECT_WLOCK(obj); 249 if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) 250 return (ENOMEM); 251 252 /* Restart the search. */ 253 vm_domainset_iter_first(di, domain); 254 255 return (0); 256 } 257 258 static void 259 _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, 260 int *flags) 261 { 262 263 di->di_flags = *flags; 264 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 265 vm_domainset_iter_first(di, domain); 266 if (vm_page_count_min_domain(*domain)) 267 vm_domainset_iter_policy(di, domain); 268 } 269 270 void 271 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 272 struct domainset *ds, int *domain, int *flags) 273 { 274 275 vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); 276 _vm_domainset_iter_policy_init(di, domain, flags); 277 } 278 279 void 280 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 281 struct domainset_ref *dr, int *domain, int *flags) 282 { 283 284 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); 285 _vm_domainset_iter_policy_init(di, domain, flags); 286 } 287 288 int 289 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 290 { 291 292 /* If there are more domains to visit we run the iterator. */ 293 while (--di->di_n != 0) { 294 vm_domainset_iter_next(di, domain); 295 if (!di->di_minskip || !vm_page_count_min_domain(*domain)) 296 return (0); 297 } 298 299 /* If we skipped domains below min restart the search. */ 300 if (di->di_minskip) { 301 di->di_minskip = false; 302 vm_domainset_iter_first(di, domain); 303 return (0); 304 } 305 306 /* If we visited all domains and this was a NOWAIT we return error. */ 307 if ((di->di_flags & M_WAITOK) == 0) 308 return (ENOMEM); 309 310 /* Wait for one of the domains to accumulate some free pages. */ 311 vm_wait_doms(&di->di_domain->ds_mask, 0); 312 313 /* Restart the search. */ 314 vm_domainset_iter_first(di, domain); 315 316 return (0); 317 } 318 319 #else /* !NUMA */ 320 321 int 322 vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 323 int *domain) 324 { 325 326 return (EJUSTRETURN); 327 } 328 329 void 330 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 331 vm_pindex_t pindex, int *domain, int *flags) 332 { 333 334 *domain = 0; 335 } 336 337 int 338 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 339 { 340 341 return (EJUSTRETURN); 342 } 343 344 void 345 vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 346 struct domainset *ds, int *domain, int *flags) 347 { 348 349 *domain = 0; 350 } 351 352 void 353 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 354 struct domainset_ref *dr, int *domain, int *flags) 355 { 356 357 *domain = 0; 358 } 359 360 #endif /* NUMA */ 361