1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitset.h> 38 #include <sys/domainset.h> 39 #include <sys/proc.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/malloc.h> 43 #include <sys/vmmeter.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_domainset.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 52 #ifdef NUMA 53 /* 54 * Iterators are written such that the first nowait pass has as short a 55 * codepath as possible to eliminate bloat from the allocator. It is 56 * assumed that most allocations are successful. 57 */ 58 59 static int vm_domainset_default_stride = 64; 60 61 /* 62 * Determine which policy is to be used for this allocation. 63 */ 64 static void 65 vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66 vm_pindex_t pindex) 67 { 68 struct domainset *domain; 69 struct thread *td; 70 71 /* 72 * object policy takes precedence over thread policy. The policies 73 * are immutable and unsynchronized. Updates can race but pointer 74 * loads are assumed to be atomic. 75 */ 76 if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 77 di->di_domain = domain; 78 di->di_iter = &obj->domain.dr_iterator; 79 } else { 80 td = curthread; 81 di->di_domain = td->td_domain.dr_policy; 82 di->di_iter = &td->td_domain.dr_iterator; 83 } 84 di->di_policy = di->di_domain->ds_policy; 85 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 86 #if VM_NRESERVLEVEL > 0 87 if (vm_object_reserv(obj)) { 88 /* 89 * Color the pindex so we end up on the correct 90 * reservation boundary. 91 */ 92 pindex += obj->pg_color; 93 pindex >>= VM_LEVEL_0_ORDER; 94 } else 95 #endif 96 pindex /= vm_domainset_default_stride; 97 /* 98 * Offset pindex so the first page of each object does 99 * not end up in domain 0. 100 */ 101 if (obj != NULL) 102 pindex += (((uintptr_t)obj) / sizeof(*obj)); 103 di->di_offset = pindex; 104 } 105 /* Skip domains below min on the first pass. */ 106 di->di_minskip = true; 107 } 108 109 static void 110 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 111 { 112 113 *domain = di->di_domain->ds_order[ 114 ++(*di->di_iter) % di->di_domain->ds_cnt]; 115 } 116 117 static void 118 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 119 { 120 int d; 121 122 do { 123 d = di->di_domain->ds_order[ 124 ++(*di->di_iter) % di->di_domain->ds_cnt]; 125 } while (d == di->di_domain->ds_prefer); 126 *domain = d; 127 } 128 129 static void 130 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 131 { 132 int d; 133 134 d = di->di_offset % di->di_domain->ds_cnt; 135 *di->di_iter = d; 136 *domain = di->di_domain->ds_order[d]; 137 } 138 139 static void 140 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 141 { 142 143 KASSERT(di->di_n > 0, 144 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 145 switch (di->di_policy) { 146 case DOMAINSET_POLICY_FIRSTTOUCH: 147 /* 148 * To prevent impossible allocations we convert an invalid 149 * first-touch to round-robin. 150 */ 151 /* FALLTHROUGH */ 152 case DOMAINSET_POLICY_INTERLEAVE: 153 /* FALLTHROUGH */ 154 case DOMAINSET_POLICY_ROUNDROBIN: 155 vm_domainset_iter_rr(di, domain); 156 break; 157 case DOMAINSET_POLICY_PREFER: 158 vm_domainset_iter_prefer(di, domain); 159 break; 160 default: 161 panic("vm_domainset_iter_first: Unknown policy %d", 162 di->di_policy); 163 } 164 KASSERT(*domain < vm_ndomains, 165 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 166 } 167 168 static void 169 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 170 { 171 172 switch (di->di_policy) { 173 case DOMAINSET_POLICY_FIRSTTOUCH: 174 *domain = PCPU_GET(domain); 175 if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 176 /* 177 * Add an extra iteration because we will visit the 178 * current domain a second time in the rr iterator. 179 */ 180 di->di_n = di->di_domain->ds_cnt + 1; 181 break; 182 } 183 /* 184 * To prevent impossible allocations we convert an invalid 185 * first-touch to round-robin. 186 */ 187 /* FALLTHROUGH */ 188 case DOMAINSET_POLICY_ROUNDROBIN: 189 di->di_n = di->di_domain->ds_cnt; 190 vm_domainset_iter_rr(di, domain); 191 break; 192 case DOMAINSET_POLICY_PREFER: 193 *domain = di->di_domain->ds_prefer; 194 di->di_n = di->di_domain->ds_cnt; 195 break; 196 case DOMAINSET_POLICY_INTERLEAVE: 197 vm_domainset_iter_interleave(di, domain); 198 di->di_n = di->di_domain->ds_cnt; 199 break; 200 default: 201 panic("vm_domainset_iter_first: Unknown policy %d", 202 di->di_policy); 203 } 204 KASSERT(di->di_n > 0, 205 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 206 KASSERT(*domain < vm_ndomains, 207 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 208 } 209 210 void 211 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 212 vm_pindex_t pindex, int *domain, int *req) 213 { 214 215 vm_domainset_iter_init(di, obj, pindex); 216 di->di_flags = *req; 217 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 218 VM_ALLOC_NOWAIT; 219 vm_domainset_iter_first(di, domain); 220 if (vm_page_count_min_domain(*domain)) 221 vm_domainset_iter_page(di, domain, req); 222 } 223 224 int 225 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 226 { 227 228 /* 229 * If we exhausted all options with NOWAIT and did a WAITFAIL it 230 * is time to return an error to the caller. 231 */ 232 if ((*req & VM_ALLOC_WAITFAIL) != 0) 233 return (ENOMEM); 234 235 /* If there are more domains to visit we run the iterator. */ 236 while (--di->di_n != 0) { 237 vm_domainset_iter_next(di, domain); 238 if (!di->di_minskip || !vm_page_count_min_domain(*domain)) 239 return (0); 240 } 241 if (di->di_minskip) { 242 di->di_minskip = false; 243 vm_domainset_iter_first(di, domain); 244 return (0); 245 } 246 247 /* If we visited all domains and this was a NOWAIT we return error. */ 248 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 249 return (ENOMEM); 250 251 /* 252 * We have visited all domains with non-blocking allocations, try 253 * from the beginning with a blocking allocation. 254 */ 255 vm_domainset_iter_first(di, domain); 256 *req = di->di_flags; 257 258 return (0); 259 } 260 261 262 void 263 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 264 struct vm_object *obj, int *domain, int *flags) 265 { 266 267 vm_domainset_iter_init(di, obj, 0); 268 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 269 di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 270 di->di_flags = *flags; 271 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 272 vm_domainset_iter_first(di, domain); 273 if (vm_page_count_min_domain(*domain)) 274 vm_domainset_iter_malloc(di, domain, flags); 275 } 276 277 int 278 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 279 { 280 281 /* If there are more domains to visit we run the iterator. */ 282 while (--di->di_n != 0) { 283 vm_domainset_iter_next(di, domain); 284 if (!di->di_minskip || !vm_page_count_min_domain(*domain)) 285 return (0); 286 } 287 288 /* If we skipped domains below min restart the search. */ 289 if (di->di_minskip) { 290 di->di_minskip = false; 291 vm_domainset_iter_first(di, domain); 292 return (0); 293 } 294 295 /* If we visited all domains and this was a NOWAIT we return error. */ 296 if ((di->di_flags & M_WAITOK) == 0) 297 return (ENOMEM); 298 299 /* 300 * We have visited all domains with non-blocking allocations, try 301 * from the beginning with a blocking allocation. 302 */ 303 vm_domainset_iter_first(di, domain); 304 *flags = di->di_flags; 305 306 return (0); 307 } 308 309 #else /* !NUMA */ 310 int 311 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 312 { 313 314 return (EJUSTRETURN); 315 } 316 317 void 318 vm_domainset_iter_page_init(struct vm_domainset_iter *di, 319 struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 320 { 321 322 *domain = 0; 323 } 324 325 int 326 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 327 { 328 329 return (EJUSTRETURN); 330 } 331 332 void 333 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 334 struct vm_object *obj, int *domain, int *flags) 335 { 336 337 *domain = 0; 338 } 339 340 #endif 341