1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitset.h> 38 #include <sys/domainset.h> 39 #include <sys/proc.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/malloc.h> 43 #include <sys/vmmeter.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_domainset.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 52 #ifdef NUMA 53 /* 54 * Iterators are written such that the first nowait pass has as short a 55 * codepath as possible to eliminate bloat from the allocator. It is 56 * assumed that most allocations are successful. 57 */ 58 59 static int vm_domainset_default_stride = 64; 60 61 /* 62 * Determine which policy is to be used for this allocation. 63 */ 64 static void 65 vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66 vm_pindex_t pindex) 67 { 68 struct domainset *domain; 69 70 /* 71 * object policy takes precedence over thread policy. The policies 72 * are immutable and unsynchronized. Updates can race but pointer 73 * loads are assumed to be atomic. 74 */ 75 if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 76 di->di_domain = domain; 77 di->di_iter = &obj->domain.dr_iterator; 78 } else { 79 di->di_domain = curthread->td_domain.dr_policy; 80 di->di_iter = &curthread->td_domain.dr_iterator; 81 } 82 di->di_policy = di->di_domain->ds_policy; 83 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 84 #ifdef VM_LEVEL_0_ORDER 85 if (vm_object_reserv(obj)) { 86 /* 87 * Color the pindex so we end up on the correct 88 * reservation boundary. 89 */ 90 pindex += obj->pg_color; 91 pindex >>= VM_LEVEL_0_ORDER; 92 } 93 else 94 #endif 95 pindex /= vm_domainset_default_stride; 96 /* 97 * Offset pindex so the first page of each object does 98 * not end up in domain 0. 99 */ 100 if (obj != NULL) 101 pindex += (((uintptr_t)obj) / sizeof(*obj)); 102 di->di_offset = pindex; 103 } 104 } 105 106 static void 107 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 108 { 109 110 *domain = di->di_domain->ds_order[ 111 ++(*di->di_iter) % di->di_domain->ds_cnt]; 112 } 113 114 static void 115 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 116 { 117 int d; 118 119 do { 120 d = di->di_domain->ds_order[ 121 ++(*di->di_iter) % di->di_domain->ds_cnt]; 122 } while (d == di->di_domain->ds_prefer); 123 *domain = d; 124 } 125 126 static void 127 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 128 { 129 int d; 130 131 d = di->di_offset % di->di_domain->ds_cnt; 132 *di->di_iter = d; 133 *domain = di->di_domain->ds_order[d]; 134 } 135 136 static void 137 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 138 { 139 140 KASSERT(di->di_n > 0, 141 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 142 switch (di->di_policy) { 143 case DOMAINSET_POLICY_FIRSTTOUCH: 144 /* 145 * To prevent impossible allocations we convert an invalid 146 * first-touch to round-robin. 147 */ 148 /* FALLTHROUGH */ 149 case DOMAINSET_POLICY_INTERLEAVE: 150 /* FALLTHROUGH */ 151 case DOMAINSET_POLICY_ROUNDROBIN: 152 vm_domainset_iter_rr(di, domain); 153 break; 154 case DOMAINSET_POLICY_PREFER: 155 vm_domainset_iter_prefer(di, domain); 156 break; 157 default: 158 panic("vm_domainset_iter_first: Unknown policy %d", 159 di->di_policy); 160 } 161 KASSERT(*domain < vm_ndomains, 162 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 163 } 164 165 static void 166 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 167 { 168 169 switch (di->di_policy) { 170 case DOMAINSET_POLICY_FIRSTTOUCH: 171 *domain = PCPU_GET(domain); 172 if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 173 /* 174 * Add an extra iteration because we will visit the 175 * current domain a second time in the rr iterator. 176 */ 177 di->di_n = di->di_domain->ds_cnt + 1; 178 break; 179 } 180 /* 181 * To prevent impossible allocations we convert an invalid 182 * first-touch to round-robin. 183 */ 184 /* FALLTHROUGH */ 185 case DOMAINSET_POLICY_ROUNDROBIN: 186 di->di_n = di->di_domain->ds_cnt; 187 vm_domainset_iter_rr(di, domain); 188 break; 189 case DOMAINSET_POLICY_PREFER: 190 *domain = di->di_domain->ds_prefer; 191 di->di_n = di->di_domain->ds_cnt; 192 break; 193 case DOMAINSET_POLICY_INTERLEAVE: 194 vm_domainset_iter_interleave(di, domain); 195 di->di_n = di->di_domain->ds_cnt; 196 break; 197 default: 198 panic("vm_domainset_iter_first: Unknown policy %d", 199 di->di_policy); 200 } 201 KASSERT(di->di_n > 0, 202 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 203 KASSERT(*domain < vm_ndomains, 204 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 205 } 206 207 void 208 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 209 vm_pindex_t pindex, int *domain, int *req) 210 { 211 212 vm_domainset_iter_init(di, obj, pindex); 213 di->di_flags = *req; 214 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 215 VM_ALLOC_NOWAIT; 216 vm_domainset_iter_first(di, domain); 217 } 218 219 int 220 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 221 { 222 223 /* 224 * If we exhausted all options with NOWAIT and did a WAITFAIL it 225 * is time to return an error to the caller. 226 */ 227 if ((*req & VM_ALLOC_WAITFAIL) != 0) 228 return (ENOMEM); 229 230 /* If there are more domains to visit we run the iterator. */ 231 if (--di->di_n != 0) { 232 vm_domainset_iter_next(di, domain); 233 return (0); 234 } 235 236 /* If we visited all domains and this was a NOWAIT we return error. */ 237 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 238 return (ENOMEM); 239 240 /* 241 * We have visited all domains with non-blocking allocations, try 242 * from the beginning with a blocking allocation. 243 */ 244 vm_domainset_iter_first(di, domain); 245 *req = di->di_flags; 246 247 return (0); 248 } 249 250 251 void 252 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 253 struct vm_object *obj, int *domain, int *flags) 254 { 255 256 vm_domainset_iter_init(di, obj, 0); 257 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 258 di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 259 di->di_flags = *flags; 260 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 261 vm_domainset_iter_first(di, domain); 262 } 263 264 int 265 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 266 { 267 268 /* If there are more domains to visit we run the iterator. */ 269 if (--di->di_n != 0) { 270 vm_domainset_iter_next(di, domain); 271 return (0); 272 } 273 274 /* If we visited all domains and this was a NOWAIT we return error. */ 275 if ((di->di_flags & M_WAITOK) == 0) 276 return (ENOMEM); 277 278 /* 279 * We have visited all domains with non-blocking allocations, try 280 * from the beginning with a blocking allocation. 281 */ 282 vm_domainset_iter_first(di, domain); 283 *flags = di->di_flags; 284 285 return (0); 286 } 287 288 #else /* !NUMA */ 289 int 290 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 291 { 292 293 return (EJUSTRETURN); 294 } 295 296 void 297 vm_domainset_iter_page_init(struct vm_domainset_iter *di, 298 struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 299 { 300 301 *domain = 0; 302 } 303 304 int 305 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 306 { 307 308 return (EJUSTRETURN); 309 } 310 311 void 312 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 313 struct vm_object *obj, int *domain, int *flags) 314 { 315 316 *domain = 0; 317 } 318 319 #endif 320