1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitset.h> 38 #include <sys/domainset.h> 39 #include <sys/proc.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/malloc.h> 43 #include <sys/vmmeter.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_domainset.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 52 #ifdef NUMA 53 /* 54 * Iterators are written such that the first nowait pass has as short a 55 * codepath as possible to eliminate bloat from the allocator. It is 56 * assumed that most allocations are successful. 57 */ 58 59 static int vm_domainset_default_stride = 64; 60 61 /* 62 * Determine which policy is to be used for this allocation. 63 */ 64 static void 65 vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, 66 vm_pindex_t pindex) 67 { 68 struct domainset *domain; 69 70 /* 71 * object policy takes precedence over thread policy. The policies 72 * are immutable and unsynchronized. Updates can race but pointer 73 * loads are assumed to be atomic. 74 */ 75 if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { 76 di->di_domain = domain; 77 di->di_iter = &obj->domain.dr_iterator; 78 } else { 79 di->di_domain = curthread->td_domain.dr_policy; 80 di->di_iter = &curthread->td_domain.dr_iterator; 81 } 82 di->di_policy = di->di_domain->ds_policy; 83 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 84 if (vm_object_reserv(obj)) { 85 /* 86 * Color the pindex so we end up on the correct 87 * reservation boundary. 88 */ 89 pindex += obj->pg_color; 90 pindex >>= VM_LEVEL_0_ORDER; 91 } else 92 pindex /= vm_domainset_default_stride; 93 /* 94 * Offset pindex so the first page of each object does 95 * not end up in domain 0. 96 */ 97 if (obj != NULL) 98 pindex += (((uintptr_t)obj) / sizeof(*obj)); 99 di->di_offset = pindex; 100 } 101 } 102 103 static void 104 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 105 { 106 107 *domain = di->di_domain->ds_order[ 108 ++(*di->di_iter) % di->di_domain->ds_cnt]; 109 } 110 111 static void 112 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 113 { 114 int d; 115 116 do { 117 d = di->di_domain->ds_order[ 118 ++(*di->di_iter) % di->di_domain->ds_cnt]; 119 } while (d == di->di_domain->ds_prefer); 120 *domain = d; 121 } 122 123 static void 124 vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 125 { 126 int d; 127 128 d = di->di_offset % di->di_domain->ds_cnt; 129 *di->di_iter = d; 130 *domain = di->di_domain->ds_order[d]; 131 } 132 133 static void 134 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 135 { 136 137 KASSERT(di->di_n > 0, 138 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 139 switch (di->di_policy) { 140 case DOMAINSET_POLICY_FIRSTTOUCH: 141 /* 142 * To prevent impossible allocations we convert an invalid 143 * first-touch to round-robin. 144 */ 145 /* FALLTHROUGH */ 146 case DOMAINSET_POLICY_INTERLEAVE: 147 /* FALLTHROUGH */ 148 case DOMAINSET_POLICY_ROUNDROBIN: 149 vm_domainset_iter_rr(di, domain); 150 break; 151 case DOMAINSET_POLICY_PREFER: 152 vm_domainset_iter_prefer(di, domain); 153 break; 154 default: 155 panic("vm_domainset_iter_first: Unknown policy %d", 156 di->di_policy); 157 } 158 KASSERT(*domain < vm_ndomains, 159 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 160 } 161 162 static void 163 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 164 { 165 166 switch (di->di_policy) { 167 case DOMAINSET_POLICY_FIRSTTOUCH: 168 *domain = PCPU_GET(domain); 169 if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) { 170 /* 171 * Add an extra iteration because we will visit the 172 * current domain a second time in the rr iterator. 173 */ 174 di->di_n = di->di_domain->ds_cnt + 1; 175 break; 176 } 177 /* 178 * To prevent impossible allocations we convert an invalid 179 * first-touch to round-robin. 180 */ 181 /* FALLTHROUGH */ 182 case DOMAINSET_POLICY_ROUNDROBIN: 183 di->di_n = di->di_domain->ds_cnt; 184 vm_domainset_iter_rr(di, domain); 185 break; 186 case DOMAINSET_POLICY_PREFER: 187 *domain = di->di_domain->ds_prefer; 188 di->di_n = di->di_domain->ds_cnt; 189 break; 190 case DOMAINSET_POLICY_INTERLEAVE: 191 vm_domainset_iter_interleave(di, domain); 192 di->di_n = di->di_domain->ds_cnt; 193 break; 194 default: 195 panic("vm_domainset_iter_first: Unknown policy %d", 196 di->di_policy); 197 } 198 KASSERT(di->di_n > 0, 199 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 200 KASSERT(*domain < vm_ndomains, 201 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 202 } 203 204 void 205 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 206 vm_pindex_t pindex, int *domain, int *req) 207 { 208 209 vm_domainset_iter_init(di, obj, pindex); 210 di->di_flags = *req; 211 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 212 VM_ALLOC_NOWAIT; 213 vm_domainset_iter_first(di, domain); 214 } 215 216 int 217 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) 218 { 219 220 /* 221 * If we exhausted all options with NOWAIT and did a WAITFAIL it 222 * is time to return an error to the caller. 223 */ 224 if ((*req & VM_ALLOC_WAITFAIL) != 0) 225 return (ENOMEM); 226 227 /* If there are more domains to visit we run the iterator. */ 228 if (--di->di_n != 0) { 229 vm_domainset_iter_next(di, domain); 230 return (0); 231 } 232 233 /* If we visited all domains and this was a NOWAIT we return error. */ 234 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 235 return (ENOMEM); 236 237 /* 238 * We have visited all domains with non-blocking allocations, try 239 * from the beginning with a blocking allocation. 240 */ 241 vm_domainset_iter_first(di, domain); 242 *req = di->di_flags; 243 244 return (0); 245 } 246 247 248 void 249 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 250 struct vm_object *obj, int *domain, int *flags) 251 { 252 253 vm_domainset_iter_init(di, obj, 0); 254 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) 255 di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; 256 di->di_flags = *flags; 257 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 258 vm_domainset_iter_first(di, domain); 259 } 260 261 int 262 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 263 { 264 265 /* If there are more domains to visit we run the iterator. */ 266 if (--di->di_n != 0) { 267 vm_domainset_iter_next(di, domain); 268 return (0); 269 } 270 271 /* If we visited all domains and this was a NOWAIT we return error. */ 272 if ((di->di_flags & M_WAITOK) == 0) 273 return (ENOMEM); 274 275 /* 276 * We have visited all domains with non-blocking allocations, try 277 * from the beginning with a blocking allocation. 278 */ 279 vm_domainset_iter_first(di, domain); 280 *flags = di->di_flags; 281 282 return (0); 283 } 284 285 #else /* !NUMA */ 286 int 287 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) 288 { 289 290 return (EJUSTRETURN); 291 } 292 293 void 294 vm_domainset_iter_page_init(struct vm_domainset_iter *di, 295 struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) 296 { 297 298 *domain = 0; 299 } 300 301 int 302 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) 303 { 304 305 return (EJUSTRETURN); 306 } 307 308 void 309 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, 310 struct vm_object *obj, int *domain, int *flags) 311 { 312 313 *domain = 0; 314 } 315 316 #endif 317