1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst 4 * 5 * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. 6 * Copyright (c) 2026 Tejun Heo <tj@kernel.org> 7 */ 8 #include <linux/cacheinfo.h> 9 10 #include "internal.h" 11 #include "cid.h" 12 13 /* 14 * cid tables. 15 * 16 * Pointers are published once on first enable and never revoked. The default 17 * mapping is populated before ops.init() runs; scx_bpf_cid_override() commits 18 * before it returns. As long as the BPF scheduler only uses the tables from 19 * those points onward, it sees a consistent view. 20 */ 21 s16 *scx_cid_to_cpu_tbl; 22 s16 *scx_cpu_to_cid_tbl; 23 struct scx_cid_topo *scx_cid_topo; 24 25 #define SCX_CID_TOPO_NEG (struct scx_cid_topo) { \ 26 .core_cid = -1, .core_idx = -1, .llc_cid = -1, .llc_idx = -1, \ 27 .node_cid = -1, .node_idx = -1, \ 28 } 29 30 /* 31 * Return @cpu's LLC shared_cpu_map. If cacheinfo isn't populated (offline or 32 * !present), record @cpu in @fallbacks and return its node mask instead - the 33 * worst that can happen is that the cpu's LLC becomes coarser than reality. 34 */ 35 static const struct cpumask *cpu_llc_mask(int cpu, struct cpumask *fallbacks) 36 { 37 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); 38 39 if (!ci || !ci->info_list || !ci->num_leaves) { 40 cpumask_set_cpu(cpu, fallbacks); 41 return cpumask_of_node(cpu_to_node(cpu)); 42 } 43 return &ci->info_list[ci->num_leaves - 1].shared_cpu_map; 44 } 45 46 /* Allocate the cid tables once on first enable; never freed. */ 47 static s32 scx_cid_arrays_alloc(void) 48 { 49 u32 npossible = num_possible_cpus(); 50 s16 *cid_to_cpu, *cpu_to_cid; 51 struct scx_cid_topo *cid_topo; 52 53 if (scx_cid_to_cpu_tbl) 54 return 0; 55 56 cid_to_cpu = kzalloc_objs(*scx_cid_to_cpu_tbl, npossible, GFP_KERNEL); 57 cpu_to_cid = kzalloc_objs(*scx_cpu_to_cid_tbl, nr_cpu_ids, GFP_KERNEL); 58 cid_topo = kmalloc_objs(*scx_cid_topo, npossible, GFP_KERNEL); 59 60 if (!cid_to_cpu || !cpu_to_cid || !cid_topo) { 61 kfree(cid_to_cpu); 62 kfree(cpu_to_cid); 63 kfree(cid_topo); 64 return -ENOMEM; 65 } 66 67 WRITE_ONCE(scx_cid_to_cpu_tbl, cid_to_cpu); 68 WRITE_ONCE(scx_cpu_to_cid_tbl, cpu_to_cid); 69 WRITE_ONCE(scx_cid_topo, cid_topo); 70 return 0; 71 } 72 73 /** 74 * scx_cid_init - build the cid mapping 75 * @sch: the scx_sched being initialized; used as the scx_error() target 76 * 77 * See "Topological CPU IDs" in cid.h for the model. Walk online cpus by 78 * intersection at each level (parent_scratch & this_level_mask), which keeps 79 * containment correct by construction and naturally splits a physical LLC 80 * straddling two NUMA nodes into two LLC units. The caller must hold 81 * cpus_read_lock. 82 */ 83 s32 scx_cid_init(struct scx_sched *sch) 84 { 85 cpumask_var_t to_walk __free(free_cpumask_var) = CPUMASK_VAR_NULL; 86 cpumask_var_t node_scratch __free(free_cpumask_var) = CPUMASK_VAR_NULL; 87 cpumask_var_t llc_scratch __free(free_cpumask_var) = CPUMASK_VAR_NULL; 88 cpumask_var_t core_scratch __free(free_cpumask_var) = CPUMASK_VAR_NULL; 89 cpumask_var_t llc_fallback __free(free_cpumask_var) = CPUMASK_VAR_NULL; 90 cpumask_var_t online_no_topo __free(free_cpumask_var) = CPUMASK_VAR_NULL; 91 u32 next_cid = 0; 92 s32 next_node_idx = 0, next_llc_idx = 0, next_core_idx = 0; 93 s32 cpu, ret; 94 95 /* CMASK_MAX_WORDS in cid.bpf.h covers NR_CPUS up to 8192 */ 96 BUILD_BUG_ON(NR_CPUS > 8192); 97 98 lockdep_assert_cpus_held(); 99 100 ret = scx_cid_arrays_alloc(); 101 if (ret) 102 return ret; 103 104 if (!zalloc_cpumask_var(&to_walk, GFP_KERNEL) || 105 !zalloc_cpumask_var(&node_scratch, GFP_KERNEL) || 106 !zalloc_cpumask_var(&llc_scratch, GFP_KERNEL) || 107 !zalloc_cpumask_var(&core_scratch, GFP_KERNEL) || 108 !zalloc_cpumask_var(&llc_fallback, GFP_KERNEL) || 109 !zalloc_cpumask_var(&online_no_topo, GFP_KERNEL)) 110 return -ENOMEM; 111 112 /* -1 sentinels for sparse-possible cpu id holes (0 is a valid cid) */ 113 for (cpu = 0; cpu < nr_cpu_ids; cpu++) 114 scx_cpu_to_cid_tbl[cpu] = -1; 115 116 cpumask_copy(to_walk, cpu_online_mask); 117 118 while (!cpumask_empty(to_walk)) { 119 s32 next_cpu = cpumask_first(to_walk); 120 s32 nid = cpu_to_node(next_cpu); 121 s32 node_cid = next_cid; 122 s32 node_idx; 123 124 /* 125 * No NUMA info: skip and let the tail loop assign a no-topo 126 * cid. cpumask_of_node(-1) is undefined. 127 */ 128 if (nid < 0) { 129 cpumask_clear_cpu(next_cpu, to_walk); 130 continue; 131 } 132 133 node_idx = next_node_idx++; 134 135 /* node_scratch = to_walk & this node */ 136 cpumask_and(node_scratch, to_walk, cpumask_of_node(nid)); 137 if (WARN_ON_ONCE(!cpumask_test_cpu(next_cpu, node_scratch))) 138 return -EINVAL; 139 140 while (!cpumask_empty(node_scratch)) { 141 s32 ncpu = cpumask_first(node_scratch); 142 const struct cpumask *llc_mask = cpu_llc_mask(ncpu, llc_fallback); 143 s32 llc_cid = next_cid; 144 s32 llc_idx = next_llc_idx++; 145 146 /* llc_scratch = node_scratch & this llc */ 147 cpumask_and(llc_scratch, node_scratch, llc_mask); 148 if (WARN_ON_ONCE(!cpumask_test_cpu(ncpu, llc_scratch))) 149 return -EINVAL; 150 151 while (!cpumask_empty(llc_scratch)) { 152 s32 lcpu = cpumask_first(llc_scratch); 153 const struct cpumask *sib = topology_sibling_cpumask(lcpu); 154 s32 core_cid = next_cid; 155 s32 core_idx = next_core_idx++; 156 s32 ccpu; 157 158 /* core_scratch = llc_scratch & this core */ 159 cpumask_and(core_scratch, llc_scratch, sib); 160 if (WARN_ON_ONCE(!cpumask_test_cpu(lcpu, core_scratch))) 161 return -EINVAL; 162 163 for_each_cpu(ccpu, core_scratch) { 164 s32 cid = next_cid++; 165 166 scx_cid_to_cpu_tbl[cid] = ccpu; 167 scx_cpu_to_cid_tbl[ccpu] = cid; 168 scx_cid_topo[cid] = (struct scx_cid_topo){ 169 .core_cid = core_cid, 170 .core_idx = core_idx, 171 .llc_cid = llc_cid, 172 .llc_idx = llc_idx, 173 .node_cid = node_cid, 174 .node_idx = node_idx, 175 }; 176 177 cpumask_clear_cpu(ccpu, llc_scratch); 178 cpumask_clear_cpu(ccpu, node_scratch); 179 cpumask_clear_cpu(ccpu, to_walk); 180 } 181 } 182 } 183 } 184 185 /* 186 * No-topo section: any possible cpu without a cid - normally just the 187 * not-online ones. Collect any currently-online cpus that land here in 188 * @online_no_topo so we can warn about them at the end. 189 */ 190 for_each_cpu(cpu, cpu_possible_mask) { 191 s32 cid; 192 193 if (__scx_cpu_to_cid(cpu) != -1) 194 continue; 195 if (cpu_online(cpu)) 196 cpumask_set_cpu(cpu, online_no_topo); 197 198 cid = next_cid++; 199 scx_cid_to_cpu_tbl[cid] = cpu; 200 scx_cpu_to_cid_tbl[cpu] = cid; 201 scx_cid_topo[cid] = SCX_CID_TOPO_NEG; 202 } 203 204 if (!cpumask_empty(llc_fallback)) 205 pr_warn("scx_cid: cpus without cacheinfo, using node mask as llc: %*pbl\n", 206 cpumask_pr_args(llc_fallback)); 207 if (!cpumask_empty(online_no_topo)) 208 pr_warn("scx_cid: online cpus with no usable topology: %*pbl\n", 209 cpumask_pr_args(online_no_topo)); 210 211 return 0; 212 } 213 214 /** 215 * scx_cmask_clear - Zero every bit in @m's active range 216 * @m: cmask to clear 217 * 218 * Storage past the active range is left as is. 219 */ 220 void scx_cmask_clear(struct scx_cmask *m) 221 { 222 u32 nr_words; 223 224 if (!m->nr_cids) 225 return; 226 nr_words = (m->base + m->nr_cids - 1) / 64 - m->base / 64 + 1; 227 memset(m->bits, 0, nr_words * sizeof(u64)); 228 } 229 230 /** 231 * scx_cmask_fill - Set every bit in @m's active range 232 * @m: cmask to fill 233 * 234 * Counterpart to scx_cmask_clear(). Storage past the active range is left as is. 235 */ 236 void scx_cmask_fill(struct scx_cmask *m) 237 { 238 u32 nr_words, head_bits, tail_bits; 239 240 if (!m->nr_cids) 241 return; 242 nr_words = (m->base + m->nr_cids - 1) / 64 - m->base / 64 + 1; 243 memset(m->bits, 0xff, nr_words * sizeof(u64)); 244 245 /* clear word-0 bits below base */ 246 head_bits = m->base & 63; 247 if (head_bits) 248 m->bits[0] &= ~((1ULL << head_bits) - 1); 249 250 /* clear last-word bits at or past base + nr_cids */ 251 tail_bits = (m->base + m->nr_cids) & 63; 252 if (tail_bits) 253 m->bits[nr_words - 1] &= (1ULL << tail_bits) - 1; 254 } 255 256 /** 257 * scx_cpumask_to_cmask - Translate a kernel cpumask into a cmask 258 * @src: source cpumask 259 * @dst: cmask to write 260 * 261 * Clear @dst's active range and set the bit for each cid whose cpu is in 262 * @src and lies within that range. Out-of-range cids are silently ignored. 263 */ 264 void scx_cpumask_to_cmask(const struct cpumask *src, struct scx_cmask *dst) 265 { 266 s32 cpu; 267 268 scx_cmask_clear(dst); 269 for_each_cpu(cpu, src) { 270 s32 cid = __scx_cpu_to_cid(cpu); 271 272 if (cid >= 0) 273 __scx_cmask_set(cid, dst); 274 } 275 } 276 277 __bpf_kfunc_start_defs(); 278 279 /** 280 * scx_bpf_cid_override - Install an explicit cpu->cid mapping 281 * @cpu_to_cid: array of nr_cpu_ids s32 entries (cid for each cpu) 282 * @cpu_to_cid__sz: must be nr_cpu_ids * sizeof(s32) bytes 283 * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs 284 * 285 * May only be called from ops.init() of the root scheduler. Replace the 286 * topology-probed cid mapping with the caller-provided one. Each possible cpu 287 * must map to a unique cid in [0, num_possible_cpus()). Topo info is cleared. 288 * On invalid input, trigger scx_error() to abort the scheduler. 289 */ 290 __bpf_kfunc void scx_bpf_cid_override(const s32 *cpu_to_cid, u32 cpu_to_cid__sz, 291 const struct bpf_prog_aux *aux) 292 { 293 cpumask_var_t seen __free(free_cpumask_var) = CPUMASK_VAR_NULL; 294 struct scx_sched *sch; 295 bool alloced; 296 s32 cpu, cid; 297 298 /* GFP_KERNEL alloc must happen before the rcu read section */ 299 alloced = zalloc_cpumask_var(&seen, GFP_KERNEL); 300 301 guard(rcu)(); 302 303 sch = scx_prog_sched(aux); 304 if (unlikely(!sch)) 305 return; 306 307 if (!alloced) { 308 scx_error(sch, "scx_bpf_cid_override: failed to allocate cpumask"); 309 return; 310 } 311 312 if (scx_parent(sch)) { 313 scx_error(sch, "scx_bpf_cid_override() only allowed from root sched"); 314 return; 315 } 316 317 if (cpu_to_cid__sz != nr_cpu_ids * sizeof(s32)) { 318 scx_error(sch, "scx_bpf_cid_override: expected %zu bytes, got %u", 319 nr_cpu_ids * sizeof(s32), cpu_to_cid__sz); 320 return; 321 } 322 323 for_each_possible_cpu(cpu) { 324 s32 c = cpu_to_cid[cpu]; 325 326 if (!cid_valid(sch, c)) 327 return; 328 if (cpumask_test_and_set_cpu(c, seen)) { 329 scx_error(sch, "cid %d assigned to multiple cpus", c); 330 return; 331 } 332 scx_cpu_to_cid_tbl[cpu] = c; 333 scx_cid_to_cpu_tbl[c] = cpu; 334 } 335 336 /* Invalidate stale topo info - the override carries no topology. */ 337 for (cid = 0; cid < num_possible_cpus(); cid++) 338 scx_cid_topo[cid] = SCX_CID_TOPO_NEG; 339 } 340 341 /** 342 * scx_bpf_cid_to_cpu - Return the raw CPU id for @cid 343 * @cid: cid to look up 344 * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs 345 * 346 * Return the raw CPU id for @cid. Trigger scx_error() and return -EINVAL if 347 * @cid is invalid. The cid<->cpu mapping is static for the lifetime of the 348 * loaded scheduler, so the BPF side can cache the result to avoid repeated 349 * kfunc invocations. 350 */ 351 __bpf_kfunc s32 scx_bpf_cid_to_cpu(s32 cid, const struct bpf_prog_aux *aux) 352 { 353 struct scx_sched *sch; 354 355 guard(rcu)(); 356 357 sch = scx_prog_sched(aux); 358 if (unlikely(!sch)) 359 return -EINVAL; 360 return scx_cid_to_cpu(sch, cid); 361 } 362 363 /** 364 * scx_bpf_cpu_to_cid - Return the cid for @cpu 365 * @cpu: cpu to look up 366 * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs 367 * 368 * Return the cid for @cpu. Trigger scx_error() and return -EINVAL if @cpu is 369 * invalid. The cid<->cpu mapping is static for the lifetime of the loaded 370 * scheduler, so the BPF side can cache the result to avoid repeated kfunc 371 * invocations. 372 */ 373 __bpf_kfunc s32 scx_bpf_cpu_to_cid(s32 cpu, const struct bpf_prog_aux *aux) 374 { 375 struct scx_sched *sch; 376 377 guard(rcu)(); 378 379 sch = scx_prog_sched(aux); 380 if (unlikely(!sch)) 381 return -EINVAL; 382 return scx_cpu_to_cid(sch, cpu); 383 } 384 385 /* 386 * Set ops on cmasks. cmask_walk_op2() shares one walk across mutating 387 * (and/or/copy/andnot) and predicate (subset/intersects) two-cmask forms; 388 * cmask_walk_op1() does the same shape over a single cmask range. Every public 389 * entry passes a compile-time-constant @op; cmask_walk_op{1,2}() and 390 * cmask_word_op{1,2}() are __always_inline so the inner switch collapses to the 391 * selected op and cmask_op2_is_pred() folds the predicate early-exit out of 392 * mutating ops. 393 * 394 * Two-cmask ops only touch @dst bits inside the intersection of the two ranges; 395 * bits outside stay untouched. In particular, scx_cmask_copy() does NOT zero 396 * @dst bits that lie outside @src's range. 397 * 398 * The _RACY variants are otherwise identical to their non-racy counterpart but 399 * read @src word-by-word via data_race(). Memory ordering with concurrent 400 * writers is the caller's responsibility. 401 */ 402 enum cmask_op2 { 403 /* mutating */ 404 CMASK_OP2_AND, 405 CMASK_OP2_OR, 406 CMASK_OP2_OR_RACY, 407 CMASK_OP2_COPY, 408 CMASK_OP2_COPY_RACY, 409 CMASK_OP2_ANDNOT, 410 /* predicates - short-circuit when the per-word result is true */ 411 CMASK_OP2_SUBSET, 412 CMASK_OP2_INTERSECTS, 413 }; 414 415 static __always_inline bool cmask_op2_is_pred(const enum cmask_op2 op) 416 { 417 return op == CMASK_OP2_SUBSET || op == CMASK_OP2_INTERSECTS; 418 } 419 420 static __always_inline bool cmask_word_op2(u64 *av, const u64 *bp, u64 mask, 421 const enum cmask_op2 op) 422 { 423 switch (op) { 424 case CMASK_OP2_AND: 425 *av &= ~mask | *bp; 426 return false; 427 case CMASK_OP2_OR: 428 *av |= *bp & mask; 429 return false; 430 case CMASK_OP2_OR_RACY: 431 *av |= data_race(*bp) & mask; 432 return false; 433 case CMASK_OP2_COPY: 434 *av = (*av & ~mask) | (*bp & mask); 435 return false; 436 case CMASK_OP2_COPY_RACY: 437 *av = (*av & ~mask) | (data_race(*bp) & mask); 438 return false; 439 case CMASK_OP2_ANDNOT: 440 *av &= ~(*bp & mask); 441 return false; 442 case CMASK_OP2_SUBSET: 443 /* stop on the first bit in @sub not set in @super */ 444 return (*bp & ~*av) & mask; 445 case CMASK_OP2_INTERSECTS: 446 return (*av & *bp) & mask; 447 } 448 unreachable(); 449 } 450 451 /* 452 * Walk the intersection of [@a_base, @a_base + @a_nr_cids) with [@b_base, 453 * @b_base + @b_nr_cids) word by word, applying @op. Mutating ops walk all words 454 * and return false; predicates return true on the first word whose per-word 455 * test is true. Empty intersection returns false (matches "no bits to consider" 456 * for both mutate and predicate). 457 * 458 * Base/nr_cids are taken as parameters so callers with snapshotted bounds can 459 * drive the walk with values independent of the cmask's header. 460 */ 461 static __always_inline bool cmask_walk_op2(u64 *a_bits, u32 a_base, u32 a_nr_cids, 462 const u64 *b_bits, u32 b_base, u32 b_nr_cids, 463 const enum cmask_op2 op) 464 { 465 u32 lo = max(a_base, b_base); 466 u32 hi = min(a_base + a_nr_cids, b_base + b_nr_cids); 467 u32 a_word_off = a_base / 64; 468 u32 b_word_off = b_base / 64; 469 u32 lo_word = lo / 64; 470 u32 hi_word = (hi - 1) / 64; 471 u64 head_mask = GENMASK_U64(63, lo & 63); 472 u64 tail_mask = GENMASK_U64((hi - 1) & 63, 0); 473 u32 w; 474 475 if (lo >= hi) 476 return false; 477 478 if (lo_word == hi_word) 479 return cmask_word_op2(&a_bits[lo_word - a_word_off], 480 &b_bits[lo_word - b_word_off], 481 head_mask & tail_mask, op); 482 483 if (cmask_word_op2(&a_bits[lo_word - a_word_off], 484 &b_bits[lo_word - b_word_off], head_mask, op) && 485 cmask_op2_is_pred(op)) 486 return true; 487 488 for (w = lo_word + 1; w < hi_word; w++) 489 if (cmask_word_op2(&a_bits[w - a_word_off], 490 &b_bits[w - b_word_off], ~0ULL, op) && 491 cmask_op2_is_pred(op)) 492 return true; 493 494 return cmask_word_op2(&a_bits[hi_word - a_word_off], 495 &b_bits[hi_word - b_word_off], tail_mask, op); 496 } 497 498 enum cmask_op1 { 499 CMASK_OP1_ANY_SET, 500 }; 501 502 static __always_inline bool cmask_word_op1(const u64 *ap, u64 mask, 503 const enum cmask_op1 op) 504 { 505 switch (op) { 506 case CMASK_OP1_ANY_SET: 507 return *ap & mask; 508 } 509 unreachable(); 510 } 511 512 /* 513 * Walk [@a_base, @a_base + @a_nr_cids) of @a_bits word by word, applying @op. 514 * Returns true on the first word whose per-word test is true; returns false if 515 * no word matches or the range is empty. All current op1s short-circuit on 516 * per-word true; if a non-predicate op1 lands here, add a cmask_op1_is_pred() 517 * guard analogous to cmask_op2_is_pred(). 518 */ 519 static __always_inline bool cmask_walk_op1(const u64 *a_bits, u32 a_base, 520 u32 a_nr_cids, 521 const enum cmask_op1 op) 522 { 523 u32 lo = a_base; 524 u32 hi = a_base + a_nr_cids; 525 u32 a_word_off = a_base / 64; 526 u32 lo_word = lo / 64; 527 u32 hi_word = (hi - 1) / 64; 528 u64 head_mask = GENMASK_U64(63, lo & 63); 529 u64 tail_mask = GENMASK_U64((hi - 1) & 63, 0); 530 u32 w; 531 532 if (lo >= hi) 533 return false; 534 535 if (lo_word == hi_word) 536 return cmask_word_op1(&a_bits[lo_word - a_word_off], 537 head_mask & tail_mask, op); 538 539 if (cmask_word_op1(&a_bits[lo_word - a_word_off], head_mask, op)) 540 return true; 541 for (w = lo_word + 1; w < hi_word; w++) 542 if (cmask_word_op1(&a_bits[w - a_word_off], ~0ULL, op)) 543 return true; 544 return cmask_word_op1(&a_bits[hi_word - a_word_off], tail_mask, op); 545 } 546 547 void scx_cmask_and(struct scx_cmask *dst, const struct scx_cmask *src) 548 { 549 cmask_walk_op2(dst->bits, dst->base, dst->nr_cids, 550 src->bits, src->base, src->nr_cids, CMASK_OP2_AND); 551 } 552 553 void scx_cmask_or(struct scx_cmask *dst, const struct scx_cmask *src) 554 { 555 cmask_walk_op2(dst->bits, dst->base, dst->nr_cids, 556 src->bits, src->base, src->nr_cids, CMASK_OP2_OR); 557 } 558 559 /** 560 * scx_cmask_or_racy - OR @src into @dst, reading @src without locking 561 * 562 * @src is read word-by-word through data_race(). Same per-bit independence 563 * rationale as scx_cmask_copy_racy(). Memory ordering with writers is the 564 * caller's responsibility. 565 */ 566 void scx_cmask_or_racy(struct scx_cmask *dst, const struct scx_cmask *src) 567 { 568 cmask_walk_op2(dst->bits, dst->base, dst->nr_cids, 569 src->bits, src->base, src->nr_cids, CMASK_OP2_OR_RACY); 570 } 571 572 void scx_cmask_copy(struct scx_cmask *dst, const struct scx_cmask *src) 573 { 574 cmask_walk_op2(dst->bits, dst->base, dst->nr_cids, 575 src->bits, src->base, src->nr_cids, CMASK_OP2_COPY); 576 } 577 578 /** 579 * scx_cmask_copy_racy - Snapshot @src into @dst without locking 580 * 581 * @src is read word-by-word through data_race(). Head/tail masking matches 582 * scx_cmask_copy(). Each bit in a cmask is independent, so partial updates 583 * just leave some bits fresher than others. Memory ordering with writers is 584 * the caller's responsibility. 585 */ 586 void scx_cmask_copy_racy(struct scx_cmask *dst, const struct scx_cmask *src) 587 { 588 cmask_walk_op2(dst->bits, dst->base, dst->nr_cids, 589 src->bits, src->base, src->nr_cids, CMASK_OP2_COPY_RACY); 590 } 591 592 void scx_cmask_andnot(struct scx_cmask *dst, const struct scx_cmask *src) 593 { 594 cmask_walk_op2(dst->bits, dst->base, dst->nr_cids, 595 src->bits, src->base, src->nr_cids, CMASK_OP2_ANDNOT); 596 } 597 598 /* 599 * Return true if @cm has any bit set in [@lo, @hi). Caller must ensure 600 * [@lo, @hi) is contained in @cm's range. 601 */ 602 static bool cmask_any_set_in_range(const struct scx_cmask *cm, u32 lo, u32 hi) 603 { 604 if (lo >= hi) 605 return false; 606 return cmask_walk_op1(&cm->bits[lo / 64 - cm->base / 64], lo, hi - lo, 607 CMASK_OP1_ANY_SET); 608 } 609 610 /** 611 * scx_cmask_subset - test whether @sub is a subset of @super 612 * @sub: cmask to test 613 * @super: cmask to test against 614 * 615 * Return true iff every set bit of @sub is also set in @super. 616 */ 617 bool scx_cmask_subset(const struct scx_cmask *sub, const struct scx_cmask *super) 618 { 619 u32 super_end = super->base + super->nr_cids; 620 u32 sub_end = sub->base + sub->nr_cids; 621 622 /* 623 * Set bits in @sub outside @super's range can't be in @super, so any 624 * such bit means not a subset. The walk below only visits words 625 * common to both ranges, so these need a separate scan. 626 */ 627 if (sub->base < super->base && 628 cmask_any_set_in_range(sub, sub->base, min(super->base, sub_end))) 629 return false; 630 if (sub_end > super_end && 631 cmask_any_set_in_range(sub, max(sub->base, super_end), sub_end)) 632 return false; 633 634 return !cmask_walk_op2((u64 *)super->bits, super->base, super->nr_cids, 635 sub->bits, sub->base, sub->nr_cids, CMASK_OP2_SUBSET); 636 } 637 638 bool scx_cmask_intersects(const struct scx_cmask *a, const struct scx_cmask *b) 639 { 640 return cmask_walk_op2((u64 *)a->bits, a->base, a->nr_cids, 641 b->bits, b->base, b->nr_cids, CMASK_OP2_INTERSECTS); 642 } 643 644 /** 645 * scx_cmask_empty - Test whether @m has no bits set 646 * @m: cmask to test 647 * 648 * Return true iff @m's active range has no bits set. 649 */ 650 bool scx_cmask_empty(const struct scx_cmask *m) 651 { 652 return !cmask_any_set_in_range(m, m->base, m->base + m->nr_cids); 653 } 654 655 /** 656 * scx_bpf_cid_topo - Copy out per-cid topology info 657 * @cid: cid to look up 658 * @out__uninit: where to copy the topology info; fully written by this call 659 * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs 660 * 661 * Fill @out__uninit with the topology info for @cid. Trigger scx_error() if 662 * @cid is out of range. If @cid is valid but in the no-topo section, all fields 663 * are set to -1. 664 */ 665 __bpf_kfunc void scx_bpf_cid_topo(s32 cid, struct scx_cid_topo *out__uninit, 666 const struct bpf_prog_aux *aux) 667 { 668 struct scx_sched *sch; 669 670 guard(rcu)(); 671 672 sch = scx_prog_sched(aux); 673 if (unlikely(!sch) || !cid_valid(sch, cid)) { 674 *out__uninit = SCX_CID_TOPO_NEG; 675 return; 676 } 677 678 *out__uninit = READ_ONCE(scx_cid_topo)[cid]; 679 } 680 681 __bpf_kfunc_end_defs(); 682 683 BTF_KFUNCS_START(scx_kfunc_ids_init) 684 BTF_ID_FLAGS(func, scx_bpf_cid_override, KF_IMPLICIT_ARGS | KF_SLEEPABLE) 685 BTF_KFUNCS_END(scx_kfunc_ids_init) 686 687 static const struct btf_kfunc_id_set scx_kfunc_set_init = { 688 .owner = THIS_MODULE, 689 .set = &scx_kfunc_ids_init, 690 .filter = scx_kfunc_context_filter, 691 }; 692 693 BTF_KFUNCS_START(scx_kfunc_ids_cid) 694 BTF_ID_FLAGS(func, scx_bpf_cid_to_cpu, KF_IMPLICIT_ARGS) 695 BTF_ID_FLAGS(func, scx_bpf_cpu_to_cid, KF_IMPLICIT_ARGS) 696 BTF_ID_FLAGS(func, scx_bpf_cid_topo, KF_IMPLICIT_ARGS) 697 BTF_KFUNCS_END(scx_kfunc_ids_cid) 698 699 static const struct btf_kfunc_id_set scx_kfunc_set_cid = { 700 .owner = THIS_MODULE, 701 .set = &scx_kfunc_ids_cid, 702 }; 703 704 int scx_cid_kfunc_init(void) 705 { 706 return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_init) ?: 707 register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cid) ?: 708 register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &scx_kfunc_set_cid) ?: 709 register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_cid); 710 } 711