1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * BPF-side helpers for cids and cmasks. See kernel/sched/ext/cid.h for the 4 * authoritative layout and semantics. The BPF-side helpers use the cmask_* 5 * naming (no scx_ prefix); cmask is the SCX bitmap type so the prefix is 6 * redundant in BPF code. Atomics use __sync_val_compare_and_swap and every 7 * helper is inline (no .c counterpart). 8 * 9 * Included by scx/common.bpf.h; don't include directly. 10 * 11 * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. 12 * Copyright (c) 2026 Tejun Heo <tj@kernel.org> 13 */ 14 #ifndef __SCX_CID_BPF_H 15 #define __SCX_CID_BPF_H 16 17 #include "bpf_arena_common.bpf.h" 18 19 #ifndef BIT_U64 20 #define BIT_U64(nr) (1ULL << (nr)) 21 #endif 22 #ifndef GENMASK_U64 23 #define GENMASK_U64(h, l) ((~0ULL << (l)) & (~0ULL >> (63 - (h)))) 24 #endif 25 26 /* 27 * Storage cap for bounded loops over bits[]. Sized to cover NR_CPUS=8192 with 28 * one extra word for head-misalignment. Increase if deployment targets larger 29 * NR_CPUS. 30 */ 31 #ifndef CMASK_MAX_WORDS 32 #define CMASK_MAX_WORDS 129 33 #endif 34 35 /* 36 * Mirrors SCX_CMASK_NR_WORDS in kernel/sched/ext/types.h. The u64 cast keeps 37 * the +63 from wrapping when @nr_cids is near U32_MAX, so cmask_reframe() 38 * bounds-checking the result against alloc_words catches the overflow instead 39 * of seeing a small value. 40 */ 41 #define CMASK_NR_WORDS(nr_cids) ((u32)(((u64)(nr_cids) + 63) / 64 + 1)) 42 43 static __always_inline bool __cmask_contains(u32 cid, const struct scx_cmask __arena *m) 44 { 45 return cid >= m->base && cid < m->base + m->nr_cids; 46 } 47 48 static __always_inline u64 __arena *__cmask_word(u32 cid, const struct scx_cmask __arena *m) 49 { 50 return (u64 __arena *)&m->bits[cid / 64 - m->base / 64]; 51 } 52 53 /** 54 * __cmask_init - Initialize @m with explicit storage capacity 55 * @m: cmask to initialize 56 * @base: first cid of the active range 57 * @nr_cids: number of cids in the active range 58 * @alloc_cids: storage capacity in cids, at least @nr_cids 59 * 60 * Use when storage is sized larger than the initial active range. All of 61 * bits[] is zeroed. 62 */ 63 static __always_inline void __cmask_init(struct scx_cmask __arena *m, u32 base, 64 u32 nr_cids, u32 alloc_cids) 65 { 66 u32 alloc_words, i; 67 68 if (unlikely(nr_cids > alloc_cids)) { 69 scx_bpf_error("__cmask_init: nr_cids=%u exceeds alloc_cids=%u", 70 nr_cids, alloc_cids); 71 return; 72 } 73 alloc_words = CMASK_NR_WORDS(alloc_cids); 74 75 m->base = base; 76 m->nr_cids = nr_cids; 77 m->alloc_words = alloc_words; 78 79 bpf_for(i, 0, CMASK_MAX_WORDS) { 80 if (i >= alloc_words) 81 break; 82 m->bits[i] = 0; 83 } 84 } 85 86 /** 87 * cmask_init - Initialize @m on tight storage 88 * @m: cmask to initialize 89 * @base: first cid of the active range 90 * @nr_cids: number of cids in the active range 91 * 92 * All of bits[] is zeroed. 93 */ 94 static __always_inline void cmask_init(struct scx_cmask __arena *m, u32 base, u32 nr_cids) 95 { 96 __cmask_init(m, base, nr_cids, nr_cids); 97 } 98 99 /** 100 * cmask_reframe - Reshape @m's active range without resizing storage 101 * @m: cmask to reframe 102 * @base: new active range base 103 * @nr_cids: new active range length, must fit within @m->alloc_words 104 * 105 * Body bits within the new range become garbage - only the head and tail 106 * words are zeroed to keep the padding invariant. 107 */ 108 static __always_inline void cmask_reframe(struct scx_cmask __arena *m, u32 base, u32 nr_cids) 109 { 110 if (CMASK_NR_WORDS(nr_cids) > m->alloc_words) { 111 scx_bpf_error("cmask_reframe: nr_cids=%u exceeds alloc_words=%u", 112 nr_cids, m->alloc_words); 113 return; 114 } 115 if (nr_cids) { 116 u32 last_word = ((base & 63) + nr_cids - 1) / 64; 117 118 m->bits[0] = 0; 119 m->bits[last_word] = 0; 120 } 121 m->base = base; 122 m->nr_cids = nr_cids; 123 } 124 125 static __always_inline bool cmask_test(u32 cid, const struct scx_cmask __arena *m) 126 { 127 if (!__cmask_contains(cid, m)) 128 return false; 129 return *__cmask_word(cid, m) & BIT_U64(cid & 63); 130 } 131 132 /* 133 * x86 BPF JIT rejects BPF_OR | BPF_FETCH and BPF_AND | BPF_FETCH on arena 134 * pointers (see bpf_jit_supports_insn() in arch/x86/net/bpf_jit_comp.c). Only 135 * BPF_CMPXCHG / BPF_XCHG / BPF_ADD with FETCH are allowed. Implement 136 * test_and_{set,clear} and the atomic set/clear via a cmpxchg loop. 137 * 138 * CMASK_CAS_TRIES is sized so exhausting it means seconds of real spinning 139 * on one word - past any plausible contention. Abort hard. 140 */ 141 #define CMASK_CAS_TRIES (1U << 23) 142 143 static __always_inline void cmask_set(u32 cid, struct scx_cmask __arena *m) 144 { 145 u64 __arena *w; 146 u64 bit, old, new; 147 u32 i; 148 149 if (!__cmask_contains(cid, m)) 150 return; 151 w = __cmask_word(cid, m); 152 bit = BIT_U64(cid & 63); 153 bpf_for(i, 0, CMASK_CAS_TRIES) { 154 old = *w; 155 if (old & bit) 156 return; 157 new = old | bit; 158 if (__sync_val_compare_and_swap(w, old, new) == old) 159 return; 160 } 161 scx_bpf_error("cmask_set CAS exhausted at cid %u", cid); 162 } 163 164 static __always_inline void cmask_clear(u32 cid, struct scx_cmask __arena *m) 165 { 166 u64 __arena *w; 167 u64 bit, old, new; 168 u32 i; 169 170 if (!__cmask_contains(cid, m)) 171 return; 172 w = __cmask_word(cid, m); 173 bit = BIT_U64(cid & 63); 174 bpf_for(i, 0, CMASK_CAS_TRIES) { 175 old = *w; 176 if (!(old & bit)) 177 return; 178 new = old & ~bit; 179 if (__sync_val_compare_and_swap(w, old, new) == old) 180 return; 181 } 182 scx_bpf_error("cmask_clear CAS exhausted at cid %u", cid); 183 } 184 185 static __always_inline bool cmask_test_and_set(u32 cid, struct scx_cmask __arena *m) 186 { 187 u64 __arena *w; 188 u64 bit, old, new; 189 u32 i; 190 191 if (!__cmask_contains(cid, m)) 192 return false; 193 w = __cmask_word(cid, m); 194 bit = BIT_U64(cid & 63); 195 bpf_for(i, 0, CMASK_CAS_TRIES) { 196 old = *w; 197 if (old & bit) 198 return true; 199 new = old | bit; 200 if (__sync_val_compare_and_swap(w, old, new) == old) 201 return false; 202 } 203 scx_bpf_error("cmask_test_and_set CAS exhausted at cid %u", cid); 204 return false; 205 } 206 207 static __always_inline bool cmask_test_and_clear(u32 cid, struct scx_cmask __arena *m) 208 { 209 u64 __arena *w; 210 u64 bit, old, new; 211 u32 i; 212 213 if (!__cmask_contains(cid, m)) 214 return false; 215 w = __cmask_word(cid, m); 216 bit = BIT_U64(cid & 63); 217 bpf_for(i, 0, CMASK_CAS_TRIES) { 218 old = *w; 219 if (!(old & bit)) 220 return false; 221 new = old & ~bit; 222 if (__sync_val_compare_and_swap(w, old, new) == old) 223 return true; 224 } 225 scx_bpf_error("cmask_test_and_clear CAS exhausted at cid %u", cid); 226 return false; 227 } 228 229 static __always_inline void __cmask_set(u32 cid, struct scx_cmask __arena *m) 230 { 231 if (!__cmask_contains(cid, m)) 232 return; 233 *__cmask_word(cid, m) |= BIT_U64(cid & 63); 234 } 235 236 static __always_inline void __cmask_clear(u32 cid, struct scx_cmask __arena *m) 237 { 238 if (!__cmask_contains(cid, m)) 239 return; 240 *__cmask_word(cid, m) &= ~BIT_U64(cid & 63); 241 } 242 243 static __always_inline bool __cmask_test_and_set(u32 cid, struct scx_cmask __arena *m) 244 { 245 u64 bit = BIT_U64(cid & 63); 246 u64 __arena *w; 247 u64 prev; 248 249 if (!__cmask_contains(cid, m)) 250 return false; 251 w = __cmask_word(cid, m); 252 prev = *w & bit; 253 *w |= bit; 254 return prev; 255 } 256 257 static __always_inline bool __cmask_test_and_clear(u32 cid, struct scx_cmask __arena *m) 258 { 259 u64 bit = BIT_U64(cid & 63); 260 u64 __arena *w; 261 u64 prev; 262 263 if (!__cmask_contains(cid, m)) 264 return false; 265 w = __cmask_word(cid, m); 266 prev = *w & bit; 267 *w &= ~bit; 268 return prev; 269 } 270 271 static __always_inline void cmask_zero(struct scx_cmask __arena *m) 272 { 273 u32 nr_words = CMASK_NR_WORDS(m->nr_cids), i; 274 275 bpf_for(i, 0, CMASK_MAX_WORDS) { 276 if (i >= nr_words) 277 break; 278 m->bits[i] = 0; 279 } 280 } 281 282 /* 283 * BPF_-prefixed to avoid colliding with the kernel's anonymous CMASK_OP_* 284 * enum in ext/cid.c, which is exported via BTF and reachable through 285 * vmlinux.h. 286 */ 287 enum { 288 BPF_CMASK_OP_AND, 289 BPF_CMASK_OP_OR, 290 BPF_CMASK_OP_COPY, 291 BPF_CMASK_OP_ANDNOT, 292 }; 293 294 static __always_inline void cmask_op_word(struct scx_cmask __arena *dst, 295 const struct scx_cmask __arena *src, 296 u32 di, u32 si, u64 mask, int op) 297 { 298 u64 dv = dst->bits[di]; 299 u64 sv = src->bits[si]; 300 u64 rv; 301 302 if (op == BPF_CMASK_OP_AND) 303 rv = dv & sv; 304 else if (op == BPF_CMASK_OP_OR) 305 rv = dv | sv; 306 else if (op == BPF_CMASK_OP_ANDNOT) 307 rv = dv & ~sv; 308 else 309 rv = sv; 310 311 dst->bits[di] = (dv & ~mask) | (rv & mask); 312 } 313 314 static __always_inline void cmask_op(struct scx_cmask __arena *dst, 315 const struct scx_cmask __arena *src, int op) 316 { 317 u32 d_end = dst->base + dst->nr_cids; 318 u32 s_end = src->base + src->nr_cids; 319 u32 lo = dst->base > src->base ? dst->base : src->base; 320 u32 hi = d_end < s_end ? d_end : s_end; 321 u32 d_base = dst->base / 64; 322 u32 s_base = src->base / 64; 323 u32 lo_word, hi_word, i; 324 u64 head_mask, tail_mask; 325 326 if (lo >= hi) 327 return; 328 329 lo_word = lo / 64; 330 hi_word = (hi - 1) / 64; 331 head_mask = GENMASK_U64(63, lo & 63); 332 tail_mask = GENMASK_U64((hi - 1) & 63, 0); 333 334 bpf_for(i, 0, CMASK_MAX_WORDS) { 335 u32 w = lo_word + i; 336 u64 m; 337 338 if (w > hi_word) 339 break; 340 341 m = GENMASK_U64(63, 0); 342 if (w == lo_word) 343 m &= head_mask; 344 if (w == hi_word) 345 m &= tail_mask; 346 347 cmask_op_word(dst, src, w - d_base, w - s_base, m, op); 348 } 349 } 350 351 /* 352 * cmask_and/or/copy only modify @dst bits that lie in the intersection of 353 * [@dst->base, @dst->base + @dst->nr_cids) and [@src->base, 354 * @src->base + @src->nr_cids). Bits in @dst outside that window 355 * keep their prior values - in particular, cmask_copy() does NOT zero @dst 356 * bits that lie outside @src's range. 357 */ 358 static __always_inline void cmask_and(struct scx_cmask __arena *dst, 359 const struct scx_cmask __arena *src) 360 { 361 cmask_op(dst, src, BPF_CMASK_OP_AND); 362 } 363 364 static __always_inline void cmask_or(struct scx_cmask __arena *dst, 365 const struct scx_cmask __arena *src) 366 { 367 cmask_op(dst, src, BPF_CMASK_OP_OR); 368 } 369 370 static __always_inline void cmask_copy(struct scx_cmask __arena *dst, 371 const struct scx_cmask __arena *src) 372 { 373 cmask_op(dst, src, BPF_CMASK_OP_COPY); 374 } 375 376 static __always_inline void cmask_andnot(struct scx_cmask __arena *dst, 377 const struct scx_cmask __arena *src) 378 { 379 cmask_op(dst, src, BPF_CMASK_OP_ANDNOT); 380 } 381 382 /* 383 * True iff @a and @b have identical bits over their (assumed equal) range. 384 * Callers are expected to pass same-shape cmasks; differing shapes always 385 * compare unequal. 386 */ 387 static __always_inline bool cmask_equal(const struct scx_cmask __arena *a, 388 const struct scx_cmask __arena *b) 389 { 390 u32 nr_words, i; 391 392 if (a->base != b->base || a->nr_cids != b->nr_cids) 393 return false; 394 nr_words = CMASK_NR_WORDS(a->nr_cids); 395 396 bpf_for(i, 0, CMASK_MAX_WORDS) { 397 if (i >= nr_words) 398 break; 399 if (a->bits[i] != b->bits[i]) 400 return false; 401 } 402 return true; 403 } 404 405 /* 406 * True iff every bit set in @a is also set in @b over the intersection of 407 * their ranges. Bits of @a outside @b's range fail the test. 408 */ 409 static __always_inline bool cmask_subset(const struct scx_cmask __arena *a, 410 const struct scx_cmask __arena *b) 411 { 412 u32 a_end = a->base + a->nr_cids; 413 u32 b_end = b->base + b->nr_cids; 414 u32 a_wbase = a->base / 64; 415 u32 b_wbase = b->base / 64; 416 u32 nr_words, i; 417 418 /* any bit of @a outside @b's range is a subset violation */ 419 if (a->base < b->base || a_end > b_end) 420 return false; 421 422 nr_words = CMASK_NR_WORDS(a->nr_cids); 423 bpf_for(i, 0, CMASK_MAX_WORDS) { 424 u32 wi_b; 425 426 if (i >= nr_words) 427 break; 428 wi_b = a_wbase + i - b_wbase; 429 if (a->bits[i] & ~b->bits[wi_b]) 430 return false; 431 } 432 return true; 433 } 434 435 /** 436 * cmask_next_set - find the first set bit at or after @cid 437 * @m: cmask to search 438 * @cid: starting cid (clamped to @m->base if below) 439 * 440 * Returns the smallest set cid in [@cid, @m->base + @m->nr_cids), or 441 * @m->base + @m->nr_cids if none (the out-of-range sentinel matches the 442 * termination condition used by cmask_for_each()). 443 */ 444 static __always_inline u32 cmask_next_set(const struct scx_cmask __arena *m, u32 cid) 445 { 446 u32 end = m->base + m->nr_cids; 447 u32 base = m->base / 64; 448 u32 last_wi = (end - 1) / 64 - base; 449 u32 start_wi, start_bit, i; 450 451 if (cid < m->base) 452 cid = m->base; 453 if (cid >= end) 454 return end; 455 456 start_wi = cid / 64 - base; 457 start_bit = cid & 63; 458 459 bpf_for(i, 0, CMASK_MAX_WORDS) { 460 u32 wi = start_wi + i; 461 u64 word; 462 u32 found; 463 464 if (wi > last_wi) 465 break; 466 467 word = m->bits[wi]; 468 if (i == 0) 469 word &= GENMASK_U64(63, start_bit); 470 if (!word) 471 continue; 472 473 found = (base + wi) * 64 + ctzll(word); 474 if (found >= end) 475 return end; 476 return found; 477 } 478 return end; 479 } 480 481 static __always_inline u32 cmask_first_set(const struct scx_cmask __arena *m) 482 { 483 return cmask_next_set(m, m->base); 484 } 485 486 #define cmask_for_each(cid, m) \ 487 for ((cid) = cmask_first_set(m); \ 488 (cid) < (m)->base + (m)->nr_cids; \ 489 (cid) = cmask_next_set((m), (cid) + 1)) 490 491 /* 492 * Population count over [base, base + nr_cids). Padding bits in the head/tail 493 * words are guaranteed zero by the mutating helpers, so a flat popcount over 494 * all words is correct. 495 */ 496 static __always_inline u32 cmask_weight(const struct scx_cmask __arena *m) 497 { 498 u32 nr_words = CMASK_NR_WORDS(m->nr_cids), i; 499 u32 count = 0; 500 501 bpf_for(i, 0, CMASK_MAX_WORDS) { 502 if (i >= nr_words) 503 break; 504 count += __builtin_popcountll(m->bits[i]); 505 } 506 return count; 507 } 508 509 /* 510 * True if @a and @b share any set bit. Walk only the intersection of their 511 * ranges, matching the semantics of cmask_and(). 512 */ 513 static __always_inline bool cmask_intersects(const struct scx_cmask __arena *a, 514 const struct scx_cmask __arena *b) 515 { 516 u32 a_end = a->base + a->nr_cids; 517 u32 b_end = b->base + b->nr_cids; 518 u32 lo = a->base > b->base ? a->base : b->base; 519 u32 hi = a_end < b_end ? a_end : b_end; 520 u32 a_base = a->base / 64; 521 u32 b_base = b->base / 64; 522 u32 lo_word, hi_word, i; 523 u64 head_mask, tail_mask; 524 525 if (lo >= hi) 526 return false; 527 528 lo_word = lo / 64; 529 hi_word = (hi - 1) / 64; 530 head_mask = GENMASK_U64(63, lo & 63); 531 tail_mask = GENMASK_U64((hi - 1) & 63, 0); 532 533 bpf_for(i, 0, CMASK_MAX_WORDS) { 534 u32 w = lo_word + i; 535 u64 mask, av, bv; 536 537 if (w > hi_word) 538 break; 539 540 mask = GENMASK_U64(63, 0); 541 if (w == lo_word) 542 mask &= head_mask; 543 if (w == hi_word) 544 mask &= tail_mask; 545 546 av = a->bits[w - a_base] & mask; 547 bv = b->bits[w - b_base] & mask; 548 if (av & bv) 549 return true; 550 } 551 return false; 552 } 553 554 /* 555 * Find the next cid set in both @a and @b at or after @start, bounded by the 556 * intersection of the two ranges. Return a->base + a->nr_cids if none found. 557 * 558 * Building block for cmask_next_and_set_wrap(). Callers that want a bounded 559 * scan without wrap call this directly. 560 */ 561 static __always_inline u32 cmask_next_and_set(const struct scx_cmask __arena *a, 562 const struct scx_cmask __arena *b, 563 u32 start) 564 { 565 u32 a_end = a->base + a->nr_cids; 566 u32 b_end = b->base + b->nr_cids; 567 u32 a_wbase = a->base / 64; 568 u32 b_wbase = b->base / 64; 569 u32 lo = a->base > b->base ? a->base : b->base; 570 u32 hi = a_end < b_end ? a_end : b_end; 571 u32 last_wi, start_wi, start_bit, i; 572 573 if (lo >= hi) 574 return a_end; 575 if (start < lo) 576 start = lo; 577 if (start >= hi) 578 return a_end; 579 580 last_wi = (hi - 1) / 64; 581 start_wi = start / 64; 582 start_bit = start & 63; 583 584 bpf_for(i, 0, CMASK_MAX_WORDS) { 585 u32 abs_wi = start_wi + i; 586 u64 word; 587 u32 found; 588 589 if (abs_wi > last_wi) 590 break; 591 592 word = a->bits[abs_wi - a_wbase] & b->bits[abs_wi - b_wbase]; 593 if (i == 0) 594 word &= GENMASK_U64(63, start_bit); 595 if (!word) 596 continue; 597 598 found = abs_wi * 64 + ctzll(word); 599 if (found >= hi) 600 return a_end; 601 return found; 602 } 603 return a_end; 604 } 605 606 /* 607 * Find the next set cid in @m at or after @start, wrapping to @m->base if no 608 * set bit is found in [start, m->base + m->nr_cids). Return m->base + 609 * m->nr_cids if @m is empty. 610 * 611 * Callers do round-robin distribution by passing (last_cid + 1) as @start. 612 */ 613 static __always_inline u32 cmask_next_set_wrap(const struct scx_cmask __arena *m, 614 u32 start) 615 { 616 u32 end = m->base + m->nr_cids; 617 u32 found; 618 619 found = cmask_next_set(m, start); 620 if (found < end || start <= m->base) 621 return found; 622 623 found = cmask_next_set(m, m->base); 624 return found < start ? found : end; 625 } 626 627 /* 628 * Find the next cid set in both @a and @b at or after @start, wrapping to 629 * @a->base if none found in the forward half. Return a->base + a->nr_cids 630 * if the intersection is empty. 631 * 632 * Callers do round-robin distribution by passing (last_cid + 1) as @start. 633 */ 634 static __always_inline u32 cmask_next_and_set_wrap(const struct scx_cmask __arena *a, 635 const struct scx_cmask __arena *b, 636 u32 start) 637 { 638 u32 a_end = a->base + a->nr_cids; 639 u32 found; 640 641 found = cmask_next_and_set(a, b, start); 642 if (found < a_end || start <= a->base) 643 return found; 644 645 found = cmask_next_and_set(a, b, a->base); 646 return found < start ? found : a_end; 647 } 648 649 /** 650 * cmask_from_cpumask - translate a kernel cpumask to a cid-space cmask 651 * @m: cmask to fill. Zeroed first; only bits within [@m->base, @m->base + 652 * @m->nr_cids) are updated - cpus mapping to cids outside that range 653 * are ignored. 654 * @cpumask: kernel cpumask to translate 655 * 656 * For each cpu in @cpumask, set the cpu's cid in @m. Caller must ensure 657 * @cpumask stays stable across the call (e.g. RCU read lock for 658 * task->cpus_ptr). 659 */ 660 static __always_inline void cmask_from_cpumask(struct scx_cmask __arena *m, 661 const struct cpumask *cpumask) 662 { 663 u32 nr_cpu_ids = scx_bpf_nr_cpu_ids(); 664 s32 cpu; 665 666 cmask_zero(m); 667 bpf_for(cpu, 0, nr_cpu_ids) { 668 s32 cid; 669 670 if (!bpf_cpumask_test_cpu(cpu, cpumask)) 671 continue; 672 cid = scx_bpf_cpu_to_cid(cpu); 673 if (cid >= 0) 674 __cmask_set(cid, m); 675 } 676 } 677 678 #endif /* __SCX_CID_BPF_H */ 679