101d541baSAndrea Righi // SPDX-License-Identifier: GPL-2.0 201d541baSAndrea Righi /* 301d541baSAndrea Righi * A scheduler that validates the behavior of scx_bpf_select_cpu_and() by 401d541baSAndrea Righi * selecting idle CPUs strictly within a subset of allowed CPUs. 501d541baSAndrea Righi * 601d541baSAndrea Righi * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com> 701d541baSAndrea Righi */ 801d541baSAndrea Righi 901d541baSAndrea Righi #include <scx/common.bpf.h> 1001d541baSAndrea Righi 1101d541baSAndrea Righi char _license[] SEC("license") = "GPL"; 1201d541baSAndrea Righi 1301d541baSAndrea Righi UEI_DEFINE(uei); 1401d541baSAndrea Righi 1501d541baSAndrea Righi private(PREF_CPUS) struct bpf_cpumask __kptr * allowed_cpumask; 1601d541baSAndrea Righi 1701d541baSAndrea Righi static void 1801d541baSAndrea Righi validate_idle_cpu(const struct task_struct *p, const struct cpumask *allowed, s32 cpu) 1901d541baSAndrea Righi { 2001d541baSAndrea Righi if (scx_bpf_test_and_clear_cpu_idle(cpu)) 2101d541baSAndrea Righi scx_bpf_error("CPU %d should be marked as busy", cpu); 2201d541baSAndrea Righi 2301d541baSAndrea Righi if (bpf_cpumask_subset(allowed, p->cpus_ptr) && 2401d541baSAndrea Righi !bpf_cpumask_test_cpu(cpu, allowed)) 2501d541baSAndrea Righi scx_bpf_error("CPU %d not in the allowed domain for %d (%s)", 2601d541baSAndrea Righi cpu, p->pid, p->comm); 2701d541baSAndrea Righi } 2801d541baSAndrea Righi 2901d541baSAndrea Righi s32 BPF_STRUCT_OPS(allowed_cpus_select_cpu, 3001d541baSAndrea Righi struct task_struct *p, s32 prev_cpu, u64 wake_flags) 3101d541baSAndrea Righi { 3201d541baSAndrea Righi const struct cpumask *allowed; 3301d541baSAndrea Righi s32 cpu; 3401d541baSAndrea Righi 3501d541baSAndrea Righi allowed = cast_mask(allowed_cpumask); 3601d541baSAndrea Righi if (!allowed) { 3701d541baSAndrea Righi scx_bpf_error("allowed domain not initialized"); 3801d541baSAndrea Righi return -EINVAL; 3901d541baSAndrea Righi } 4001d541baSAndrea Righi 4101d541baSAndrea Righi /* 4201d541baSAndrea Righi * Select an idle CPU strictly within the allowed domain. 4301d541baSAndrea Righi */ 4401d541baSAndrea Righi cpu = scx_bpf_select_cpu_and(p, prev_cpu, wake_flags, allowed, 0); 4501d541baSAndrea Righi if (cpu >= 0) { 4601d541baSAndrea Righi validate_idle_cpu(p, allowed, cpu); 4701d541baSAndrea Righi scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); 4801d541baSAndrea Righi 4901d541baSAndrea Righi return cpu; 5001d541baSAndrea Righi } 5101d541baSAndrea Righi 5201d541baSAndrea Righi return prev_cpu; 5301d541baSAndrea Righi } 5401d541baSAndrea Righi 5501d541baSAndrea Righi void BPF_STRUCT_OPS(allowed_cpus_enqueue, struct task_struct *p, u64 enq_flags) 5601d541baSAndrea Righi { 5701d541baSAndrea Righi const struct cpumask *allowed; 5801d541baSAndrea Righi s32 prev_cpu = scx_bpf_task_cpu(p), cpu; 5901d541baSAndrea Righi 6001d541baSAndrea Righi scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); 6101d541baSAndrea Righi 6201d541baSAndrea Righi allowed = cast_mask(allowed_cpumask); 6301d541baSAndrea Righi if (!allowed) { 6401d541baSAndrea Righi scx_bpf_error("allowed domain not initialized"); 6501d541baSAndrea Righi return; 6601d541baSAndrea Righi } 6701d541baSAndrea Righi 6801d541baSAndrea Righi /* 6901d541baSAndrea Righi * Use scx_bpf_select_cpu_and() to proactively kick an idle CPU 7001d541baSAndrea Righi * within @allowed_cpumask, usable by @p. 7101d541baSAndrea Righi */ 7201d541baSAndrea Righi cpu = scx_bpf_select_cpu_and(p, prev_cpu, 0, allowed, 0); 7301d541baSAndrea Righi if (cpu >= 0) { 7401d541baSAndrea Righi validate_idle_cpu(p, allowed, cpu); 7501d541baSAndrea Righi scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); 7601d541baSAndrea Righi } 7701d541baSAndrea Righi } 7801d541baSAndrea Righi 7901d541baSAndrea Righi s32 BPF_STRUCT_OPS_SLEEPABLE(allowed_cpus_init) 8001d541baSAndrea Righi { 8101d541baSAndrea Righi struct bpf_cpumask *mask; 8201d541baSAndrea Righi 8301d541baSAndrea Righi mask = bpf_cpumask_create(); 8401d541baSAndrea Righi if (!mask) 8501d541baSAndrea Righi return -ENOMEM; 8601d541baSAndrea Righi 8701d541baSAndrea Righi mask = bpf_kptr_xchg(&allowed_cpumask, mask); 8801d541baSAndrea Righi if (mask) 8901d541baSAndrea Righi bpf_cpumask_release(mask); 9001d541baSAndrea Righi 9101d541baSAndrea Righi bpf_rcu_read_lock(); 9201d541baSAndrea Righi 9301d541baSAndrea Righi /* 9401d541baSAndrea Righi * Assign the first online CPU to the allowed domain. 9501d541baSAndrea Righi */ 9601d541baSAndrea Righi mask = allowed_cpumask; 9701d541baSAndrea Righi if (mask) { 9801d541baSAndrea Righi const struct cpumask *online = scx_bpf_get_online_cpumask(); 9901d541baSAndrea Righi 10001d541baSAndrea Righi bpf_cpumask_set_cpu(bpf_cpumask_first(online), mask); 10101d541baSAndrea Righi scx_bpf_put_cpumask(online); 10201d541baSAndrea Righi } 10301d541baSAndrea Righi 10401d541baSAndrea Righi bpf_rcu_read_unlock(); 10501d541baSAndrea Righi 10601d541baSAndrea Righi return 0; 10701d541baSAndrea Righi } 10801d541baSAndrea Righi 10901d541baSAndrea Righi void BPF_STRUCT_OPS(allowed_cpus_exit, struct scx_exit_info *ei) 11001d541baSAndrea Righi { 11101d541baSAndrea Righi UEI_RECORD(uei, ei); 11201d541baSAndrea Righi } 11301d541baSAndrea Righi 114*e764295aSAndrea Righi struct task_cpu_arg { 115*e764295aSAndrea Righi pid_t pid; 116*e764295aSAndrea Righi }; 117*e764295aSAndrea Righi 118*e764295aSAndrea Righi SEC("syscall") 119*e764295aSAndrea Righi int select_cpu_from_user(struct task_cpu_arg *input) 120*e764295aSAndrea Righi { 121*e764295aSAndrea Righi struct task_struct *p; 122*e764295aSAndrea Righi int cpu; 123*e764295aSAndrea Righi 124*e764295aSAndrea Righi p = bpf_task_from_pid(input->pid); 125*e764295aSAndrea Righi if (!p) 126*e764295aSAndrea Righi return -EINVAL; 127*e764295aSAndrea Righi 128*e764295aSAndrea Righi bpf_rcu_read_lock(); 129*e764295aSAndrea Righi cpu = scx_bpf_select_cpu_and(p, bpf_get_smp_processor_id(), 0, p->cpus_ptr, 0); 130*e764295aSAndrea Righi bpf_rcu_read_unlock(); 131*e764295aSAndrea Righi 132*e764295aSAndrea Righi bpf_task_release(p); 133*e764295aSAndrea Righi 134*e764295aSAndrea Righi return cpu; 135*e764295aSAndrea Righi } 136*e764295aSAndrea Righi 13701d541baSAndrea Righi SEC(".struct_ops.link") 13801d541baSAndrea Righi struct sched_ext_ops allowed_cpus_ops = { 13901d541baSAndrea Righi .select_cpu = (void *)allowed_cpus_select_cpu, 14001d541baSAndrea Righi .enqueue = (void *)allowed_cpus_enqueue, 14101d541baSAndrea Righi .init = (void *)allowed_cpus_init, 14201d541baSAndrea Righi .exit = (void *)allowed_cpus_exit, 14301d541baSAndrea Righi .name = "allowed_cpus", 14401d541baSAndrea Righi }; 145