1*337d1b35SAndrea Righi // SPDX-License-Identifier: GPL-2.0 2*337d1b35SAndrea Righi /* 3*337d1b35SAndrea Righi * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst 4*337d1b35SAndrea Righi * 5*337d1b35SAndrea Righi * Built-in idle CPU tracking policy. 6*337d1b35SAndrea Righi * 7*337d1b35SAndrea Righi * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 8*337d1b35SAndrea Righi * Copyright (c) 2022 Tejun Heo <tj@kernel.org> 9*337d1b35SAndrea Righi * Copyright (c) 2022 David Vernet <dvernet@meta.com> 10*337d1b35SAndrea Righi * Copyright (c) 2024 Andrea Righi <arighi@nvidia.com> 11*337d1b35SAndrea Righi */ 12*337d1b35SAndrea Righi #include "ext_idle.h" 13*337d1b35SAndrea Righi 14*337d1b35SAndrea Righi /* Enable/disable built-in idle CPU selection policy */ 15*337d1b35SAndrea Righi DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled); 16*337d1b35SAndrea Righi 17*337d1b35SAndrea Righi #ifdef CONFIG_SMP 18*337d1b35SAndrea Righi #ifdef CONFIG_CPUMASK_OFFSTACK 19*337d1b35SAndrea Righi #define CL_ALIGNED_IF_ONSTACK 20*337d1b35SAndrea Righi #else 21*337d1b35SAndrea Righi #define CL_ALIGNED_IF_ONSTACK __cacheline_aligned_in_smp 22*337d1b35SAndrea Righi #endif 23*337d1b35SAndrea Righi 24*337d1b35SAndrea Righi /* Enable/disable LLC aware optimizations */ 25*337d1b35SAndrea Righi DEFINE_STATIC_KEY_FALSE(scx_selcpu_topo_llc); 26*337d1b35SAndrea Righi 27*337d1b35SAndrea Righi /* Enable/disable NUMA aware optimizations */ 28*337d1b35SAndrea Righi DEFINE_STATIC_KEY_FALSE(scx_selcpu_topo_numa); 29*337d1b35SAndrea Righi 30*337d1b35SAndrea Righi static struct { 31*337d1b35SAndrea Righi cpumask_var_t cpu; 32*337d1b35SAndrea Righi cpumask_var_t smt; 33*337d1b35SAndrea Righi } idle_masks CL_ALIGNED_IF_ONSTACK; 34*337d1b35SAndrea Righi 35*337d1b35SAndrea Righi bool scx_idle_test_and_clear_cpu(int cpu) 36*337d1b35SAndrea Righi { 37*337d1b35SAndrea Righi #ifdef CONFIG_SCHED_SMT 38*337d1b35SAndrea Righi /* 39*337d1b35SAndrea Righi * SMT mask should be cleared whether we can claim @cpu or not. The SMT 40*337d1b35SAndrea Righi * cluster is not wholly idle either way. This also prevents 41*337d1b35SAndrea Righi * scx_pick_idle_cpu() from getting caught in an infinite loop. 42*337d1b35SAndrea Righi */ 43*337d1b35SAndrea Righi if (sched_smt_active()) { 44*337d1b35SAndrea Righi const struct cpumask *smt = cpu_smt_mask(cpu); 45*337d1b35SAndrea Righi 46*337d1b35SAndrea Righi /* 47*337d1b35SAndrea Righi * If offline, @cpu is not its own sibling and 48*337d1b35SAndrea Righi * scx_pick_idle_cpu() can get caught in an infinite loop as 49*337d1b35SAndrea Righi * @cpu is never cleared from idle_masks.smt. Ensure that @cpu 50*337d1b35SAndrea Righi * is eventually cleared. 51*337d1b35SAndrea Righi * 52*337d1b35SAndrea Righi * NOTE: Use cpumask_intersects() and cpumask_test_cpu() to 53*337d1b35SAndrea Righi * reduce memory writes, which may help alleviate cache 54*337d1b35SAndrea Righi * coherence pressure. 55*337d1b35SAndrea Righi */ 56*337d1b35SAndrea Righi if (cpumask_intersects(smt, idle_masks.smt)) 57*337d1b35SAndrea Righi cpumask_andnot(idle_masks.smt, idle_masks.smt, smt); 58*337d1b35SAndrea Righi else if (cpumask_test_cpu(cpu, idle_masks.smt)) 59*337d1b35SAndrea Righi __cpumask_clear_cpu(cpu, idle_masks.smt); 60*337d1b35SAndrea Righi } 61*337d1b35SAndrea Righi #endif 62*337d1b35SAndrea Righi return cpumask_test_and_clear_cpu(cpu, idle_masks.cpu); 63*337d1b35SAndrea Righi } 64*337d1b35SAndrea Righi 65*337d1b35SAndrea Righi s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags) 66*337d1b35SAndrea Righi { 67*337d1b35SAndrea Righi int cpu; 68*337d1b35SAndrea Righi 69*337d1b35SAndrea Righi retry: 70*337d1b35SAndrea Righi if (sched_smt_active()) { 71*337d1b35SAndrea Righi cpu = cpumask_any_and_distribute(idle_masks.smt, cpus_allowed); 72*337d1b35SAndrea Righi if (cpu < nr_cpu_ids) 73*337d1b35SAndrea Righi goto found; 74*337d1b35SAndrea Righi 75*337d1b35SAndrea Righi if (flags & SCX_PICK_IDLE_CORE) 76*337d1b35SAndrea Righi return -EBUSY; 77*337d1b35SAndrea Righi } 78*337d1b35SAndrea Righi 79*337d1b35SAndrea Righi cpu = cpumask_any_and_distribute(idle_masks.cpu, cpus_allowed); 80*337d1b35SAndrea Righi if (cpu >= nr_cpu_ids) 81*337d1b35SAndrea Righi return -EBUSY; 82*337d1b35SAndrea Righi 83*337d1b35SAndrea Righi found: 84*337d1b35SAndrea Righi if (scx_idle_test_and_clear_cpu(cpu)) 85*337d1b35SAndrea Righi return cpu; 86*337d1b35SAndrea Righi else 87*337d1b35SAndrea Righi goto retry; 88*337d1b35SAndrea Righi } 89*337d1b35SAndrea Righi 90*337d1b35SAndrea Righi /* 91*337d1b35SAndrea Righi * Return the amount of CPUs in the same LLC domain of @cpu (or zero if the LLC 92*337d1b35SAndrea Righi * domain is not defined). 93*337d1b35SAndrea Righi */ 94*337d1b35SAndrea Righi static unsigned int llc_weight(s32 cpu) 95*337d1b35SAndrea Righi { 96*337d1b35SAndrea Righi struct sched_domain *sd; 97*337d1b35SAndrea Righi 98*337d1b35SAndrea Righi sd = rcu_dereference(per_cpu(sd_llc, cpu)); 99*337d1b35SAndrea Righi if (!sd) 100*337d1b35SAndrea Righi return 0; 101*337d1b35SAndrea Righi 102*337d1b35SAndrea Righi return sd->span_weight; 103*337d1b35SAndrea Righi } 104*337d1b35SAndrea Righi 105*337d1b35SAndrea Righi /* 106*337d1b35SAndrea Righi * Return the cpumask representing the LLC domain of @cpu (or NULL if the LLC 107*337d1b35SAndrea Righi * domain is not defined). 108*337d1b35SAndrea Righi */ 109*337d1b35SAndrea Righi static struct cpumask *llc_span(s32 cpu) 110*337d1b35SAndrea Righi { 111*337d1b35SAndrea Righi struct sched_domain *sd; 112*337d1b35SAndrea Righi 113*337d1b35SAndrea Righi sd = rcu_dereference(per_cpu(sd_llc, cpu)); 114*337d1b35SAndrea Righi if (!sd) 115*337d1b35SAndrea Righi return 0; 116*337d1b35SAndrea Righi 117*337d1b35SAndrea Righi return sched_domain_span(sd); 118*337d1b35SAndrea Righi } 119*337d1b35SAndrea Righi 120*337d1b35SAndrea Righi /* 121*337d1b35SAndrea Righi * Return the amount of CPUs in the same NUMA domain of @cpu (or zero if the 122*337d1b35SAndrea Righi * NUMA domain is not defined). 123*337d1b35SAndrea Righi */ 124*337d1b35SAndrea Righi static unsigned int numa_weight(s32 cpu) 125*337d1b35SAndrea Righi { 126*337d1b35SAndrea Righi struct sched_domain *sd; 127*337d1b35SAndrea Righi struct sched_group *sg; 128*337d1b35SAndrea Righi 129*337d1b35SAndrea Righi sd = rcu_dereference(per_cpu(sd_numa, cpu)); 130*337d1b35SAndrea Righi if (!sd) 131*337d1b35SAndrea Righi return 0; 132*337d1b35SAndrea Righi sg = sd->groups; 133*337d1b35SAndrea Righi if (!sg) 134*337d1b35SAndrea Righi return 0; 135*337d1b35SAndrea Righi 136*337d1b35SAndrea Righi return sg->group_weight; 137*337d1b35SAndrea Righi } 138*337d1b35SAndrea Righi 139*337d1b35SAndrea Righi /* 140*337d1b35SAndrea Righi * Return the cpumask representing the NUMA domain of @cpu (or NULL if the NUMA 141*337d1b35SAndrea Righi * domain is not defined). 142*337d1b35SAndrea Righi */ 143*337d1b35SAndrea Righi static struct cpumask *numa_span(s32 cpu) 144*337d1b35SAndrea Righi { 145*337d1b35SAndrea Righi struct sched_domain *sd; 146*337d1b35SAndrea Righi struct sched_group *sg; 147*337d1b35SAndrea Righi 148*337d1b35SAndrea Righi sd = rcu_dereference(per_cpu(sd_numa, cpu)); 149*337d1b35SAndrea Righi if (!sd) 150*337d1b35SAndrea Righi return NULL; 151*337d1b35SAndrea Righi sg = sd->groups; 152*337d1b35SAndrea Righi if (!sg) 153*337d1b35SAndrea Righi return NULL; 154*337d1b35SAndrea Righi 155*337d1b35SAndrea Righi return sched_group_span(sg); 156*337d1b35SAndrea Righi } 157*337d1b35SAndrea Righi 158*337d1b35SAndrea Righi /* 159*337d1b35SAndrea Righi * Return true if the LLC domains do not perfectly overlap with the NUMA 160*337d1b35SAndrea Righi * domains, false otherwise. 161*337d1b35SAndrea Righi */ 162*337d1b35SAndrea Righi static bool llc_numa_mismatch(void) 163*337d1b35SAndrea Righi { 164*337d1b35SAndrea Righi int cpu; 165*337d1b35SAndrea Righi 166*337d1b35SAndrea Righi /* 167*337d1b35SAndrea Righi * We need to scan all online CPUs to verify whether their scheduling 168*337d1b35SAndrea Righi * domains overlap. 169*337d1b35SAndrea Righi * 170*337d1b35SAndrea Righi * While it is rare to encounter architectures with asymmetric NUMA 171*337d1b35SAndrea Righi * topologies, CPU hotplugging or virtualized environments can result 172*337d1b35SAndrea Righi * in asymmetric configurations. 173*337d1b35SAndrea Righi * 174*337d1b35SAndrea Righi * For example: 175*337d1b35SAndrea Righi * 176*337d1b35SAndrea Righi * NUMA 0: 177*337d1b35SAndrea Righi * - LLC 0: cpu0..cpu7 178*337d1b35SAndrea Righi * - LLC 1: cpu8..cpu15 [offline] 179*337d1b35SAndrea Righi * 180*337d1b35SAndrea Righi * NUMA 1: 181*337d1b35SAndrea Righi * - LLC 0: cpu16..cpu23 182*337d1b35SAndrea Righi * - LLC 1: cpu24..cpu31 183*337d1b35SAndrea Righi * 184*337d1b35SAndrea Righi * In this case, if we only check the first online CPU (cpu0), we might 185*337d1b35SAndrea Righi * incorrectly assume that the LLC and NUMA domains are fully 186*337d1b35SAndrea Righi * overlapping, which is incorrect (as NUMA 1 has two distinct LLC 187*337d1b35SAndrea Righi * domains). 188*337d1b35SAndrea Righi */ 189*337d1b35SAndrea Righi for_each_online_cpu(cpu) 190*337d1b35SAndrea Righi if (llc_weight(cpu) != numa_weight(cpu)) 191*337d1b35SAndrea Righi return true; 192*337d1b35SAndrea Righi 193*337d1b35SAndrea Righi return false; 194*337d1b35SAndrea Righi } 195*337d1b35SAndrea Righi 196*337d1b35SAndrea Righi /* 197*337d1b35SAndrea Righi * Initialize topology-aware scheduling. 198*337d1b35SAndrea Righi * 199*337d1b35SAndrea Righi * Detect if the system has multiple LLC or multiple NUMA domains and enable 200*337d1b35SAndrea Righi * cache-aware / NUMA-aware scheduling optimizations in the default CPU idle 201*337d1b35SAndrea Righi * selection policy. 202*337d1b35SAndrea Righi * 203*337d1b35SAndrea Righi * Assumption: the kernel's internal topology representation assumes that each 204*337d1b35SAndrea Righi * CPU belongs to a single LLC domain, and that each LLC domain is entirely 205*337d1b35SAndrea Righi * contained within a single NUMA node. 206*337d1b35SAndrea Righi */ 207*337d1b35SAndrea Righi void scx_idle_update_selcpu_topology(void) 208*337d1b35SAndrea Righi { 209*337d1b35SAndrea Righi bool enable_llc = false, enable_numa = false; 210*337d1b35SAndrea Righi unsigned int nr_cpus; 211*337d1b35SAndrea Righi s32 cpu = cpumask_first(cpu_online_mask); 212*337d1b35SAndrea Righi 213*337d1b35SAndrea Righi /* 214*337d1b35SAndrea Righi * Enable LLC domain optimization only when there are multiple LLC 215*337d1b35SAndrea Righi * domains among the online CPUs. If all online CPUs are part of a 216*337d1b35SAndrea Righi * single LLC domain, the idle CPU selection logic can choose any 217*337d1b35SAndrea Righi * online CPU without bias. 218*337d1b35SAndrea Righi * 219*337d1b35SAndrea Righi * Note that it is sufficient to check the LLC domain of the first 220*337d1b35SAndrea Righi * online CPU to determine whether a single LLC domain includes all 221*337d1b35SAndrea Righi * CPUs. 222*337d1b35SAndrea Righi */ 223*337d1b35SAndrea Righi rcu_read_lock(); 224*337d1b35SAndrea Righi nr_cpus = llc_weight(cpu); 225*337d1b35SAndrea Righi if (nr_cpus > 0) { 226*337d1b35SAndrea Righi if (nr_cpus < num_online_cpus()) 227*337d1b35SAndrea Righi enable_llc = true; 228*337d1b35SAndrea Righi pr_debug("sched_ext: LLC=%*pb weight=%u\n", 229*337d1b35SAndrea Righi cpumask_pr_args(llc_span(cpu)), llc_weight(cpu)); 230*337d1b35SAndrea Righi } 231*337d1b35SAndrea Righi 232*337d1b35SAndrea Righi /* 233*337d1b35SAndrea Righi * Enable NUMA optimization only when there are multiple NUMA domains 234*337d1b35SAndrea Righi * among the online CPUs and the NUMA domains don't perfectly overlaps 235*337d1b35SAndrea Righi * with the LLC domains. 236*337d1b35SAndrea Righi * 237*337d1b35SAndrea Righi * If all CPUs belong to the same NUMA node and the same LLC domain, 238*337d1b35SAndrea Righi * enabling both NUMA and LLC optimizations is unnecessary, as checking 239*337d1b35SAndrea Righi * for an idle CPU in the same domain twice is redundant. 240*337d1b35SAndrea Righi */ 241*337d1b35SAndrea Righi nr_cpus = numa_weight(cpu); 242*337d1b35SAndrea Righi if (nr_cpus > 0) { 243*337d1b35SAndrea Righi if (nr_cpus < num_online_cpus() && llc_numa_mismatch()) 244*337d1b35SAndrea Righi enable_numa = true; 245*337d1b35SAndrea Righi pr_debug("sched_ext: NUMA=%*pb weight=%u\n", 246*337d1b35SAndrea Righi cpumask_pr_args(numa_span(cpu)), numa_weight(cpu)); 247*337d1b35SAndrea Righi } 248*337d1b35SAndrea Righi rcu_read_unlock(); 249*337d1b35SAndrea Righi 250*337d1b35SAndrea Righi pr_debug("sched_ext: LLC idle selection %s\n", 251*337d1b35SAndrea Righi str_enabled_disabled(enable_llc)); 252*337d1b35SAndrea Righi pr_debug("sched_ext: NUMA idle selection %s\n", 253*337d1b35SAndrea Righi str_enabled_disabled(enable_numa)); 254*337d1b35SAndrea Righi 255*337d1b35SAndrea Righi if (enable_llc) 256*337d1b35SAndrea Righi static_branch_enable_cpuslocked(&scx_selcpu_topo_llc); 257*337d1b35SAndrea Righi else 258*337d1b35SAndrea Righi static_branch_disable_cpuslocked(&scx_selcpu_topo_llc); 259*337d1b35SAndrea Righi if (enable_numa) 260*337d1b35SAndrea Righi static_branch_enable_cpuslocked(&scx_selcpu_topo_numa); 261*337d1b35SAndrea Righi else 262*337d1b35SAndrea Righi static_branch_disable_cpuslocked(&scx_selcpu_topo_numa); 263*337d1b35SAndrea Righi } 264*337d1b35SAndrea Righi 265*337d1b35SAndrea Righi /* 266*337d1b35SAndrea Righi * Built-in CPU idle selection policy: 267*337d1b35SAndrea Righi * 268*337d1b35SAndrea Righi * 1. Prioritize full-idle cores: 269*337d1b35SAndrea Righi * - always prioritize CPUs from fully idle cores (both logical CPUs are 270*337d1b35SAndrea Righi * idle) to avoid interference caused by SMT. 271*337d1b35SAndrea Righi * 272*337d1b35SAndrea Righi * 2. Reuse the same CPU: 273*337d1b35SAndrea Righi * - prefer the last used CPU to take advantage of cached data (L1, L2) and 274*337d1b35SAndrea Righi * branch prediction optimizations. 275*337d1b35SAndrea Righi * 276*337d1b35SAndrea Righi * 3. Pick a CPU within the same LLC (Last-Level Cache): 277*337d1b35SAndrea Righi * - if the above conditions aren't met, pick a CPU that shares the same LLC 278*337d1b35SAndrea Righi * to maintain cache locality. 279*337d1b35SAndrea Righi * 280*337d1b35SAndrea Righi * 4. Pick a CPU within the same NUMA node, if enabled: 281*337d1b35SAndrea Righi * - choose a CPU from the same NUMA node to reduce memory access latency. 282*337d1b35SAndrea Righi * 283*337d1b35SAndrea Righi * 5. Pick any idle CPU usable by the task. 284*337d1b35SAndrea Righi * 285*337d1b35SAndrea Righi * Step 3 and 4 are performed only if the system has, respectively, multiple 286*337d1b35SAndrea Righi * LLC domains / multiple NUMA nodes (see scx_selcpu_topo_llc and 287*337d1b35SAndrea Righi * scx_selcpu_topo_numa). 288*337d1b35SAndrea Righi * 289*337d1b35SAndrea Righi * NOTE: tasks that can only run on 1 CPU are excluded by this logic, because 290*337d1b35SAndrea Righi * we never call ops.select_cpu() for them, see select_task_rq(). 291*337d1b35SAndrea Righi */ 292*337d1b35SAndrea Righi s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *found) 293*337d1b35SAndrea Righi { 294*337d1b35SAndrea Righi const struct cpumask *llc_cpus = NULL; 295*337d1b35SAndrea Righi const struct cpumask *numa_cpus = NULL; 296*337d1b35SAndrea Righi s32 cpu; 297*337d1b35SAndrea Righi 298*337d1b35SAndrea Righi *found = false; 299*337d1b35SAndrea Righi 300*337d1b35SAndrea Righi /* 301*337d1b35SAndrea Righi * This is necessary to protect llc_cpus. 302*337d1b35SAndrea Righi */ 303*337d1b35SAndrea Righi rcu_read_lock(); 304*337d1b35SAndrea Righi 305*337d1b35SAndrea Righi /* 306*337d1b35SAndrea Righi * Determine the scheduling domain only if the task is allowed to run 307*337d1b35SAndrea Righi * on all CPUs. 308*337d1b35SAndrea Righi * 309*337d1b35SAndrea Righi * This is done primarily for efficiency, as it avoids the overhead of 310*337d1b35SAndrea Righi * updating a cpumask every time we need to select an idle CPU (which 311*337d1b35SAndrea Righi * can be costly in large SMP systems), but it also aligns logically: 312*337d1b35SAndrea Righi * if a task's scheduling domain is restricted by user-space (through 313*337d1b35SAndrea Righi * CPU affinity), the task will simply use the flat scheduling domain 314*337d1b35SAndrea Righi * defined by user-space. 315*337d1b35SAndrea Righi */ 316*337d1b35SAndrea Righi if (p->nr_cpus_allowed >= num_possible_cpus()) { 317*337d1b35SAndrea Righi if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) 318*337d1b35SAndrea Righi numa_cpus = numa_span(prev_cpu); 319*337d1b35SAndrea Righi 320*337d1b35SAndrea Righi if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) 321*337d1b35SAndrea Righi llc_cpus = llc_span(prev_cpu); 322*337d1b35SAndrea Righi } 323*337d1b35SAndrea Righi 324*337d1b35SAndrea Righi /* 325*337d1b35SAndrea Righi * If WAKE_SYNC, try to migrate the wakee to the waker's CPU. 326*337d1b35SAndrea Righi */ 327*337d1b35SAndrea Righi if (wake_flags & SCX_WAKE_SYNC) { 328*337d1b35SAndrea Righi cpu = smp_processor_id(); 329*337d1b35SAndrea Righi 330*337d1b35SAndrea Righi /* 331*337d1b35SAndrea Righi * If the waker's CPU is cache affine and prev_cpu is idle, 332*337d1b35SAndrea Righi * then avoid a migration. 333*337d1b35SAndrea Righi */ 334*337d1b35SAndrea Righi if (cpus_share_cache(cpu, prev_cpu) && 335*337d1b35SAndrea Righi scx_idle_test_and_clear_cpu(prev_cpu)) { 336*337d1b35SAndrea Righi cpu = prev_cpu; 337*337d1b35SAndrea Righi goto cpu_found; 338*337d1b35SAndrea Righi } 339*337d1b35SAndrea Righi 340*337d1b35SAndrea Righi /* 341*337d1b35SAndrea Righi * If the waker's local DSQ is empty, and the system is under 342*337d1b35SAndrea Righi * utilized, try to wake up @p to the local DSQ of the waker. 343*337d1b35SAndrea Righi * 344*337d1b35SAndrea Righi * Checking only for an empty local DSQ is insufficient as it 345*337d1b35SAndrea Righi * could give the wakee an unfair advantage when the system is 346*337d1b35SAndrea Righi * oversaturated. 347*337d1b35SAndrea Righi * 348*337d1b35SAndrea Righi * Checking only for the presence of idle CPUs is also 349*337d1b35SAndrea Righi * insufficient as the local DSQ of the waker could have tasks 350*337d1b35SAndrea Righi * piled up on it even if there is an idle core elsewhere on 351*337d1b35SAndrea Righi * the system. 352*337d1b35SAndrea Righi */ 353*337d1b35SAndrea Righi if (!cpumask_empty(idle_masks.cpu) && 354*337d1b35SAndrea Righi !(current->flags & PF_EXITING) && 355*337d1b35SAndrea Righi cpu_rq(cpu)->scx.local_dsq.nr == 0) { 356*337d1b35SAndrea Righi if (cpumask_test_cpu(cpu, p->cpus_ptr)) 357*337d1b35SAndrea Righi goto cpu_found; 358*337d1b35SAndrea Righi } 359*337d1b35SAndrea Righi } 360*337d1b35SAndrea Righi 361*337d1b35SAndrea Righi /* 362*337d1b35SAndrea Righi * If CPU has SMT, any wholly idle CPU is likely a better pick than 363*337d1b35SAndrea Righi * partially idle @prev_cpu. 364*337d1b35SAndrea Righi */ 365*337d1b35SAndrea Righi if (sched_smt_active()) { 366*337d1b35SAndrea Righi /* 367*337d1b35SAndrea Righi * Keep using @prev_cpu if it's part of a fully idle core. 368*337d1b35SAndrea Righi */ 369*337d1b35SAndrea Righi if (cpumask_test_cpu(prev_cpu, idle_masks.smt) && 370*337d1b35SAndrea Righi scx_idle_test_and_clear_cpu(prev_cpu)) { 371*337d1b35SAndrea Righi cpu = prev_cpu; 372*337d1b35SAndrea Righi goto cpu_found; 373*337d1b35SAndrea Righi } 374*337d1b35SAndrea Righi 375*337d1b35SAndrea Righi /* 376*337d1b35SAndrea Righi * Search for any fully idle core in the same LLC domain. 377*337d1b35SAndrea Righi */ 378*337d1b35SAndrea Righi if (llc_cpus) { 379*337d1b35SAndrea Righi cpu = scx_pick_idle_cpu(llc_cpus, SCX_PICK_IDLE_CORE); 380*337d1b35SAndrea Righi if (cpu >= 0) 381*337d1b35SAndrea Righi goto cpu_found; 382*337d1b35SAndrea Righi } 383*337d1b35SAndrea Righi 384*337d1b35SAndrea Righi /* 385*337d1b35SAndrea Righi * Search for any fully idle core in the same NUMA node. 386*337d1b35SAndrea Righi */ 387*337d1b35SAndrea Righi if (numa_cpus) { 388*337d1b35SAndrea Righi cpu = scx_pick_idle_cpu(numa_cpus, SCX_PICK_IDLE_CORE); 389*337d1b35SAndrea Righi if (cpu >= 0) 390*337d1b35SAndrea Righi goto cpu_found; 391*337d1b35SAndrea Righi } 392*337d1b35SAndrea Righi 393*337d1b35SAndrea Righi /* 394*337d1b35SAndrea Righi * Search for any full idle core usable by the task. 395*337d1b35SAndrea Righi */ 396*337d1b35SAndrea Righi cpu = scx_pick_idle_cpu(p->cpus_ptr, SCX_PICK_IDLE_CORE); 397*337d1b35SAndrea Righi if (cpu >= 0) 398*337d1b35SAndrea Righi goto cpu_found; 399*337d1b35SAndrea Righi } 400*337d1b35SAndrea Righi 401*337d1b35SAndrea Righi /* 402*337d1b35SAndrea Righi * Use @prev_cpu if it's idle. 403*337d1b35SAndrea Righi */ 404*337d1b35SAndrea Righi if (scx_idle_test_and_clear_cpu(prev_cpu)) { 405*337d1b35SAndrea Righi cpu = prev_cpu; 406*337d1b35SAndrea Righi goto cpu_found; 407*337d1b35SAndrea Righi } 408*337d1b35SAndrea Righi 409*337d1b35SAndrea Righi /* 410*337d1b35SAndrea Righi * Search for any idle CPU in the same LLC domain. 411*337d1b35SAndrea Righi */ 412*337d1b35SAndrea Righi if (llc_cpus) { 413*337d1b35SAndrea Righi cpu = scx_pick_idle_cpu(llc_cpus, 0); 414*337d1b35SAndrea Righi if (cpu >= 0) 415*337d1b35SAndrea Righi goto cpu_found; 416*337d1b35SAndrea Righi } 417*337d1b35SAndrea Righi 418*337d1b35SAndrea Righi /* 419*337d1b35SAndrea Righi * Search for any idle CPU in the same NUMA node. 420*337d1b35SAndrea Righi */ 421*337d1b35SAndrea Righi if (numa_cpus) { 422*337d1b35SAndrea Righi cpu = scx_pick_idle_cpu(numa_cpus, 0); 423*337d1b35SAndrea Righi if (cpu >= 0) 424*337d1b35SAndrea Righi goto cpu_found; 425*337d1b35SAndrea Righi } 426*337d1b35SAndrea Righi 427*337d1b35SAndrea Righi /* 428*337d1b35SAndrea Righi * Search for any idle CPU usable by the task. 429*337d1b35SAndrea Righi */ 430*337d1b35SAndrea Righi cpu = scx_pick_idle_cpu(p->cpus_ptr, 0); 431*337d1b35SAndrea Righi if (cpu >= 0) 432*337d1b35SAndrea Righi goto cpu_found; 433*337d1b35SAndrea Righi 434*337d1b35SAndrea Righi rcu_read_unlock(); 435*337d1b35SAndrea Righi return prev_cpu; 436*337d1b35SAndrea Righi 437*337d1b35SAndrea Righi cpu_found: 438*337d1b35SAndrea Righi rcu_read_unlock(); 439*337d1b35SAndrea Righi 440*337d1b35SAndrea Righi *found = true; 441*337d1b35SAndrea Righi return cpu; 442*337d1b35SAndrea Righi } 443*337d1b35SAndrea Righi 444*337d1b35SAndrea Righi void scx_idle_reset_masks(void) 445*337d1b35SAndrea Righi { 446*337d1b35SAndrea Righi /* 447*337d1b35SAndrea Righi * Consider all online cpus idle. Should converge to the actual state 448*337d1b35SAndrea Righi * quickly. 449*337d1b35SAndrea Righi */ 450*337d1b35SAndrea Righi cpumask_copy(idle_masks.cpu, cpu_online_mask); 451*337d1b35SAndrea Righi cpumask_copy(idle_masks.smt, cpu_online_mask); 452*337d1b35SAndrea Righi } 453*337d1b35SAndrea Righi 454*337d1b35SAndrea Righi void scx_idle_init_masks(void) 455*337d1b35SAndrea Righi { 456*337d1b35SAndrea Righi BUG_ON(!alloc_cpumask_var(&idle_masks.cpu, GFP_KERNEL)); 457*337d1b35SAndrea Righi BUG_ON(!alloc_cpumask_var(&idle_masks.smt, GFP_KERNEL)); 458*337d1b35SAndrea Righi } 459*337d1b35SAndrea Righi 460*337d1b35SAndrea Righi static void update_builtin_idle(int cpu, bool idle) 461*337d1b35SAndrea Righi { 462*337d1b35SAndrea Righi assign_cpu(cpu, idle_masks.cpu, idle); 463*337d1b35SAndrea Righi 464*337d1b35SAndrea Righi #ifdef CONFIG_SCHED_SMT 465*337d1b35SAndrea Righi if (sched_smt_active()) { 466*337d1b35SAndrea Righi const struct cpumask *smt = cpu_smt_mask(cpu); 467*337d1b35SAndrea Righi 468*337d1b35SAndrea Righi if (idle) { 469*337d1b35SAndrea Righi /* 470*337d1b35SAndrea Righi * idle_masks.smt handling is racy but that's fine as 471*337d1b35SAndrea Righi * it's only for optimization and self-correcting. 472*337d1b35SAndrea Righi */ 473*337d1b35SAndrea Righi if (!cpumask_subset(smt, idle_masks.cpu)) 474*337d1b35SAndrea Righi return; 475*337d1b35SAndrea Righi cpumask_or(idle_masks.smt, idle_masks.smt, smt); 476*337d1b35SAndrea Righi } else { 477*337d1b35SAndrea Righi cpumask_andnot(idle_masks.smt, idle_masks.smt, smt); 478*337d1b35SAndrea Righi } 479*337d1b35SAndrea Righi } 480*337d1b35SAndrea Righi #endif 481*337d1b35SAndrea Righi } 482*337d1b35SAndrea Righi 483*337d1b35SAndrea Righi /* 484*337d1b35SAndrea Righi * Update the idle state of a CPU to @idle. 485*337d1b35SAndrea Righi * 486*337d1b35SAndrea Righi * If @do_notify is true, ops.update_idle() is invoked to notify the scx 487*337d1b35SAndrea Righi * scheduler of an actual idle state transition (idle to busy or vice 488*337d1b35SAndrea Righi * versa). If @do_notify is false, only the idle state in the idle masks is 489*337d1b35SAndrea Righi * refreshed without invoking ops.update_idle(). 490*337d1b35SAndrea Righi * 491*337d1b35SAndrea Righi * This distinction is necessary, because an idle CPU can be "reserved" and 492*337d1b35SAndrea Righi * awakened via scx_bpf_pick_idle_cpu() + scx_bpf_kick_cpu(), marking it as 493*337d1b35SAndrea Righi * busy even if no tasks are dispatched. In this case, the CPU may return 494*337d1b35SAndrea Righi * to idle without a true state transition. Refreshing the idle masks 495*337d1b35SAndrea Righi * without invoking ops.update_idle() ensures accurate idle state tracking 496*337d1b35SAndrea Righi * while avoiding unnecessary updates and maintaining balanced state 497*337d1b35SAndrea Righi * transitions. 498*337d1b35SAndrea Righi */ 499*337d1b35SAndrea Righi void __scx_update_idle(struct rq *rq, bool idle, bool do_notify) 500*337d1b35SAndrea Righi { 501*337d1b35SAndrea Righi int cpu = cpu_of(rq); 502*337d1b35SAndrea Righi 503*337d1b35SAndrea Righi lockdep_assert_rq_held(rq); 504*337d1b35SAndrea Righi 505*337d1b35SAndrea Righi /* 506*337d1b35SAndrea Righi * Trigger ops.update_idle() only when transitioning from a task to 507*337d1b35SAndrea Righi * the idle thread and vice versa. 508*337d1b35SAndrea Righi * 509*337d1b35SAndrea Righi * Idle transitions are indicated by do_notify being set to true, 510*337d1b35SAndrea Righi * managed by put_prev_task_idle()/set_next_task_idle(). 511*337d1b35SAndrea Righi */ 512*337d1b35SAndrea Righi if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq)) 513*337d1b35SAndrea Righi SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle); 514*337d1b35SAndrea Righi 515*337d1b35SAndrea Righi /* 516*337d1b35SAndrea Righi * Update the idle masks: 517*337d1b35SAndrea Righi * - for real idle transitions (do_notify == true) 518*337d1b35SAndrea Righi * - for idle-to-idle transitions (indicated by the previous task 519*337d1b35SAndrea Righi * being the idle thread, managed by pick_task_idle()) 520*337d1b35SAndrea Righi * 521*337d1b35SAndrea Righi * Skip updating idle masks if the previous task is not the idle 522*337d1b35SAndrea Righi * thread, since set_next_task_idle() has already handled it when 523*337d1b35SAndrea Righi * transitioning from a task to the idle thread (calling this 524*337d1b35SAndrea Righi * function with do_notify == true). 525*337d1b35SAndrea Righi * 526*337d1b35SAndrea Righi * In this way we can avoid updating the idle masks twice, 527*337d1b35SAndrea Righi * unnecessarily. 528*337d1b35SAndrea Righi */ 529*337d1b35SAndrea Righi if (static_branch_likely(&scx_builtin_idle_enabled)) 530*337d1b35SAndrea Righi if (do_notify || is_idle_task(rq->curr)) 531*337d1b35SAndrea Righi update_builtin_idle(cpu, idle); 532*337d1b35SAndrea Righi } 533*337d1b35SAndrea Righi #endif /* CONFIG_SMP */ 534*337d1b35SAndrea Righi 535*337d1b35SAndrea Righi /******************************************************************************** 536*337d1b35SAndrea Righi * Helpers that can be called from the BPF scheduler. 537*337d1b35SAndrea Righi */ 538*337d1b35SAndrea Righi __bpf_kfunc_start_defs(); 539*337d1b35SAndrea Righi 540*337d1b35SAndrea Righi static bool check_builtin_idle_enabled(void) 541*337d1b35SAndrea Righi { 542*337d1b35SAndrea Righi if (static_branch_likely(&scx_builtin_idle_enabled)) 543*337d1b35SAndrea Righi return true; 544*337d1b35SAndrea Righi 545*337d1b35SAndrea Righi scx_ops_error("built-in idle tracking is disabled"); 546*337d1b35SAndrea Righi return false; 547*337d1b35SAndrea Righi } 548*337d1b35SAndrea Righi 549*337d1b35SAndrea Righi /** 550*337d1b35SAndrea Righi * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu() 551*337d1b35SAndrea Righi * @p: task_struct to select a CPU for 552*337d1b35SAndrea Righi * @prev_cpu: CPU @p was on previously 553*337d1b35SAndrea Righi * @wake_flags: %SCX_WAKE_* flags 554*337d1b35SAndrea Righi * @is_idle: out parameter indicating whether the returned CPU is idle 555*337d1b35SAndrea Righi * 556*337d1b35SAndrea Righi * Can only be called from ops.select_cpu() if the built-in CPU selection is 557*337d1b35SAndrea Righi * enabled - ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE is set. 558*337d1b35SAndrea Righi * @p, @prev_cpu and @wake_flags match ops.select_cpu(). 559*337d1b35SAndrea Righi * 560*337d1b35SAndrea Righi * Returns the picked CPU with *@is_idle indicating whether the picked CPU is 561*337d1b35SAndrea Righi * currently idle and thus a good candidate for direct dispatching. 562*337d1b35SAndrea Righi */ 563*337d1b35SAndrea Righi __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, 564*337d1b35SAndrea Righi u64 wake_flags, bool *is_idle) 565*337d1b35SAndrea Righi { 566*337d1b35SAndrea Righi if (!check_builtin_idle_enabled()) 567*337d1b35SAndrea Righi goto prev_cpu; 568*337d1b35SAndrea Righi 569*337d1b35SAndrea Righi if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) 570*337d1b35SAndrea Righi goto prev_cpu; 571*337d1b35SAndrea Righi 572*337d1b35SAndrea Righi #ifdef CONFIG_SMP 573*337d1b35SAndrea Righi return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); 574*337d1b35SAndrea Righi #endif 575*337d1b35SAndrea Righi 576*337d1b35SAndrea Righi prev_cpu: 577*337d1b35SAndrea Righi *is_idle = false; 578*337d1b35SAndrea Righi return prev_cpu; 579*337d1b35SAndrea Righi } 580*337d1b35SAndrea Righi 581*337d1b35SAndrea Righi /** 582*337d1b35SAndrea Righi * scx_bpf_get_idle_cpumask - Get a referenced kptr to the idle-tracking 583*337d1b35SAndrea Righi * per-CPU cpumask. 584*337d1b35SAndrea Righi * 585*337d1b35SAndrea Righi * Returns NULL if idle tracking is not enabled, or running on a UP kernel. 586*337d1b35SAndrea Righi */ 587*337d1b35SAndrea Righi __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void) 588*337d1b35SAndrea Righi { 589*337d1b35SAndrea Righi if (!check_builtin_idle_enabled()) 590*337d1b35SAndrea Righi return cpu_none_mask; 591*337d1b35SAndrea Righi 592*337d1b35SAndrea Righi #ifdef CONFIG_SMP 593*337d1b35SAndrea Righi return idle_masks.cpu; 594*337d1b35SAndrea Righi #else 595*337d1b35SAndrea Righi return cpu_none_mask; 596*337d1b35SAndrea Righi #endif 597*337d1b35SAndrea Righi } 598*337d1b35SAndrea Righi 599*337d1b35SAndrea Righi /** 600*337d1b35SAndrea Righi * scx_bpf_get_idle_smtmask - Get a referenced kptr to the idle-tracking, 601*337d1b35SAndrea Righi * per-physical-core cpumask. Can be used to determine if an entire physical 602*337d1b35SAndrea Righi * core is free. 603*337d1b35SAndrea Righi * 604*337d1b35SAndrea Righi * Returns NULL if idle tracking is not enabled, or running on a UP kernel. 605*337d1b35SAndrea Righi */ 606*337d1b35SAndrea Righi __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void) 607*337d1b35SAndrea Righi { 608*337d1b35SAndrea Righi if (!check_builtin_idle_enabled()) 609*337d1b35SAndrea Righi return cpu_none_mask; 610*337d1b35SAndrea Righi 611*337d1b35SAndrea Righi #ifdef CONFIG_SMP 612*337d1b35SAndrea Righi if (sched_smt_active()) 613*337d1b35SAndrea Righi return idle_masks.smt; 614*337d1b35SAndrea Righi else 615*337d1b35SAndrea Righi return idle_masks.cpu; 616*337d1b35SAndrea Righi #else 617*337d1b35SAndrea Righi return cpu_none_mask; 618*337d1b35SAndrea Righi #endif 619*337d1b35SAndrea Righi } 620*337d1b35SAndrea Righi 621*337d1b35SAndrea Righi /** 622*337d1b35SAndrea Righi * scx_bpf_put_idle_cpumask - Release a previously acquired referenced kptr to 623*337d1b35SAndrea Righi * either the percpu, or SMT idle-tracking cpumask. 624*337d1b35SAndrea Righi * @idle_mask: &cpumask to use 625*337d1b35SAndrea Righi */ 626*337d1b35SAndrea Righi __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask) 627*337d1b35SAndrea Righi { 628*337d1b35SAndrea Righi /* 629*337d1b35SAndrea Righi * Empty function body because we aren't actually acquiring or releasing 630*337d1b35SAndrea Righi * a reference to a global idle cpumask, which is read-only in the 631*337d1b35SAndrea Righi * caller and is never released. The acquire / release semantics here 632*337d1b35SAndrea Righi * are just used to make the cpumask a trusted pointer in the caller. 633*337d1b35SAndrea Righi */ 634*337d1b35SAndrea Righi } 635*337d1b35SAndrea Righi 636*337d1b35SAndrea Righi /** 637*337d1b35SAndrea Righi * scx_bpf_test_and_clear_cpu_idle - Test and clear @cpu's idle state 638*337d1b35SAndrea Righi * @cpu: cpu to test and clear idle for 639*337d1b35SAndrea Righi * 640*337d1b35SAndrea Righi * Returns %true if @cpu was idle and its idle state was successfully cleared. 641*337d1b35SAndrea Righi * %false otherwise. 642*337d1b35SAndrea Righi * 643*337d1b35SAndrea Righi * Unavailable if ops.update_idle() is implemented and 644*337d1b35SAndrea Righi * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. 645*337d1b35SAndrea Righi */ 646*337d1b35SAndrea Righi __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) 647*337d1b35SAndrea Righi { 648*337d1b35SAndrea Righi if (!check_builtin_idle_enabled()) 649*337d1b35SAndrea Righi return false; 650*337d1b35SAndrea Righi 651*337d1b35SAndrea Righi if (ops_cpu_valid(cpu, NULL)) 652*337d1b35SAndrea Righi return scx_idle_test_and_clear_cpu(cpu); 653*337d1b35SAndrea Righi else 654*337d1b35SAndrea Righi return false; 655*337d1b35SAndrea Righi } 656*337d1b35SAndrea Righi 657*337d1b35SAndrea Righi /** 658*337d1b35SAndrea Righi * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu 659*337d1b35SAndrea Righi * @cpus_allowed: Allowed cpumask 660*337d1b35SAndrea Righi * @flags: %SCX_PICK_IDLE_CPU_* flags 661*337d1b35SAndrea Righi * 662*337d1b35SAndrea Righi * Pick and claim an idle cpu in @cpus_allowed. Returns the picked idle cpu 663*337d1b35SAndrea Righi * number on success. -%EBUSY if no matching cpu was found. 664*337d1b35SAndrea Righi * 665*337d1b35SAndrea Righi * Idle CPU tracking may race against CPU scheduling state transitions. For 666*337d1b35SAndrea Righi * example, this function may return -%EBUSY as CPUs are transitioning into the 667*337d1b35SAndrea Righi * idle state. If the caller then assumes that there will be dispatch events on 668*337d1b35SAndrea Righi * the CPUs as they were all busy, the scheduler may end up stalling with CPUs 669*337d1b35SAndrea Righi * idling while there are pending tasks. Use scx_bpf_pick_any_cpu() and 670*337d1b35SAndrea Righi * scx_bpf_kick_cpu() to guarantee that there will be at least one dispatch 671*337d1b35SAndrea Righi * event in the near future. 672*337d1b35SAndrea Righi * 673*337d1b35SAndrea Righi * Unavailable if ops.update_idle() is implemented and 674*337d1b35SAndrea Righi * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. 675*337d1b35SAndrea Righi */ 676*337d1b35SAndrea Righi __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, 677*337d1b35SAndrea Righi u64 flags) 678*337d1b35SAndrea Righi { 679*337d1b35SAndrea Righi if (!check_builtin_idle_enabled()) 680*337d1b35SAndrea Righi return -EBUSY; 681*337d1b35SAndrea Righi 682*337d1b35SAndrea Righi return scx_pick_idle_cpu(cpus_allowed, flags); 683*337d1b35SAndrea Righi } 684*337d1b35SAndrea Righi 685*337d1b35SAndrea Righi /** 686*337d1b35SAndrea Righi * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU 687*337d1b35SAndrea Righi * @cpus_allowed: Allowed cpumask 688*337d1b35SAndrea Righi * @flags: %SCX_PICK_IDLE_CPU_* flags 689*337d1b35SAndrea Righi * 690*337d1b35SAndrea Righi * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any 691*337d1b35SAndrea Righi * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu 692*337d1b35SAndrea Righi * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is 693*337d1b35SAndrea Righi * empty. 694*337d1b35SAndrea Righi * 695*337d1b35SAndrea Righi * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not 696*337d1b35SAndrea Righi * set, this function can't tell which CPUs are idle and will always pick any 697*337d1b35SAndrea Righi * CPU. 698*337d1b35SAndrea Righi */ 699*337d1b35SAndrea Righi __bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, 700*337d1b35SAndrea Righi u64 flags) 701*337d1b35SAndrea Righi { 702*337d1b35SAndrea Righi s32 cpu; 703*337d1b35SAndrea Righi 704*337d1b35SAndrea Righi if (static_branch_likely(&scx_builtin_idle_enabled)) { 705*337d1b35SAndrea Righi cpu = scx_pick_idle_cpu(cpus_allowed, flags); 706*337d1b35SAndrea Righi if (cpu >= 0) 707*337d1b35SAndrea Righi return cpu; 708*337d1b35SAndrea Righi } 709*337d1b35SAndrea Righi 710*337d1b35SAndrea Righi cpu = cpumask_any_distribute(cpus_allowed); 711*337d1b35SAndrea Righi if (cpu < nr_cpu_ids) 712*337d1b35SAndrea Righi return cpu; 713*337d1b35SAndrea Righi else 714*337d1b35SAndrea Righi return -EBUSY; 715*337d1b35SAndrea Righi } 716*337d1b35SAndrea Righi 717*337d1b35SAndrea Righi __bpf_kfunc_end_defs(); 718*337d1b35SAndrea Righi 719*337d1b35SAndrea Righi BTF_KFUNCS_START(scx_kfunc_ids_idle) 720*337d1b35SAndrea Righi BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) 721*337d1b35SAndrea Righi BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) 722*337d1b35SAndrea Righi BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) 723*337d1b35SAndrea Righi BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) 724*337d1b35SAndrea Righi BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) 725*337d1b35SAndrea Righi BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) 726*337d1b35SAndrea Righi BTF_KFUNCS_END(scx_kfunc_ids_idle) 727*337d1b35SAndrea Righi 728*337d1b35SAndrea Righi static const struct btf_kfunc_id_set scx_kfunc_set_idle = { 729*337d1b35SAndrea Righi .owner = THIS_MODULE, 730*337d1b35SAndrea Righi .set = &scx_kfunc_ids_idle, 731*337d1b35SAndrea Righi }; 732*337d1b35SAndrea Righi 733*337d1b35SAndrea Righi BTF_KFUNCS_START(scx_kfunc_ids_select_cpu) 734*337d1b35SAndrea Righi BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) 735*337d1b35SAndrea Righi BTF_KFUNCS_END(scx_kfunc_ids_select_cpu) 736*337d1b35SAndrea Righi 737*337d1b35SAndrea Righi static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = { 738*337d1b35SAndrea Righi .owner = THIS_MODULE, 739*337d1b35SAndrea Righi .set = &scx_kfunc_ids_select_cpu, 740*337d1b35SAndrea Righi }; 741*337d1b35SAndrea Righi 742*337d1b35SAndrea Righi int scx_idle_init(void) 743*337d1b35SAndrea Righi { 744*337d1b35SAndrea Righi int ret; 745*337d1b35SAndrea Righi 746*337d1b35SAndrea Righi ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_select_cpu) || 747*337d1b35SAndrea Righi register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_idle) || 748*337d1b35SAndrea Righi register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &scx_kfunc_set_idle) || 749*337d1b35SAndrea Righi register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_idle); 750*337d1b35SAndrea Righi 751*337d1b35SAndrea Righi return ret; 752*337d1b35SAndrea Righi } 753