1*5ae51618SAndrea Righi // SPDX-License-Identifier: GPL-2.0 2*5ae51618SAndrea Righi /* 3*5ae51618SAndrea Righi * A scheduler that validates the behavior of the NUMA-aware 4*5ae51618SAndrea Righi * functionalities. 5*5ae51618SAndrea Righi * 6*5ae51618SAndrea Righi * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks 7*5ae51618SAndrea Righi * are exclusively processed by CPUs within their respective nodes. Idle 8*5ae51618SAndrea Righi * CPUs are selected only within the same node, so task migration can only 9*5ae51618SAndrea Righi * occurs between CPUs belonging to the same node. 10*5ae51618SAndrea Righi * 11*5ae51618SAndrea Righi * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com> 12*5ae51618SAndrea Righi */ 13*5ae51618SAndrea Righi 14*5ae51618SAndrea Righi #include <scx/common.bpf.h> 15*5ae51618SAndrea Righi 16*5ae51618SAndrea Righi char _license[] SEC("license") = "GPL"; 17*5ae51618SAndrea Righi 18*5ae51618SAndrea Righi UEI_DEFINE(uei); 19*5ae51618SAndrea Righi 20*5ae51618SAndrea Righi const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE; 21*5ae51618SAndrea Righi 22*5ae51618SAndrea Righi static bool is_cpu_idle(s32 cpu, int node) 23*5ae51618SAndrea Righi { 24*5ae51618SAndrea Righi const struct cpumask *idle_cpumask; 25*5ae51618SAndrea Righi bool idle; 26*5ae51618SAndrea Righi 27*5ae51618SAndrea Righi idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node); 28*5ae51618SAndrea Righi idle = bpf_cpumask_test_cpu(cpu, idle_cpumask); 29*5ae51618SAndrea Righi scx_bpf_put_cpumask(idle_cpumask); 30*5ae51618SAndrea Righi 31*5ae51618SAndrea Righi return idle; 32*5ae51618SAndrea Righi } 33*5ae51618SAndrea Righi 34*5ae51618SAndrea Righi s32 BPF_STRUCT_OPS(numa_select_cpu, 35*5ae51618SAndrea Righi struct task_struct *p, s32 prev_cpu, u64 wake_flags) 36*5ae51618SAndrea Righi { 37*5ae51618SAndrea Righi int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p)); 38*5ae51618SAndrea Righi s32 cpu; 39*5ae51618SAndrea Righi 40*5ae51618SAndrea Righi /* 41*5ae51618SAndrea Righi * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here, 42*5ae51618SAndrea Righi * since it already tries to pick an idle CPU within the node 43*5ae51618SAndrea Righi * first, but let's use both functions for better testing coverage. 44*5ae51618SAndrea Righi */ 45*5ae51618SAndrea Righi cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node, 46*5ae51618SAndrea Righi __COMPAT_SCX_PICK_IDLE_IN_NODE); 47*5ae51618SAndrea Righi if (cpu < 0) 48*5ae51618SAndrea Righi cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node, 49*5ae51618SAndrea Righi __COMPAT_SCX_PICK_IDLE_IN_NODE); 50*5ae51618SAndrea Righi 51*5ae51618SAndrea Righi if (is_cpu_idle(cpu, node)) 52*5ae51618SAndrea Righi scx_bpf_error("CPU %d should be marked as busy", cpu); 53*5ae51618SAndrea Righi 54*5ae51618SAndrea Righi if (__COMPAT_scx_bpf_cpu_node(cpu) != node) 55*5ae51618SAndrea Righi scx_bpf_error("CPU %d should be in node %d", cpu, node); 56*5ae51618SAndrea Righi 57*5ae51618SAndrea Righi return cpu; 58*5ae51618SAndrea Righi } 59*5ae51618SAndrea Righi 60*5ae51618SAndrea Righi void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags) 61*5ae51618SAndrea Righi { 62*5ae51618SAndrea Righi int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p)); 63*5ae51618SAndrea Righi 64*5ae51618SAndrea Righi scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags); 65*5ae51618SAndrea Righi } 66*5ae51618SAndrea Righi 67*5ae51618SAndrea Righi void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev) 68*5ae51618SAndrea Righi { 69*5ae51618SAndrea Righi int node = __COMPAT_scx_bpf_cpu_node(cpu); 70*5ae51618SAndrea Righi 71*5ae51618SAndrea Righi scx_bpf_dsq_move_to_local(node); 72*5ae51618SAndrea Righi } 73*5ae51618SAndrea Righi 74*5ae51618SAndrea Righi s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init) 75*5ae51618SAndrea Righi { 76*5ae51618SAndrea Righi int node, err; 77*5ae51618SAndrea Righi 78*5ae51618SAndrea Righi bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) { 79*5ae51618SAndrea Righi err = scx_bpf_create_dsq(node, node); 80*5ae51618SAndrea Righi if (err) 81*5ae51618SAndrea Righi return err; 82*5ae51618SAndrea Righi } 83*5ae51618SAndrea Righi 84*5ae51618SAndrea Righi return 0; 85*5ae51618SAndrea Righi } 86*5ae51618SAndrea Righi 87*5ae51618SAndrea Righi void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei) 88*5ae51618SAndrea Righi { 89*5ae51618SAndrea Righi UEI_RECORD(uei, ei); 90*5ae51618SAndrea Righi } 91*5ae51618SAndrea Righi 92*5ae51618SAndrea Righi SEC(".struct_ops.link") 93*5ae51618SAndrea Righi struct sched_ext_ops numa_ops = { 94*5ae51618SAndrea Righi .select_cpu = (void *)numa_select_cpu, 95*5ae51618SAndrea Righi .enqueue = (void *)numa_enqueue, 96*5ae51618SAndrea Righi .dispatch = (void *)numa_dispatch, 97*5ae51618SAndrea Righi .init = (void *)numa_init, 98*5ae51618SAndrea Righi .exit = (void *)numa_exit, 99*5ae51618SAndrea Righi .name = "numa", 100*5ae51618SAndrea Righi }; 101