xref: /linux/tools/testing/selftests/sched_ext/numa.bpf.c (revision 4f9786035f9e519db41375818e1d0b5f20da2f10)
1*5ae51618SAndrea Righi // SPDX-License-Identifier: GPL-2.0
2*5ae51618SAndrea Righi /*
3*5ae51618SAndrea Righi  * A scheduler that validates the behavior of the NUMA-aware
4*5ae51618SAndrea Righi  * functionalities.
5*5ae51618SAndrea Righi  *
6*5ae51618SAndrea Righi  * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
7*5ae51618SAndrea Righi  * are exclusively processed by CPUs within their respective nodes. Idle
8*5ae51618SAndrea Righi  * CPUs are selected only within the same node, so task migration can only
9*5ae51618SAndrea Righi  * occurs between CPUs belonging to the same node.
10*5ae51618SAndrea Righi  *
11*5ae51618SAndrea Righi  * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
12*5ae51618SAndrea Righi  */
13*5ae51618SAndrea Righi 
14*5ae51618SAndrea Righi #include <scx/common.bpf.h>
15*5ae51618SAndrea Righi 
16*5ae51618SAndrea Righi char _license[] SEC("license") = "GPL";
17*5ae51618SAndrea Righi 
18*5ae51618SAndrea Righi UEI_DEFINE(uei);
19*5ae51618SAndrea Righi 
20*5ae51618SAndrea Righi const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
21*5ae51618SAndrea Righi 
22*5ae51618SAndrea Righi static bool is_cpu_idle(s32 cpu, int node)
23*5ae51618SAndrea Righi {
24*5ae51618SAndrea Righi 	const struct cpumask *idle_cpumask;
25*5ae51618SAndrea Righi 	bool idle;
26*5ae51618SAndrea Righi 
27*5ae51618SAndrea Righi 	idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
28*5ae51618SAndrea Righi 	idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
29*5ae51618SAndrea Righi 	scx_bpf_put_cpumask(idle_cpumask);
30*5ae51618SAndrea Righi 
31*5ae51618SAndrea Righi 	return idle;
32*5ae51618SAndrea Righi }
33*5ae51618SAndrea Righi 
34*5ae51618SAndrea Righi s32 BPF_STRUCT_OPS(numa_select_cpu,
35*5ae51618SAndrea Righi 		   struct task_struct *p, s32 prev_cpu, u64 wake_flags)
36*5ae51618SAndrea Righi {
37*5ae51618SAndrea Righi 	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
38*5ae51618SAndrea Righi 	s32 cpu;
39*5ae51618SAndrea Righi 
40*5ae51618SAndrea Righi 	/*
41*5ae51618SAndrea Righi 	 * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
42*5ae51618SAndrea Righi 	 * since it already tries to pick an idle CPU within the node
43*5ae51618SAndrea Righi 	 * first, but let's use both functions for better testing coverage.
44*5ae51618SAndrea Righi 	 */
45*5ae51618SAndrea Righi 	cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
46*5ae51618SAndrea Righi 					__COMPAT_SCX_PICK_IDLE_IN_NODE);
47*5ae51618SAndrea Righi 	if (cpu < 0)
48*5ae51618SAndrea Righi 		cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
49*5ae51618SAndrea Righi 						__COMPAT_SCX_PICK_IDLE_IN_NODE);
50*5ae51618SAndrea Righi 
51*5ae51618SAndrea Righi 	if (is_cpu_idle(cpu, node))
52*5ae51618SAndrea Righi 		scx_bpf_error("CPU %d should be marked as busy", cpu);
53*5ae51618SAndrea Righi 
54*5ae51618SAndrea Righi 	if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
55*5ae51618SAndrea Righi 		scx_bpf_error("CPU %d should be in node %d", cpu, node);
56*5ae51618SAndrea Righi 
57*5ae51618SAndrea Righi 	return cpu;
58*5ae51618SAndrea Righi }
59*5ae51618SAndrea Righi 
60*5ae51618SAndrea Righi void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
61*5ae51618SAndrea Righi {
62*5ae51618SAndrea Righi 	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
63*5ae51618SAndrea Righi 
64*5ae51618SAndrea Righi 	scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
65*5ae51618SAndrea Righi }
66*5ae51618SAndrea Righi 
67*5ae51618SAndrea Righi void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
68*5ae51618SAndrea Righi {
69*5ae51618SAndrea Righi 	int node = __COMPAT_scx_bpf_cpu_node(cpu);
70*5ae51618SAndrea Righi 
71*5ae51618SAndrea Righi 	scx_bpf_dsq_move_to_local(node);
72*5ae51618SAndrea Righi }
73*5ae51618SAndrea Righi 
74*5ae51618SAndrea Righi s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
75*5ae51618SAndrea Righi {
76*5ae51618SAndrea Righi 	int node, err;
77*5ae51618SAndrea Righi 
78*5ae51618SAndrea Righi 	bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
79*5ae51618SAndrea Righi 		err = scx_bpf_create_dsq(node, node);
80*5ae51618SAndrea Righi 		if (err)
81*5ae51618SAndrea Righi 			return err;
82*5ae51618SAndrea Righi 	}
83*5ae51618SAndrea Righi 
84*5ae51618SAndrea Righi 	return 0;
85*5ae51618SAndrea Righi }
86*5ae51618SAndrea Righi 
87*5ae51618SAndrea Righi void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
88*5ae51618SAndrea Righi {
89*5ae51618SAndrea Righi 	UEI_RECORD(uei, ei);
90*5ae51618SAndrea Righi }
91*5ae51618SAndrea Righi 
92*5ae51618SAndrea Righi SEC(".struct_ops.link")
93*5ae51618SAndrea Righi struct sched_ext_ops numa_ops = {
94*5ae51618SAndrea Righi 	.select_cpu		= (void *)numa_select_cpu,
95*5ae51618SAndrea Righi 	.enqueue		= (void *)numa_enqueue,
96*5ae51618SAndrea Righi 	.dispatch		= (void *)numa_dispatch,
97*5ae51618SAndrea Righi 	.init			= (void *)numa_init,
98*5ae51618SAndrea Righi 	.exit			= (void *)numa_exit,
99*5ae51618SAndrea Righi 	.name			= "numa",
100*5ae51618SAndrea Righi };
101