xref: /linux/tools/testing/selftests/sched_ext/cyclic_kick_wait.c (revision 9147566d801602c9e7fc7f85e989735735bf38ba)
1*090d34f0STejun Heo /* SPDX-License-Identifier: GPL-2.0 */
2*090d34f0STejun Heo /*
3*090d34f0STejun Heo  * Test SCX_KICK_WAIT forward progress under cyclic wait pressure.
4*090d34f0STejun Heo  *
5*090d34f0STejun Heo  * SCX_KICK_WAIT busy-waits until the target CPU enters the scheduling path.
6*090d34f0STejun Heo  * If multiple CPUs form a wait cycle (A waits for B, B waits for C, C waits
7*090d34f0STejun Heo  * for A), all CPUs deadlock unless the implementation breaks the cycle.
8*090d34f0STejun Heo  *
9*090d34f0STejun Heo  * This test creates that scenario: three CPUs are arranged in a ring. The BPF
10*090d34f0STejun Heo  * scheduler's ops.enqueue() kicks the next CPU in the ring with SCX_KICK_WAIT
11*090d34f0STejun Heo  * on every enqueue. Userspace pins 4 worker threads per CPU that loop calling
12*090d34f0STejun Heo  * sched_yield(), generating a steady stream of enqueues and thus sustained
13*090d34f0STejun Heo  * A->B->C->A kick_wait cycle pressure. The test passes if the system remains
14*090d34f0STejun Heo  * responsive for 5 seconds without the scheduler being killed by the watchdog.
15*090d34f0STejun Heo  */
16*090d34f0STejun Heo #define _GNU_SOURCE
17*090d34f0STejun Heo 
18*090d34f0STejun Heo #include <bpf/bpf.h>
19*090d34f0STejun Heo #include <errno.h>
20*090d34f0STejun Heo #include <pthread.h>
21*090d34f0STejun Heo #include <sched.h>
22*090d34f0STejun Heo #include <scx/common.h>
23*090d34f0STejun Heo #include <stdint.h>
24*090d34f0STejun Heo #include <string.h>
25*090d34f0STejun Heo #include <time.h>
26*090d34f0STejun Heo #include <unistd.h>
27*090d34f0STejun Heo 
28*090d34f0STejun Heo #include "scx_test.h"
29*090d34f0STejun Heo #include "cyclic_kick_wait.bpf.skel.h"
30*090d34f0STejun Heo 
31*090d34f0STejun Heo #define WORKERS_PER_CPU	4
32*090d34f0STejun Heo #define NR_TEST_CPUS	3
33*090d34f0STejun Heo #define NR_WORKERS	(NR_TEST_CPUS * WORKERS_PER_CPU)
34*090d34f0STejun Heo 
35*090d34f0STejun Heo struct worker_ctx {
36*090d34f0STejun Heo 	pthread_t tid;
37*090d34f0STejun Heo 	int cpu;
38*090d34f0STejun Heo 	volatile bool stop;
39*090d34f0STejun Heo 	volatile __u64 iters;
40*090d34f0STejun Heo 	bool started;
41*090d34f0STejun Heo };
42*090d34f0STejun Heo 
worker_fn(void * arg)43*090d34f0STejun Heo static void *worker_fn(void *arg)
44*090d34f0STejun Heo {
45*090d34f0STejun Heo 	struct worker_ctx *worker = arg;
46*090d34f0STejun Heo 	cpu_set_t mask;
47*090d34f0STejun Heo 
48*090d34f0STejun Heo 	CPU_ZERO(&mask);
49*090d34f0STejun Heo 	CPU_SET(worker->cpu, &mask);
50*090d34f0STejun Heo 
51*090d34f0STejun Heo 	if (sched_setaffinity(0, sizeof(mask), &mask))
52*090d34f0STejun Heo 		return (void *)(uintptr_t)errno;
53*090d34f0STejun Heo 
54*090d34f0STejun Heo 	while (!worker->stop) {
55*090d34f0STejun Heo 		sched_yield();
56*090d34f0STejun Heo 		worker->iters++;
57*090d34f0STejun Heo 	}
58*090d34f0STejun Heo 
59*090d34f0STejun Heo 	return NULL;
60*090d34f0STejun Heo }
61*090d34f0STejun Heo 
join_worker(struct worker_ctx * worker)62*090d34f0STejun Heo static int join_worker(struct worker_ctx *worker)
63*090d34f0STejun Heo {
64*090d34f0STejun Heo 	void *ret;
65*090d34f0STejun Heo 	struct timespec ts;
66*090d34f0STejun Heo 	int err;
67*090d34f0STejun Heo 
68*090d34f0STejun Heo 	if (!worker->started)
69*090d34f0STejun Heo 		return 0;
70*090d34f0STejun Heo 
71*090d34f0STejun Heo 	if (clock_gettime(CLOCK_REALTIME, &ts))
72*090d34f0STejun Heo 		return -errno;
73*090d34f0STejun Heo 
74*090d34f0STejun Heo 	ts.tv_sec += 2;
75*090d34f0STejun Heo 	err = pthread_timedjoin_np(worker->tid, &ret, &ts);
76*090d34f0STejun Heo 	if (err == ETIMEDOUT)
77*090d34f0STejun Heo 		pthread_detach(worker->tid);
78*090d34f0STejun Heo 	if (err)
79*090d34f0STejun Heo 		return -err;
80*090d34f0STejun Heo 
81*090d34f0STejun Heo 	if ((uintptr_t)ret)
82*090d34f0STejun Heo 		return -(int)(uintptr_t)ret;
83*090d34f0STejun Heo 
84*090d34f0STejun Heo 	return 0;
85*090d34f0STejun Heo }
86*090d34f0STejun Heo 
setup(void ** ctx)87*090d34f0STejun Heo static enum scx_test_status setup(void **ctx)
88*090d34f0STejun Heo {
89*090d34f0STejun Heo 	struct cyclic_kick_wait *skel;
90*090d34f0STejun Heo 
91*090d34f0STejun Heo 	skel = cyclic_kick_wait__open();
92*090d34f0STejun Heo 	SCX_FAIL_IF(!skel, "Failed to open skel");
93*090d34f0STejun Heo 	SCX_ENUM_INIT(skel);
94*090d34f0STejun Heo 
95*090d34f0STejun Heo 	*ctx = skel;
96*090d34f0STejun Heo 	return SCX_TEST_PASS;
97*090d34f0STejun Heo }
98*090d34f0STejun Heo 
run(void * ctx)99*090d34f0STejun Heo static enum scx_test_status run(void *ctx)
100*090d34f0STejun Heo {
101*090d34f0STejun Heo 	struct cyclic_kick_wait *skel = ctx;
102*090d34f0STejun Heo 	struct worker_ctx workers[NR_WORKERS] = {};
103*090d34f0STejun Heo 	struct bpf_link *link = NULL;
104*090d34f0STejun Heo 	enum scx_test_status status = SCX_TEST_PASS;
105*090d34f0STejun Heo 	int test_cpus[NR_TEST_CPUS];
106*090d34f0STejun Heo 	int nr_cpus = 0;
107*090d34f0STejun Heo 	cpu_set_t mask;
108*090d34f0STejun Heo 	int ret, i;
109*090d34f0STejun Heo 
110*090d34f0STejun Heo 	if (sched_getaffinity(0, sizeof(mask), &mask)) {
111*090d34f0STejun Heo 		SCX_ERR("Failed to get affinity (%d)", errno);
112*090d34f0STejun Heo 		return SCX_TEST_FAIL;
113*090d34f0STejun Heo 	}
114*090d34f0STejun Heo 
115*090d34f0STejun Heo 	for (i = 0; i < CPU_SETSIZE; i++) {
116*090d34f0STejun Heo 		if (CPU_ISSET(i, &mask))
117*090d34f0STejun Heo 			test_cpus[nr_cpus++] = i;
118*090d34f0STejun Heo 		if (nr_cpus == NR_TEST_CPUS)
119*090d34f0STejun Heo 			break;
120*090d34f0STejun Heo 	}
121*090d34f0STejun Heo 
122*090d34f0STejun Heo 	if (nr_cpus < NR_TEST_CPUS)
123*090d34f0STejun Heo 		return SCX_TEST_SKIP;
124*090d34f0STejun Heo 
125*090d34f0STejun Heo 	skel->rodata->test_cpu_a = test_cpus[0];
126*090d34f0STejun Heo 	skel->rodata->test_cpu_b = test_cpus[1];
127*090d34f0STejun Heo 	skel->rodata->test_cpu_c = test_cpus[2];
128*090d34f0STejun Heo 
129*090d34f0STejun Heo 	if (cyclic_kick_wait__load(skel)) {
130*090d34f0STejun Heo 		SCX_ERR("Failed to load skel");
131*090d34f0STejun Heo 		return SCX_TEST_FAIL;
132*090d34f0STejun Heo 	}
133*090d34f0STejun Heo 
134*090d34f0STejun Heo 	link = bpf_map__attach_struct_ops(skel->maps.cyclic_kick_wait_ops);
135*090d34f0STejun Heo 	if (!link) {
136*090d34f0STejun Heo 		SCX_ERR("Failed to attach scheduler");
137*090d34f0STejun Heo 		return SCX_TEST_FAIL;
138*090d34f0STejun Heo 	}
139*090d34f0STejun Heo 
140*090d34f0STejun Heo 	for (i = 0; i < NR_WORKERS; i++)
141*090d34f0STejun Heo 		workers[i].cpu = test_cpus[i / WORKERS_PER_CPU];
142*090d34f0STejun Heo 
143*090d34f0STejun Heo 	for (i = 0; i < NR_WORKERS; i++) {
144*090d34f0STejun Heo 		ret = pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]);
145*090d34f0STejun Heo 		if (ret) {
146*090d34f0STejun Heo 			SCX_ERR("Failed to create worker thread %d (%d)", i, ret);
147*090d34f0STejun Heo 			status = SCX_TEST_FAIL;
148*090d34f0STejun Heo 			goto out;
149*090d34f0STejun Heo 		}
150*090d34f0STejun Heo 		workers[i].started = true;
151*090d34f0STejun Heo 	}
152*090d34f0STejun Heo 
153*090d34f0STejun Heo 	sleep(5);
154*090d34f0STejun Heo 
155*090d34f0STejun Heo 	if (skel->data->uei.kind != EXIT_KIND(SCX_EXIT_NONE)) {
156*090d34f0STejun Heo 		SCX_ERR("Scheduler exited unexpectedly (kind=%llu code=%lld)",
157*090d34f0STejun Heo 			(unsigned long long)skel->data->uei.kind,
158*090d34f0STejun Heo 			(long long)skel->data->uei.exit_code);
159*090d34f0STejun Heo 		status = SCX_TEST_FAIL;
160*090d34f0STejun Heo 	}
161*090d34f0STejun Heo 
162*090d34f0STejun Heo out:
163*090d34f0STejun Heo 	for (i = 0; i < NR_WORKERS; i++)
164*090d34f0STejun Heo 		workers[i].stop = true;
165*090d34f0STejun Heo 
166*090d34f0STejun Heo 	for (i = 0; i < NR_WORKERS; i++) {
167*090d34f0STejun Heo 		ret = join_worker(&workers[i]);
168*090d34f0STejun Heo 		if (ret && status == SCX_TEST_PASS) {
169*090d34f0STejun Heo 			SCX_ERR("Failed to join worker thread %d (%d)", i, ret);
170*090d34f0STejun Heo 			status = SCX_TEST_FAIL;
171*090d34f0STejun Heo 		}
172*090d34f0STejun Heo 	}
173*090d34f0STejun Heo 
174*090d34f0STejun Heo 	if (link)
175*090d34f0STejun Heo 		bpf_link__destroy(link);
176*090d34f0STejun Heo 
177*090d34f0STejun Heo 	return status;
178*090d34f0STejun Heo }
179*090d34f0STejun Heo 
cleanup(void * ctx)180*090d34f0STejun Heo static void cleanup(void *ctx)
181*090d34f0STejun Heo {
182*090d34f0STejun Heo 	struct cyclic_kick_wait *skel = ctx;
183*090d34f0STejun Heo 
184*090d34f0STejun Heo 	cyclic_kick_wait__destroy(skel);
185*090d34f0STejun Heo }
186*090d34f0STejun Heo 
187*090d34f0STejun Heo struct scx_test cyclic_kick_wait = {
188*090d34f0STejun Heo 	.name = "cyclic_kick_wait",
189*090d34f0STejun Heo 	.description = "Verify SCX_KICK_WAIT forward progress under a 3-CPU wait cycle",
190*090d34f0STejun Heo 	.setup = setup,
191*090d34f0STejun Heo 	.run = run,
192*090d34f0STejun Heo 	.cleanup = cleanup,
193*090d34f0STejun Heo };
194*090d34f0STejun Heo REGISTER_SCX_TEST(&cyclic_kick_wait)
195