xref: /linux/tools/testing/selftests/sched_ext/total_bw.c (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Test to verify that total_bw value remains consistent across all CPUs
4  * in different BPF program states.
5  *
6  * Copyright (C) 2025 NVIDIA Corporation.
7  */
8 #include <bpf/bpf.h>
9 #include <errno.h>
10 #include <pthread.h>
11 #include <scx/common.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/wait.h>
16 #include <unistd.h>
17 #include "minimal.bpf.skel.h"
18 #include "scx_test.h"
19 
20 #define MAX_CPUS 512
21 #define STRESS_DURATION_SEC 5
22 
23 struct total_bw_ctx {
24 	struct minimal *skel;
25 	long baseline_bw[MAX_CPUS];
26 	int nr_cpus;
27 };
28 
29 static void *cpu_stress_thread(void *arg)
30 {
31 	volatile int i;
32 	time_t end_time = time(NULL) + STRESS_DURATION_SEC;
33 
34 	while (time(NULL) < end_time)
35 		for (i = 0; i < 1000000; i++)
36 			;
37 
38 	return NULL;
39 }
40 
41 /*
42  * The first enqueue on a CPU causes the DL server to start, for that
43  * reason run stressor threads in the hopes it schedules on all CPUs.
44  */
45 static int run_cpu_stress(int nr_cpus)
46 {
47 	pthread_t *threads;
48 	int i, ret = 0;
49 
50 	threads = calloc(nr_cpus, sizeof(pthread_t));
51 	if (!threads)
52 		return -ENOMEM;
53 
54 	/* Create threads to run on each CPU */
55 	for (i = 0; i < nr_cpus; i++) {
56 		if (pthread_create(&threads[i], NULL, cpu_stress_thread, NULL)) {
57 			ret = -errno;
58 			fprintf(stderr, "Failed to create thread %d: %s\n", i, strerror(-ret));
59 			break;
60 		}
61 	}
62 
63 	/* Wait for all threads to complete */
64 	for (i = 0; i < nr_cpus; i++) {
65 		if (threads[i])
66 			pthread_join(threads[i], NULL);
67 	}
68 
69 	free(threads);
70 	return ret;
71 }
72 
73 static int read_total_bw_values(long *bw_values, int max_cpus)
74 {
75 	FILE *fp;
76 	char line[256];
77 	int cpu_count = 0;
78 
79 	fp = fopen("/sys/kernel/debug/sched/debug", "r");
80 	if (!fp) {
81 		SCX_ERR("Failed to open debug file");
82 		return -1;
83 	}
84 
85 	while (fgets(line, sizeof(line), fp)) {
86 		char *bw_str = strstr(line, "total_bw");
87 
88 		if (bw_str) {
89 			bw_str = strchr(bw_str, ':');
90 			if (bw_str) {
91 				/* Only store up to max_cpus values */
92 				if (cpu_count < max_cpus)
93 					bw_values[cpu_count] = atol(bw_str + 1);
94 				cpu_count++;
95 			}
96 		}
97 	}
98 
99 	fclose(fp);
100 	return cpu_count;
101 }
102 
103 static bool verify_total_bw_consistency(long *bw_values, int count)
104 {
105 	int i;
106 	long first_value;
107 
108 	if (count <= 0)
109 		return false;
110 
111 	first_value = bw_values[0];
112 
113 	for (i = 1; i < count; i++) {
114 		if (bw_values[i] != first_value) {
115 			SCX_ERR("Inconsistent total_bw: CPU0=%ld, CPU%d=%ld",
116 				first_value, i, bw_values[i]);
117 			return false;
118 		}
119 	}
120 
121 	return true;
122 }
123 
124 static int fetch_verify_total_bw(long *bw_values, int nr_cpus)
125 {
126 	int attempts = 0;
127 	int max_attempts = 10;
128 	int count;
129 
130 	/*
131 	 * The first enqueue on a CPU causes the DL server to start, for that
132 	 * reason run stressor threads in the hopes it schedules on all CPUs.
133 	 */
134 	if (run_cpu_stress(nr_cpus) < 0) {
135 		SCX_ERR("Failed to run CPU stress");
136 		return -1;
137 	}
138 
139 	/* Try multiple times to get stable values */
140 	while (attempts < max_attempts) {
141 		count = read_total_bw_values(bw_values, nr_cpus);
142 		fprintf(stderr, "Read %d total_bw values (testing %d CPUs)\n", count, nr_cpus);
143 		/* If system has more CPUs than we're testing, that's OK */
144 		if (count < nr_cpus) {
145 			SCX_ERR("Expected at least %d CPUs, got %d", nr_cpus, count);
146 			attempts++;
147 			sleep(1);
148 			continue;
149 		}
150 
151 		/* Only verify the CPUs we're testing */
152 		if (verify_total_bw_consistency(bw_values, nr_cpus)) {
153 			fprintf(stderr, "Values are consistent: %ld\n", bw_values[0]);
154 			return 0;
155 		}
156 
157 		attempts++;
158 		sleep(1);
159 	}
160 
161 	return -1;
162 }
163 
164 static enum scx_test_status setup(void **ctx)
165 {
166 	struct total_bw_ctx *test_ctx;
167 
168 	if (access("/sys/kernel/debug/sched/debug", R_OK) != 0) {
169 		fprintf(stderr, "Skipping test: debugfs sched/debug not accessible\n");
170 		return SCX_TEST_SKIP;
171 	}
172 
173 	test_ctx = calloc(1, sizeof(*test_ctx));
174 	if (!test_ctx)
175 		return SCX_TEST_FAIL;
176 
177 	test_ctx->nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
178 	if (test_ctx->nr_cpus <= 0) {
179 		free(test_ctx);
180 		return SCX_TEST_FAIL;
181 	}
182 
183 	/* If system has more CPUs than MAX_CPUS, just test the first MAX_CPUS */
184 	if (test_ctx->nr_cpus > MAX_CPUS)
185 		test_ctx->nr_cpus = MAX_CPUS;
186 
187 	/* Test scenario 1: BPF program not loaded */
188 	/* Read and verify baseline total_bw before loading BPF program */
189 	fprintf(stderr, "BPF prog initially not loaded, reading total_bw values\n");
190 	if (fetch_verify_total_bw(test_ctx->baseline_bw, test_ctx->nr_cpus) < 0) {
191 		SCX_ERR("Failed to get stable baseline values");
192 		free(test_ctx);
193 		return SCX_TEST_FAIL;
194 	}
195 
196 	/* Load the BPF skeleton */
197 	test_ctx->skel = minimal__open();
198 	if (!test_ctx->skel) {
199 		free(test_ctx);
200 		return SCX_TEST_FAIL;
201 	}
202 
203 	SCX_ENUM_INIT(test_ctx->skel);
204 	if (minimal__load(test_ctx->skel)) {
205 		minimal__destroy(test_ctx->skel);
206 		free(test_ctx);
207 		return SCX_TEST_FAIL;
208 	}
209 
210 	*ctx = test_ctx;
211 	return SCX_TEST_PASS;
212 }
213 
214 static enum scx_test_status run(void *ctx)
215 {
216 	struct total_bw_ctx *test_ctx = ctx;
217 	struct bpf_link *link;
218 	long loaded_bw[MAX_CPUS];
219 	long unloaded_bw[MAX_CPUS];
220 	int i;
221 
222 	/* Test scenario 2: BPF program loaded */
223 	link = bpf_map__attach_struct_ops(test_ctx->skel->maps.minimal_ops);
224 	if (!link) {
225 		SCX_ERR("Failed to attach scheduler");
226 		return SCX_TEST_FAIL;
227 	}
228 
229 	fprintf(stderr, "BPF program loaded, reading total_bw values\n");
230 	if (fetch_verify_total_bw(loaded_bw, test_ctx->nr_cpus) < 0) {
231 		SCX_ERR("Failed to get stable values with BPF loaded");
232 		bpf_link__destroy(link);
233 		return SCX_TEST_FAIL;
234 	}
235 	bpf_link__destroy(link);
236 
237 	/* Test scenario 3: BPF program unloaded */
238 	fprintf(stderr, "BPF program unloaded, reading total_bw values\n");
239 	if (fetch_verify_total_bw(unloaded_bw, test_ctx->nr_cpus) < 0) {
240 		SCX_ERR("Failed to get stable values after BPF unload");
241 		return SCX_TEST_FAIL;
242 	}
243 
244 	/* Verify all three scenarios have the same total_bw values */
245 	for (i = 0; i < test_ctx->nr_cpus; i++) {
246 		if (test_ctx->baseline_bw[i] != loaded_bw[i]) {
247 			SCX_ERR("CPU%d: baseline_bw=%ld != loaded_bw=%ld",
248 				i, test_ctx->baseline_bw[i], loaded_bw[i]);
249 			return SCX_TEST_FAIL;
250 		}
251 
252 		if (test_ctx->baseline_bw[i] != unloaded_bw[i]) {
253 			SCX_ERR("CPU%d: baseline_bw=%ld != unloaded_bw=%ld",
254 				i, test_ctx->baseline_bw[i], unloaded_bw[i]);
255 			return SCX_TEST_FAIL;
256 		}
257 	}
258 
259 	fprintf(stderr, "All total_bw values are consistent across all scenarios\n");
260 	return SCX_TEST_PASS;
261 }
262 
263 static void cleanup(void *ctx)
264 {
265 	struct total_bw_ctx *test_ctx = ctx;
266 
267 	if (test_ctx) {
268 		if (test_ctx->skel)
269 			minimal__destroy(test_ctx->skel);
270 		free(test_ctx);
271 	}
272 }
273 
274 struct scx_test total_bw = {
275 	.name = "total_bw",
276 	.description = "Verify total_bw consistency across BPF program states",
277 	.setup = setup,
278 	.run = run,
279 	.cleanup = cleanup,
280 };
281 REGISTER_SCX_TEST(&total_bw)
282