1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Test to verify that total_bw value remains consistent across all CPUs 4 * in different BPF program states. 5 * 6 * Copyright (C) 2025 NVIDIA Corporation. 7 */ 8 #include <bpf/bpf.h> 9 #include <errno.h> 10 #include <pthread.h> 11 #include <scx/common.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 #include "minimal.bpf.skel.h" 18 #include "scx_test.h" 19 20 #define MAX_CPUS 512 21 #define STRESS_DURATION_SEC 5 22 23 struct total_bw_ctx { 24 struct minimal *skel; 25 long baseline_bw[MAX_CPUS]; 26 int nr_cpus; 27 }; 28 29 static void *cpu_stress_thread(void *arg) 30 { 31 volatile int i; 32 time_t end_time = time(NULL) + STRESS_DURATION_SEC; 33 34 while (time(NULL) < end_time) 35 for (i = 0; i < 1000000; i++) 36 ; 37 38 return NULL; 39 } 40 41 /* 42 * The first enqueue on a CPU causes the DL server to start, for that 43 * reason run stressor threads in the hopes it schedules on all CPUs. 44 */ 45 static int run_cpu_stress(int nr_cpus) 46 { 47 pthread_t *threads; 48 int i, ret = 0; 49 50 threads = calloc(nr_cpus, sizeof(pthread_t)); 51 if (!threads) 52 return -ENOMEM; 53 54 /* Create threads to run on each CPU */ 55 for (i = 0; i < nr_cpus; i++) { 56 if (pthread_create(&threads[i], NULL, cpu_stress_thread, NULL)) { 57 ret = -errno; 58 fprintf(stderr, "Failed to create thread %d: %s\n", i, strerror(-ret)); 59 break; 60 } 61 } 62 63 /* Wait for all threads to complete */ 64 for (i = 0; i < nr_cpus; i++) { 65 if (threads[i]) 66 pthread_join(threads[i], NULL); 67 } 68 69 free(threads); 70 return ret; 71 } 72 73 static int read_total_bw_values(long *bw_values, int max_cpus) 74 { 75 FILE *fp; 76 char line[256]; 77 int cpu_count = 0; 78 79 fp = fopen("/sys/kernel/debug/sched/debug", "r"); 80 if (!fp) { 81 SCX_ERR("Failed to open debug file"); 82 return -1; 83 } 84 85 while (fgets(line, sizeof(line), fp)) { 86 char *bw_str = strstr(line, "total_bw"); 87 88 if (bw_str) { 89 bw_str = strchr(bw_str, ':'); 90 if (bw_str) { 91 /* Only store up to max_cpus values */ 92 if (cpu_count < max_cpus) 93 bw_values[cpu_count] = atol(bw_str + 1); 94 cpu_count++; 95 } 96 } 97 } 98 99 fclose(fp); 100 return cpu_count; 101 } 102 103 static bool verify_total_bw_consistency(long *bw_values, int count) 104 { 105 int i; 106 long first_value; 107 108 if (count <= 0) 109 return false; 110 111 first_value = bw_values[0]; 112 113 for (i = 1; i < count; i++) { 114 if (bw_values[i] != first_value) { 115 SCX_ERR("Inconsistent total_bw: CPU0=%ld, CPU%d=%ld", 116 first_value, i, bw_values[i]); 117 return false; 118 } 119 } 120 121 return true; 122 } 123 124 static int fetch_verify_total_bw(long *bw_values, int nr_cpus) 125 { 126 int attempts = 0; 127 int max_attempts = 10; 128 int count; 129 130 /* 131 * The first enqueue on a CPU causes the DL server to start, for that 132 * reason run stressor threads in the hopes it schedules on all CPUs. 133 */ 134 if (run_cpu_stress(nr_cpus) < 0) { 135 SCX_ERR("Failed to run CPU stress"); 136 return -1; 137 } 138 139 /* Try multiple times to get stable values */ 140 while (attempts < max_attempts) { 141 count = read_total_bw_values(bw_values, nr_cpus); 142 fprintf(stderr, "Read %d total_bw values (testing %d CPUs)\n", count, nr_cpus); 143 /* If system has more CPUs than we're testing, that's OK */ 144 if (count < nr_cpus) { 145 SCX_ERR("Expected at least %d CPUs, got %d", nr_cpus, count); 146 attempts++; 147 sleep(1); 148 continue; 149 } 150 151 /* Only verify the CPUs we're testing */ 152 if (verify_total_bw_consistency(bw_values, nr_cpus)) { 153 fprintf(stderr, "Values are consistent: %ld\n", bw_values[0]); 154 return 0; 155 } 156 157 attempts++; 158 sleep(1); 159 } 160 161 return -1; 162 } 163 164 static enum scx_test_status setup(void **ctx) 165 { 166 struct total_bw_ctx *test_ctx; 167 168 if (access("/sys/kernel/debug/sched/debug", R_OK) != 0) { 169 fprintf(stderr, "Skipping test: debugfs sched/debug not accessible\n"); 170 return SCX_TEST_SKIP; 171 } 172 173 test_ctx = calloc(1, sizeof(*test_ctx)); 174 if (!test_ctx) 175 return SCX_TEST_FAIL; 176 177 test_ctx->nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 178 if (test_ctx->nr_cpus <= 0) { 179 free(test_ctx); 180 return SCX_TEST_FAIL; 181 } 182 183 /* If system has more CPUs than MAX_CPUS, just test the first MAX_CPUS */ 184 if (test_ctx->nr_cpus > MAX_CPUS) 185 test_ctx->nr_cpus = MAX_CPUS; 186 187 /* Test scenario 1: BPF program not loaded */ 188 /* Read and verify baseline total_bw before loading BPF program */ 189 fprintf(stderr, "BPF prog initially not loaded, reading total_bw values\n"); 190 if (fetch_verify_total_bw(test_ctx->baseline_bw, test_ctx->nr_cpus) < 0) { 191 SCX_ERR("Failed to get stable baseline values"); 192 free(test_ctx); 193 return SCX_TEST_FAIL; 194 } 195 196 /* Load the BPF skeleton */ 197 test_ctx->skel = minimal__open(); 198 if (!test_ctx->skel) { 199 free(test_ctx); 200 return SCX_TEST_FAIL; 201 } 202 203 SCX_ENUM_INIT(test_ctx->skel); 204 if (minimal__load(test_ctx->skel)) { 205 minimal__destroy(test_ctx->skel); 206 free(test_ctx); 207 return SCX_TEST_FAIL; 208 } 209 210 *ctx = test_ctx; 211 return SCX_TEST_PASS; 212 } 213 214 static enum scx_test_status run(void *ctx) 215 { 216 struct total_bw_ctx *test_ctx = ctx; 217 struct bpf_link *link; 218 long loaded_bw[MAX_CPUS]; 219 long unloaded_bw[MAX_CPUS]; 220 int i; 221 222 /* Test scenario 2: BPF program loaded */ 223 link = bpf_map__attach_struct_ops(test_ctx->skel->maps.minimal_ops); 224 if (!link) { 225 SCX_ERR("Failed to attach scheduler"); 226 return SCX_TEST_FAIL; 227 } 228 229 fprintf(stderr, "BPF program loaded, reading total_bw values\n"); 230 if (fetch_verify_total_bw(loaded_bw, test_ctx->nr_cpus) < 0) { 231 SCX_ERR("Failed to get stable values with BPF loaded"); 232 bpf_link__destroy(link); 233 return SCX_TEST_FAIL; 234 } 235 bpf_link__destroy(link); 236 237 /* Test scenario 3: BPF program unloaded */ 238 fprintf(stderr, "BPF program unloaded, reading total_bw values\n"); 239 if (fetch_verify_total_bw(unloaded_bw, test_ctx->nr_cpus) < 0) { 240 SCX_ERR("Failed to get stable values after BPF unload"); 241 return SCX_TEST_FAIL; 242 } 243 244 /* Verify all three scenarios have the same total_bw values */ 245 for (i = 0; i < test_ctx->nr_cpus; i++) { 246 if (test_ctx->baseline_bw[i] != loaded_bw[i]) { 247 SCX_ERR("CPU%d: baseline_bw=%ld != loaded_bw=%ld", 248 i, test_ctx->baseline_bw[i], loaded_bw[i]); 249 return SCX_TEST_FAIL; 250 } 251 252 if (test_ctx->baseline_bw[i] != unloaded_bw[i]) { 253 SCX_ERR("CPU%d: baseline_bw=%ld != unloaded_bw=%ld", 254 i, test_ctx->baseline_bw[i], unloaded_bw[i]); 255 return SCX_TEST_FAIL; 256 } 257 } 258 259 fprintf(stderr, "All total_bw values are consistent across all scenarios\n"); 260 return SCX_TEST_PASS; 261 } 262 263 static void cleanup(void *ctx) 264 { 265 struct total_bw_ctx *test_ctx = ctx; 266 267 if (test_ctx) { 268 if (test_ctx->skel) 269 minimal__destroy(test_ctx->skel); 270 free(test_ctx); 271 } 272 } 273 274 struct scx_test total_bw = { 275 .name = "total_bw", 276 .description = "Verify total_bw consistency across BPF program states", 277 .setup = setup, 278 .run = run, 279 .cleanup = cleanup, 280 }; 281 REGISTER_SCX_TEST(&total_bw) 282