1*658ad225SAndrea Righi // SPDX-License-Identifier: GPL-2.0 2*658ad225SAndrea Righi /* 3*658ad225SAndrea Righi * Copyright (c) 2025 NVIDIA Corporation. 4*658ad225SAndrea Righi */ 5*658ad225SAndrea Righi #define _GNU_SOURCE 6*658ad225SAndrea Righi #include <stdio.h> 7*658ad225SAndrea Righi #include <unistd.h> 8*658ad225SAndrea Righi #include <signal.h> 9*658ad225SAndrea Righi #include <time.h> 10*658ad225SAndrea Righi #include <bpf/bpf.h> 11*658ad225SAndrea Righi #include <scx/common.h> 12*658ad225SAndrea Righi #include <sys/wait.h> 13*658ad225SAndrea Righi #include <sched.h> 14*658ad225SAndrea Righi #include <pthread.h> 15*658ad225SAndrea Righi #include "scx_test.h" 16*658ad225SAndrea Righi #include "dequeue.bpf.skel.h" 17*658ad225SAndrea Righi 18*658ad225SAndrea Righi #define NUM_WORKERS 8 19*658ad225SAndrea Righi #define AFFINITY_HAMMER_MS 500 20*658ad225SAndrea Righi 21*658ad225SAndrea Righi /* 22*658ad225SAndrea Righi * Worker function that creates enqueue/dequeue events via CPU work and 23*658ad225SAndrea Righi * sleep. 24*658ad225SAndrea Righi */ 25*658ad225SAndrea Righi static void worker_fn(int id) 26*658ad225SAndrea Righi { 27*658ad225SAndrea Righi int i; 28*658ad225SAndrea Righi volatile int sum = 0; 29*658ad225SAndrea Righi 30*658ad225SAndrea Righi for (i = 0; i < 1000; i++) { 31*658ad225SAndrea Righi volatile int j; 32*658ad225SAndrea Righi 33*658ad225SAndrea Righi /* Do some work to trigger scheduling events */ 34*658ad225SAndrea Righi for (j = 0; j < 10000; j++) 35*658ad225SAndrea Righi sum += j; 36*658ad225SAndrea Righi 37*658ad225SAndrea Righi /* Sleep to trigger dequeue */ 38*658ad225SAndrea Righi usleep(1000 + (id * 100)); 39*658ad225SAndrea Righi } 40*658ad225SAndrea Righi 41*658ad225SAndrea Righi exit(0); 42*658ad225SAndrea Righi } 43*658ad225SAndrea Righi 44*658ad225SAndrea Righi /* 45*658ad225SAndrea Righi * This thread changes workers' affinity from outside so that some changes 46*658ad225SAndrea Righi * hit tasks while they are still in the scheduler's queue and trigger 47*658ad225SAndrea Righi * property-change dequeues. 48*658ad225SAndrea Righi */ 49*658ad225SAndrea Righi static void *affinity_hammer_fn(void *arg) 50*658ad225SAndrea Righi { 51*658ad225SAndrea Righi pid_t *pids = arg; 52*658ad225SAndrea Righi cpu_set_t cpuset; 53*658ad225SAndrea Righi int i = 0, n = NUM_WORKERS; 54*658ad225SAndrea Righi struct timespec start, now; 55*658ad225SAndrea Righi 56*658ad225SAndrea Righi clock_gettime(CLOCK_MONOTONIC, &start); 57*658ad225SAndrea Righi while (1) { 58*658ad225SAndrea Righi int w = i % n; 59*658ad225SAndrea Righi int cpu = (i / n) % 4; 60*658ad225SAndrea Righi 61*658ad225SAndrea Righi CPU_ZERO(&cpuset); 62*658ad225SAndrea Righi CPU_SET(cpu, &cpuset); 63*658ad225SAndrea Righi sched_setaffinity(pids[w], sizeof(cpuset), &cpuset); 64*658ad225SAndrea Righi i++; 65*658ad225SAndrea Righi 66*658ad225SAndrea Righi /* Check elapsed time every 256 iterations to limit gettime cost */ 67*658ad225SAndrea Righi if ((i & 255) == 0) { 68*658ad225SAndrea Righi long long elapsed_ms; 69*658ad225SAndrea Righi 70*658ad225SAndrea Righi clock_gettime(CLOCK_MONOTONIC, &now); 71*658ad225SAndrea Righi elapsed_ms = (now.tv_sec - start.tv_sec) * 1000LL + 72*658ad225SAndrea Righi (now.tv_nsec - start.tv_nsec) / 1000000; 73*658ad225SAndrea Righi if (elapsed_ms >= AFFINITY_HAMMER_MS) 74*658ad225SAndrea Righi break; 75*658ad225SAndrea Righi } 76*658ad225SAndrea Righi } 77*658ad225SAndrea Righi return NULL; 78*658ad225SAndrea Righi } 79*658ad225SAndrea Righi 80*658ad225SAndrea Righi static enum scx_test_status run_scenario(struct dequeue *skel, u32 scenario, 81*658ad225SAndrea Righi const char *scenario_name) 82*658ad225SAndrea Righi { 83*658ad225SAndrea Righi struct bpf_link *link; 84*658ad225SAndrea Righi pid_t pids[NUM_WORKERS]; 85*658ad225SAndrea Righi pthread_t hammer; 86*658ad225SAndrea Righi 87*658ad225SAndrea Righi int i, status; 88*658ad225SAndrea Righi u64 enq_start, deq_start, 89*658ad225SAndrea Righi dispatch_deq_start, change_deq_start, bpf_queue_full_start; 90*658ad225SAndrea Righi u64 enq_delta, deq_delta, 91*658ad225SAndrea Righi dispatch_deq_delta, change_deq_delta, bpf_queue_full_delta; 92*658ad225SAndrea Righi 93*658ad225SAndrea Righi /* Set the test scenario */ 94*658ad225SAndrea Righi skel->bss->test_scenario = scenario; 95*658ad225SAndrea Righi 96*658ad225SAndrea Righi /* Record starting counts */ 97*658ad225SAndrea Righi enq_start = skel->bss->enqueue_cnt; 98*658ad225SAndrea Righi deq_start = skel->bss->dequeue_cnt; 99*658ad225SAndrea Righi dispatch_deq_start = skel->bss->dispatch_dequeue_cnt; 100*658ad225SAndrea Righi change_deq_start = skel->bss->change_dequeue_cnt; 101*658ad225SAndrea Righi bpf_queue_full_start = skel->bss->bpf_queue_full; 102*658ad225SAndrea Righi 103*658ad225SAndrea Righi link = bpf_map__attach_struct_ops(skel->maps.dequeue_ops); 104*658ad225SAndrea Righi SCX_FAIL_IF(!link, "Failed to attach struct_ops for scenario %s", scenario_name); 105*658ad225SAndrea Righi 106*658ad225SAndrea Righi /* Fork worker processes to generate enqueue/dequeue events */ 107*658ad225SAndrea Righi for (i = 0; i < NUM_WORKERS; i++) { 108*658ad225SAndrea Righi pids[i] = fork(); 109*658ad225SAndrea Righi SCX_FAIL_IF(pids[i] < 0, "Failed to fork worker %d", i); 110*658ad225SAndrea Righi 111*658ad225SAndrea Righi if (pids[i] == 0) { 112*658ad225SAndrea Righi worker_fn(i); 113*658ad225SAndrea Righi /* Should not reach here */ 114*658ad225SAndrea Righi exit(1); 115*658ad225SAndrea Righi } 116*658ad225SAndrea Righi } 117*658ad225SAndrea Righi 118*658ad225SAndrea Righi /* 119*658ad225SAndrea Righi * Run an "affinity hammer" so that some property changes hit tasks 120*658ad225SAndrea Righi * while they are still in BPF custody (e.g., in user DSQ or BPF 121*658ad225SAndrea Righi * queue), triggering SCX_DEQ_SCHED_CHANGE dequeues. 122*658ad225SAndrea Righi */ 123*658ad225SAndrea Righi SCX_FAIL_IF(pthread_create(&hammer, NULL, affinity_hammer_fn, pids) != 0, 124*658ad225SAndrea Righi "Failed to create affinity hammer thread"); 125*658ad225SAndrea Righi pthread_join(hammer, NULL); 126*658ad225SAndrea Righi 127*658ad225SAndrea Righi /* Wait for all workers to complete */ 128*658ad225SAndrea Righi for (i = 0; i < NUM_WORKERS; i++) { 129*658ad225SAndrea Righi SCX_FAIL_IF(waitpid(pids[i], &status, 0) != pids[i], 130*658ad225SAndrea Righi "Failed to wait for worker %d", i); 131*658ad225SAndrea Righi SCX_FAIL_IF(status != 0, "Worker %d exited with status %d", i, status); 132*658ad225SAndrea Righi } 133*658ad225SAndrea Righi 134*658ad225SAndrea Righi bpf_link__destroy(link); 135*658ad225SAndrea Righi 136*658ad225SAndrea Righi SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG)); 137*658ad225SAndrea Righi 138*658ad225SAndrea Righi /* Calculate deltas */ 139*658ad225SAndrea Righi enq_delta = skel->bss->enqueue_cnt - enq_start; 140*658ad225SAndrea Righi deq_delta = skel->bss->dequeue_cnt - deq_start; 141*658ad225SAndrea Righi dispatch_deq_delta = skel->bss->dispatch_dequeue_cnt - dispatch_deq_start; 142*658ad225SAndrea Righi change_deq_delta = skel->bss->change_dequeue_cnt - change_deq_start; 143*658ad225SAndrea Righi bpf_queue_full_delta = skel->bss->bpf_queue_full - bpf_queue_full_start; 144*658ad225SAndrea Righi 145*658ad225SAndrea Righi printf("%s:\n", scenario_name); 146*658ad225SAndrea Righi printf(" enqueues: %lu\n", (unsigned long)enq_delta); 147*658ad225SAndrea Righi printf(" dequeues: %lu (dispatch: %lu, property_change: %lu)\n", 148*658ad225SAndrea Righi (unsigned long)deq_delta, 149*658ad225SAndrea Righi (unsigned long)dispatch_deq_delta, 150*658ad225SAndrea Righi (unsigned long)change_deq_delta); 151*658ad225SAndrea Righi printf(" BPF queue full: %lu\n", (unsigned long)bpf_queue_full_delta); 152*658ad225SAndrea Righi 153*658ad225SAndrea Righi /* 154*658ad225SAndrea Righi * Validate enqueue/dequeue lifecycle tracking. 155*658ad225SAndrea Righi * 156*658ad225SAndrea Righi * For scenarios 0, 1, 3, 4 (local and global DSQs from 157*658ad225SAndrea Righi * ops.select_cpu() and ops.enqueue()), both enqueues and dequeues 158*658ad225SAndrea Righi * should be 0 because tasks bypass the BPF scheduler entirely: 159*658ad225SAndrea Righi * tasks never enter BPF scheduler's custody. 160*658ad225SAndrea Righi * 161*658ad225SAndrea Righi * For scenarios 2, 5, 6 (user DSQ or BPF internal queue) we expect 162*658ad225SAndrea Righi * both enqueues and dequeues. 163*658ad225SAndrea Righi * 164*658ad225SAndrea Righi * The BPF code does strict state machine validation with 165*658ad225SAndrea Righi * scx_bpf_error() to ensure the workflow semantics are correct. 166*658ad225SAndrea Righi * 167*658ad225SAndrea Righi * If we reach this point without errors, the semantics are 168*658ad225SAndrea Righi * validated correctly. 169*658ad225SAndrea Righi */ 170*658ad225SAndrea Righi if (scenario == 0 || scenario == 1 || 171*658ad225SAndrea Righi scenario == 3 || scenario == 4) { 172*658ad225SAndrea Righi /* Tasks bypass BPF scheduler completely */ 173*658ad225SAndrea Righi SCX_EQ(enq_delta, 0); 174*658ad225SAndrea Righi SCX_EQ(deq_delta, 0); 175*658ad225SAndrea Righi SCX_EQ(dispatch_deq_delta, 0); 176*658ad225SAndrea Righi SCX_EQ(change_deq_delta, 0); 177*658ad225SAndrea Righi } else { 178*658ad225SAndrea Righi /* 179*658ad225SAndrea Righi * User DSQ from ops.enqueue() or ops.select_cpu(): tasks 180*658ad225SAndrea Righi * enter BPF scheduler's custody. 181*658ad225SAndrea Righi * 182*658ad225SAndrea Righi * Also validate 1:1 enqueue/dequeue pairing. 183*658ad225SAndrea Righi */ 184*658ad225SAndrea Righi SCX_GT(enq_delta, 0); 185*658ad225SAndrea Righi SCX_GT(deq_delta, 0); 186*658ad225SAndrea Righi SCX_EQ(enq_delta, deq_delta); 187*658ad225SAndrea Righi } 188*658ad225SAndrea Righi 189*658ad225SAndrea Righi return SCX_TEST_PASS; 190*658ad225SAndrea Righi } 191*658ad225SAndrea Righi 192*658ad225SAndrea Righi static enum scx_test_status setup(void **ctx) 193*658ad225SAndrea Righi { 194*658ad225SAndrea Righi struct dequeue *skel; 195*658ad225SAndrea Righi 196*658ad225SAndrea Righi skel = dequeue__open(); 197*658ad225SAndrea Righi SCX_FAIL_IF(!skel, "Failed to open skel"); 198*658ad225SAndrea Righi SCX_ENUM_INIT(skel); 199*658ad225SAndrea Righi SCX_FAIL_IF(dequeue__load(skel), "Failed to load skel"); 200*658ad225SAndrea Righi 201*658ad225SAndrea Righi *ctx = skel; 202*658ad225SAndrea Righi 203*658ad225SAndrea Righi return SCX_TEST_PASS; 204*658ad225SAndrea Righi } 205*658ad225SAndrea Righi 206*658ad225SAndrea Righi static enum scx_test_status run(void *ctx) 207*658ad225SAndrea Righi { 208*658ad225SAndrea Righi struct dequeue *skel = ctx; 209*658ad225SAndrea Righi enum scx_test_status status; 210*658ad225SAndrea Righi 211*658ad225SAndrea Righi status = run_scenario(skel, 0, "Scenario 0: Local DSQ from ops.select_cpu()"); 212*658ad225SAndrea Righi if (status != SCX_TEST_PASS) 213*658ad225SAndrea Righi return status; 214*658ad225SAndrea Righi 215*658ad225SAndrea Righi status = run_scenario(skel, 1, "Scenario 1: Global DSQ from ops.select_cpu()"); 216*658ad225SAndrea Righi if (status != SCX_TEST_PASS) 217*658ad225SAndrea Righi return status; 218*658ad225SAndrea Righi 219*658ad225SAndrea Righi status = run_scenario(skel, 2, "Scenario 2: User DSQ from ops.select_cpu()"); 220*658ad225SAndrea Righi if (status != SCX_TEST_PASS) 221*658ad225SAndrea Righi return status; 222*658ad225SAndrea Righi 223*658ad225SAndrea Righi status = run_scenario(skel, 3, "Scenario 3: Local DSQ from ops.enqueue()"); 224*658ad225SAndrea Righi if (status != SCX_TEST_PASS) 225*658ad225SAndrea Righi return status; 226*658ad225SAndrea Righi 227*658ad225SAndrea Righi status = run_scenario(skel, 4, "Scenario 4: Global DSQ from ops.enqueue()"); 228*658ad225SAndrea Righi if (status != SCX_TEST_PASS) 229*658ad225SAndrea Righi return status; 230*658ad225SAndrea Righi 231*658ad225SAndrea Righi status = run_scenario(skel, 5, "Scenario 5: User DSQ from ops.enqueue()"); 232*658ad225SAndrea Righi if (status != SCX_TEST_PASS) 233*658ad225SAndrea Righi return status; 234*658ad225SAndrea Righi 235*658ad225SAndrea Righi status = run_scenario(skel, 6, "Scenario 6: BPF queue from ops.enqueue()"); 236*658ad225SAndrea Righi if (status != SCX_TEST_PASS) 237*658ad225SAndrea Righi return status; 238*658ad225SAndrea Righi 239*658ad225SAndrea Righi printf("\n=== Summary ===\n"); 240*658ad225SAndrea Righi printf("Total enqueues: %lu\n", (unsigned long)skel->bss->enqueue_cnt); 241*658ad225SAndrea Righi printf("Total dequeues: %lu\n", (unsigned long)skel->bss->dequeue_cnt); 242*658ad225SAndrea Righi printf(" Dispatch dequeues: %lu (no flag, normal workflow)\n", 243*658ad225SAndrea Righi (unsigned long)skel->bss->dispatch_dequeue_cnt); 244*658ad225SAndrea Righi printf(" Property change dequeues: %lu (SCX_DEQ_SCHED_CHANGE flag)\n", 245*658ad225SAndrea Righi (unsigned long)skel->bss->change_dequeue_cnt); 246*658ad225SAndrea Righi printf(" BPF queue full: %lu\n", 247*658ad225SAndrea Righi (unsigned long)skel->bss->bpf_queue_full); 248*658ad225SAndrea Righi printf("\nAll scenarios passed - no state machine violations detected\n"); 249*658ad225SAndrea Righi printf("-> Validated: Local DSQ dispatch bypasses BPF scheduler\n"); 250*658ad225SAndrea Righi printf("-> Validated: Global DSQ dispatch bypasses BPF scheduler\n"); 251*658ad225SAndrea Righi printf("-> Validated: User DSQ dispatch triggers ops.dequeue() callbacks\n"); 252*658ad225SAndrea Righi printf("-> Validated: Dispatch dequeues have no flags (normal workflow)\n"); 253*658ad225SAndrea Righi printf("-> Validated: Property change dequeues have SCX_DEQ_SCHED_CHANGE flag\n"); 254*658ad225SAndrea Righi printf("-> Validated: No duplicate enqueues or invalid state transitions\n"); 255*658ad225SAndrea Righi 256*658ad225SAndrea Righi return SCX_TEST_PASS; 257*658ad225SAndrea Righi } 258*658ad225SAndrea Righi 259*658ad225SAndrea Righi static void cleanup(void *ctx) 260*658ad225SAndrea Righi { 261*658ad225SAndrea Righi struct dequeue *skel = ctx; 262*658ad225SAndrea Righi 263*658ad225SAndrea Righi dequeue__destroy(skel); 264*658ad225SAndrea Righi } 265*658ad225SAndrea Righi 266*658ad225SAndrea Righi struct scx_test dequeue_test = { 267*658ad225SAndrea Righi .name = "dequeue", 268*658ad225SAndrea Righi .description = "Verify ops.dequeue() semantics", 269*658ad225SAndrea Righi .setup = setup, 270*658ad225SAndrea Righi .run = run, 271*658ad225SAndrea Righi .cleanup = cleanup, 272*658ad225SAndrea Righi }; 273*658ad225SAndrea Righi 274*658ad225SAndrea Righi REGISTER_SCX_TEST(&dequeue_test) 275