xref: /linux/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c (revision d30c1683aaecb93d2ab95685dc4300a33d3cea7a)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
3 #include <linux/sched.h>
4 #include <linux/smp.h>
5 #include <linux/delay.h>
6 #include <linux/module.h>
7 #include <linux/prandom.h>
8 #include <linux/ktime.h>
9 #include <asm/rqspinlock.h>
10 #include <linux/perf_event.h>
11 #include <linux/kthread.h>
12 #include <linux/atomic.h>
13 #include <linux/slab.h>
14 
15 static struct perf_event_attr hw_attr = {
16 	.type		= PERF_TYPE_HARDWARE,
17 	.config		= PERF_COUNT_HW_CPU_CYCLES,
18 	.size		= sizeof(struct perf_event_attr),
19 	.pinned		= 1,
20 	.disabled	= 1,
21 	.sample_period	= 100000,
22 };
23 
24 static rqspinlock_t lock_a;
25 static rqspinlock_t lock_b;
26 static rqspinlock_t lock_c;
27 
28 #define RQSL_SLOW_THRESHOLD_MS 10
29 static const unsigned int rqsl_hist_ms[] = {
30 	1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
31 	12, 14, 16, 18, 20, 25, 30, 40, 50, 75,
32 	100, 150, 200, 250, 1000,
33 };
34 #define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
35 
36 enum rqsl_context {
37 	RQSL_CTX_NORMAL = 0,
38 	RQSL_CTX_NMI,
39 	RQSL_CTX_MAX,
40 };
41 
42 struct rqsl_cpu_hist {
43 	atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS];
44 	atomic64_t success[RQSL_CTX_MAX];
45 	atomic64_t failure[RQSL_CTX_MAX];
46 };
47 
48 static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
49 
50 enum rqsl_mode {
51 	RQSL_MODE_AA = 0,
52 	RQSL_MODE_ABBA,
53 	RQSL_MODE_ABBCCA,
54 };
55 
56 static int test_mode = RQSL_MODE_AA;
57 module_param(test_mode, int, 0644);
58 MODULE_PARM_DESC(test_mode,
59 		 "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA");
60 
61 static int normal_delay = 20;
62 module_param(normal_delay, int, 0644);
63 MODULE_PARM_DESC(normal_delay,
64 		 "rqspinlock critical section length for normal context (20ms default)");
65 
66 static int nmi_delay = 10;
67 module_param(nmi_delay, int, 0644);
68 MODULE_PARM_DESC(nmi_delay,
69 		 "rqspinlock critical section length for NMI context (10ms default)");
70 
71 static struct perf_event **rqsl_evts;
72 static int rqsl_nevts;
73 
74 static struct task_struct **rqsl_threads;
75 static int rqsl_nthreads;
76 static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0);
77 
78 static int pause = 0;
79 
80 static const char *rqsl_mode_names[] = {
81 	[RQSL_MODE_AA] = "AA",
82 	[RQSL_MODE_ABBA] = "ABBA",
83 	[RQSL_MODE_ABBCCA] = "ABBCCA",
84 };
85 
86 struct rqsl_lock_pair {
87 	rqspinlock_t *worker_lock;
88 	rqspinlock_t *nmi_lock;
89 };
90 
91 static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
92 {
93 	int mode = READ_ONCE(test_mode);
94 
95 	switch (mode) {
96 	default:
97 	case RQSL_MODE_AA:
98 		return (struct rqsl_lock_pair){ &lock_a, &lock_a };
99 	case RQSL_MODE_ABBA:
100 		if (cpu & 1)
101 			return (struct rqsl_lock_pair){ &lock_b, &lock_a };
102 		return (struct rqsl_lock_pair){ &lock_a, &lock_b };
103 	case RQSL_MODE_ABBCCA:
104 		switch (cpu % 3) {
105 		case 0:
106 			return (struct rqsl_lock_pair){ &lock_a, &lock_b };
107 		case 1:
108 			return (struct rqsl_lock_pair){ &lock_b, &lock_c };
109 		default:
110 			return (struct rqsl_lock_pair){ &lock_c, &lock_a };
111 		}
112 	}
113 }
114 
115 static u32 rqsl_hist_bucket_idx(u32 delta_ms)
116 {
117 	int i;
118 
119 	for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
120 		if (delta_ms <= rqsl_hist_ms[i])
121 			return i;
122 	}
123 
124 	return RQSL_NR_HIST_BUCKETS - 1;
125 }
126 
127 static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret)
128 {
129 	struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
130 	u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
131 	u32 bucket = rqsl_hist_bucket_idx(delta_ms);
132 	atomic64_t *buckets = hist->hist[ctx];
133 
134 	atomic64_inc(&buckets[bucket]);
135 	if (!ret)
136 		atomic64_inc(&hist->success[ctx]);
137 	else
138 		atomic64_inc(&hist->failure[ctx]);
139 }
140 
141 static int rqspinlock_worker_fn(void *arg)
142 {
143 	int cpu = smp_processor_id();
144 	unsigned long flags;
145 	u64 start_ns;
146 	int ret;
147 
148 	if (cpu) {
149 		atomic_inc(&rqsl_ready_cpus);
150 
151 		while (!kthread_should_stop()) {
152 			struct rqsl_lock_pair locks = rqsl_get_lock_pair(cpu);
153 			rqspinlock_t *worker_lock = locks.worker_lock;
154 
155 			if (READ_ONCE(pause)) {
156 				msleep(1000);
157 				continue;
158 			}
159 			start_ns = ktime_get_mono_fast_ns();
160 			ret = raw_res_spin_lock_irqsave(worker_lock, flags);
161 			rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
162 						RQSL_CTX_NORMAL, ret);
163 			mdelay(normal_delay);
164 			if (!ret)
165 				raw_res_spin_unlock_irqrestore(worker_lock, flags);
166 			cpu_relax();
167 		}
168 		return 0;
169 	}
170 
171 	while (!kthread_should_stop()) {
172 		int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0;
173 		int ready = atomic_read(&rqsl_ready_cpus);
174 
175 		if (ready == expected && !READ_ONCE(pause)) {
176 			for (int i = 0; i < rqsl_nevts; i++)
177 				perf_event_enable(rqsl_evts[i]);
178 			pr_err("Waiting 5 secs to pause the test\n");
179 			msleep(1000 * 5);
180 			WRITE_ONCE(pause, 1);
181 			pr_err("Paused the test\n");
182 		} else {
183 			msleep(1000);
184 			cpu_relax();
185 		}
186 	}
187 	return 0;
188 }
189 
190 static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
191 		   struct pt_regs *regs)
192 {
193 	struct rqsl_lock_pair locks;
194 	int cpu = smp_processor_id();
195 	unsigned long flags;
196 	u64 start_ns;
197 	int ret;
198 
199 	if (!cpu || READ_ONCE(pause))
200 		return;
201 
202 	locks = rqsl_get_lock_pair(cpu);
203 	start_ns = ktime_get_mono_fast_ns();
204 	ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
205 	rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
206 				RQSL_CTX_NMI, ret);
207 
208 	mdelay(nmi_delay);
209 
210 	if (!ret)
211 		raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags);
212 }
213 
214 static void free_rqsl_threads(void)
215 {
216 	int i;
217 
218 	if (rqsl_threads) {
219 		for_each_online_cpu(i) {
220 			if (rqsl_threads[i])
221 				kthread_stop(rqsl_threads[i]);
222 		}
223 		kfree(rqsl_threads);
224 	}
225 }
226 
227 static void free_rqsl_evts(void)
228 {
229 	int i;
230 
231 	if (rqsl_evts) {
232 		for (i = 0; i < rqsl_nevts; i++) {
233 			if (rqsl_evts[i])
234 				perf_event_release_kernel(rqsl_evts[i]);
235 		}
236 		kfree(rqsl_evts);
237 	}
238 }
239 
240 static int bpf_test_rqspinlock_init(void)
241 {
242 	int i, ret;
243 	int ncpus = num_online_cpus();
244 
245 	if (test_mode < RQSL_MODE_AA || test_mode > RQSL_MODE_ABBCCA) {
246 		pr_err("Invalid mode %d\n", test_mode);
247 		return -EINVAL;
248 	}
249 
250 	pr_err("Mode = %s\n", rqsl_mode_names[test_mode]);
251 
252 	if (ncpus < test_mode + 2)
253 		return -ENOTSUPP;
254 
255 	raw_res_spin_lock_init(&lock_a);
256 	raw_res_spin_lock_init(&lock_b);
257 	raw_res_spin_lock_init(&lock_c);
258 
259 	rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL);
260 	if (!rqsl_evts)
261 		return -ENOMEM;
262 	rqsl_nevts = ncpus - 1;
263 
264 	for (i = 1; i < ncpus; i++) {
265 		struct perf_event *e;
266 
267 		e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL);
268 		if (IS_ERR(e)) {
269 			ret = PTR_ERR(e);
270 			goto err_perf_events;
271 		}
272 		rqsl_evts[i - 1] = e;
273 	}
274 
275 	rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL);
276 	if (!rqsl_threads) {
277 		ret = -ENOMEM;
278 		goto err_perf_events;
279 	}
280 	rqsl_nthreads = ncpus;
281 
282 	for_each_online_cpu(i) {
283 		struct task_struct *t;
284 
285 		t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i);
286 		if (IS_ERR(t)) {
287 			ret = PTR_ERR(t);
288 			goto err_threads_create;
289 		}
290 		kthread_bind(t, i);
291 		rqsl_threads[i] = t;
292 		wake_up_process(t);
293 	}
294 	return 0;
295 
296 err_threads_create:
297 	free_rqsl_threads();
298 err_perf_events:
299 	free_rqsl_evts();
300 	return ret;
301 }
302 
303 module_init(bpf_test_rqspinlock_init);
304 
305 static void rqsl_print_histograms(void)
306 {
307 	int cpu, i;
308 
309 	pr_err("rqspinlock acquisition latency histogram (ms):\n");
310 
311 	for_each_online_cpu(cpu) {
312 		struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu);
313 		u64 norm_counts[RQSL_NR_HIST_BUCKETS];
314 		u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
315 		u64 total_counts[RQSL_NR_HIST_BUCKETS];
316 		u64 norm_success, nmi_success, success_total;
317 		u64 norm_failure, nmi_failure, failure_total;
318 		u64 norm_total = 0, nmi_total = 0, total = 0;
319 		bool has_slow = false;
320 
321 		for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
322 			norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]);
323 			nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]);
324 			total_counts[i] = norm_counts[i] + nmi_counts[i];
325 			norm_total += norm_counts[i];
326 			nmi_total += nmi_counts[i];
327 			total += total_counts[i];
328 			if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS &&
329 			    total_counts[i])
330 				has_slow = true;
331 		}
332 
333 		norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]);
334 		nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]);
335 		norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]);
336 		nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]);
337 		success_total = norm_success + nmi_success;
338 		failure_total = norm_failure + nmi_failure;
339 
340 		if (!total)
341 			continue;
342 
343 		if (!has_slow) {
344 			pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
345 			       "success %llu (normal %llu, nmi %llu) | "
346 			       "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n",
347 			       cpu, total, norm_total, nmi_total,
348 			       success_total, norm_success, nmi_success,
349 			       failure_total, norm_failure, nmi_failure,
350 			       RQSL_SLOW_THRESHOLD_MS);
351 			continue;
352 		}
353 
354 		pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
355 		       "success %llu (normal %llu, nmi %llu) | "
356 		       "failure %llu (normal %llu, nmi %llu)\n",
357 		       cpu, total, norm_total, nmi_total,
358 		       success_total, norm_success, nmi_success,
359 		       failure_total, norm_failure, nmi_failure);
360 		for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
361 			unsigned int start_ms;
362 
363 			if (!total_counts[i])
364 				continue;
365 
366 			start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1;
367 			if (i == RQSL_NR_HIST_BUCKETS - 1) {
368 				pr_err("   >= %ums: total %llu (normal %llu, nmi %llu)\n",
369 				       start_ms, total_counts[i],
370 				       norm_counts[i], nmi_counts[i]);
371 			} else {
372 				pr_err("   %u-%ums: total %llu (normal %llu, nmi %llu)\n",
373 				       start_ms, rqsl_hist_ms[i],
374 				       total_counts[i],
375 				       norm_counts[i], nmi_counts[i]);
376 			}
377 		}
378 	}
379 }
380 
381 static void bpf_test_rqspinlock_exit(void)
382 {
383 	WRITE_ONCE(pause, 1);
384 	free_rqsl_threads();
385 	free_rqsl_evts();
386 	rqsl_print_histograms();
387 }
388 
389 module_exit(bpf_test_rqspinlock_exit);
390 
391 MODULE_AUTHOR("Kumar Kartikeya Dwivedi");
392 MODULE_DESCRIPTION("BPF rqspinlock stress test module");
393 MODULE_LICENSE("GPL");
394