xref: /linux/tools/sched_ext/scx_central.c (revision 3a39d672e7f48b8d6b91a09afa4b55352773b4b5)
1037df2a3STejun Heo /* SPDX-License-Identifier: GPL-2.0 */
2037df2a3STejun Heo /*
3037df2a3STejun Heo  * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
4037df2a3STejun Heo  * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
5037df2a3STejun Heo  * Copyright (c) 2022 David Vernet <dvernet@meta.com>
6037df2a3STejun Heo  */
7037df2a3STejun Heo #define _GNU_SOURCE
8037df2a3STejun Heo #include <sched.h>
9037df2a3STejun Heo #include <stdio.h>
10037df2a3STejun Heo #include <unistd.h>
11037df2a3STejun Heo #include <inttypes.h>
12037df2a3STejun Heo #include <signal.h>
13037df2a3STejun Heo #include <libgen.h>
14037df2a3STejun Heo #include <bpf/bpf.h>
15037df2a3STejun Heo #include <scx/common.h>
16037df2a3STejun Heo #include "scx_central.bpf.skel.h"
17037df2a3STejun Heo 
18037df2a3STejun Heo const char help_fmt[] =
19037df2a3STejun Heo "A central FIFO sched_ext scheduler.\n"
20037df2a3STejun Heo "\n"
21037df2a3STejun Heo "See the top-level comment in .bpf.c for more details.\n"
22037df2a3STejun Heo "\n"
23037df2a3STejun Heo "Usage: %s [-s SLICE_US] [-c CPU]\n"
24037df2a3STejun Heo "\n"
25037df2a3STejun Heo "  -s SLICE_US   Override slice duration\n"
26037df2a3STejun Heo "  -c CPU        Override the central CPU (default: 0)\n"
27037df2a3STejun Heo "  -v            Print libbpf debug messages\n"
28037df2a3STejun Heo "  -h            Display this help and exit\n";
29037df2a3STejun Heo 
30037df2a3STejun Heo static bool verbose;
31037df2a3STejun Heo static volatile int exit_req;
32037df2a3STejun Heo 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)33037df2a3STejun Heo static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
34037df2a3STejun Heo {
35037df2a3STejun Heo 	if (level == LIBBPF_DEBUG && !verbose)
36037df2a3STejun Heo 		return 0;
37037df2a3STejun Heo 	return vfprintf(stderr, format, args);
38037df2a3STejun Heo }
39037df2a3STejun Heo 
sigint_handler(int dummy)40037df2a3STejun Heo static void sigint_handler(int dummy)
41037df2a3STejun Heo {
42037df2a3STejun Heo 	exit_req = 1;
43037df2a3STejun Heo }
44037df2a3STejun Heo 
main(int argc,char ** argv)45037df2a3STejun Heo int main(int argc, char **argv)
46037df2a3STejun Heo {
47037df2a3STejun Heo 	struct scx_central *skel;
48037df2a3STejun Heo 	struct bpf_link *link;
49*60c27fb5STejun Heo 	__u64 seq = 0, ecode;
50037df2a3STejun Heo 	__s32 opt;
5122a92020STejun Heo 	cpu_set_t *cpuset;
52037df2a3STejun Heo 
53037df2a3STejun Heo 	libbpf_set_print(libbpf_print_fn);
54037df2a3STejun Heo 	signal(SIGINT, sigint_handler);
55037df2a3STejun Heo 	signal(SIGTERM, sigint_handler);
56*60c27fb5STejun Heo restart:
57037df2a3STejun Heo 	skel = SCX_OPS_OPEN(central_ops, scx_central);
58037df2a3STejun Heo 
59037df2a3STejun Heo 	skel->rodata->central_cpu = 0;
60037df2a3STejun Heo 	skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
61037df2a3STejun Heo 
62037df2a3STejun Heo 	while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
63037df2a3STejun Heo 		switch (opt) {
64037df2a3STejun Heo 		case 's':
65037df2a3STejun Heo 			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
66037df2a3STejun Heo 			break;
67037df2a3STejun Heo 		case 'c':
68037df2a3STejun Heo 			skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
69037df2a3STejun Heo 			break;
70037df2a3STejun Heo 		case 'v':
71037df2a3STejun Heo 			verbose = true;
72037df2a3STejun Heo 			break;
73037df2a3STejun Heo 		default:
74037df2a3STejun Heo 			fprintf(stderr, help_fmt, basename(argv[0]));
75037df2a3STejun Heo 			return opt != 'h';
76037df2a3STejun Heo 		}
77037df2a3STejun Heo 	}
78037df2a3STejun Heo 
79037df2a3STejun Heo 	/* Resize arrays so their element count is equal to cpu count. */
80037df2a3STejun Heo 	RESIZE_ARRAY(skel, data, cpu_gimme_task, skel->rodata->nr_cpu_ids);
8122a92020STejun Heo 	RESIZE_ARRAY(skel, data, cpu_started_at, skel->rodata->nr_cpu_ids);
82037df2a3STejun Heo 
83037df2a3STejun Heo 	SCX_OPS_LOAD(skel, central_ops, scx_central, uei);
8422a92020STejun Heo 
8522a92020STejun Heo 	/*
8622a92020STejun Heo 	 * Affinitize the loading thread to the central CPU, as:
8722a92020STejun Heo 	 * - That's where the BPF timer is first invoked in the BPF program.
8822a92020STejun Heo 	 * - We probably don't want this user space component to take up a core
8922a92020STejun Heo 	 *   from a task that would benefit from avoiding preemption on one of
9022a92020STejun Heo 	 *   the tickless cores.
9122a92020STejun Heo 	 *
9222a92020STejun Heo 	 * Until BPF supports pinning the timer, it's not guaranteed that it
9322a92020STejun Heo 	 * will always be invoked on the central CPU. In practice, this
9422a92020STejun Heo 	 * suffices the majority of the time.
9522a92020STejun Heo 	 */
9622a92020STejun Heo 	cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
9722a92020STejun Heo 	SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
9822a92020STejun Heo 	CPU_ZERO(cpuset);
9922a92020STejun Heo 	CPU_SET(skel->rodata->central_cpu, cpuset);
10022a92020STejun Heo 	SCX_BUG_ON(sched_setaffinity(0, sizeof(cpuset), cpuset),
10122a92020STejun Heo 		   "Failed to affinitize to central CPU %d (max %d)",
10222a92020STejun Heo 		   skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
10322a92020STejun Heo 	CPU_FREE(cpuset);
10422a92020STejun Heo 
105037df2a3STejun Heo 	link = SCX_OPS_ATTACH(skel, central_ops, scx_central);
106037df2a3STejun Heo 
10722a92020STejun Heo 	if (!skel->data->timer_pinned)
10822a92020STejun Heo 		printf("WARNING : BPF_F_TIMER_CPU_PIN not available, timer not pinned to central\n");
10922a92020STejun Heo 
110037df2a3STejun Heo 	while (!exit_req && !UEI_EXITED(skel, uei)) {
111037df2a3STejun Heo 		printf("[SEQ %llu]\n", seq++);
112037df2a3STejun Heo 		printf("total   :%10" PRIu64 "    local:%10" PRIu64 "   queued:%10" PRIu64 "  lost:%10" PRIu64 "\n",
113037df2a3STejun Heo 		       skel->bss->nr_total,
114037df2a3STejun Heo 		       skel->bss->nr_locals,
115037df2a3STejun Heo 		       skel->bss->nr_queued,
116037df2a3STejun Heo 		       skel->bss->nr_lost_pids);
11722a92020STejun Heo 		printf("timer   :%10" PRIu64 " dispatch:%10" PRIu64 " mismatch:%10" PRIu64 " retry:%10" PRIu64 "\n",
11822a92020STejun Heo 		       skel->bss->nr_timers,
119037df2a3STejun Heo 		       skel->bss->nr_dispatches,
120037df2a3STejun Heo 		       skel->bss->nr_mismatches,
121037df2a3STejun Heo 		       skel->bss->nr_retries);
122037df2a3STejun Heo 		printf("overflow:%10" PRIu64 "\n",
123037df2a3STejun Heo 		       skel->bss->nr_overflows);
124037df2a3STejun Heo 		fflush(stdout);
125037df2a3STejun Heo 		sleep(1);
126037df2a3STejun Heo 	}
127037df2a3STejun Heo 
128037df2a3STejun Heo 	bpf_link__destroy(link);
129*60c27fb5STejun Heo 	ecode = UEI_REPORT(skel, uei);
130037df2a3STejun Heo 	scx_central__destroy(skel);
131*60c27fb5STejun Heo 
132*60c27fb5STejun Heo 	if (UEI_ECODE_RESTART(ecode))
133*60c27fb5STejun Heo 		goto restart;
134037df2a3STejun Heo 	return 0;
135037df2a3STejun Heo }
136