xref: /linux/tools/sched_ext/scx_flatcg.c (revision a23cd25baed2316e50597f8b67192bdc904f955b)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
4  * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
5  * Copyright (c) 2023 David Vernet <dvernet@meta.com>
6  */
7 #include <stdio.h>
8 #include <signal.h>
9 #include <assert.h>
10 #include <unistd.h>
11 #include <libgen.h>
12 #include <limits.h>
13 #include <inttypes.h>
14 #include <fcntl.h>
15 #include <time.h>
16 #include <bpf/bpf.h>
17 #include <scx/common.h>
18 #include "scx_flatcg.h"
19 #include "scx_flatcg.bpf.skel.h"
20 
21 #ifndef FILEID_KERNFS
22 #define FILEID_KERNFS		0xfe
23 #endif
24 
25 const char help_fmt[] =
26 "A flattened cgroup hierarchy sched_ext scheduler.\n"
27 "\n"
28 "See the top-level comment in .bpf.c for more details.\n"
29 "\n"
30 "Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n"
31 "\n"
32 "  -s SLICE_US   Override slice duration\n"
33 "  -i INTERVAL   Report interval\n"
34 "  -f            Use FIFO scheduling instead of weighted vtime scheduling\n"
35 "  -v            Print libbpf debug messages\n"
36 "  -h            Display this help and exit\n";
37 
38 static bool verbose;
39 static volatile int exit_req;
40 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)41 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
42 {
43 	if (level == LIBBPF_DEBUG && !verbose)
44 		return 0;
45 	return vfprintf(stderr, format, args);
46 }
47 
sigint_handler(int dummy)48 static void sigint_handler(int dummy)
49 {
50 	exit_req = 1;
51 }
52 
read_cpu_util(__u64 * last_sum,__u64 * last_idle)53 static float read_cpu_util(__u64 *last_sum, __u64 *last_idle)
54 {
55 	FILE *fp;
56 	char buf[4096];
57 	char *line, *cur = NULL, *tok;
58 	__u64 sum = 0, idle = 0;
59 	__u64 delta_sum, delta_idle;
60 	int idx;
61 
62 	fp = fopen("/proc/stat", "r");
63 	if (!fp) {
64 		perror("fopen(\"/proc/stat\")");
65 		return 0.0;
66 	}
67 
68 	if (!fgets(buf, sizeof(buf), fp)) {
69 		perror("fgets(\"/proc/stat\")");
70 		fclose(fp);
71 		return 0.0;
72 	}
73 	fclose(fp);
74 
75 	line = buf;
76 	for (idx = 0; (tok = strtok_r(line, " \n", &cur)); idx++) {
77 		char *endp = NULL;
78 		__u64 v;
79 
80 		if (idx == 0) {
81 			line = NULL;
82 			continue;
83 		}
84 		v = strtoull(tok, &endp, 0);
85 		if (!endp || *endp != '\0') {
86 			fprintf(stderr, "failed to parse %dth field of /proc/stat (\"%s\")\n",
87 				idx, tok);
88 			continue;
89 		}
90 		sum += v;
91 		if (idx == 4)
92 			idle = v;
93 	}
94 
95 	delta_sum = sum - *last_sum;
96 	delta_idle = idle - *last_idle;
97 	*last_sum = sum;
98 	*last_idle = idle;
99 
100 	return delta_sum ? (float)(delta_sum - delta_idle) / delta_sum : 0.0;
101 }
102 
fcg_read_stats(struct scx_flatcg * skel,__u64 * stats)103 static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats)
104 {
105 	__u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus];
106 	__u32 idx;
107 
108 	memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS);
109 
110 	for (idx = 0; idx < FCG_NR_STATS; idx++) {
111 		int ret, cpu;
112 
113 		ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
114 					  &idx, cnts[idx]);
115 		if (ret < 0)
116 			continue;
117 		for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++)
118 			stats[idx] += cnts[idx][cpu];
119 	}
120 }
121 
main(int argc,char ** argv)122 int main(int argc, char **argv)
123 {
124 	struct scx_flatcg *skel;
125 	struct bpf_link *link;
126 	struct timespec intv_ts = { .tv_sec = 2, .tv_nsec = 0 };
127 	bool dump_cgrps = false;
128 	__u64 last_cpu_sum = 0, last_cpu_idle = 0;
129 	__u64 last_stats[FCG_NR_STATS] = {};
130 	unsigned long seq = 0;
131 	__s32 opt;
132 	__u64 ecode;
133 
134 	libbpf_set_print(libbpf_print_fn);
135 	signal(SIGINT, sigint_handler);
136 	signal(SIGTERM, sigint_handler);
137 restart:
138 	skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
139 
140 	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
141 	assert(skel->rodata->nr_cpus > 0);
142 	skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
143 
144 	while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
145 		double v;
146 
147 		switch (opt) {
148 		case 's':
149 			v = strtod(optarg, NULL);
150 			skel->rodata->cgrp_slice_ns = v * 1000;
151 			break;
152 		case 'i':
153 			v = strtod(optarg, NULL);
154 			intv_ts.tv_sec = v;
155 			intv_ts.tv_nsec = (v - (float)intv_ts.tv_sec) * 1000000000;
156 			break;
157 		case 'd':
158 			dump_cgrps = true;
159 			break;
160 		case 'f':
161 			skel->rodata->fifo_sched = true;
162 			break;
163 		case 'v':
164 			verbose = true;
165 			break;
166 		case 'h':
167 		default:
168 			fprintf(stderr, help_fmt, basename(argv[0]));
169 			return opt != 'h';
170 		}
171 	}
172 
173 	printf("slice=%.1lfms intv=%.1lfs dump_cgrps=%d",
174 	       (double)skel->rodata->cgrp_slice_ns / 1000000.0,
175 	       (double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0,
176 	       dump_cgrps);
177 
178 	SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei);
179 	link = SCX_OPS_ATTACH(skel, flatcg_ops, scx_flatcg);
180 
181 	while (!exit_req && !UEI_EXITED(skel, uei)) {
182 		__u64 acc_stats[FCG_NR_STATS];
183 		__u64 stats[FCG_NR_STATS];
184 		float cpu_util;
185 		int i;
186 
187 		cpu_util = read_cpu_util(&last_cpu_sum, &last_cpu_idle);
188 
189 		fcg_read_stats(skel, acc_stats);
190 		for (i = 0; i < FCG_NR_STATS; i++)
191 			stats[i] = acc_stats[i] - last_stats[i];
192 
193 		memcpy(last_stats, acc_stats, sizeof(acc_stats));
194 
195 		printf("\n[SEQ %6lu cpu=%5.1lf hweight_gen=%" PRIu64 "]\n",
196 		       seq++, cpu_util * 100.0, skel->data->hweight_gen);
197 		printf("       act:%6llu  deact:%6llu global:%6llu local:%6llu\n",
198 		       stats[FCG_STAT_ACT],
199 		       stats[FCG_STAT_DEACT],
200 		       stats[FCG_STAT_GLOBAL],
201 		       stats[FCG_STAT_LOCAL]);
202 		printf("HWT  cache:%6llu update:%6llu   skip:%6llu  race:%6llu\n",
203 		       stats[FCG_STAT_HWT_CACHE],
204 		       stats[FCG_STAT_HWT_UPDATES],
205 		       stats[FCG_STAT_HWT_SKIP],
206 		       stats[FCG_STAT_HWT_RACE]);
207 		printf("ENQ   skip:%6llu   race:%6llu\n",
208 		       stats[FCG_STAT_ENQ_SKIP],
209 		       stats[FCG_STAT_ENQ_RACE]);
210 		printf("CNS   keep:%6llu expire:%6llu  empty:%6llu  gone:%6llu\n",
211 		       stats[FCG_STAT_CNS_KEEP],
212 		       stats[FCG_STAT_CNS_EXPIRE],
213 		       stats[FCG_STAT_CNS_EMPTY],
214 		       stats[FCG_STAT_CNS_GONE]);
215 		printf("PNC   next:%6llu  empty:%6llu nocgrp:%6llu  gone:%6llu race:%6llu fail:%6llu\n",
216 		       stats[FCG_STAT_PNC_NEXT],
217 		       stats[FCG_STAT_PNC_EMPTY],
218 		       stats[FCG_STAT_PNC_NO_CGRP],
219 		       stats[FCG_STAT_PNC_GONE],
220 		       stats[FCG_STAT_PNC_RACE],
221 		       stats[FCG_STAT_PNC_FAIL]);
222 		printf("BAD remove:%6llu\n",
223 		       acc_stats[FCG_STAT_BAD_REMOVAL]);
224 		fflush(stdout);
225 
226 		nanosleep(&intv_ts, NULL);
227 	}
228 
229 	bpf_link__destroy(link);
230 	ecode = UEI_REPORT(skel, uei);
231 	scx_flatcg__destroy(skel);
232 
233 	if (UEI_ECODE_RESTART(ecode))
234 		goto restart;
235 	return 0;
236 }
237