1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
4 * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
5 * Copyright (c) 2023 David Vernet <dvernet@meta.com>
6 */
7 #include <stdio.h>
8 #include <signal.h>
9 #include <assert.h>
10 #include <unistd.h>
11 #include <libgen.h>
12 #include <limits.h>
13 #include <inttypes.h>
14 #include <fcntl.h>
15 #include <time.h>
16 #include <bpf/bpf.h>
17 #include <scx/common.h>
18 #include "scx_flatcg.h"
19 #include "scx_flatcg.bpf.skel.h"
20
21 #ifndef FILEID_KERNFS
22 #define FILEID_KERNFS 0xfe
23 #endif
24
25 const char help_fmt[] =
26 "A flattened cgroup hierarchy sched_ext scheduler.\n"
27 "\n"
28 "See the top-level comment in .bpf.c for more details.\n"
29 "\n"
30 "Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n"
31 "\n"
32 " -s SLICE_US Override slice duration\n"
33 " -i INTERVAL Report interval\n"
34 " -f Use FIFO scheduling instead of weighted vtime scheduling\n"
35 " -v Print libbpf debug messages\n"
36 " -h Display this help and exit\n";
37
38 static bool verbose;
39 static volatile int exit_req;
40
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)41 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
42 {
43 if (level == LIBBPF_DEBUG && !verbose)
44 return 0;
45 return vfprintf(stderr, format, args);
46 }
47
sigint_handler(int dummy)48 static void sigint_handler(int dummy)
49 {
50 exit_req = 1;
51 }
52
read_cpu_util(__u64 * last_sum,__u64 * last_idle)53 static float read_cpu_util(__u64 *last_sum, __u64 *last_idle)
54 {
55 FILE *fp;
56 char buf[4096];
57 char *line, *cur = NULL, *tok;
58 __u64 sum = 0, idle = 0;
59 __u64 delta_sum, delta_idle;
60 int idx;
61
62 fp = fopen("/proc/stat", "r");
63 if (!fp) {
64 perror("fopen(\"/proc/stat\")");
65 return 0.0;
66 }
67
68 if (!fgets(buf, sizeof(buf), fp)) {
69 perror("fgets(\"/proc/stat\")");
70 fclose(fp);
71 return 0.0;
72 }
73 fclose(fp);
74
75 line = buf;
76 for (idx = 0; (tok = strtok_r(line, " \n", &cur)); idx++) {
77 char *endp = NULL;
78 __u64 v;
79
80 if (idx == 0) {
81 line = NULL;
82 continue;
83 }
84 v = strtoull(tok, &endp, 0);
85 if (!endp || *endp != '\0') {
86 fprintf(stderr, "failed to parse %dth field of /proc/stat (\"%s\")\n",
87 idx, tok);
88 continue;
89 }
90 sum += v;
91 if (idx == 4)
92 idle = v;
93 }
94
95 delta_sum = sum - *last_sum;
96 delta_idle = idle - *last_idle;
97 *last_sum = sum;
98 *last_idle = idle;
99
100 return delta_sum ? (float)(delta_sum - delta_idle) / delta_sum : 0.0;
101 }
102
fcg_read_stats(struct scx_flatcg * skel,__u64 * stats)103 static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats)
104 {
105 __u64 *cnts;
106 __u32 idx;
107
108 cnts = calloc(skel->rodata->nr_cpus, sizeof(__u64));
109 if (!cnts)
110 return;
111
112 memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS);
113
114 for (idx = 0; idx < FCG_NR_STATS; idx++) {
115 int ret, cpu;
116
117 ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
118 &idx, cnts);
119 if (ret < 0)
120 continue;
121 for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++)
122 stats[idx] += cnts[cpu];
123 }
124
125 free(cnts);
126 }
127
main(int argc,char ** argv)128 int main(int argc, char **argv)
129 {
130 struct scx_flatcg *skel;
131 struct bpf_link *link;
132 struct timespec intv_ts = { .tv_sec = 2, .tv_nsec = 0 };
133 bool dump_cgrps = false;
134 __u64 last_cpu_sum = 0, last_cpu_idle = 0;
135 __u64 last_stats[FCG_NR_STATS] = {};
136 unsigned long seq = 0;
137 __s32 opt;
138 __u64 ecode;
139
140 libbpf_set_print(libbpf_print_fn);
141 signal(SIGINT, sigint_handler);
142 signal(SIGTERM, sigint_handler);
143 restart:
144 optind = 1;
145 skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
146
147 skel->rodata->nr_cpus = libbpf_num_possible_cpus();
148 assert(skel->rodata->nr_cpus > 0);
149 skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
150
151 while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
152 double v;
153
154 switch (opt) {
155 case 's':
156 v = strtod(optarg, NULL);
157 skel->rodata->cgrp_slice_ns = v * 1000;
158 break;
159 case 'i':
160 v = strtod(optarg, NULL);
161 intv_ts.tv_sec = v;
162 intv_ts.tv_nsec = (v - (float)intv_ts.tv_sec) * 1000000000;
163 break;
164 case 'd':
165 dump_cgrps = true;
166 break;
167 case 'f':
168 skel->rodata->fifo_sched = true;
169 break;
170 case 'v':
171 verbose = true;
172 break;
173 case 'h':
174 default:
175 fprintf(stderr, help_fmt, basename(argv[0]));
176 return opt != 'h';
177 }
178 }
179
180 printf("slice=%.1lfms intv=%.1lfs dump_cgrps=%d",
181 (double)skel->rodata->cgrp_slice_ns / 1000000.0,
182 (double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0,
183 dump_cgrps);
184
185 SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei);
186 link = SCX_OPS_ATTACH(skel, flatcg_ops, scx_flatcg);
187
188 while (!exit_req && !UEI_EXITED(skel, uei)) {
189 __u64 acc_stats[FCG_NR_STATS];
190 __u64 stats[FCG_NR_STATS];
191 float cpu_util;
192 int i;
193
194 cpu_util = read_cpu_util(&last_cpu_sum, &last_cpu_idle);
195
196 fcg_read_stats(skel, acc_stats);
197 for (i = 0; i < FCG_NR_STATS; i++)
198 stats[i] = acc_stats[i] - last_stats[i];
199
200 memcpy(last_stats, acc_stats, sizeof(acc_stats));
201
202 printf("\n[SEQ %6lu cpu=%5.1lf hweight_gen=%" PRIu64 "]\n",
203 seq++, cpu_util * 100.0, skel->data->hweight_gen);
204 printf(" act:%6llu deact:%6llu global:%6llu local:%6llu\n",
205 stats[FCG_STAT_ACT],
206 stats[FCG_STAT_DEACT],
207 stats[FCG_STAT_GLOBAL],
208 stats[FCG_STAT_LOCAL]);
209 printf("HWT cache:%6llu update:%6llu skip:%6llu race:%6llu\n",
210 stats[FCG_STAT_HWT_CACHE],
211 stats[FCG_STAT_HWT_UPDATES],
212 stats[FCG_STAT_HWT_SKIP],
213 stats[FCG_STAT_HWT_RACE]);
214 printf("ENQ skip:%6llu race:%6llu\n",
215 stats[FCG_STAT_ENQ_SKIP],
216 stats[FCG_STAT_ENQ_RACE]);
217 printf("CNS keep:%6llu expire:%6llu empty:%6llu gone:%6llu\n",
218 stats[FCG_STAT_CNS_KEEP],
219 stats[FCG_STAT_CNS_EXPIRE],
220 stats[FCG_STAT_CNS_EMPTY],
221 stats[FCG_STAT_CNS_GONE]);
222 printf("PNC next:%6llu empty:%6llu nocgrp:%6llu gone:%6llu race:%6llu fail:%6llu\n",
223 stats[FCG_STAT_PNC_NEXT],
224 stats[FCG_STAT_PNC_EMPTY],
225 stats[FCG_STAT_PNC_NO_CGRP],
226 stats[FCG_STAT_PNC_GONE],
227 stats[FCG_STAT_PNC_RACE],
228 stats[FCG_STAT_PNC_FAIL]);
229 printf("BAD remove:%6llu\n",
230 acc_stats[FCG_STAT_BAD_REMOVAL]);
231 fflush(stdout);
232
233 nanosleep(&intv_ts, NULL);
234 }
235
236 bpf_link__destroy(link);
237 ecode = UEI_REPORT(skel, uei);
238 scx_flatcg__destroy(skel);
239
240 if (UEI_ECODE_RESTART(ecode))
241 goto restart;
242 return 0;
243 }
244