1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. 4 * Copyright (c) 2023 Tejun Heo <tj@kernel.org> 5 * Copyright (c) 2023 David Vernet <dvernet@meta.com> 6 */ 7 #include <stdio.h> 8 #include <signal.h> 9 #include <assert.h> 10 #include <unistd.h> 11 #include <libgen.h> 12 #include <limits.h> 13 #include <inttypes.h> 14 #include <fcntl.h> 15 #include <time.h> 16 #include <bpf/bpf.h> 17 #include <scx/common.h> 18 #include "scx_flatcg.h" 19 #include "scx_flatcg.bpf.skel.h" 20 21 #ifndef FILEID_KERNFS 22 #define FILEID_KERNFS 0xfe 23 #endif 24 25 const char help_fmt[] = 26 "A flattened cgroup hierarchy sched_ext scheduler.\n" 27 "\n" 28 "See the top-level comment in .bpf.c for more details.\n" 29 "\n" 30 "Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n" 31 "\n" 32 " -s SLICE_US Override slice duration\n" 33 " -i INTERVAL Report interval\n" 34 " -f Use FIFO scheduling instead of weighted vtime scheduling\n" 35 " -v Print libbpf debug messages\n" 36 " -h Display this help and exit\n"; 37 38 static bool verbose; 39 static volatile int exit_req; 40 41 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) 42 { 43 if (level == LIBBPF_DEBUG && !verbose) 44 return 0; 45 return vfprintf(stderr, format, args); 46 } 47 48 static void sigint_handler(int dummy) 49 { 50 exit_req = 1; 51 } 52 53 static float read_cpu_util(__u64 *last_sum, __u64 *last_idle) 54 { 55 FILE *fp; 56 char buf[4096]; 57 char *line, *cur = NULL, *tok; 58 __u64 sum = 0, idle = 0; 59 __u64 delta_sum, delta_idle; 60 int idx; 61 62 fp = fopen("/proc/stat", "r"); 63 if (!fp) { 64 perror("fopen(\"/proc/stat\")"); 65 return 0.0; 66 } 67 68 if (!fgets(buf, sizeof(buf), fp)) { 69 perror("fgets(\"/proc/stat\")"); 70 fclose(fp); 71 return 0.0; 72 } 73 fclose(fp); 74 75 line = buf; 76 for (idx = 0; (tok = strtok_r(line, " \n", &cur)); idx++) { 77 char *endp = NULL; 78 __u64 v; 79 80 if (idx == 0) { 81 line = NULL; 82 continue; 83 } 84 v = strtoull(tok, &endp, 0); 85 if (!endp || *endp != '\0') { 86 fprintf(stderr, "failed to parse %dth field of /proc/stat (\"%s\")\n", 87 idx, tok); 88 continue; 89 } 90 sum += v; 91 if (idx == 4) 92 idle = v; 93 } 94 95 delta_sum = sum - *last_sum; 96 delta_idle = idle - *last_idle; 97 *last_sum = sum; 98 *last_idle = idle; 99 100 return delta_sum ? (float)(delta_sum - delta_idle) / delta_sum : 0.0; 101 } 102 103 static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats) 104 { 105 __u64 *cnts; 106 __u32 idx; 107 108 cnts = calloc(skel->rodata->nr_cpus, sizeof(__u64)); 109 if (!cnts) 110 return; 111 112 memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS); 113 114 for (idx = 0; idx < FCG_NR_STATS; idx++) { 115 int ret, cpu; 116 117 ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), 118 &idx, cnts); 119 if (ret < 0) 120 continue; 121 for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++) 122 stats[idx] += cnts[cpu]; 123 } 124 125 free(cnts); 126 } 127 128 int main(int argc, char **argv) 129 { 130 struct scx_flatcg *skel; 131 struct bpf_link *link; 132 struct timespec intv_ts = { .tv_sec = 2, .tv_nsec = 0 }; 133 bool dump_cgrps = false; 134 __u64 last_cpu_sum = 0, last_cpu_idle = 0; 135 __u64 last_stats[FCG_NR_STATS] = {}; 136 unsigned long seq = 0; 137 __s32 opt; 138 __u64 ecode; 139 140 libbpf_set_print(libbpf_print_fn); 141 signal(SIGINT, sigint_handler); 142 signal(SIGTERM, sigint_handler); 143 restart: 144 optind = 1; 145 skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); 146 147 skel->rodata->nr_cpus = libbpf_num_possible_cpus(); 148 assert(skel->rodata->nr_cpus > 0); 149 skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); 150 151 while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) { 152 double v; 153 154 switch (opt) { 155 case 's': 156 v = strtod(optarg, NULL); 157 skel->rodata->cgrp_slice_ns = v * 1000; 158 break; 159 case 'i': 160 v = strtod(optarg, NULL); 161 intv_ts.tv_sec = v; 162 intv_ts.tv_nsec = (v - (float)intv_ts.tv_sec) * 1000000000; 163 break; 164 case 'd': 165 dump_cgrps = true; 166 break; 167 case 'f': 168 skel->rodata->fifo_sched = true; 169 break; 170 case 'v': 171 verbose = true; 172 break; 173 case 'h': 174 default: 175 fprintf(stderr, help_fmt, basename(argv[0])); 176 return opt != 'h'; 177 } 178 } 179 180 printf("slice=%.1lfms intv=%.1lfs dump_cgrps=%d", 181 (double)skel->rodata->cgrp_slice_ns / 1000000.0, 182 (double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0, 183 dump_cgrps); 184 185 SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei); 186 link = SCX_OPS_ATTACH(skel, flatcg_ops, scx_flatcg); 187 188 while (!exit_req && !UEI_EXITED(skel, uei)) { 189 __u64 acc_stats[FCG_NR_STATS]; 190 __u64 stats[FCG_NR_STATS]; 191 float cpu_util; 192 int i; 193 194 cpu_util = read_cpu_util(&last_cpu_sum, &last_cpu_idle); 195 196 fcg_read_stats(skel, acc_stats); 197 for (i = 0; i < FCG_NR_STATS; i++) 198 stats[i] = acc_stats[i] - last_stats[i]; 199 200 memcpy(last_stats, acc_stats, sizeof(acc_stats)); 201 202 printf("\n[SEQ %6lu cpu=%5.1lf hweight_gen=%" PRIu64 "]\n", 203 seq++, cpu_util * 100.0, skel->data->hweight_gen); 204 printf(" act:%6llu deact:%6llu global:%6llu local:%6llu\n", 205 stats[FCG_STAT_ACT], 206 stats[FCG_STAT_DEACT], 207 stats[FCG_STAT_GLOBAL], 208 stats[FCG_STAT_LOCAL]); 209 printf("HWT cache:%6llu update:%6llu skip:%6llu race:%6llu\n", 210 stats[FCG_STAT_HWT_CACHE], 211 stats[FCG_STAT_HWT_UPDATES], 212 stats[FCG_STAT_HWT_SKIP], 213 stats[FCG_STAT_HWT_RACE]); 214 printf("ENQ skip:%6llu race:%6llu\n", 215 stats[FCG_STAT_ENQ_SKIP], 216 stats[FCG_STAT_ENQ_RACE]); 217 printf("CNS keep:%6llu expire:%6llu empty:%6llu gone:%6llu\n", 218 stats[FCG_STAT_CNS_KEEP], 219 stats[FCG_STAT_CNS_EXPIRE], 220 stats[FCG_STAT_CNS_EMPTY], 221 stats[FCG_STAT_CNS_GONE]); 222 printf("PNC next:%6llu empty:%6llu nocgrp:%6llu gone:%6llu race:%6llu fail:%6llu\n", 223 stats[FCG_STAT_PNC_NEXT], 224 stats[FCG_STAT_PNC_EMPTY], 225 stats[FCG_STAT_PNC_NO_CGRP], 226 stats[FCG_STAT_PNC_GONE], 227 stats[FCG_STAT_PNC_RACE], 228 stats[FCG_STAT_PNC_FAIL]); 229 printf("BAD remove:%6llu\n", 230 acc_stats[FCG_STAT_BAD_REMOVAL]); 231 fflush(stdout); 232 233 nanosleep(&intv_ts, NULL); 234 } 235 236 bpf_link__destroy(link); 237 ecode = UEI_REPORT(skel, uei); 238 scx_flatcg__destroy(skel); 239 240 if (UEI_ECODE_RESTART(ecode)) 241 goto restart; 242 return 0; 243 } 244