1c5350777SLeo Yan // SPDX-License-Identifier: GPL-2.0
2c5350777SLeo Yan
3c5350777SLeo Yan #define _GNU_SOURCE
4c5350777SLeo Yan #include <errno.h>
5c5350777SLeo Yan #include <stdio.h>
6c5350777SLeo Yan #include <stdlib.h>
7c5350777SLeo Yan #include <signal.h>
8c5350777SLeo Yan #include <sched.h>
9c5350777SLeo Yan #include <string.h>
10c5350777SLeo Yan #include <unistd.h>
11c5350777SLeo Yan #include <fcntl.h>
12c5350777SLeo Yan #include <locale.h>
13c5350777SLeo Yan #include <sys/types.h>
14c5350777SLeo Yan #include <sys/stat.h>
15c5350777SLeo Yan #include <sys/time.h>
16c5350777SLeo Yan #include <sys/wait.h>
17c5350777SLeo Yan
182bf3e2efSJakub Kicinski #include <bpf/bpf.h>
19f0c328f8SDaniel T. Lee #include <bpf/libbpf.h>
20f0c328f8SDaniel T. Lee
21f0c328f8SDaniel T. Lee static int cstate_map_fd, pstate_map_fd;
22c5350777SLeo Yan
23c5350777SLeo Yan #define MAX_CPU 8
24c5350777SLeo Yan #define MAX_PSTATE_ENTRIES 5
25c5350777SLeo Yan #define MAX_CSTATE_ENTRIES 3
26c5350777SLeo Yan #define MAX_STARS 40
27c5350777SLeo Yan
28c5350777SLeo Yan #define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
29c5350777SLeo Yan #define CPUFREQ_LOWEST_FREQ "208000"
30c5350777SLeo Yan #define CPUFREQ_HIGHEST_FREQ "12000000"
31c5350777SLeo Yan
32c5350777SLeo Yan struct cpu_stat_data {
33c5350777SLeo Yan unsigned long cstate[MAX_CSTATE_ENTRIES];
34c5350777SLeo Yan unsigned long pstate[MAX_PSTATE_ENTRIES];
35c5350777SLeo Yan };
36c5350777SLeo Yan
37c5350777SLeo Yan static struct cpu_stat_data stat_data[MAX_CPU];
38c5350777SLeo Yan
cpu_stat_print(void)39c5350777SLeo Yan static void cpu_stat_print(void)
40c5350777SLeo Yan {
41c5350777SLeo Yan int i, j;
42c5350777SLeo Yan char state_str[sizeof("cstate-9")];
43c5350777SLeo Yan struct cpu_stat_data *data;
44c5350777SLeo Yan
45c5350777SLeo Yan /* Clear screen */
46c5350777SLeo Yan printf("\033[2J");
47c5350777SLeo Yan
48c5350777SLeo Yan /* Header */
49c5350777SLeo Yan printf("\nCPU states statistics:\n");
50c5350777SLeo Yan printf("%-10s ", "state(ms)");
51c5350777SLeo Yan
52c5350777SLeo Yan for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
53c5350777SLeo Yan sprintf(state_str, "cstate-%d", i);
54c5350777SLeo Yan printf("%-11s ", state_str);
55c5350777SLeo Yan }
56c5350777SLeo Yan
57c5350777SLeo Yan for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
58c5350777SLeo Yan sprintf(state_str, "pstate-%d", i);
59c5350777SLeo Yan printf("%-11s ", state_str);
60c5350777SLeo Yan }
61c5350777SLeo Yan
62c5350777SLeo Yan printf("\n");
63c5350777SLeo Yan
64c5350777SLeo Yan for (j = 0; j < MAX_CPU; j++) {
65c5350777SLeo Yan data = &stat_data[j];
66c5350777SLeo Yan
67c5350777SLeo Yan printf("CPU-%-6d ", j);
68c5350777SLeo Yan for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
69*32f24938SColin Ian King printf("%-11lu ", data->cstate[i] / 1000000);
70c5350777SLeo Yan
71c5350777SLeo Yan for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
72*32f24938SColin Ian King printf("%-11lu ", data->pstate[i] / 1000000);
73c5350777SLeo Yan
74c5350777SLeo Yan printf("\n");
75c5350777SLeo Yan }
76c5350777SLeo Yan }
77c5350777SLeo Yan
cpu_stat_update(int cstate_fd,int pstate_fd)78c5350777SLeo Yan static void cpu_stat_update(int cstate_fd, int pstate_fd)
79c5350777SLeo Yan {
80c5350777SLeo Yan unsigned long key, value;
81c5350777SLeo Yan int c, i;
82c5350777SLeo Yan
83c5350777SLeo Yan for (c = 0; c < MAX_CPU; c++) {
84c5350777SLeo Yan for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
85c5350777SLeo Yan key = c * MAX_CSTATE_ENTRIES + i;
86c5350777SLeo Yan bpf_map_lookup_elem(cstate_fd, &key, &value);
87c5350777SLeo Yan stat_data[c].cstate[i] = value;
88c5350777SLeo Yan }
89c5350777SLeo Yan
90c5350777SLeo Yan for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
91c5350777SLeo Yan key = c * MAX_PSTATE_ENTRIES + i;
92c5350777SLeo Yan bpf_map_lookup_elem(pstate_fd, &key, &value);
93c5350777SLeo Yan stat_data[c].pstate[i] = value;
94c5350777SLeo Yan }
95c5350777SLeo Yan }
96c5350777SLeo Yan }
97c5350777SLeo Yan
98c5350777SLeo Yan /*
99c5350777SLeo Yan * This function is copied from 'idlestat' tool function
100c5350777SLeo Yan * idlestat_wake_all() in idlestate.c.
101c5350777SLeo Yan *
102c5350777SLeo Yan * It sets the self running task affinity to cpus one by one so can wake up
103c5350777SLeo Yan * the specific CPU to handle scheduling; this results in all cpus can be
104c5350777SLeo Yan * waken up once and produce ftrace event 'trace_cpu_idle'.
105c5350777SLeo Yan */
cpu_stat_inject_cpu_idle_event(void)106c5350777SLeo Yan static int cpu_stat_inject_cpu_idle_event(void)
107c5350777SLeo Yan {
108c5350777SLeo Yan int rcpu, i, ret;
109c5350777SLeo Yan cpu_set_t cpumask;
110c5350777SLeo Yan cpu_set_t original_cpumask;
111c5350777SLeo Yan
112c5350777SLeo Yan ret = sysconf(_SC_NPROCESSORS_CONF);
113c5350777SLeo Yan if (ret < 0)
114c5350777SLeo Yan return -1;
115c5350777SLeo Yan
116c5350777SLeo Yan rcpu = sched_getcpu();
117c5350777SLeo Yan if (rcpu < 0)
118c5350777SLeo Yan return -1;
119c5350777SLeo Yan
120c5350777SLeo Yan /* Keep track of the CPUs we will run on */
121c5350777SLeo Yan sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
122c5350777SLeo Yan
123c5350777SLeo Yan for (i = 0; i < ret; i++) {
124c5350777SLeo Yan
125c5350777SLeo Yan /* Pointless to wake up ourself */
126c5350777SLeo Yan if (i == rcpu)
127c5350777SLeo Yan continue;
128c5350777SLeo Yan
129c5350777SLeo Yan /* Pointless to wake CPUs we will not run on */
130c5350777SLeo Yan if (!CPU_ISSET(i, &original_cpumask))
131c5350777SLeo Yan continue;
132c5350777SLeo Yan
133c5350777SLeo Yan CPU_ZERO(&cpumask);
134c5350777SLeo Yan CPU_SET(i, &cpumask);
135c5350777SLeo Yan
136c5350777SLeo Yan sched_setaffinity(0, sizeof(cpumask), &cpumask);
137c5350777SLeo Yan }
138c5350777SLeo Yan
139c5350777SLeo Yan /* Enable all the CPUs of the original mask */
140c5350777SLeo Yan sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
141c5350777SLeo Yan return 0;
142c5350777SLeo Yan }
143c5350777SLeo Yan
144c5350777SLeo Yan /*
145c5350777SLeo Yan * It's possible to have no any frequency change for long time and cannot
146c5350777SLeo Yan * get ftrace event 'trace_cpu_frequency' for long period, this introduces
147c5350777SLeo Yan * big deviation for pstate statistics.
148c5350777SLeo Yan *
149c5350777SLeo Yan * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
150c5350777SLeo Yan * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
151c5350777SLeo Yan * the maximum frequency value 1.2GHz.
152c5350777SLeo Yan */
cpu_stat_inject_cpu_frequency_event(void)153c5350777SLeo Yan static int cpu_stat_inject_cpu_frequency_event(void)
154c5350777SLeo Yan {
155c5350777SLeo Yan int len, fd;
156c5350777SLeo Yan
157c5350777SLeo Yan fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
158c5350777SLeo Yan if (fd < 0) {
159c5350777SLeo Yan printf("failed to open scaling_max_freq, errno=%d\n", errno);
160c5350777SLeo Yan return fd;
161c5350777SLeo Yan }
162c5350777SLeo Yan
163c5350777SLeo Yan len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
164c5350777SLeo Yan if (len < 0) {
165c5350777SLeo Yan printf("failed to open scaling_max_freq, errno=%d\n", errno);
166c5350777SLeo Yan goto err;
167c5350777SLeo Yan }
168c5350777SLeo Yan
169c5350777SLeo Yan len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
170c5350777SLeo Yan if (len < 0) {
171c5350777SLeo Yan printf("failed to open scaling_max_freq, errno=%d\n", errno);
172c5350777SLeo Yan goto err;
173c5350777SLeo Yan }
174c5350777SLeo Yan
175c5350777SLeo Yan err:
176c5350777SLeo Yan close(fd);
177c5350777SLeo Yan return len;
178c5350777SLeo Yan }
179c5350777SLeo Yan
int_exit(int sig)180c5350777SLeo Yan static void int_exit(int sig)
181c5350777SLeo Yan {
182c5350777SLeo Yan cpu_stat_inject_cpu_idle_event();
183c5350777SLeo Yan cpu_stat_inject_cpu_frequency_event();
184f0c328f8SDaniel T. Lee cpu_stat_update(cstate_map_fd, pstate_map_fd);
185c5350777SLeo Yan cpu_stat_print();
186c5350777SLeo Yan exit(0);
187c5350777SLeo Yan }
188c5350777SLeo Yan
main(int argc,char ** argv)189c5350777SLeo Yan int main(int argc, char **argv)
190c5350777SLeo Yan {
191f0c328f8SDaniel T. Lee struct bpf_link *link = NULL;
192f0c328f8SDaniel T. Lee struct bpf_program *prog;
193f0c328f8SDaniel T. Lee struct bpf_object *obj;
194c5350777SLeo Yan char filename[256];
195c5350777SLeo Yan int ret;
196c5350777SLeo Yan
197c5350777SLeo Yan snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
198f0c328f8SDaniel T. Lee obj = bpf_object__open_file(filename, NULL);
199f0c328f8SDaniel T. Lee if (libbpf_get_error(obj)) {
200f0c328f8SDaniel T. Lee fprintf(stderr, "ERROR: opening BPF object file failed\n");
201f0c328f8SDaniel T. Lee return 0;
202f0c328f8SDaniel T. Lee }
203c5350777SLeo Yan
204f0c328f8SDaniel T. Lee prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
205f0c328f8SDaniel T. Lee if (!prog) {
206f0c328f8SDaniel T. Lee printf("finding a prog in obj file failed\n");
207f0c328f8SDaniel T. Lee goto cleanup;
208f0c328f8SDaniel T. Lee }
209f0c328f8SDaniel T. Lee
210f0c328f8SDaniel T. Lee /* load BPF program */
211f0c328f8SDaniel T. Lee if (bpf_object__load(obj)) {
212f0c328f8SDaniel T. Lee fprintf(stderr, "ERROR: loading BPF object file failed\n");
213f0c328f8SDaniel T. Lee goto cleanup;
214f0c328f8SDaniel T. Lee }
215f0c328f8SDaniel T. Lee
216f0c328f8SDaniel T. Lee cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration");
217f0c328f8SDaniel T. Lee pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration");
218f0c328f8SDaniel T. Lee if (cstate_map_fd < 0 || pstate_map_fd < 0) {
219f0c328f8SDaniel T. Lee fprintf(stderr, "ERROR: finding a map in obj file failed\n");
220f0c328f8SDaniel T. Lee goto cleanup;
221f0c328f8SDaniel T. Lee }
222f0c328f8SDaniel T. Lee
223f0c328f8SDaniel T. Lee link = bpf_program__attach(prog);
224f0c328f8SDaniel T. Lee if (libbpf_get_error(link)) {
225f0c328f8SDaniel T. Lee fprintf(stderr, "ERROR: bpf_program__attach failed\n");
226f0c328f8SDaniel T. Lee link = NULL;
227f0c328f8SDaniel T. Lee goto cleanup;
228c5350777SLeo Yan }
229c5350777SLeo Yan
230c5350777SLeo Yan ret = cpu_stat_inject_cpu_idle_event();
231c5350777SLeo Yan if (ret < 0)
232c5350777SLeo Yan return 1;
233c5350777SLeo Yan
234c5350777SLeo Yan ret = cpu_stat_inject_cpu_frequency_event();
235c5350777SLeo Yan if (ret < 0)
236c5350777SLeo Yan return 1;
237c5350777SLeo Yan
238c5350777SLeo Yan signal(SIGINT, int_exit);
239c5350777SLeo Yan signal(SIGTERM, int_exit);
240c5350777SLeo Yan
241c5350777SLeo Yan while (1) {
242f0c328f8SDaniel T. Lee cpu_stat_update(cstate_map_fd, pstate_map_fd);
243c5350777SLeo Yan cpu_stat_print();
244c5350777SLeo Yan sleep(5);
245c5350777SLeo Yan }
246c5350777SLeo Yan
247f0c328f8SDaniel T. Lee cleanup:
248f0c328f8SDaniel T. Lee bpf_link__destroy(link);
249f0c328f8SDaniel T. Lee bpf_object__close(obj);
250c5350777SLeo Yan return 0;
251c5350777SLeo Yan }
252