1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * dlfilter-show-cycles.c: Print the number of cycles at the start of each line
4 * Copyright (c) 2021, Intel Corporation.
5 */
6 #include <perf/perf_dlfilter.h>
7 #include <string.h>
8 #include <stdio.h>
9
10 #define MAX_CPU 4096
11
12 enum {
13 INSTR_CYC,
14 BRNCH_CYC,
15 OTHER_CYC,
16 MAX_ENTRY
17 };
18
19 static __u64 cycles[MAX_CPU][MAX_ENTRY];
20 static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY];
21
22 #define BITS 16
23 #define TABLESZ (1 << BITS)
24 #define TABLEMAX (TABLESZ / 2)
25 #define MASK (TABLESZ - 1)
26
27 static struct entry {
28 __u32 used;
29 __s32 tid;
30 __u64 cycles[MAX_ENTRY];
31 __u64 cycles_rpt[MAX_ENTRY];
32 } table[TABLESZ];
33
34 static int tid_cnt;
35
event_entry(const char * event)36 static int event_entry(const char *event)
37 {
38 if (!event)
39 return OTHER_CYC;
40 if (!strncmp(event, "instructions", 12))
41 return INSTR_CYC;
42 if (!strncmp(event, "branches", 8))
43 return BRNCH_CYC;
44 return OTHER_CYC;
45 }
46
find_entry(__s32 tid)47 static struct entry *find_entry(__s32 tid)
48 {
49 __u32 pos = tid & MASK;
50 struct entry *e;
51
52 e = &table[pos];
53 while (e->used) {
54 if (e->tid == tid)
55 return e;
56 if (++pos == TABLESZ)
57 pos = 0;
58 e = &table[pos];
59 }
60
61 if (tid_cnt >= TABLEMAX) {
62 fprintf(stderr, "Too many threads\n");
63 return NULL;
64 }
65
66 tid_cnt += 1;
67 e->used = 1;
68 e->tid = tid;
69 return e;
70 }
71
add_entry(__s32 tid,int pos,__u64 cnt)72 static void add_entry(__s32 tid, int pos, __u64 cnt)
73 {
74 struct entry *e = find_entry(tid);
75
76 if (e)
77 e->cycles[pos] += cnt;
78 }
79
filter_event_early(void * data,const struct perf_dlfilter_sample * sample,void * ctx)80 int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
81 {
82 __s32 cpu = sample->cpu;
83 __s32 tid = sample->tid;
84 int pos;
85
86 if (!sample->cyc_cnt)
87 return 0;
88
89 pos = event_entry(sample->event);
90
91 if (cpu >= 0 && cpu < MAX_CPU)
92 cycles[cpu][pos] += sample->cyc_cnt;
93 else if (tid != -1)
94 add_entry(tid, pos, sample->cyc_cnt);
95 return 0;
96 }
97
print_vals(__u64 cycles,__u64 delta)98 static void print_vals(__u64 cycles, __u64 delta)
99 {
100 if (delta)
101 printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
102 else
103 printf("%10llu %10s ", (unsigned long long)cycles, "");
104 }
105
filter_event(void * data,const struct perf_dlfilter_sample * sample,void * ctx)106 int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
107 {
108 __s32 cpu = sample->cpu;
109 __s32 tid = sample->tid;
110 int pos;
111
112 pos = event_entry(sample->event);
113
114 if (cpu >= 0 && cpu < MAX_CPU) {
115 print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]);
116 cycles_rpt[cpu][pos] = cycles[cpu][pos];
117 return 0;
118 }
119
120 if (tid != -1) {
121 struct entry *e = find_entry(tid);
122
123 if (e) {
124 print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]);
125 e->cycles_rpt[pos] = e->cycles[pos];
126 return 0;
127 }
128 }
129
130 printf("%22s", "");
131 return 0;
132 }
133
filter_description(const char ** long_description)134 const char *filter_description(const char **long_description)
135 {
136 static char *long_desc = "Cycle counts are accumulated per CPU (or "
137 "per thread if CPU is not recorded) from IPC information, and "
138 "printed together with the change since the last print, at the "
139 "start of each line. Separate counts are kept for branches, "
140 "instructions or other events.";
141
142 *long_description = long_desc;
143 return "Print the number of cycles at the start of each line";
144 }
145