1 #include <errno.h> 2 #include <fcntl.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 7 #include <bpf/bpf.h> 8 #include <linux/err.h> 9 10 #include "util/ftrace.h" 11 #include "util/cpumap.h" 12 #include "util/thread_map.h" 13 #include "util/debug.h" 14 #include "util/evlist.h" 15 #include "util/bpf_counter.h" 16 #include "util/stat.h" 17 18 #include "util/bpf_skel/func_latency.skel.h" 19 20 static struct func_latency_bpf *skel; 21 22 int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) 23 { 24 int fd, err; 25 int i, ncpus = 1, ntasks = 1; 26 struct filter_entry *func = NULL; 27 28 if (!list_empty(&ftrace->filters)) { 29 if (!list_is_singular(&ftrace->filters)) { 30 pr_err("ERROR: Too many target functions.\n"); 31 return -1; 32 } 33 func = list_first_entry(&ftrace->filters, struct filter_entry, list); 34 } else { 35 int count = 0; 36 struct list_head *pos; 37 38 list_for_each(pos, &ftrace->event_pair) 39 count++; 40 41 if (count != 2) { 42 pr_err("ERROR: Needs two target events.\n"); 43 return -1; 44 } 45 } 46 47 skel = func_latency_bpf__open(); 48 if (!skel) { 49 pr_err("Failed to open func latency skeleton\n"); 50 return -1; 51 } 52 53 skel->rodata->bucket_range = ftrace->bucket_range; 54 skel->rodata->min_latency = ftrace->min_latency; 55 skel->rodata->bucket_num = ftrace->bucket_num; 56 if (ftrace->bucket_range && ftrace->bucket_num) { 57 bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num); 58 } 59 60 /* don't need to set cpu filter for system-wide mode */ 61 if (ftrace->target.cpu_list) { 62 ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus); 63 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 64 skel->rodata->has_cpu = 1; 65 } 66 67 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { 68 ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); 69 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 70 skel->rodata->has_task = 1; 71 } 72 73 skel->rodata->use_nsec = ftrace->use_nsec; 74 75 set_max_rlimit(); 76 77 err = func_latency_bpf__load(skel); 78 if (err) { 79 pr_err("Failed to load func latency skeleton\n"); 80 goto out; 81 } 82 83 if (ftrace->target.cpu_list) { 84 u32 cpu; 85 u8 val = 1; 86 87 fd = bpf_map__fd(skel->maps.cpu_filter); 88 89 for (i = 0; i < ncpus; i++) { 90 cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu; 91 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 92 } 93 } 94 95 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { 96 u32 pid; 97 u8 val = 1; 98 99 fd = bpf_map__fd(skel->maps.task_filter); 100 101 for (i = 0; i < ntasks; i++) { 102 pid = perf_thread_map__pid(ftrace->evlist->core.threads, i); 103 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 104 } 105 } 106 107 skel->bss->min = INT64_MAX; 108 109 if (func) { 110 skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, 111 false, func->name); 112 if (IS_ERR(skel->links.func_begin)) { 113 pr_err("Failed to attach fentry program\n"); 114 err = PTR_ERR(skel->links.func_begin); 115 goto out; 116 } 117 118 skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, 119 true, func->name); 120 if (IS_ERR(skel->links.func_end)) { 121 pr_err("Failed to attach fexit program\n"); 122 err = PTR_ERR(skel->links.func_end); 123 goto out; 124 } 125 } else { 126 struct filter_entry *event; 127 128 event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); 129 130 skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, 131 event->name); 132 if (IS_ERR(skel->links.event_begin)) { 133 pr_err("Failed to attach first tracepoint program\n"); 134 err = PTR_ERR(skel->links.event_begin); 135 goto out; 136 } 137 138 event = list_next_entry(event, list); 139 140 skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, 141 event->name); 142 if (IS_ERR(skel->links.event_end)) { 143 pr_err("Failed to attach second tracepoint program\n"); 144 err = PTR_ERR(skel->links.event_end); 145 goto out; 146 } 147 } 148 149 /* XXX: we don't actually use this fd - just for poll() */ 150 return open("/dev/null", O_RDONLY); 151 152 out: 153 return err; 154 } 155 156 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) 157 { 158 skel->bss->enabled = 1; 159 return 0; 160 } 161 162 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) 163 { 164 skel->bss->enabled = 0; 165 return 0; 166 } 167 168 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, 169 int buckets[], struct stats *stats) 170 { 171 int i, fd, err; 172 u32 idx; 173 u64 *hist; 174 int ncpus = cpu__max_cpu().cpu; 175 176 fd = bpf_map__fd(skel->maps.latency); 177 178 hist = calloc(ncpus, sizeof(*hist)); 179 if (hist == NULL) 180 return -ENOMEM; 181 182 for (idx = 0; idx < skel->rodata->bucket_num; idx++) { 183 err = bpf_map_lookup_elem(fd, &idx, hist); 184 if (err) { 185 buckets[idx] = 0; 186 continue; 187 } 188 189 for (i = 0; i < ncpus; i++) 190 buckets[idx] += hist[i]; 191 } 192 193 if (skel->bss->count) { 194 stats->mean = skel->bss->total / skel->bss->count; 195 stats->n = skel->bss->count; 196 stats->max = skel->bss->max; 197 stats->min = skel->bss->min; 198 199 if (!ftrace->use_nsec) { 200 stats->mean /= 1000; 201 stats->max /= 1000; 202 stats->min /= 1000; 203 } 204 } 205 206 free(hist); 207 return 0; 208 } 209 210 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) 211 { 212 func_latency_bpf__destroy(skel); 213 return 0; 214 } 215