1 #include <stdio.h> 2 #include <fcntl.h> 3 #include <stdint.h> 4 #include <stdlib.h> 5 6 #include <bpf/bpf.h> 7 #include <linux/err.h> 8 9 #include "util/ftrace.h" 10 #include "util/cpumap.h" 11 #include "util/thread_map.h" 12 #include "util/debug.h" 13 #include "util/evlist.h" 14 #include "util/bpf_counter.h" 15 #include "util/stat.h" 16 17 #include "util/bpf_skel/func_latency.skel.h" 18 19 static struct func_latency_bpf *skel; 20 21 int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) 22 { 23 int fd, err; 24 int i, ncpus = 1, ntasks = 1; 25 struct filter_entry *func = NULL; 26 27 if (!list_empty(&ftrace->filters)) { 28 if (!list_is_singular(&ftrace->filters)) { 29 pr_err("ERROR: Too many target functions.\n"); 30 return -1; 31 } 32 func = list_first_entry(&ftrace->filters, struct filter_entry, list); 33 } else { 34 int count = 0; 35 struct list_head *pos; 36 37 list_for_each(pos, &ftrace->event_pair) 38 count++; 39 40 if (count != 2) { 41 pr_err("ERROR: Needs two target events.\n"); 42 return -1; 43 } 44 } 45 46 skel = func_latency_bpf__open(); 47 if (!skel) { 48 pr_err("Failed to open func latency skeleton\n"); 49 return -1; 50 } 51 52 skel->rodata->bucket_range = ftrace->bucket_range; 53 skel->rodata->min_latency = ftrace->min_latency; 54 skel->rodata->bucket_num = ftrace->bucket_num; 55 if (ftrace->bucket_range && ftrace->bucket_num) { 56 bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num); 57 } 58 59 /* don't need to set cpu filter for system-wide mode */ 60 if (ftrace->target.cpu_list) { 61 ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus); 62 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 63 skel->rodata->has_cpu = 1; 64 } 65 66 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { 67 ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); 68 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 69 skel->rodata->has_task = 1; 70 } 71 72 skel->rodata->use_nsec = ftrace->use_nsec; 73 74 set_max_rlimit(); 75 76 err = func_latency_bpf__load(skel); 77 if (err) { 78 pr_err("Failed to load func latency skeleton\n"); 79 goto out; 80 } 81 82 if (ftrace->target.cpu_list) { 83 u32 cpu; 84 u8 val = 1; 85 86 fd = bpf_map__fd(skel->maps.cpu_filter); 87 88 for (i = 0; i < ncpus; i++) { 89 cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu; 90 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 91 } 92 } 93 94 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { 95 u32 pid; 96 u8 val = 1; 97 98 fd = bpf_map__fd(skel->maps.task_filter); 99 100 for (i = 0; i < ntasks; i++) { 101 pid = perf_thread_map__pid(ftrace->evlist->core.threads, i); 102 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 103 } 104 } 105 106 skel->bss->min = INT64_MAX; 107 108 if (func) { 109 skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, 110 false, func->name); 111 if (IS_ERR(skel->links.func_begin)) { 112 pr_err("Failed to attach fentry program\n"); 113 err = PTR_ERR(skel->links.func_begin); 114 goto out; 115 } 116 117 skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, 118 true, func->name); 119 if (IS_ERR(skel->links.func_end)) { 120 pr_err("Failed to attach fexit program\n"); 121 err = PTR_ERR(skel->links.func_end); 122 goto out; 123 } 124 } else { 125 struct filter_entry *event; 126 127 event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); 128 129 skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, 130 event->name); 131 if (IS_ERR(skel->links.event_begin)) { 132 pr_err("Failed to attach first tracepoint program\n"); 133 err = PTR_ERR(skel->links.event_begin); 134 goto out; 135 } 136 137 event = list_next_entry(event, list); 138 139 skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, 140 event->name); 141 if (IS_ERR(skel->links.event_end)) { 142 pr_err("Failed to attach second tracepoint program\n"); 143 err = PTR_ERR(skel->links.event_end); 144 goto out; 145 } 146 } 147 148 /* XXX: we don't actually use this fd - just for poll() */ 149 return open("/dev/null", O_RDONLY); 150 151 out: 152 return err; 153 } 154 155 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) 156 { 157 skel->bss->enabled = 1; 158 return 0; 159 } 160 161 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) 162 { 163 skel->bss->enabled = 0; 164 return 0; 165 } 166 167 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, 168 int buckets[], struct stats *stats) 169 { 170 int i, fd, err; 171 u32 idx; 172 u64 *hist; 173 int ncpus = cpu__max_cpu().cpu; 174 175 fd = bpf_map__fd(skel->maps.latency); 176 177 hist = calloc(ncpus, sizeof(*hist)); 178 if (hist == NULL) 179 return -ENOMEM; 180 181 for (idx = 0; idx < skel->rodata->bucket_num; idx++) { 182 err = bpf_map_lookup_elem(fd, &idx, hist); 183 if (err) { 184 buckets[idx] = 0; 185 continue; 186 } 187 188 for (i = 0; i < ncpus; i++) 189 buckets[idx] += hist[i]; 190 } 191 192 if (skel->bss->count) { 193 stats->mean = skel->bss->total / skel->bss->count; 194 stats->n = skel->bss->count; 195 stats->max = skel->bss->max; 196 stats->min = skel->bss->min; 197 198 if (!ftrace->use_nsec) { 199 stats->mean /= 1000; 200 stats->max /= 1000; 201 stats->min /= 1000; 202 } 203 } 204 205 free(hist); 206 return 0; 207 } 208 209 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) 210 { 211 func_latency_bpf__destroy(skel); 212 return 0; 213 } 214