1 #include <stdio.h> 2 #include <fcntl.h> 3 #include <stdint.h> 4 #include <stdlib.h> 5 6 #include <linux/err.h> 7 8 #include "util/ftrace.h" 9 #include "util/cpumap.h" 10 #include "util/thread_map.h" 11 #include "util/debug.h" 12 #include "util/evlist.h" 13 #include "util/bpf_counter.h" 14 #include "util/stat.h" 15 16 #include "util/bpf_skel/func_latency.skel.h" 17 18 static struct func_latency_bpf *skel; 19 20 int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) 21 { 22 int fd, err; 23 int i, ncpus = 1, ntasks = 1; 24 struct filter_entry *func = NULL; 25 26 if (!list_empty(&ftrace->filters)) { 27 if (!list_is_singular(&ftrace->filters)) { 28 pr_err("ERROR: Too many target functions.\n"); 29 return -1; 30 } 31 func = list_first_entry(&ftrace->filters, struct filter_entry, list); 32 } else { 33 int count = 0; 34 struct list_head *pos; 35 36 list_for_each(pos, &ftrace->event_pair) 37 count++; 38 39 if (count != 2) { 40 pr_err("ERROR: Needs two target events.\n"); 41 return -1; 42 } 43 } 44 45 skel = func_latency_bpf__open(); 46 if (!skel) { 47 pr_err("Failed to open func latency skeleton\n"); 48 return -1; 49 } 50 51 skel->rodata->bucket_range = ftrace->bucket_range; 52 skel->rodata->min_latency = ftrace->min_latency; 53 skel->rodata->bucket_num = ftrace->bucket_num; 54 if (ftrace->bucket_range && ftrace->bucket_num) { 55 bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num); 56 } 57 58 /* don't need to set cpu filter for system-wide mode */ 59 if (ftrace->target.cpu_list) { 60 ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus); 61 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 62 skel->rodata->has_cpu = 1; 63 } 64 65 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { 66 ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); 67 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 68 skel->rodata->has_task = 1; 69 } 70 71 skel->rodata->use_nsec = ftrace->use_nsec; 72 73 set_max_rlimit(); 74 75 err = func_latency_bpf__load(skel); 76 if (err) { 77 pr_err("Failed to load func latency skeleton\n"); 78 goto out; 79 } 80 81 if (ftrace->target.cpu_list) { 82 u32 cpu; 83 u8 val = 1; 84 85 fd = bpf_map__fd(skel->maps.cpu_filter); 86 87 for (i = 0; i < ncpus; i++) { 88 cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu; 89 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 90 } 91 } 92 93 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { 94 u32 pid; 95 u8 val = 1; 96 97 fd = bpf_map__fd(skel->maps.task_filter); 98 99 for (i = 0; i < ntasks; i++) { 100 pid = perf_thread_map__pid(ftrace->evlist->core.threads, i); 101 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 102 } 103 } 104 105 skel->bss->min = INT64_MAX; 106 107 if (func) { 108 skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, 109 false, func->name); 110 if (IS_ERR(skel->links.func_begin)) { 111 pr_err("Failed to attach fentry program\n"); 112 err = PTR_ERR(skel->links.func_begin); 113 goto out; 114 } 115 116 skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, 117 true, func->name); 118 if (IS_ERR(skel->links.func_end)) { 119 pr_err("Failed to attach fexit program\n"); 120 err = PTR_ERR(skel->links.func_end); 121 goto out; 122 } 123 } else { 124 struct filter_entry *event; 125 126 event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); 127 128 skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, 129 event->name); 130 if (IS_ERR(skel->links.event_begin)) { 131 pr_err("Failed to attach first tracepoint program\n"); 132 err = PTR_ERR(skel->links.event_begin); 133 goto out; 134 } 135 136 event = list_next_entry(event, list); 137 138 skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, 139 event->name); 140 if (IS_ERR(skel->links.event_end)) { 141 pr_err("Failed to attach second tracepoint program\n"); 142 err = PTR_ERR(skel->links.event_end); 143 goto out; 144 } 145 } 146 147 /* XXX: we don't actually use this fd - just for poll() */ 148 return open("/dev/null", O_RDONLY); 149 150 out: 151 return err; 152 } 153 154 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) 155 { 156 skel->bss->enabled = 1; 157 return 0; 158 } 159 160 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) 161 { 162 skel->bss->enabled = 0; 163 return 0; 164 } 165 166 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, 167 int buckets[], struct stats *stats) 168 { 169 int i, fd, err; 170 u32 idx; 171 u64 *hist; 172 int ncpus = cpu__max_cpu().cpu; 173 174 fd = bpf_map__fd(skel->maps.latency); 175 176 hist = calloc(ncpus, sizeof(*hist)); 177 if (hist == NULL) 178 return -ENOMEM; 179 180 for (idx = 0; idx < skel->rodata->bucket_num; idx++) { 181 err = bpf_map_lookup_elem(fd, &idx, hist); 182 if (err) { 183 buckets[idx] = 0; 184 continue; 185 } 186 187 for (i = 0; i < ncpus; i++) 188 buckets[idx] += hist[i]; 189 } 190 191 if (skel->bss->count) { 192 stats->mean = skel->bss->total / skel->bss->count; 193 stats->n = skel->bss->count; 194 stats->max = skel->bss->max; 195 stats->min = skel->bss->min; 196 197 if (!ftrace->use_nsec) { 198 stats->mean /= 1000; 199 stats->max /= 1000; 200 stats->min /= 1000; 201 } 202 } 203 204 free(hist); 205 return 0; 206 } 207 208 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) 209 { 210 func_latency_bpf__destroy(skel); 211 return 0; 212 } 213