1 // SPDX-License-Identifier: GPL-2.0
2 #include "util/cgroup.h"
3 #include "util/debug.h"
4 #include "util/evlist.h"
5 #include "util/hashmap.h"
6 #include "util/machine.h"
7 #include "util/map.h"
8 #include "util/symbol.h"
9 #include "util/target.h"
10 #include "util/thread.h"
11 #include "util/thread_map.h"
12 #include "util/lock-contention.h"
13 #include <linux/zalloc.h>
14 #include <linux/string.h>
15 #include <api/fs/fs.h>
16 #include <bpf/bpf.h>
17 #include <bpf/btf.h>
18 #include <inttypes.h>
19
20 #include "bpf_skel/lock_contention.skel.h"
21 #include "bpf_skel/lock_data.h"
22
23 static struct lock_contention_bpf *skel;
24 static bool has_slab_iter;
25 static struct hashmap slab_hash;
26
slab_cache_hash(long key,void * ctx __maybe_unused)27 static size_t slab_cache_hash(long key, void *ctx __maybe_unused)
28 {
29 return key;
30 }
31
slab_cache_equal(long key1,long key2,void * ctx __maybe_unused)32 static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused)
33 {
34 return key1 == key2;
35 }
36
check_slab_cache_iter(struct lock_contention * con)37 static void check_slab_cache_iter(struct lock_contention *con)
38 {
39 s32 ret;
40
41 hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL);
42
43 con->btf = btf__load_vmlinux_btf();
44 if (con->btf == NULL) {
45 pr_debug("BTF loading failed: %m\n");
46 return;
47 }
48
49 ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT);
50 if (ret < 0) {
51 bpf_program__set_autoload(skel->progs.slab_cache_iter, false);
52 pr_debug("slab cache iterator is not available: %d\n", ret);
53 return;
54 }
55
56 has_slab_iter = true;
57
58 bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries);
59 }
60
run_slab_cache_iter(void)61 static void run_slab_cache_iter(void)
62 {
63 int fd;
64 char buf[256];
65 long key, *prev_key;
66
67 if (!has_slab_iter)
68 return;
69
70 fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter));
71 if (fd < 0) {
72 pr_debug("cannot create slab cache iter: %d\n", fd);
73 return;
74 }
75
76 /* This will run the bpf program */
77 while (read(fd, buf, sizeof(buf)) > 0)
78 continue;
79
80 close(fd);
81
82 /* Read the slab cache map and build a hash with IDs */
83 fd = bpf_map__fd(skel->maps.slab_caches);
84 prev_key = NULL;
85 while (!bpf_map_get_next_key(fd, prev_key, &key)) {
86 struct slab_cache_data *data;
87
88 data = malloc(sizeof(*data));
89 if (data == NULL)
90 break;
91
92 if (bpf_map_lookup_elem(fd, &key, data) < 0)
93 break;
94
95 hashmap__add(&slab_hash, data->id, data);
96 prev_key = &key;
97 }
98 }
99
exit_slab_cache_iter(void)100 static void exit_slab_cache_iter(void)
101 {
102 struct hashmap_entry *cur;
103 unsigned bkt;
104
105 hashmap__for_each_entry(&slab_hash, cur, bkt)
106 free(cur->pvalue);
107
108 hashmap__clear(&slab_hash);
109 }
110
init_numa_data(struct lock_contention * con)111 static void init_numa_data(struct lock_contention *con)
112 {
113 struct symbol *sym;
114 struct map *kmap;
115 char *buf = NULL, *p;
116 size_t len;
117 long last = -1;
118 int ret;
119
120 if (!con->btf)
121 return;
122
123 /*
124 * 'struct zone' is embedded in 'struct pglist_data' as an array.
125 * As we may not have full information of the struct zone in the
126 * (fake) vmlinux.h, let's get the actual size from BTF.
127 */
128 ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT);
129 if (ret < 0) {
130 pr_debug("cannot get type of struct zone: %d\n", ret);
131 return;
132 }
133
134 ret = btf__resolve_size(con->btf, ret);
135 if (ret < 0) {
136 pr_debug("cannot get size of struct zone: %d\n", ret);
137 return;
138 }
139 skel->rodata->sizeof_zone = ret;
140
141 /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */
142 sym = machine__find_kernel_symbol_by_name(con->machine,
143 "contig_page_data",
144 &kmap);
145 if (sym) {
146 skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start);
147 map__put(kmap);
148 return;
149 }
150
151 /*
152 * The 'node_data' is an array of pointers to struct pglist_data.
153 * It needs to follow the pointer for each node in BPF to get the
154 * address of struct pglist_data and its zones.
155 */
156 sym = machine__find_kernel_symbol_by_name(con->machine,
157 "node_data",
158 &kmap);
159 if (sym == NULL)
160 return;
161
162 skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start);
163 map__put(kmap);
164
165 /* get the number of online nodes using the last node number + 1 */
166 ret = sysfs__read_str("devices/system/node/online", &buf, &len);
167 if (ret < 0) {
168 pr_debug("failed to read online node: %d\n", ret);
169 return;
170 }
171
172 p = buf;
173 while (p && *p) {
174 last = strtol(p, &p, 0);
175
176 if (p && (*p == ',' || *p == '-' || *p == '\n'))
177 p++;
178 }
179 skel->rodata->nr_nodes = last + 1;
180 free(buf);
181 }
182
lock_contention_prepare(struct lock_contention * con)183 int lock_contention_prepare(struct lock_contention *con)
184 {
185 int i, fd;
186 int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1;
187 struct evlist *evlist = con->evlist;
188 struct target *target = con->target;
189
190 /* make sure it loads the kernel map before lookup */
191 map__load(machine__kernel_map(con->machine));
192
193 skel = lock_contention_bpf__open();
194 if (!skel) {
195 pr_err("Failed to open lock-contention BPF skeleton\n");
196 return -1;
197 }
198
199 bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
200 bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
201 bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
202
203 if (con->aggr_mode == LOCK_AGGR_TASK)
204 bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
205 else
206 bpf_map__set_max_entries(skel->maps.task_data, 1);
207
208 if (con->save_callstack) {
209 bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
210 if (con->owner) {
211 bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64));
212 bpf_map__set_key_size(skel->maps.owner_stacks,
213 con->max_stack * sizeof(u64));
214 bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries);
215 bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries);
216 bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries);
217 skel->rodata->max_stack = con->max_stack;
218 }
219 } else {
220 bpf_map__set_max_entries(skel->maps.stacks, 1);
221 }
222
223 if (target__has_cpu(target)) {
224 skel->rodata->has_cpu = 1;
225 ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
226 }
227 if (target__has_task(target)) {
228 skel->rodata->has_task = 1;
229 ntasks = perf_thread_map__nr(evlist->core.threads);
230 }
231 if (con->filters->nr_types) {
232 skel->rodata->has_type = 1;
233 ntypes = con->filters->nr_types;
234 }
235 if (con->filters->nr_cgrps) {
236 skel->rodata->has_cgroup = 1;
237 ncgrps = con->filters->nr_cgrps;
238 }
239
240 /* resolve lock name filters to addr */
241 if (con->filters->nr_syms) {
242 struct symbol *sym;
243 struct map *kmap;
244 unsigned long *addrs;
245
246 for (i = 0; i < con->filters->nr_syms; i++) {
247 sym = machine__find_kernel_symbol_by_name(con->machine,
248 con->filters->syms[i],
249 &kmap);
250 if (sym == NULL) {
251 pr_warning("ignore unknown symbol: %s\n",
252 con->filters->syms[i]);
253 continue;
254 }
255
256 addrs = realloc(con->filters->addrs,
257 (con->filters->nr_addrs + 1) * sizeof(*addrs));
258 if (addrs == NULL) {
259 pr_warning("memory allocation failure\n");
260 continue;
261 }
262
263 addrs[con->filters->nr_addrs++] = map__unmap_ip(kmap, sym->start);
264 con->filters->addrs = addrs;
265 }
266 naddrs = con->filters->nr_addrs;
267 skel->rodata->has_addr = 1;
268 }
269
270 /* resolve lock name in delays */
271 if (con->nr_delays) {
272 struct symbol *sym;
273 struct map *kmap;
274
275 for (i = 0; i < con->nr_delays; i++) {
276 sym = machine__find_kernel_symbol_by_name(con->machine,
277 con->delays[i].sym,
278 &kmap);
279 if (sym == NULL) {
280 pr_warning("ignore unknown symbol: %s\n",
281 con->delays[i].sym);
282 continue;
283 }
284
285 con->delays[i].addr = map__unmap_ip(kmap, sym->start);
286 }
287 skel->rodata->lock_delay = 1;
288 bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays);
289 }
290
291 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
292 bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
293 bpf_map__set_max_entries(skel->maps.type_filter, ntypes);
294 bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
295 bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
296
297 skel->rodata->stack_skip = con->stack_skip;
298 skel->rodata->aggr_mode = con->aggr_mode;
299 skel->rodata->needs_callstack = con->save_callstack;
300 skel->rodata->lock_owner = con->owner;
301
302 if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
303 if (cgroup_is_v2("perf_event"))
304 skel->rodata->use_cgroup_v2 = 1;
305 }
306
307 check_slab_cache_iter(con);
308
309 if (con->filters->nr_slabs && has_slab_iter) {
310 skel->rodata->has_slab = 1;
311 nslabs = con->filters->nr_slabs;
312 }
313
314 bpf_map__set_max_entries(skel->maps.slab_filter, nslabs);
315
316 init_numa_data(con);
317
318 if (lock_contention_bpf__load(skel) < 0) {
319 pr_err("Failed to load lock-contention BPF skeleton\n");
320 return -1;
321 }
322
323 if (target__has_cpu(target)) {
324 u32 cpu;
325 u8 val = 1;
326
327 fd = bpf_map__fd(skel->maps.cpu_filter);
328
329 for (i = 0; i < ncpus; i++) {
330 cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
331 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
332 }
333 }
334
335 if (target__has_task(target)) {
336 u32 pid;
337 u8 val = 1;
338
339 fd = bpf_map__fd(skel->maps.task_filter);
340
341 for (i = 0; i < ntasks; i++) {
342 pid = perf_thread_map__pid(evlist->core.threads, i);
343 bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
344 }
345 }
346
347 if (target__none(target) && evlist->workload.pid > 0) {
348 u32 pid = evlist->workload.pid;
349 u8 val = 1;
350
351 fd = bpf_map__fd(skel->maps.task_filter);
352 bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
353 }
354
355 if (con->filters->nr_types) {
356 u8 val = 1;
357
358 fd = bpf_map__fd(skel->maps.type_filter);
359
360 for (i = 0; i < con->filters->nr_types; i++)
361 bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY);
362 }
363
364 if (con->filters->nr_addrs) {
365 u8 val = 1;
366
367 fd = bpf_map__fd(skel->maps.addr_filter);
368
369 for (i = 0; i < con->filters->nr_addrs; i++)
370 bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY);
371 }
372
373 if (con->filters->nr_cgrps) {
374 u8 val = 1;
375
376 fd = bpf_map__fd(skel->maps.cgroup_filter);
377
378 for (i = 0; i < con->filters->nr_cgrps; i++)
379 bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
380 }
381
382 if (con->nr_delays) {
383 fd = bpf_map__fd(skel->maps.lock_delays);
384
385 for (i = 0; i < con->nr_delays; i++)
386 bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY);
387 }
388
389 if (con->aggr_mode == LOCK_AGGR_CGROUP)
390 read_all_cgroups(&con->cgroups);
391
392 bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
393
394 lock_contention_bpf__attach(skel);
395
396 /* run the slab iterator after attaching */
397 run_slab_cache_iter();
398
399 if (con->filters->nr_slabs) {
400 u8 val = 1;
401 int cache_fd;
402 long key, *prev_key;
403
404 fd = bpf_map__fd(skel->maps.slab_filter);
405
406 /* Read the slab cache map and build a hash with its address */
407 cache_fd = bpf_map__fd(skel->maps.slab_caches);
408 prev_key = NULL;
409 while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) {
410 struct slab_cache_data data;
411
412 if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0)
413 break;
414
415 for (i = 0; i < con->filters->nr_slabs; i++) {
416 if (!strcmp(con->filters->slabs[i], data.name)) {
417 bpf_map_update_elem(fd, &key, &val, BPF_ANY);
418 break;
419 }
420 }
421 prev_key = &key;
422 }
423 }
424
425 return 0;
426 }
427
428 /*
429 * Run the BPF program directly using BPF_PROG_TEST_RUN to update the end
430 * timestamp in ktime so that it can calculate delta easily.
431 */
mark_end_timestamp(void)432 static void mark_end_timestamp(void)
433 {
434 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
435 .flags = BPF_F_TEST_RUN_ON_CPU,
436 );
437 int prog_fd = bpf_program__fd(skel->progs.end_timestamp);
438
439 bpf_prog_test_run_opts(prog_fd, &opts);
440 }
441
update_lock_stat(int map_fd,int pid,u64 end_ts,enum lock_aggr_mode aggr_mode,struct tstamp_data * ts_data)442 static void update_lock_stat(int map_fd, int pid, u64 end_ts,
443 enum lock_aggr_mode aggr_mode,
444 struct tstamp_data *ts_data)
445 {
446 u64 delta;
447 struct contention_key stat_key = {};
448 struct contention_data stat_data;
449
450 if (ts_data->timestamp >= end_ts)
451 return;
452
453 delta = end_ts - ts_data->timestamp;
454
455 switch (aggr_mode) {
456 case LOCK_AGGR_CALLER:
457 stat_key.stack_id = ts_data->stack_id;
458 break;
459 case LOCK_AGGR_TASK:
460 stat_key.pid = pid;
461 break;
462 case LOCK_AGGR_ADDR:
463 stat_key.lock_addr_or_cgroup = ts_data->lock;
464 break;
465 case LOCK_AGGR_CGROUP:
466 /* TODO */
467 return;
468 default:
469 return;
470 }
471
472 if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0)
473 return;
474
475 stat_data.total_time += delta;
476 stat_data.count++;
477
478 if (delta > stat_data.max_time)
479 stat_data.max_time = delta;
480 if (delta < stat_data.min_time)
481 stat_data.min_time = delta;
482
483 bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST);
484 }
485
486 /*
487 * Account entries in the tstamp map (which didn't see the corresponding
488 * lock:contention_end tracepoint) using end_ts.
489 */
account_end_timestamp(struct lock_contention * con)490 static void account_end_timestamp(struct lock_contention *con)
491 {
492 int ts_fd, stat_fd;
493 int *prev_key, key;
494 u64 end_ts = skel->bss->end_ts;
495 int total_cpus;
496 enum lock_aggr_mode aggr_mode = con->aggr_mode;
497 struct tstamp_data ts_data, *cpu_data;
498
499 /* Iterate per-task tstamp map (key = TID) */
500 ts_fd = bpf_map__fd(skel->maps.tstamp);
501 stat_fd = bpf_map__fd(skel->maps.lock_stat);
502
503 prev_key = NULL;
504 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) {
505 if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) {
506 int pid = key;
507
508 if (aggr_mode == LOCK_AGGR_TASK && con->owner)
509 pid = ts_data.flags;
510
511 update_lock_stat(stat_fd, pid, end_ts, aggr_mode,
512 &ts_data);
513 }
514
515 prev_key = &key;
516 }
517
518 /* Now it'll check per-cpu tstamp map which doesn't have TID. */
519 if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP)
520 return;
521
522 total_cpus = cpu__max_cpu().cpu;
523 ts_fd = bpf_map__fd(skel->maps.tstamp_cpu);
524
525 cpu_data = calloc(total_cpus, sizeof(*cpu_data));
526 if (cpu_data == NULL)
527 return;
528
529 prev_key = NULL;
530 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) {
531 if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0)
532 goto next;
533
534 for (int i = 0; i < total_cpus; i++) {
535 if (cpu_data[i].lock == 0)
536 continue;
537
538 update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
539 &cpu_data[i]);
540 }
541
542 next:
543 prev_key = &key;
544 }
545 free(cpu_data);
546 }
547
lock_contention_start(void)548 int lock_contention_start(void)
549 {
550 skel->bss->enabled = 1;
551 return 0;
552 }
553
lock_contention_stop(void)554 int lock_contention_stop(void)
555 {
556 skel->bss->enabled = 0;
557 mark_end_timestamp();
558 return 0;
559 }
560
lock_contention_get_name(struct lock_contention * con,struct contention_key * key,u64 * stack_trace,u32 flags)561 static const char *lock_contention_get_name(struct lock_contention *con,
562 struct contention_key *key,
563 u64 *stack_trace, u32 flags)
564 {
565 int idx = 0;
566 u64 addr;
567 static char name_buf[KSYM_NAME_LEN];
568 struct symbol *sym;
569 struct map *kmap;
570 struct machine *machine = con->machine;
571
572 if (con->aggr_mode == LOCK_AGGR_TASK) {
573 struct contention_task_data task;
574 int pid = key->pid;
575 int task_fd = bpf_map__fd(skel->maps.task_data);
576
577 /* do not update idle comm which contains CPU number */
578 if (pid) {
579 struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid);
580
581 if (t != NULL &&
582 !bpf_map_lookup_elem(task_fd, &pid, &task) &&
583 thread__set_comm(t, task.comm, /*timestamp=*/0)) {
584 snprintf(name_buf, sizeof(name_buf), "%s", task.comm);
585 return name_buf;
586 }
587 }
588 return "";
589 }
590
591 if (con->aggr_mode == LOCK_AGGR_ADDR) {
592 int lock_fd = bpf_map__fd(skel->maps.lock_syms);
593 struct slab_cache_data *slab_data;
594
595 /* per-process locks set upper bits of the flags */
596 if (flags & LCD_F_MMAP_LOCK)
597 return "mmap_lock";
598 if (flags & LCD_F_SIGHAND_LOCK)
599 return "siglock";
600
601 /* global locks with symbols */
602 sym = machine__find_kernel_symbol(machine, key->lock_addr_or_cgroup, &kmap);
603 if (sym)
604 return sym->name;
605
606 /* try semi-global locks collected separately */
607 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) {
608 if (flags == LOCK_CLASS_RQLOCK)
609 return "rq_lock";
610 }
611
612 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) {
613 if (flags == LOCK_CLASS_ZONE_LOCK)
614 return "zone_lock";
615 }
616
617 /* look slab_hash for dynamic locks in a slab object */
618 if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) {
619 snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name);
620 return name_buf;
621 }
622
623 return "";
624 }
625
626 if (con->aggr_mode == LOCK_AGGR_CGROUP) {
627 u64 cgrp_id = key->lock_addr_or_cgroup;
628 struct cgroup *cgrp = __cgroup__find(&con->cgroups, cgrp_id);
629
630 if (cgrp)
631 return cgrp->name;
632
633 snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id);
634 return name_buf;
635 }
636
637 /* LOCK_AGGR_CALLER: skip lock internal functions */
638 while (machine__is_lock_function(machine, stack_trace[idx]) &&
639 idx < con->max_stack - 1)
640 idx++;
641
642 addr = stack_trace[idx];
643 sym = machine__find_kernel_symbol(machine, addr, &kmap);
644
645 if (sym) {
646 unsigned long offset;
647
648 offset = map__map_ip(kmap, addr) - sym->start;
649
650 if (offset == 0)
651 return sym->name;
652
653 snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset);
654 } else {
655 snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr);
656 }
657
658 return name_buf;
659 }
660
pop_owner_stack_trace(struct lock_contention * con)661 struct lock_stat *pop_owner_stack_trace(struct lock_contention *con)
662 {
663 int stacks_fd, stat_fd;
664 u64 *stack_trace = NULL;
665 s32 stack_id;
666 struct contention_key ckey = {};
667 struct contention_data cdata = {};
668 size_t stack_size = con->max_stack * sizeof(*stack_trace);
669 struct lock_stat *st = NULL;
670
671 stacks_fd = bpf_map__fd(skel->maps.owner_stacks);
672 stat_fd = bpf_map__fd(skel->maps.owner_stat);
673 if (!stacks_fd || !stat_fd)
674 goto out_err;
675
676 stack_trace = zalloc(stack_size);
677 if (stack_trace == NULL)
678 goto out_err;
679
680 if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace))
681 goto out_err;
682
683 bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id);
684 ckey.stack_id = stack_id;
685 bpf_map_lookup_elem(stat_fd, &ckey, &cdata);
686
687 st = zalloc(sizeof(struct lock_stat));
688 if (!st)
689 goto out_err;
690
691 st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) :
692 "unknown");
693 if (!st->name)
694 goto out_err;
695
696 st->flags = cdata.flags;
697 st->nr_contended = cdata.count;
698 st->wait_time_total = cdata.total_time;
699 st->wait_time_max = cdata.max_time;
700 st->wait_time_min = cdata.min_time;
701 st->callstack = stack_trace;
702
703 if (cdata.count)
704 st->avg_wait_time = cdata.total_time / cdata.count;
705
706 bpf_map_delete_elem(stacks_fd, stack_trace);
707 bpf_map_delete_elem(stat_fd, &ckey);
708
709 return st;
710
711 out_err:
712 free(stack_trace);
713 free(st);
714
715 return NULL;
716 }
717
lock_contention_read(struct lock_contention * con)718 int lock_contention_read(struct lock_contention *con)
719 {
720 int fd, stack, err = 0;
721 struct contention_key *prev_key, key = {};
722 struct contention_data data = {};
723 struct lock_stat *st = NULL;
724 struct machine *machine = con->machine;
725 u64 *stack_trace;
726 size_t stack_size = con->max_stack * sizeof(*stack_trace);
727
728 fd = bpf_map__fd(skel->maps.lock_stat);
729 stack = bpf_map__fd(skel->maps.stacks);
730
731 con->fails.task = skel->bss->task_fail;
732 con->fails.stack = skel->bss->stack_fail;
733 con->fails.time = skel->bss->time_fail;
734 con->fails.data = skel->bss->data_fail;
735
736 stack_trace = zalloc(stack_size);
737 if (stack_trace == NULL)
738 return -1;
739
740 account_end_timestamp(con);
741
742 if (con->aggr_mode == LOCK_AGGR_TASK) {
743 struct thread *idle = machine__findnew_thread(machine,
744 /*pid=*/0,
745 /*tid=*/0);
746 thread__set_comm(idle, "swapper", /*timestamp=*/0);
747 }
748
749 if (con->aggr_mode == LOCK_AGGR_ADDR) {
750 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
751 .flags = BPF_F_TEST_RUN_ON_CPU,
752 );
753 int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms);
754
755 bpf_prog_test_run_opts(prog_fd, &opts);
756 }
757
758 prev_key = NULL;
759 while (!bpf_map_get_next_key(fd, prev_key, &key)) {
760 s64 ls_key;
761 const char *name;
762
763 /* to handle errors in the loop body */
764 err = -1;
765
766 bpf_map_lookup_elem(fd, &key, &data);
767 if (con->save_callstack) {
768 bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
769
770 if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
771 con->nr_filtered += data.count;
772 goto next;
773 }
774 }
775
776 switch (con->aggr_mode) {
777 case LOCK_AGGR_CALLER:
778 ls_key = key.stack_id;
779 break;
780 case LOCK_AGGR_TASK:
781 ls_key = key.pid;
782 break;
783 case LOCK_AGGR_ADDR:
784 case LOCK_AGGR_CGROUP:
785 ls_key = key.lock_addr_or_cgroup;
786 break;
787 default:
788 goto next;
789 }
790
791 st = lock_stat_find(ls_key);
792 if (st != NULL) {
793 st->wait_time_total += data.total_time;
794 if (st->wait_time_max < data.max_time)
795 st->wait_time_max = data.max_time;
796 if (st->wait_time_min > data.min_time)
797 st->wait_time_min = data.min_time;
798
799 st->nr_contended += data.count;
800 if (st->nr_contended)
801 st->avg_wait_time = st->wait_time_total / st->nr_contended;
802 goto next;
803 }
804
805 name = lock_contention_get_name(con, &key, stack_trace, data.flags);
806 st = lock_stat_findnew(ls_key, name, data.flags);
807 if (st == NULL)
808 break;
809
810 st->nr_contended = data.count;
811 st->wait_time_total = data.total_time;
812 st->wait_time_max = data.max_time;
813 st->wait_time_min = data.min_time;
814
815 if (data.count)
816 st->avg_wait_time = data.total_time / data.count;
817
818 if (con->aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
819 st->callstack = memdup(stack_trace, stack_size);
820 if (st->callstack == NULL)
821 break;
822 }
823
824 next:
825 prev_key = &key;
826
827 /* we're fine now, reset the error */
828 err = 0;
829 }
830
831 free(stack_trace);
832
833 return err;
834 }
835
lock_contention_finish(struct lock_contention * con)836 int lock_contention_finish(struct lock_contention *con)
837 {
838 if (skel) {
839 skel->bss->enabled = 0;
840 lock_contention_bpf__destroy(skel);
841 }
842
843 while (!RB_EMPTY_ROOT(&con->cgroups)) {
844 struct rb_node *node = rb_first(&con->cgroups);
845 struct cgroup *cgrp = rb_entry(node, struct cgroup, node);
846
847 rb_erase(node, &con->cgroups);
848 cgroup__put(cgrp);
849 }
850
851 exit_slab_cache_iter();
852 btf__free(con->btf);
853
854 return 0;
855 }
856