1 // SPDX-License-Identifier: GPL-2.0 2 #include "util/cgroup.h" 3 #include "util/debug.h" 4 #include "util/evlist.h" 5 #include "util/hashmap.h" 6 #include "util/machine.h" 7 #include "util/map.h" 8 #include "util/symbol.h" 9 #include "util/target.h" 10 #include "util/thread.h" 11 #include "util/thread_map.h" 12 #include "util/lock-contention.h" 13 #include <linux/zalloc.h> 14 #include <linux/string.h> 15 #include <api/fs/fs.h> 16 #include <bpf/bpf.h> 17 #include <bpf/btf.h> 18 #include <inttypes.h> 19 20 #include "bpf_skel/lock_contention.skel.h" 21 #include "bpf_skel/lock_data.h" 22 23 static struct lock_contention_bpf *skel; 24 static bool has_slab_iter; 25 static struct hashmap slab_hash; 26 27 static size_t slab_cache_hash(long key, void *ctx __maybe_unused) 28 { 29 return key; 30 } 31 32 static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) 33 { 34 return key1 == key2; 35 } 36 37 static void check_slab_cache_iter(struct lock_contention *con) 38 { 39 s32 ret; 40 41 hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); 42 43 con->btf = btf__load_vmlinux_btf(); 44 if (con->btf == NULL) { 45 pr_debug("BTF loading failed: %m\n"); 46 return; 47 } 48 49 ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); 50 if (ret < 0) { 51 bpf_program__set_autoload(skel->progs.slab_cache_iter, false); 52 pr_debug("slab cache iterator is not available: %d\n", ret); 53 return; 54 } 55 56 has_slab_iter = true; 57 58 bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); 59 } 60 61 static void run_slab_cache_iter(void) 62 { 63 int fd; 64 char buf[256]; 65 long key, *prev_key; 66 67 if (!has_slab_iter) 68 return; 69 70 fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter)); 71 if (fd < 0) { 72 pr_debug("cannot create slab cache iter: %d\n", fd); 73 return; 74 } 75 76 /* This will run the bpf program */ 77 while (read(fd, buf, sizeof(buf)) > 0) 78 continue; 79 80 close(fd); 81 82 /* Read the slab cache map and build a hash with IDs */ 83 fd = bpf_map__fd(skel->maps.slab_caches); 84 prev_key = NULL; 85 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 86 struct slab_cache_data *data; 87 88 data = malloc(sizeof(*data)); 89 if (data == NULL) 90 break; 91 92 if (bpf_map_lookup_elem(fd, &key, data) < 0) 93 break; 94 95 hashmap__add(&slab_hash, data->id, data); 96 prev_key = &key; 97 } 98 } 99 100 static void exit_slab_cache_iter(void) 101 { 102 struct hashmap_entry *cur; 103 unsigned bkt; 104 105 hashmap__for_each_entry(&slab_hash, cur, bkt) 106 free(cur->pvalue); 107 108 hashmap__clear(&slab_hash); 109 } 110 111 static void init_numa_data(struct lock_contention *con) 112 { 113 struct symbol *sym; 114 struct map *kmap; 115 char *buf = NULL, *p; 116 size_t len; 117 long last = -1; 118 int ret; 119 120 if (!con->btf) 121 return; 122 123 /* 124 * 'struct zone' is embedded in 'struct pglist_data' as an array. 125 * As we may not have full information of the struct zone in the 126 * (fake) vmlinux.h, let's get the actual size from BTF. 127 */ 128 ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); 129 if (ret < 0) { 130 pr_debug("cannot get type of struct zone: %d\n", ret); 131 return; 132 } 133 134 ret = btf__resolve_size(con->btf, ret); 135 if (ret < 0) { 136 pr_debug("cannot get size of struct zone: %d\n", ret); 137 return; 138 } 139 skel->rodata->sizeof_zone = ret; 140 141 /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ 142 sym = machine__find_kernel_symbol_by_name(con->machine, 143 "contig_page_data", 144 &kmap); 145 if (sym) { 146 skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); 147 map__put(kmap); 148 return; 149 } 150 151 /* 152 * The 'node_data' is an array of pointers to struct pglist_data. 153 * It needs to follow the pointer for each node in BPF to get the 154 * address of struct pglist_data and its zones. 155 */ 156 sym = machine__find_kernel_symbol_by_name(con->machine, 157 "node_data", 158 &kmap); 159 if (sym == NULL) 160 return; 161 162 skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); 163 map__put(kmap); 164 165 /* get the number of online nodes using the last node number + 1 */ 166 ret = sysfs__read_str("devices/system/node/online", &buf, &len); 167 if (ret < 0) { 168 pr_debug("failed to read online node: %d\n", ret); 169 return; 170 } 171 172 p = buf; 173 while (p && *p) { 174 last = strtol(p, &p, 0); 175 176 if (p && (*p == ',' || *p == '-' || *p == '\n')) 177 p++; 178 } 179 skel->rodata->nr_nodes = last + 1; 180 free(buf); 181 } 182 183 int lock_contention_prepare(struct lock_contention *con) 184 { 185 int i, fd; 186 int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1; 187 struct evlist *evlist = con->evlist; 188 struct target *target = con->target; 189 190 /* make sure it loads the kernel map before lookup */ 191 map__load(machine__kernel_map(con->machine)); 192 193 skel = lock_contention_bpf__open(); 194 if (!skel) { 195 pr_err("Failed to open lock-contention BPF skeleton\n"); 196 return -1; 197 } 198 199 bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64)); 200 bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); 201 bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); 202 203 if (con->aggr_mode == LOCK_AGGR_TASK) 204 bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries); 205 else 206 bpf_map__set_max_entries(skel->maps.task_data, 1); 207 208 if (con->save_callstack) { 209 bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); 210 if (con->owner) { 211 bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64)); 212 bpf_map__set_key_size(skel->maps.owner_stacks, 213 con->max_stack * sizeof(u64)); 214 bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries); 215 bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries); 216 bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries); 217 skel->rodata->max_stack = con->max_stack; 218 } 219 } else { 220 bpf_map__set_max_entries(skel->maps.stacks, 1); 221 } 222 223 if (target__has_cpu(target)) { 224 skel->rodata->has_cpu = 1; 225 ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); 226 } 227 if (target__has_task(target)) { 228 skel->rodata->has_task = 1; 229 ntasks = perf_thread_map__nr(evlist->core.threads); 230 } 231 if (con->filters->nr_types) { 232 skel->rodata->has_type = 1; 233 ntypes = con->filters->nr_types; 234 } 235 if (con->filters->nr_cgrps) { 236 skel->rodata->has_cgroup = 1; 237 ncgrps = con->filters->nr_cgrps; 238 } 239 240 /* resolve lock name filters to addr */ 241 if (con->filters->nr_syms) { 242 struct symbol *sym; 243 struct map *kmap; 244 unsigned long *addrs; 245 246 for (i = 0; i < con->filters->nr_syms; i++) { 247 sym = machine__find_kernel_symbol_by_name(con->machine, 248 con->filters->syms[i], 249 &kmap); 250 if (sym == NULL) { 251 pr_warning("ignore unknown symbol: %s\n", 252 con->filters->syms[i]); 253 continue; 254 } 255 256 addrs = realloc(con->filters->addrs, 257 (con->filters->nr_addrs + 1) * sizeof(*addrs)); 258 if (addrs == NULL) { 259 pr_warning("memory allocation failure\n"); 260 continue; 261 } 262 263 addrs[con->filters->nr_addrs++] = map__unmap_ip(kmap, sym->start); 264 con->filters->addrs = addrs; 265 } 266 naddrs = con->filters->nr_addrs; 267 skel->rodata->has_addr = 1; 268 } 269 270 /* resolve lock name in delays */ 271 if (con->nr_delays) { 272 struct symbol *sym; 273 struct map *kmap; 274 275 for (i = 0; i < con->nr_delays; i++) { 276 sym = machine__find_kernel_symbol_by_name(con->machine, 277 con->delays[i].sym, 278 &kmap); 279 if (sym == NULL) { 280 pr_warning("ignore unknown symbol: %s\n", 281 con->delays[i].sym); 282 continue; 283 } 284 285 con->delays[i].addr = map__unmap_ip(kmap, sym->start); 286 } 287 skel->rodata->lock_delay = 1; 288 bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); 289 } 290 291 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 292 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 293 bpf_map__set_max_entries(skel->maps.type_filter, ntypes); 294 bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); 295 bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); 296 297 skel->rodata->stack_skip = con->stack_skip; 298 skel->rodata->aggr_mode = con->aggr_mode; 299 skel->rodata->needs_callstack = con->save_callstack; 300 skel->rodata->lock_owner = con->owner; 301 302 if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) { 303 if (cgroup_is_v2("perf_event")) 304 skel->rodata->use_cgroup_v2 = 1; 305 } 306 307 check_slab_cache_iter(con); 308 309 if (con->filters->nr_slabs && has_slab_iter) { 310 skel->rodata->has_slab = 1; 311 nslabs = con->filters->nr_slabs; 312 } 313 314 bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); 315 316 init_numa_data(con); 317 318 if (lock_contention_bpf__load(skel) < 0) { 319 pr_err("Failed to load lock-contention BPF skeleton\n"); 320 return -1; 321 } 322 323 if (target__has_cpu(target)) { 324 u32 cpu; 325 u8 val = 1; 326 327 fd = bpf_map__fd(skel->maps.cpu_filter); 328 329 for (i = 0; i < ncpus; i++) { 330 cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; 331 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 332 } 333 } 334 335 if (target__has_task(target)) { 336 u32 pid; 337 u8 val = 1; 338 339 fd = bpf_map__fd(skel->maps.task_filter); 340 341 for (i = 0; i < ntasks; i++) { 342 pid = perf_thread_map__pid(evlist->core.threads, i); 343 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 344 } 345 } 346 347 if (target__none(target) && evlist->workload.pid > 0) { 348 u32 pid = evlist->workload.pid; 349 u8 val = 1; 350 351 fd = bpf_map__fd(skel->maps.task_filter); 352 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 353 } 354 355 if (con->filters->nr_types) { 356 u8 val = 1; 357 358 fd = bpf_map__fd(skel->maps.type_filter); 359 360 for (i = 0; i < con->filters->nr_types; i++) 361 bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY); 362 } 363 364 if (con->filters->nr_addrs) { 365 u8 val = 1; 366 367 fd = bpf_map__fd(skel->maps.addr_filter); 368 369 for (i = 0; i < con->filters->nr_addrs; i++) 370 bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY); 371 } 372 373 if (con->filters->nr_cgrps) { 374 u8 val = 1; 375 376 fd = bpf_map__fd(skel->maps.cgroup_filter); 377 378 for (i = 0; i < con->filters->nr_cgrps; i++) 379 bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); 380 } 381 382 if (con->nr_delays) { 383 fd = bpf_map__fd(skel->maps.lock_delays); 384 385 for (i = 0; i < con->nr_delays; i++) 386 bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); 387 } 388 389 if (con->aggr_mode == LOCK_AGGR_CGROUP) 390 read_all_cgroups(&con->cgroups); 391 392 bpf_program__set_autoload(skel->progs.collect_lock_syms, false); 393 394 lock_contention_bpf__attach(skel); 395 396 /* run the slab iterator after attaching */ 397 run_slab_cache_iter(); 398 399 if (con->filters->nr_slabs) { 400 u8 val = 1; 401 int cache_fd; 402 long key, *prev_key; 403 404 fd = bpf_map__fd(skel->maps.slab_filter); 405 406 /* Read the slab cache map and build a hash with its address */ 407 cache_fd = bpf_map__fd(skel->maps.slab_caches); 408 prev_key = NULL; 409 while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) { 410 struct slab_cache_data data; 411 412 if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0) 413 break; 414 415 for (i = 0; i < con->filters->nr_slabs; i++) { 416 if (!strcmp(con->filters->slabs[i], data.name)) { 417 bpf_map_update_elem(fd, &key, &val, BPF_ANY); 418 break; 419 } 420 } 421 prev_key = &key; 422 } 423 } 424 425 return 0; 426 } 427 428 /* 429 * Run the BPF program directly using BPF_PROG_TEST_RUN to update the end 430 * timestamp in ktime so that it can calculate delta easily. 431 */ 432 static void mark_end_timestamp(void) 433 { 434 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 435 .flags = BPF_F_TEST_RUN_ON_CPU, 436 ); 437 int prog_fd = bpf_program__fd(skel->progs.end_timestamp); 438 439 bpf_prog_test_run_opts(prog_fd, &opts); 440 } 441 442 static void update_lock_stat(int map_fd, int pid, u64 end_ts, 443 enum lock_aggr_mode aggr_mode, 444 struct tstamp_data *ts_data) 445 { 446 u64 delta; 447 struct contention_key stat_key = {}; 448 struct contention_data stat_data; 449 450 if (ts_data->timestamp >= end_ts) 451 return; 452 453 delta = end_ts - ts_data->timestamp; 454 455 switch (aggr_mode) { 456 case LOCK_AGGR_CALLER: 457 stat_key.stack_id = ts_data->stack_id; 458 break; 459 case LOCK_AGGR_TASK: 460 stat_key.pid = pid; 461 break; 462 case LOCK_AGGR_ADDR: 463 stat_key.lock_addr_or_cgroup = ts_data->lock; 464 break; 465 case LOCK_AGGR_CGROUP: 466 /* TODO */ 467 return; 468 default: 469 return; 470 } 471 472 if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0) 473 return; 474 475 stat_data.total_time += delta; 476 stat_data.count++; 477 478 if (delta > stat_data.max_time) 479 stat_data.max_time = delta; 480 if (delta < stat_data.min_time) 481 stat_data.min_time = delta; 482 483 bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST); 484 } 485 486 /* 487 * Account entries in the tstamp map (which didn't see the corresponding 488 * lock:contention_end tracepoint) using end_ts. 489 */ 490 static void account_end_timestamp(struct lock_contention *con) 491 { 492 int ts_fd, stat_fd; 493 int *prev_key, key; 494 u64 end_ts = skel->bss->end_ts; 495 int total_cpus; 496 enum lock_aggr_mode aggr_mode = con->aggr_mode; 497 struct tstamp_data ts_data, *cpu_data; 498 499 /* Iterate per-task tstamp map (key = TID) */ 500 ts_fd = bpf_map__fd(skel->maps.tstamp); 501 stat_fd = bpf_map__fd(skel->maps.lock_stat); 502 503 prev_key = NULL; 504 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 505 if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) { 506 int pid = key; 507 508 if (aggr_mode == LOCK_AGGR_TASK && con->owner) 509 pid = ts_data.flags; 510 511 update_lock_stat(stat_fd, pid, end_ts, aggr_mode, 512 &ts_data); 513 } 514 515 prev_key = &key; 516 } 517 518 /* Now it'll check per-cpu tstamp map which doesn't have TID. */ 519 if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP) 520 return; 521 522 total_cpus = cpu__max_cpu().cpu; 523 ts_fd = bpf_map__fd(skel->maps.tstamp_cpu); 524 525 cpu_data = calloc(total_cpus, sizeof(*cpu_data)); 526 if (cpu_data == NULL) 527 return; 528 529 prev_key = NULL; 530 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 531 if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0) 532 goto next; 533 534 for (int i = 0; i < total_cpus; i++) { 535 if (cpu_data[i].lock == 0) 536 continue; 537 538 update_lock_stat(stat_fd, -1, end_ts, aggr_mode, 539 &cpu_data[i]); 540 } 541 542 next: 543 prev_key = &key; 544 } 545 free(cpu_data); 546 } 547 548 int lock_contention_start(void) 549 { 550 skel->bss->enabled = 1; 551 return 0; 552 } 553 554 int lock_contention_stop(void) 555 { 556 skel->bss->enabled = 0; 557 mark_end_timestamp(); 558 return 0; 559 } 560 561 static const char *lock_contention_get_name(struct lock_contention *con, 562 struct contention_key *key, 563 u64 *stack_trace, u32 flags) 564 { 565 int idx = 0; 566 u64 addr; 567 static char name_buf[KSYM_NAME_LEN]; 568 struct symbol *sym; 569 struct map *kmap; 570 struct machine *machine = con->machine; 571 572 if (con->aggr_mode == LOCK_AGGR_TASK) { 573 struct contention_task_data task; 574 int pid = key->pid; 575 int task_fd = bpf_map__fd(skel->maps.task_data); 576 577 /* do not update idle comm which contains CPU number */ 578 if (pid) { 579 struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid); 580 581 if (t != NULL && 582 !bpf_map_lookup_elem(task_fd, &pid, &task) && 583 thread__set_comm(t, task.comm, /*timestamp=*/0)) { 584 snprintf(name_buf, sizeof(name_buf), "%s", task.comm); 585 return name_buf; 586 } 587 } 588 return ""; 589 } 590 591 if (con->aggr_mode == LOCK_AGGR_ADDR) { 592 int lock_fd = bpf_map__fd(skel->maps.lock_syms); 593 struct slab_cache_data *slab_data; 594 595 /* per-process locks set upper bits of the flags */ 596 if (flags & LCD_F_MMAP_LOCK) 597 return "mmap_lock"; 598 if (flags & LCD_F_SIGHAND_LOCK) 599 return "siglock"; 600 601 /* global locks with symbols */ 602 sym = machine__find_kernel_symbol(machine, key->lock_addr_or_cgroup, &kmap); 603 if (sym) 604 return sym->name; 605 606 /* try semi-global locks collected separately */ 607 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 608 if (flags == LOCK_CLASS_RQLOCK) 609 return "rq_lock"; 610 } 611 612 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 613 if (flags == LOCK_CLASS_ZONE_LOCK) 614 return "zone_lock"; 615 } 616 617 /* look slab_hash for dynamic locks in a slab object */ 618 if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { 619 snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); 620 return name_buf; 621 } 622 623 return ""; 624 } 625 626 if (con->aggr_mode == LOCK_AGGR_CGROUP) { 627 u64 cgrp_id = key->lock_addr_or_cgroup; 628 struct cgroup *cgrp = __cgroup__find(&con->cgroups, cgrp_id); 629 630 if (cgrp) 631 return cgrp->name; 632 633 snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id); 634 return name_buf; 635 } 636 637 /* LOCK_AGGR_CALLER: skip lock internal functions */ 638 while (machine__is_lock_function(machine, stack_trace[idx]) && 639 idx < con->max_stack - 1) 640 idx++; 641 642 addr = stack_trace[idx]; 643 sym = machine__find_kernel_symbol(machine, addr, &kmap); 644 645 if (sym) { 646 unsigned long offset; 647 648 offset = map__map_ip(kmap, addr) - sym->start; 649 650 if (offset == 0) 651 return sym->name; 652 653 snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset); 654 } else { 655 snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr); 656 } 657 658 return name_buf; 659 } 660 661 struct lock_stat *pop_owner_stack_trace(struct lock_contention *con) 662 { 663 int stacks_fd, stat_fd; 664 u64 *stack_trace = NULL; 665 s32 stack_id; 666 struct contention_key ckey = {}; 667 struct contention_data cdata = {}; 668 size_t stack_size = con->max_stack * sizeof(*stack_trace); 669 struct lock_stat *st = NULL; 670 671 stacks_fd = bpf_map__fd(skel->maps.owner_stacks); 672 stat_fd = bpf_map__fd(skel->maps.owner_stat); 673 if (!stacks_fd || !stat_fd) 674 goto out_err; 675 676 stack_trace = zalloc(stack_size); 677 if (stack_trace == NULL) 678 goto out_err; 679 680 if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace)) 681 goto out_err; 682 683 bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id); 684 ckey.stack_id = stack_id; 685 bpf_map_lookup_elem(stat_fd, &ckey, &cdata); 686 687 st = zalloc(sizeof(struct lock_stat)); 688 if (!st) 689 goto out_err; 690 691 st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) : 692 "unknown"); 693 if (!st->name) 694 goto out_err; 695 696 st->flags = cdata.flags; 697 st->nr_contended = cdata.count; 698 st->wait_time_total = cdata.total_time; 699 st->wait_time_max = cdata.max_time; 700 st->wait_time_min = cdata.min_time; 701 st->callstack = stack_trace; 702 703 if (cdata.count) 704 st->avg_wait_time = cdata.total_time / cdata.count; 705 706 bpf_map_delete_elem(stacks_fd, stack_trace); 707 bpf_map_delete_elem(stat_fd, &ckey); 708 709 return st; 710 711 out_err: 712 free(stack_trace); 713 free(st); 714 715 return NULL; 716 } 717 718 int lock_contention_read(struct lock_contention *con) 719 { 720 int fd, stack, err = 0; 721 struct contention_key *prev_key, key = {}; 722 struct contention_data data = {}; 723 struct lock_stat *st = NULL; 724 struct machine *machine = con->machine; 725 u64 *stack_trace; 726 size_t stack_size = con->max_stack * sizeof(*stack_trace); 727 728 fd = bpf_map__fd(skel->maps.lock_stat); 729 stack = bpf_map__fd(skel->maps.stacks); 730 731 con->fails.task = skel->bss->task_fail; 732 con->fails.stack = skel->bss->stack_fail; 733 con->fails.time = skel->bss->time_fail; 734 con->fails.data = skel->bss->data_fail; 735 736 stack_trace = zalloc(stack_size); 737 if (stack_trace == NULL) 738 return -1; 739 740 account_end_timestamp(con); 741 742 if (con->aggr_mode == LOCK_AGGR_TASK) { 743 struct thread *idle = machine__findnew_thread(machine, 744 /*pid=*/0, 745 /*tid=*/0); 746 thread__set_comm(idle, "swapper", /*timestamp=*/0); 747 } 748 749 if (con->aggr_mode == LOCK_AGGR_ADDR) { 750 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 751 .flags = BPF_F_TEST_RUN_ON_CPU, 752 ); 753 int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms); 754 755 bpf_prog_test_run_opts(prog_fd, &opts); 756 } 757 758 prev_key = NULL; 759 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 760 s64 ls_key; 761 const char *name; 762 763 /* to handle errors in the loop body */ 764 err = -1; 765 766 bpf_map_lookup_elem(fd, &key, &data); 767 if (con->save_callstack) { 768 bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); 769 770 if (!match_callstack_filter(machine, stack_trace, con->max_stack)) { 771 con->nr_filtered += data.count; 772 goto next; 773 } 774 } 775 776 switch (con->aggr_mode) { 777 case LOCK_AGGR_CALLER: 778 ls_key = key.stack_id; 779 break; 780 case LOCK_AGGR_TASK: 781 ls_key = key.pid; 782 break; 783 case LOCK_AGGR_ADDR: 784 case LOCK_AGGR_CGROUP: 785 ls_key = key.lock_addr_or_cgroup; 786 break; 787 default: 788 goto next; 789 } 790 791 st = lock_stat_find(ls_key); 792 if (st != NULL) { 793 st->wait_time_total += data.total_time; 794 if (st->wait_time_max < data.max_time) 795 st->wait_time_max = data.max_time; 796 if (st->wait_time_min > data.min_time) 797 st->wait_time_min = data.min_time; 798 799 st->nr_contended += data.count; 800 if (st->nr_contended) 801 st->avg_wait_time = st->wait_time_total / st->nr_contended; 802 goto next; 803 } 804 805 name = lock_contention_get_name(con, &key, stack_trace, data.flags); 806 st = lock_stat_findnew(ls_key, name, data.flags); 807 if (st == NULL) 808 break; 809 810 st->nr_contended = data.count; 811 st->wait_time_total = data.total_time; 812 st->wait_time_max = data.max_time; 813 st->wait_time_min = data.min_time; 814 815 if (data.count) 816 st->avg_wait_time = data.total_time / data.count; 817 818 if (con->aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 819 st->callstack = memdup(stack_trace, stack_size); 820 if (st->callstack == NULL) 821 break; 822 } 823 824 next: 825 prev_key = &key; 826 827 /* we're fine now, reset the error */ 828 err = 0; 829 } 830 831 free(stack_trace); 832 833 return err; 834 } 835 836 int lock_contention_finish(struct lock_contention *con) 837 { 838 if (skel) { 839 skel->bss->enabled = 0; 840 lock_contention_bpf__destroy(skel); 841 } 842 843 while (!RB_EMPTY_ROOT(&con->cgroups)) { 844 struct rb_node *node = rb_first(&con->cgroups); 845 struct cgroup *cgrp = rb_entry(node, struct cgroup, node); 846 847 rb_erase(node, &con->cgroups); 848 cgroup__put(cgrp); 849 } 850 851 exit_slab_cache_iter(); 852 btf__free(con->btf); 853 854 return 0; 855 } 856