1 // SPDX-License-Identifier: GPL-2.0 2 #include "util/cgroup.h" 3 #include "util/debug.h" 4 #include "util/evlist.h" 5 #include "util/hashmap.h" 6 #include "util/machine.h" 7 #include "util/map.h" 8 #include "util/symbol.h" 9 #include "util/target.h" 10 #include "util/thread.h" 11 #include "util/thread_map.h" 12 #include "util/lock-contention.h" 13 #include <linux/zalloc.h> 14 #include <linux/string.h> 15 #include <api/fs/fs.h> 16 #include <bpf/bpf.h> 17 #include <bpf/btf.h> 18 #include <inttypes.h> 19 20 #include "bpf_skel/lock_contention.skel.h" 21 #include "bpf_skel/lock_data.h" 22 23 static struct lock_contention_bpf *skel; 24 static bool has_slab_iter; 25 static struct hashmap slab_hash; 26 27 static size_t slab_cache_hash(long key, void *ctx __maybe_unused) 28 { 29 return key; 30 } 31 32 static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) 33 { 34 return key1 == key2; 35 } 36 37 static void check_slab_cache_iter(struct lock_contention *con) 38 { 39 s32 ret; 40 41 hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); 42 43 con->btf = btf__load_vmlinux_btf(); 44 if (con->btf == NULL) { 45 pr_debug("BTF loading failed: %m\n"); 46 return; 47 } 48 49 ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); 50 if (ret < 0) { 51 bpf_program__set_autoload(skel->progs.slab_cache_iter, false); 52 pr_debug("slab cache iterator is not available: %d\n", ret); 53 return; 54 } 55 56 has_slab_iter = true; 57 58 bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); 59 } 60 61 static void run_slab_cache_iter(void) 62 { 63 int fd; 64 char buf[256]; 65 long key, *prev_key; 66 67 if (!has_slab_iter) 68 return; 69 70 fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter)); 71 if (fd < 0) { 72 pr_debug("cannot create slab cache iter: %d\n", fd); 73 return; 74 } 75 76 /* This will run the bpf program */ 77 while (read(fd, buf, sizeof(buf)) > 0) 78 continue; 79 80 close(fd); 81 82 /* Read the slab cache map and build a hash with IDs */ 83 fd = bpf_map__fd(skel->maps.slab_caches); 84 prev_key = NULL; 85 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 86 struct slab_cache_data *data; 87 88 data = malloc(sizeof(*data)); 89 if (data == NULL) 90 break; 91 92 if (bpf_map_lookup_elem(fd, &key, data) < 0) 93 break; 94 95 hashmap__add(&slab_hash, data->id, data); 96 prev_key = &key; 97 } 98 } 99 100 static void exit_slab_cache_iter(void) 101 { 102 struct hashmap_entry *cur; 103 unsigned bkt; 104 105 hashmap__for_each_entry(&slab_hash, cur, bkt) 106 free(cur->pvalue); 107 108 hashmap__clear(&slab_hash); 109 } 110 111 static void init_numa_data(struct lock_contention *con) 112 { 113 struct symbol *sym; 114 struct map *kmap; 115 char *buf = NULL, *p; 116 size_t len; 117 long last = -1; 118 int ret; 119 120 if (!con->btf) 121 return; 122 123 /* 124 * 'struct zone' is embedded in 'struct pglist_data' as an array. 125 * As we may not have full information of the struct zone in the 126 * (fake) vmlinux.h, let's get the actual size from BTF. 127 */ 128 ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); 129 if (ret < 0) { 130 pr_debug("cannot get type of struct zone: %d\n", ret); 131 return; 132 } 133 134 ret = btf__resolve_size(con->btf, ret); 135 if (ret < 0) { 136 pr_debug("cannot get size of struct zone: %d\n", ret); 137 return; 138 } 139 skel->rodata->sizeof_zone = ret; 140 141 /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ 142 sym = machine__find_kernel_symbol_by_name(con->machine, 143 "contig_page_data", 144 &kmap); 145 if (sym) { 146 skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); 147 map__put(kmap); 148 return; 149 } 150 151 /* 152 * The 'node_data' is an array of pointers to struct pglist_data. 153 * It needs to follow the pointer for each node in BPF to get the 154 * address of struct pglist_data and its zones. 155 */ 156 sym = machine__find_kernel_symbol_by_name(con->machine, 157 "node_data", 158 &kmap); 159 if (sym == NULL) 160 return; 161 162 skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); 163 map__put(kmap); 164 165 /* get the number of online nodes using the last node number + 1 */ 166 ret = sysfs__read_str("devices/system/node/online", &buf, &len); 167 if (ret < 0) { 168 pr_debug("failed to read online node: %d\n", ret); 169 return; 170 } 171 172 p = buf; 173 while (p && *p) { 174 last = strtol(p, &p, 0); 175 176 if (p && (*p == ',' || *p == '-' || *p == '\n')) 177 p++; 178 } 179 skel->rodata->nr_nodes = last + 1; 180 free(buf); 181 } 182 183 int lock_contention_prepare(struct lock_contention *con) 184 { 185 int i, fd; 186 int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1; 187 struct evlist *evlist = con->evlist; 188 struct target *target = con->target; 189 bool has_mmap_lock = false; 190 191 /* make sure it loads the kernel map before lookup */ 192 map__load(machine__kernel_map(con->machine)); 193 194 skel = lock_contention_bpf__open(); 195 if (!skel) { 196 pr_err("Failed to open lock-contention BPF skeleton\n"); 197 return -1; 198 } 199 200 bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64)); 201 bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); 202 bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); 203 204 if (con->aggr_mode == LOCK_AGGR_TASK) 205 bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries); 206 else 207 bpf_map__set_max_entries(skel->maps.task_data, 1); 208 209 if (con->save_callstack) { 210 bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); 211 if (con->owner) { 212 bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64)); 213 bpf_map__set_key_size(skel->maps.owner_stacks, 214 con->max_stack * sizeof(u64)); 215 bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries); 216 bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries); 217 bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries); 218 skel->rodata->max_stack = con->max_stack; 219 } 220 } else { 221 bpf_map__set_max_entries(skel->maps.stacks, 1); 222 } 223 224 if (target__has_cpu(target)) { 225 skel->rodata->has_cpu = 1; 226 ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); 227 } 228 if (target__has_task(target)) { 229 skel->rodata->has_task = 1; 230 ntasks = perf_thread_map__nr(evlist->core.threads); 231 } 232 if (con->filters->nr_types) { 233 skel->rodata->has_type = 1; 234 ntypes = con->filters->nr_types; 235 } 236 if (con->filters->nr_cgrps) { 237 skel->rodata->has_cgroup = 1; 238 ncgrps = con->filters->nr_cgrps; 239 } 240 241 /* resolve lock name filters to addr */ 242 if (con->filters->nr_syms) { 243 struct symbol *sym; 244 struct map *kmap; 245 unsigned long *addrs; 246 247 for (i = 0; i < con->filters->nr_syms; i++) { 248 if (!strcmp(con->filters->syms[i], "mmap_lock")) { 249 has_mmap_lock = true; 250 continue; 251 } 252 253 sym = machine__find_kernel_symbol_by_name(con->machine, 254 con->filters->syms[i], 255 &kmap); 256 if (sym == NULL) { 257 pr_warning("ignore unknown symbol: %s\n", 258 con->filters->syms[i]); 259 continue; 260 } 261 262 addrs = realloc(con->filters->addrs, 263 (con->filters->nr_addrs + 1) * sizeof(*addrs)); 264 if (addrs == NULL) { 265 pr_warning("memory allocation failure\n"); 266 continue; 267 } 268 269 addrs[con->filters->nr_addrs++] = map__unmap_ip(kmap, sym->start); 270 con->filters->addrs = addrs; 271 } 272 naddrs = con->filters->nr_addrs ?: has_mmap_lock; 273 skel->rodata->has_addr = 1; 274 } 275 276 /* resolve lock name in delays */ 277 if (con->nr_delays) { 278 struct symbol *sym; 279 struct map *kmap; 280 281 for (i = 0; i < con->nr_delays; i++) { 282 sym = machine__find_kernel_symbol_by_name(con->machine, 283 con->delays[i].sym, 284 &kmap); 285 if (sym == NULL) { 286 pr_warning("ignore unknown symbol: %s\n", 287 con->delays[i].sym); 288 continue; 289 } 290 291 con->delays[i].addr = map__unmap_ip(kmap, sym->start); 292 } 293 skel->rodata->lock_delay = 1; 294 bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); 295 } 296 297 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 298 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 299 bpf_map__set_max_entries(skel->maps.type_filter, ntypes); 300 bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); 301 bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); 302 303 skel->rodata->stack_skip = con->stack_skip; 304 skel->rodata->aggr_mode = con->aggr_mode; 305 skel->rodata->needs_callstack = con->save_callstack; 306 skel->rodata->lock_owner = con->owner; 307 skel->rodata->has_mmap_lock = has_mmap_lock; 308 309 if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) { 310 if (cgroup_is_v2("perf_event")) 311 skel->rodata->use_cgroup_v2 = 1; 312 } 313 314 check_slab_cache_iter(con); 315 316 if (con->filters->nr_slabs && has_slab_iter) { 317 skel->rodata->has_slab = 1; 318 nslabs = con->filters->nr_slabs; 319 } 320 321 bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); 322 323 init_numa_data(con); 324 325 if (lock_contention_bpf__load(skel) < 0) { 326 pr_err("Failed to load lock-contention BPF skeleton\n"); 327 return -1; 328 } 329 330 if (target__has_cpu(target)) { 331 u32 cpu; 332 u8 val = 1; 333 334 fd = bpf_map__fd(skel->maps.cpu_filter); 335 336 for (i = 0; i < ncpus; i++) { 337 cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; 338 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 339 } 340 } 341 342 if (target__has_task(target)) { 343 u32 pid; 344 u8 val = 1; 345 346 fd = bpf_map__fd(skel->maps.task_filter); 347 348 for (i = 0; i < ntasks; i++) { 349 pid = perf_thread_map__pid(evlist->core.threads, i); 350 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 351 } 352 } 353 354 if (target__none(target) && evlist->workload.pid > 0) { 355 u32 pid = evlist->workload.pid; 356 u8 val = 1; 357 358 fd = bpf_map__fd(skel->maps.task_filter); 359 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 360 } 361 362 if (con->filters->nr_types) { 363 u8 val = 1; 364 365 fd = bpf_map__fd(skel->maps.type_filter); 366 367 for (i = 0; i < con->filters->nr_types; i++) 368 bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY); 369 } 370 371 if (con->filters->nr_addrs) { 372 u8 val = 1; 373 374 fd = bpf_map__fd(skel->maps.addr_filter); 375 376 for (i = 0; i < con->filters->nr_addrs; i++) 377 bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY); 378 } 379 380 if (con->filters->nr_cgrps) { 381 u8 val = 1; 382 383 fd = bpf_map__fd(skel->maps.cgroup_filter); 384 385 for (i = 0; i < con->filters->nr_cgrps; i++) 386 bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); 387 } 388 389 if (con->nr_delays) { 390 fd = bpf_map__fd(skel->maps.lock_delays); 391 392 for (i = 0; i < con->nr_delays; i++) 393 bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); 394 } 395 396 if (con->aggr_mode == LOCK_AGGR_CGROUP) 397 read_all_cgroups(&con->cgroups); 398 399 bpf_program__set_autoload(skel->progs.collect_lock_syms, false); 400 401 lock_contention_bpf__attach(skel); 402 403 /* run the slab iterator after attaching */ 404 run_slab_cache_iter(); 405 406 if (con->filters->nr_slabs) { 407 u8 val = 1; 408 int cache_fd; 409 long key, *prev_key; 410 411 fd = bpf_map__fd(skel->maps.slab_filter); 412 413 /* Read the slab cache map and build a hash with its address */ 414 cache_fd = bpf_map__fd(skel->maps.slab_caches); 415 prev_key = NULL; 416 while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) { 417 struct slab_cache_data data; 418 419 if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0) 420 break; 421 422 for (i = 0; i < con->filters->nr_slabs; i++) { 423 if (!strcmp(con->filters->slabs[i], data.name)) { 424 bpf_map_update_elem(fd, &key, &val, BPF_ANY); 425 break; 426 } 427 } 428 prev_key = &key; 429 } 430 } 431 432 return 0; 433 } 434 435 /* 436 * Run the BPF program directly using BPF_PROG_TEST_RUN to update the end 437 * timestamp in ktime so that it can calculate delta easily. 438 */ 439 static void mark_end_timestamp(void) 440 { 441 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 442 .flags = BPF_F_TEST_RUN_ON_CPU, 443 ); 444 int prog_fd = bpf_program__fd(skel->progs.end_timestamp); 445 446 bpf_prog_test_run_opts(prog_fd, &opts); 447 } 448 449 static void update_lock_stat(int map_fd, int pid, u64 end_ts, 450 enum lock_aggr_mode aggr_mode, 451 struct tstamp_data *ts_data) 452 { 453 u64 delta; 454 struct contention_key stat_key = {}; 455 struct contention_data stat_data; 456 457 if (ts_data->timestamp >= end_ts) 458 return; 459 460 delta = end_ts - ts_data->timestamp; 461 462 switch (aggr_mode) { 463 case LOCK_AGGR_CALLER: 464 stat_key.stack_id = ts_data->stack_id; 465 break; 466 case LOCK_AGGR_TASK: 467 stat_key.pid = pid; 468 break; 469 case LOCK_AGGR_ADDR: 470 stat_key.lock_addr_or_cgroup = ts_data->lock; 471 break; 472 case LOCK_AGGR_CGROUP: 473 stat_key.lock_addr_or_cgroup = ts_data->cgroup_id; 474 break; 475 default: 476 return; 477 } 478 479 if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0) 480 return; 481 482 stat_data.total_time += delta; 483 stat_data.count++; 484 485 if (delta > stat_data.max_time) 486 stat_data.max_time = delta; 487 if (delta < stat_data.min_time) 488 stat_data.min_time = delta; 489 490 bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST); 491 } 492 493 /* 494 * Account entries in the tstamp map (which didn't see the corresponding 495 * lock:contention_end tracepoint) using end_ts. 496 */ 497 static void account_end_timestamp(struct lock_contention *con) 498 { 499 int ts_fd, stat_fd; 500 int *prev_key, key; 501 u64 end_ts = skel->bss->end_ts; 502 int total_cpus; 503 enum lock_aggr_mode aggr_mode = con->aggr_mode; 504 struct tstamp_data ts_data, *cpu_data; 505 506 /* Iterate per-task tstamp map (key = TID) */ 507 ts_fd = bpf_map__fd(skel->maps.tstamp); 508 stat_fd = bpf_map__fd(skel->maps.lock_stat); 509 510 prev_key = NULL; 511 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 512 if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) { 513 int pid = key; 514 515 if (aggr_mode == LOCK_AGGR_TASK && con->owner) 516 pid = ts_data.flags; 517 518 update_lock_stat(stat_fd, pid, end_ts, aggr_mode, 519 &ts_data); 520 } 521 522 prev_key = &key; 523 } 524 525 /* Now it'll check per-cpu tstamp map which doesn't have TID. */ 526 if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP) 527 return; 528 529 total_cpus = cpu__max_cpu().cpu; 530 ts_fd = bpf_map__fd(skel->maps.tstamp_cpu); 531 532 cpu_data = calloc(total_cpus, sizeof(*cpu_data)); 533 if (cpu_data == NULL) 534 return; 535 536 prev_key = NULL; 537 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 538 if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0) 539 goto next; 540 541 for (int i = 0; i < total_cpus; i++) { 542 if (cpu_data[i].lock == 0) 543 continue; 544 545 update_lock_stat(stat_fd, -1, end_ts, aggr_mode, 546 &cpu_data[i]); 547 } 548 549 next: 550 prev_key = &key; 551 } 552 free(cpu_data); 553 } 554 555 int lock_contention_start(void) 556 { 557 skel->bss->enabled = 1; 558 return 0; 559 } 560 561 int lock_contention_stop(void) 562 { 563 skel->bss->enabled = 0; 564 mark_end_timestamp(); 565 return 0; 566 } 567 568 static const char *lock_contention_get_name(struct lock_contention *con, 569 struct contention_key *key, 570 u64 *stack_trace, u32 flags) 571 { 572 int idx = 0; 573 u64 addr; 574 static char name_buf[KSYM_NAME_LEN]; 575 struct symbol *sym; 576 struct map *kmap; 577 struct machine *machine = con->machine; 578 579 if (con->aggr_mode == LOCK_AGGR_TASK) { 580 struct contention_task_data task; 581 int pid = key->pid; 582 int task_fd = bpf_map__fd(skel->maps.task_data); 583 584 /* do not update idle comm which contains CPU number */ 585 if (pid) { 586 struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid); 587 588 if (t != NULL && 589 !bpf_map_lookup_elem(task_fd, &pid, &task) && 590 thread__set_comm(t, task.comm, /*timestamp=*/0)) { 591 snprintf(name_buf, sizeof(name_buf), "%s", task.comm); 592 return name_buf; 593 } 594 } 595 return ""; 596 } 597 598 if (con->aggr_mode == LOCK_AGGR_ADDR) { 599 int lock_fd = bpf_map__fd(skel->maps.lock_syms); 600 struct slab_cache_data *slab_data; 601 602 /* per-process locks set upper bits of the flags */ 603 if (flags & LCD_F_MMAP_LOCK) 604 return "mmap_lock"; 605 if (flags & LCD_F_SIGHAND_LOCK) 606 return "siglock"; 607 608 /* global locks with symbols */ 609 sym = machine__find_kernel_symbol(machine, key->lock_addr_or_cgroup, &kmap); 610 if (sym) 611 return sym->name; 612 613 /* try semi-global locks collected separately */ 614 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 615 if (flags == LOCK_CLASS_RQLOCK) 616 return "rq_lock"; 617 } 618 619 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 620 if (flags == LOCK_CLASS_ZONE_LOCK) 621 return "zone_lock"; 622 } 623 624 /* look slab_hash for dynamic locks in a slab object */ 625 if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { 626 snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); 627 return name_buf; 628 } 629 630 return ""; 631 } 632 633 if (con->aggr_mode == LOCK_AGGR_CGROUP) { 634 u64 cgrp_id = key->lock_addr_or_cgroup; 635 struct cgroup *cgrp = __cgroup__find(&con->cgroups, cgrp_id); 636 637 if (cgrp) 638 return cgrp->name; 639 640 snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id); 641 return name_buf; 642 } 643 644 /* LOCK_AGGR_CALLER: skip lock internal functions */ 645 while (machine__is_lock_function(machine, stack_trace[idx]) && 646 idx < con->max_stack - 1) 647 idx++; 648 649 addr = stack_trace[idx]; 650 sym = machine__find_kernel_symbol(machine, addr, &kmap); 651 652 if (sym) { 653 unsigned long offset; 654 655 offset = map__map_ip(kmap, addr) - sym->start; 656 657 if (offset == 0) 658 return sym->name; 659 660 snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset); 661 } else { 662 snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr); 663 } 664 665 return name_buf; 666 } 667 668 struct lock_stat *pop_owner_stack_trace(struct lock_contention *con) 669 { 670 int stacks_fd, stat_fd; 671 u64 *stack_trace = NULL; 672 s32 stack_id; 673 struct contention_key ckey = {}; 674 struct contention_data cdata = {}; 675 size_t stack_size = con->max_stack * sizeof(*stack_trace); 676 struct lock_stat *st = NULL; 677 678 stacks_fd = bpf_map__fd(skel->maps.owner_stacks); 679 stat_fd = bpf_map__fd(skel->maps.owner_stat); 680 if (!stacks_fd || !stat_fd) 681 goto out_err; 682 683 stack_trace = zalloc(stack_size); 684 if (stack_trace == NULL) 685 goto out_err; 686 687 if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace)) 688 goto out_err; 689 690 bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id); 691 ckey.stack_id = stack_id; 692 bpf_map_lookup_elem(stat_fd, &ckey, &cdata); 693 694 st = zalloc(sizeof(struct lock_stat)); 695 if (!st) 696 goto out_err; 697 698 st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) : 699 "unknown"); 700 if (!st->name) 701 goto out_err; 702 703 st->flags = cdata.flags; 704 st->nr_contended = cdata.count; 705 st->wait_time_total = cdata.total_time; 706 st->wait_time_max = cdata.max_time; 707 st->wait_time_min = cdata.min_time; 708 st->callstack = stack_trace; 709 710 if (cdata.count) 711 st->avg_wait_time = cdata.total_time / cdata.count; 712 713 bpf_map_delete_elem(stacks_fd, stack_trace); 714 bpf_map_delete_elem(stat_fd, &ckey); 715 716 return st; 717 718 out_err: 719 free(stack_trace); 720 free(st); 721 722 return NULL; 723 } 724 725 int lock_contention_read(struct lock_contention *con) 726 { 727 int fd, stack, err = 0; 728 struct contention_key *prev_key, key = {}; 729 struct contention_data data = {}; 730 struct lock_stat *st = NULL; 731 struct machine *machine = con->machine; 732 u64 *stack_trace; 733 size_t stack_size = con->max_stack * sizeof(*stack_trace); 734 735 fd = bpf_map__fd(skel->maps.lock_stat); 736 stack = bpf_map__fd(skel->maps.stacks); 737 738 con->fails.task = skel->bss->task_fail; 739 con->fails.stack = skel->bss->stack_fail; 740 con->fails.time = skel->bss->time_fail; 741 con->fails.data = skel->bss->data_fail; 742 743 stack_trace = zalloc(stack_size); 744 if (stack_trace == NULL) 745 return -1; 746 747 account_end_timestamp(con); 748 749 if (con->aggr_mode == LOCK_AGGR_TASK) { 750 struct thread *idle = machine__findnew_thread(machine, 751 /*pid=*/0, 752 /*tid=*/0); 753 thread__set_comm(idle, "swapper", /*timestamp=*/0); 754 } 755 756 if (con->aggr_mode == LOCK_AGGR_ADDR) { 757 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 758 .flags = BPF_F_TEST_RUN_ON_CPU, 759 ); 760 int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms); 761 762 bpf_prog_test_run_opts(prog_fd, &opts); 763 } 764 765 prev_key = NULL; 766 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 767 s64 ls_key; 768 const char *name; 769 770 /* to handle errors in the loop body */ 771 err = -1; 772 773 bpf_map_lookup_elem(fd, &key, &data); 774 if (con->save_callstack) { 775 bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); 776 777 if (!match_callstack_filter(machine, stack_trace, con->max_stack)) { 778 con->nr_filtered += data.count; 779 goto next; 780 } 781 } 782 783 switch (con->aggr_mode) { 784 case LOCK_AGGR_CALLER: 785 ls_key = key.stack_id; 786 break; 787 case LOCK_AGGR_TASK: 788 ls_key = key.pid; 789 break; 790 case LOCK_AGGR_ADDR: 791 case LOCK_AGGR_CGROUP: 792 ls_key = key.lock_addr_or_cgroup; 793 break; 794 default: 795 goto next; 796 } 797 798 st = lock_stat_find(ls_key); 799 if (st != NULL) { 800 st->wait_time_total += data.total_time; 801 if (st->wait_time_max < data.max_time) 802 st->wait_time_max = data.max_time; 803 if (st->wait_time_min > data.min_time) 804 st->wait_time_min = data.min_time; 805 806 st->nr_contended += data.count; 807 if (st->nr_contended) 808 st->avg_wait_time = st->wait_time_total / st->nr_contended; 809 goto next; 810 } 811 812 name = lock_contention_get_name(con, &key, stack_trace, data.flags); 813 st = lock_stat_findnew(ls_key, name, data.flags); 814 if (st == NULL) 815 break; 816 817 st->nr_contended = data.count; 818 st->wait_time_total = data.total_time; 819 st->wait_time_max = data.max_time; 820 st->wait_time_min = data.min_time; 821 822 if (data.count) 823 st->avg_wait_time = data.total_time / data.count; 824 825 if (con->aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 826 st->callstack = memdup(stack_trace, stack_size); 827 if (st->callstack == NULL) 828 break; 829 } 830 831 next: 832 prev_key = &key; 833 834 /* we're fine now, reset the error */ 835 err = 0; 836 } 837 838 free(stack_trace); 839 840 return err; 841 } 842 843 int lock_contention_finish(struct lock_contention *con) 844 { 845 if (skel) { 846 skel->bss->enabled = 0; 847 lock_contention_bpf__destroy(skel); 848 } 849 850 while (!RB_EMPTY_ROOT(&con->cgroups)) { 851 struct rb_node *node = rb_first(&con->cgroups); 852 struct cgroup *cgrp = rb_entry(node, struct cgroup, node); 853 854 rb_erase(node, &con->cgroups); 855 cgroup__put(cgrp); 856 } 857 858 exit_slab_cache_iter(); 859 btf__free(con->btf); 860 861 return 0; 862 } 863