1 // SPDX-License-Identifier: GPL-2.0 2 #include "util/cgroup.h" 3 #include "util/debug.h" 4 #include "util/evlist.h" 5 #include "util/hashmap.h" 6 #include "util/machine.h" 7 #include "util/map.h" 8 #include "util/symbol.h" 9 #include "util/target.h" 10 #include "util/thread.h" 11 #include "util/thread_map.h" 12 #include "util/lock-contention.h" 13 #include <linux/zalloc.h> 14 #include <linux/string.h> 15 #include <api/fs/fs.h> 16 #include <bpf/bpf.h> 17 #include <bpf/btf.h> 18 #include <inttypes.h> 19 20 #include "bpf_skel/lock_contention.skel.h" 21 #include "bpf_skel/lock_data.h" 22 23 static struct lock_contention_bpf *skel; 24 static bool has_slab_iter; 25 static struct hashmap slab_hash; 26 27 static size_t slab_cache_hash(long key, void *ctx __maybe_unused) 28 { 29 return key; 30 } 31 32 static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) 33 { 34 return key1 == key2; 35 } 36 37 static void check_slab_cache_iter(struct lock_contention *con) 38 { 39 s32 ret; 40 41 hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); 42 43 con->btf = btf__load_vmlinux_btf(); 44 if (con->btf == NULL) { 45 pr_debug("BTF loading failed: %s\n", strerror(errno)); 46 return; 47 } 48 49 ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); 50 if (ret < 0) { 51 bpf_program__set_autoload(skel->progs.slab_cache_iter, false); 52 pr_debug("slab cache iterator is not available: %d\n", ret); 53 return; 54 } 55 56 has_slab_iter = true; 57 58 bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); 59 } 60 61 static void run_slab_cache_iter(void) 62 { 63 int fd; 64 char buf[256]; 65 long key, *prev_key; 66 67 if (!has_slab_iter) 68 return; 69 70 fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter)); 71 if (fd < 0) { 72 pr_debug("cannot create slab cache iter: %d\n", fd); 73 return; 74 } 75 76 /* This will run the bpf program */ 77 while (read(fd, buf, sizeof(buf)) > 0) 78 continue; 79 80 close(fd); 81 82 /* Read the slab cache map and build a hash with IDs */ 83 fd = bpf_map__fd(skel->maps.slab_caches); 84 prev_key = NULL; 85 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 86 struct slab_cache_data *data; 87 88 data = malloc(sizeof(*data)); 89 if (data == NULL) 90 break; 91 92 if (bpf_map_lookup_elem(fd, &key, data) < 0) 93 break; 94 95 hashmap__add(&slab_hash, data->id, data); 96 prev_key = &key; 97 } 98 } 99 100 static void exit_slab_cache_iter(void) 101 { 102 struct hashmap_entry *cur; 103 unsigned bkt; 104 105 hashmap__for_each_entry(&slab_hash, cur, bkt) 106 free(cur->pvalue); 107 108 hashmap__clear(&slab_hash); 109 } 110 111 static void init_numa_data(struct lock_contention *con) 112 { 113 struct symbol *sym; 114 struct map *kmap; 115 char *buf = NULL, *p; 116 size_t len; 117 long last = -1; 118 int ret; 119 120 /* 121 * 'struct zone' is embedded in 'struct pglist_data' as an array. 122 * As we may not have full information of the struct zone in the 123 * (fake) vmlinux.h, let's get the actual size from BTF. 124 */ 125 ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); 126 if (ret < 0) { 127 pr_debug("cannot get type of struct zone: %d\n", ret); 128 return; 129 } 130 131 ret = btf__resolve_size(con->btf, ret); 132 if (ret < 0) { 133 pr_debug("cannot get size of struct zone: %d\n", ret); 134 return; 135 } 136 skel->rodata->sizeof_zone = ret; 137 138 /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ 139 sym = machine__find_kernel_symbol_by_name(con->machine, 140 "contig_page_data", 141 &kmap); 142 if (sym) { 143 skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); 144 map__put(kmap); 145 return; 146 } 147 148 /* 149 * The 'node_data' is an array of pointers to struct pglist_data. 150 * It needs to follow the pointer for each node in BPF to get the 151 * address of struct pglist_data and its zones. 152 */ 153 sym = machine__find_kernel_symbol_by_name(con->machine, 154 "node_data", 155 &kmap); 156 if (sym == NULL) 157 return; 158 159 skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); 160 map__put(kmap); 161 162 /* get the number of online nodes using the last node number + 1 */ 163 ret = sysfs__read_str("devices/system/node/online", &buf, &len); 164 if (ret < 0) { 165 pr_debug("failed to read online node: %d\n", ret); 166 return; 167 } 168 169 p = buf; 170 while (p && *p) { 171 last = strtol(p, &p, 0); 172 173 if (p && (*p == ',' || *p == '-' || *p == '\n')) 174 p++; 175 } 176 skel->rodata->nr_nodes = last + 1; 177 free(buf); 178 } 179 180 int lock_contention_prepare(struct lock_contention *con) 181 { 182 int i, fd; 183 int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1; 184 struct evlist *evlist = con->evlist; 185 struct target *target = con->target; 186 187 /* make sure it loads the kernel map before lookup */ 188 map__load(machine__kernel_map(con->machine)); 189 190 skel = lock_contention_bpf__open(); 191 if (!skel) { 192 pr_err("Failed to open lock-contention BPF skeleton\n"); 193 return -1; 194 } 195 196 bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64)); 197 bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); 198 bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); 199 200 if (con->aggr_mode == LOCK_AGGR_TASK) 201 bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries); 202 else 203 bpf_map__set_max_entries(skel->maps.task_data, 1); 204 205 if (con->save_callstack) { 206 bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); 207 if (con->owner) { 208 bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64)); 209 bpf_map__set_key_size(skel->maps.owner_stacks, 210 con->max_stack * sizeof(u64)); 211 bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries); 212 bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries); 213 bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries); 214 skel->rodata->max_stack = con->max_stack; 215 } 216 } else { 217 bpf_map__set_max_entries(skel->maps.stacks, 1); 218 } 219 220 if (target__has_cpu(target)) { 221 skel->rodata->has_cpu = 1; 222 ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); 223 } 224 if (target__has_task(target)) { 225 skel->rodata->has_task = 1; 226 ntasks = perf_thread_map__nr(evlist->core.threads); 227 } 228 if (con->filters->nr_types) { 229 skel->rodata->has_type = 1; 230 ntypes = con->filters->nr_types; 231 } 232 if (con->filters->nr_cgrps) { 233 skel->rodata->has_cgroup = 1; 234 ncgrps = con->filters->nr_cgrps; 235 } 236 237 /* resolve lock name filters to addr */ 238 if (con->filters->nr_syms) { 239 struct symbol *sym; 240 struct map *kmap; 241 unsigned long *addrs; 242 243 for (i = 0; i < con->filters->nr_syms; i++) { 244 sym = machine__find_kernel_symbol_by_name(con->machine, 245 con->filters->syms[i], 246 &kmap); 247 if (sym == NULL) { 248 pr_warning("ignore unknown symbol: %s\n", 249 con->filters->syms[i]); 250 continue; 251 } 252 253 addrs = realloc(con->filters->addrs, 254 (con->filters->nr_addrs + 1) * sizeof(*addrs)); 255 if (addrs == NULL) { 256 pr_warning("memory allocation failure\n"); 257 continue; 258 } 259 260 addrs[con->filters->nr_addrs++] = map__unmap_ip(kmap, sym->start); 261 con->filters->addrs = addrs; 262 } 263 naddrs = con->filters->nr_addrs; 264 skel->rodata->has_addr = 1; 265 } 266 267 /* resolve lock name in delays */ 268 if (con->nr_delays) { 269 struct symbol *sym; 270 struct map *kmap; 271 272 for (i = 0; i < con->nr_delays; i++) { 273 sym = machine__find_kernel_symbol_by_name(con->machine, 274 con->delays[i].sym, 275 &kmap); 276 if (sym == NULL) { 277 pr_warning("ignore unknown symbol: %s\n", 278 con->delays[i].sym); 279 continue; 280 } 281 282 con->delays[i].addr = map__unmap_ip(kmap, sym->start); 283 } 284 skel->rodata->lock_delay = 1; 285 bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); 286 } 287 288 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 289 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 290 bpf_map__set_max_entries(skel->maps.type_filter, ntypes); 291 bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); 292 bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); 293 294 skel->rodata->stack_skip = con->stack_skip; 295 skel->rodata->aggr_mode = con->aggr_mode; 296 skel->rodata->needs_callstack = con->save_callstack; 297 skel->rodata->lock_owner = con->owner; 298 299 if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) { 300 if (cgroup_is_v2("perf_event")) 301 skel->rodata->use_cgroup_v2 = 1; 302 } 303 304 check_slab_cache_iter(con); 305 306 if (con->filters->nr_slabs && has_slab_iter) { 307 skel->rodata->has_slab = 1; 308 nslabs = con->filters->nr_slabs; 309 } 310 311 bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); 312 313 init_numa_data(con); 314 315 if (lock_contention_bpf__load(skel) < 0) { 316 pr_err("Failed to load lock-contention BPF skeleton\n"); 317 return -1; 318 } 319 320 if (target__has_cpu(target)) { 321 u32 cpu; 322 u8 val = 1; 323 324 fd = bpf_map__fd(skel->maps.cpu_filter); 325 326 for (i = 0; i < ncpus; i++) { 327 cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; 328 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 329 } 330 } 331 332 if (target__has_task(target)) { 333 u32 pid; 334 u8 val = 1; 335 336 fd = bpf_map__fd(skel->maps.task_filter); 337 338 for (i = 0; i < ntasks; i++) { 339 pid = perf_thread_map__pid(evlist->core.threads, i); 340 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 341 } 342 } 343 344 if (target__none(target) && evlist->workload.pid > 0) { 345 u32 pid = evlist->workload.pid; 346 u8 val = 1; 347 348 fd = bpf_map__fd(skel->maps.task_filter); 349 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 350 } 351 352 if (con->filters->nr_types) { 353 u8 val = 1; 354 355 fd = bpf_map__fd(skel->maps.type_filter); 356 357 for (i = 0; i < con->filters->nr_types; i++) 358 bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY); 359 } 360 361 if (con->filters->nr_addrs) { 362 u8 val = 1; 363 364 fd = bpf_map__fd(skel->maps.addr_filter); 365 366 for (i = 0; i < con->filters->nr_addrs; i++) 367 bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY); 368 } 369 370 if (con->filters->nr_cgrps) { 371 u8 val = 1; 372 373 fd = bpf_map__fd(skel->maps.cgroup_filter); 374 375 for (i = 0; i < con->filters->nr_cgrps; i++) 376 bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); 377 } 378 379 if (con->nr_delays) { 380 fd = bpf_map__fd(skel->maps.lock_delays); 381 382 for (i = 0; i < con->nr_delays; i++) 383 bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); 384 } 385 386 if (con->aggr_mode == LOCK_AGGR_CGROUP) 387 read_all_cgroups(&con->cgroups); 388 389 bpf_program__set_autoload(skel->progs.collect_lock_syms, false); 390 391 lock_contention_bpf__attach(skel); 392 393 /* run the slab iterator after attaching */ 394 run_slab_cache_iter(); 395 396 if (con->filters->nr_slabs) { 397 u8 val = 1; 398 int cache_fd; 399 long key, *prev_key; 400 401 fd = bpf_map__fd(skel->maps.slab_filter); 402 403 /* Read the slab cache map and build a hash with its address */ 404 cache_fd = bpf_map__fd(skel->maps.slab_caches); 405 prev_key = NULL; 406 while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) { 407 struct slab_cache_data data; 408 409 if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0) 410 break; 411 412 for (i = 0; i < con->filters->nr_slabs; i++) { 413 if (!strcmp(con->filters->slabs[i], data.name)) { 414 bpf_map_update_elem(fd, &key, &val, BPF_ANY); 415 break; 416 } 417 } 418 prev_key = &key; 419 } 420 } 421 422 return 0; 423 } 424 425 /* 426 * Run the BPF program directly using BPF_PROG_TEST_RUN to update the end 427 * timestamp in ktime so that it can calculate delta easily. 428 */ 429 static void mark_end_timestamp(void) 430 { 431 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 432 .flags = BPF_F_TEST_RUN_ON_CPU, 433 ); 434 int prog_fd = bpf_program__fd(skel->progs.end_timestamp); 435 436 bpf_prog_test_run_opts(prog_fd, &opts); 437 } 438 439 static void update_lock_stat(int map_fd, int pid, u64 end_ts, 440 enum lock_aggr_mode aggr_mode, 441 struct tstamp_data *ts_data) 442 { 443 u64 delta; 444 struct contention_key stat_key = {}; 445 struct contention_data stat_data; 446 447 if (ts_data->timestamp >= end_ts) 448 return; 449 450 delta = end_ts - ts_data->timestamp; 451 452 switch (aggr_mode) { 453 case LOCK_AGGR_CALLER: 454 stat_key.stack_id = ts_data->stack_id; 455 break; 456 case LOCK_AGGR_TASK: 457 stat_key.pid = pid; 458 break; 459 case LOCK_AGGR_ADDR: 460 stat_key.lock_addr_or_cgroup = ts_data->lock; 461 break; 462 case LOCK_AGGR_CGROUP: 463 /* TODO */ 464 return; 465 default: 466 return; 467 } 468 469 if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0) 470 return; 471 472 stat_data.total_time += delta; 473 stat_data.count++; 474 475 if (delta > stat_data.max_time) 476 stat_data.max_time = delta; 477 if (delta < stat_data.min_time) 478 stat_data.min_time = delta; 479 480 bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST); 481 } 482 483 /* 484 * Account entries in the tstamp map (which didn't see the corresponding 485 * lock:contention_end tracepoint) using end_ts. 486 */ 487 static void account_end_timestamp(struct lock_contention *con) 488 { 489 int ts_fd, stat_fd; 490 int *prev_key, key; 491 u64 end_ts = skel->bss->end_ts; 492 int total_cpus; 493 enum lock_aggr_mode aggr_mode = con->aggr_mode; 494 struct tstamp_data ts_data, *cpu_data; 495 496 /* Iterate per-task tstamp map (key = TID) */ 497 ts_fd = bpf_map__fd(skel->maps.tstamp); 498 stat_fd = bpf_map__fd(skel->maps.lock_stat); 499 500 prev_key = NULL; 501 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 502 if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) { 503 int pid = key; 504 505 if (aggr_mode == LOCK_AGGR_TASK && con->owner) 506 pid = ts_data.flags; 507 508 update_lock_stat(stat_fd, pid, end_ts, aggr_mode, 509 &ts_data); 510 } 511 512 prev_key = &key; 513 } 514 515 /* Now it'll check per-cpu tstamp map which doesn't have TID. */ 516 if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP) 517 return; 518 519 total_cpus = cpu__max_cpu().cpu; 520 ts_fd = bpf_map__fd(skel->maps.tstamp_cpu); 521 522 cpu_data = calloc(total_cpus, sizeof(*cpu_data)); 523 if (cpu_data == NULL) 524 return; 525 526 prev_key = NULL; 527 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 528 if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0) 529 goto next; 530 531 for (int i = 0; i < total_cpus; i++) { 532 if (cpu_data[i].lock == 0) 533 continue; 534 535 update_lock_stat(stat_fd, -1, end_ts, aggr_mode, 536 &cpu_data[i]); 537 } 538 539 next: 540 prev_key = &key; 541 } 542 free(cpu_data); 543 } 544 545 int lock_contention_start(void) 546 { 547 skel->bss->enabled = 1; 548 return 0; 549 } 550 551 int lock_contention_stop(void) 552 { 553 skel->bss->enabled = 0; 554 mark_end_timestamp(); 555 return 0; 556 } 557 558 static const char *lock_contention_get_name(struct lock_contention *con, 559 struct contention_key *key, 560 u64 *stack_trace, u32 flags) 561 { 562 int idx = 0; 563 u64 addr; 564 static char name_buf[KSYM_NAME_LEN]; 565 struct symbol *sym; 566 struct map *kmap; 567 struct machine *machine = con->machine; 568 569 if (con->aggr_mode == LOCK_AGGR_TASK) { 570 struct contention_task_data task; 571 int pid = key->pid; 572 int task_fd = bpf_map__fd(skel->maps.task_data); 573 574 /* do not update idle comm which contains CPU number */ 575 if (pid) { 576 struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid); 577 578 if (t != NULL && 579 !bpf_map_lookup_elem(task_fd, &pid, &task) && 580 thread__set_comm(t, task.comm, /*timestamp=*/0)) { 581 snprintf(name_buf, sizeof(name_buf), "%s", task.comm); 582 return name_buf; 583 } 584 } 585 return ""; 586 } 587 588 if (con->aggr_mode == LOCK_AGGR_ADDR) { 589 int lock_fd = bpf_map__fd(skel->maps.lock_syms); 590 struct slab_cache_data *slab_data; 591 592 /* per-process locks set upper bits of the flags */ 593 if (flags & LCD_F_MMAP_LOCK) 594 return "mmap_lock"; 595 if (flags & LCD_F_SIGHAND_LOCK) 596 return "siglock"; 597 598 /* global locks with symbols */ 599 sym = machine__find_kernel_symbol(machine, key->lock_addr_or_cgroup, &kmap); 600 if (sym) 601 return sym->name; 602 603 /* try semi-global locks collected separately */ 604 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 605 if (flags == LOCK_CLASS_RQLOCK) 606 return "rq_lock"; 607 } 608 609 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 610 if (flags == LOCK_CLASS_ZONE_LOCK) 611 return "zone_lock"; 612 } 613 614 /* look slab_hash for dynamic locks in a slab object */ 615 if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { 616 snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); 617 return name_buf; 618 } 619 620 return ""; 621 } 622 623 if (con->aggr_mode == LOCK_AGGR_CGROUP) { 624 u64 cgrp_id = key->lock_addr_or_cgroup; 625 struct cgroup *cgrp = __cgroup__find(&con->cgroups, cgrp_id); 626 627 if (cgrp) 628 return cgrp->name; 629 630 snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id); 631 return name_buf; 632 } 633 634 /* LOCK_AGGR_CALLER: skip lock internal functions */ 635 while (machine__is_lock_function(machine, stack_trace[idx]) && 636 idx < con->max_stack - 1) 637 idx++; 638 639 addr = stack_trace[idx]; 640 sym = machine__find_kernel_symbol(machine, addr, &kmap); 641 642 if (sym) { 643 unsigned long offset; 644 645 offset = map__map_ip(kmap, addr) - sym->start; 646 647 if (offset == 0) 648 return sym->name; 649 650 snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset); 651 } else { 652 snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr); 653 } 654 655 return name_buf; 656 } 657 658 struct lock_stat *pop_owner_stack_trace(struct lock_contention *con) 659 { 660 int stacks_fd, stat_fd; 661 u64 *stack_trace = NULL; 662 s32 stack_id; 663 struct contention_key ckey = {}; 664 struct contention_data cdata = {}; 665 size_t stack_size = con->max_stack * sizeof(*stack_trace); 666 struct lock_stat *st = NULL; 667 668 stacks_fd = bpf_map__fd(skel->maps.owner_stacks); 669 stat_fd = bpf_map__fd(skel->maps.owner_stat); 670 if (!stacks_fd || !stat_fd) 671 goto out_err; 672 673 stack_trace = zalloc(stack_size); 674 if (stack_trace == NULL) 675 goto out_err; 676 677 if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace)) 678 goto out_err; 679 680 bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id); 681 ckey.stack_id = stack_id; 682 bpf_map_lookup_elem(stat_fd, &ckey, &cdata); 683 684 st = zalloc(sizeof(struct lock_stat)); 685 if (!st) 686 goto out_err; 687 688 st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) : 689 "unknown"); 690 if (!st->name) 691 goto out_err; 692 693 st->flags = cdata.flags; 694 st->nr_contended = cdata.count; 695 st->wait_time_total = cdata.total_time; 696 st->wait_time_max = cdata.max_time; 697 st->wait_time_min = cdata.min_time; 698 st->callstack = stack_trace; 699 700 if (cdata.count) 701 st->avg_wait_time = cdata.total_time / cdata.count; 702 703 bpf_map_delete_elem(stacks_fd, stack_trace); 704 bpf_map_delete_elem(stat_fd, &ckey); 705 706 return st; 707 708 out_err: 709 free(stack_trace); 710 free(st); 711 712 return NULL; 713 } 714 715 int lock_contention_read(struct lock_contention *con) 716 { 717 int fd, stack, err = 0; 718 struct contention_key *prev_key, key = {}; 719 struct contention_data data = {}; 720 struct lock_stat *st = NULL; 721 struct machine *machine = con->machine; 722 u64 *stack_trace; 723 size_t stack_size = con->max_stack * sizeof(*stack_trace); 724 725 fd = bpf_map__fd(skel->maps.lock_stat); 726 stack = bpf_map__fd(skel->maps.stacks); 727 728 con->fails.task = skel->bss->task_fail; 729 con->fails.stack = skel->bss->stack_fail; 730 con->fails.time = skel->bss->time_fail; 731 con->fails.data = skel->bss->data_fail; 732 733 stack_trace = zalloc(stack_size); 734 if (stack_trace == NULL) 735 return -1; 736 737 account_end_timestamp(con); 738 739 if (con->aggr_mode == LOCK_AGGR_TASK) { 740 struct thread *idle = machine__findnew_thread(machine, 741 /*pid=*/0, 742 /*tid=*/0); 743 thread__set_comm(idle, "swapper", /*timestamp=*/0); 744 } 745 746 if (con->aggr_mode == LOCK_AGGR_ADDR) { 747 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 748 .flags = BPF_F_TEST_RUN_ON_CPU, 749 ); 750 int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms); 751 752 bpf_prog_test_run_opts(prog_fd, &opts); 753 } 754 755 prev_key = NULL; 756 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 757 s64 ls_key; 758 const char *name; 759 760 /* to handle errors in the loop body */ 761 err = -1; 762 763 bpf_map_lookup_elem(fd, &key, &data); 764 if (con->save_callstack) { 765 bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); 766 767 if (!match_callstack_filter(machine, stack_trace, con->max_stack)) { 768 con->nr_filtered += data.count; 769 goto next; 770 } 771 } 772 773 switch (con->aggr_mode) { 774 case LOCK_AGGR_CALLER: 775 ls_key = key.stack_id; 776 break; 777 case LOCK_AGGR_TASK: 778 ls_key = key.pid; 779 break; 780 case LOCK_AGGR_ADDR: 781 case LOCK_AGGR_CGROUP: 782 ls_key = key.lock_addr_or_cgroup; 783 break; 784 default: 785 goto next; 786 } 787 788 st = lock_stat_find(ls_key); 789 if (st != NULL) { 790 st->wait_time_total += data.total_time; 791 if (st->wait_time_max < data.max_time) 792 st->wait_time_max = data.max_time; 793 if (st->wait_time_min > data.min_time) 794 st->wait_time_min = data.min_time; 795 796 st->nr_contended += data.count; 797 if (st->nr_contended) 798 st->avg_wait_time = st->wait_time_total / st->nr_contended; 799 goto next; 800 } 801 802 name = lock_contention_get_name(con, &key, stack_trace, data.flags); 803 st = lock_stat_findnew(ls_key, name, data.flags); 804 if (st == NULL) 805 break; 806 807 st->nr_contended = data.count; 808 st->wait_time_total = data.total_time; 809 st->wait_time_max = data.max_time; 810 st->wait_time_min = data.min_time; 811 812 if (data.count) 813 st->avg_wait_time = data.total_time / data.count; 814 815 if (con->aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 816 st->callstack = memdup(stack_trace, stack_size); 817 if (st->callstack == NULL) 818 break; 819 } 820 821 next: 822 prev_key = &key; 823 824 /* we're fine now, reset the error */ 825 err = 0; 826 } 827 828 free(stack_trace); 829 830 return err; 831 } 832 833 int lock_contention_finish(struct lock_contention *con) 834 { 835 if (skel) { 836 skel->bss->enabled = 0; 837 lock_contention_bpf__destroy(skel); 838 } 839 840 while (!RB_EMPTY_ROOT(&con->cgroups)) { 841 struct rb_node *node = rb_first(&con->cgroups); 842 struct cgroup *cgrp = rb_entry(node, struct cgroup, node); 843 844 rb_erase(node, &con->cgroups); 845 cgroup__put(cgrp); 846 } 847 848 exit_slab_cache_iter(); 849 btf__free(con->btf); 850 851 return 0; 852 } 853