1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/cgroup.h" 14 #include "util/callchain.h" 15 #include "util/lock-contention.h" 16 #include "util/bpf_skel/lock_data.h" 17 18 #include <subcmd/pager.h> 19 #include <subcmd/parse-options.h> 20 #include "util/trace-event.h" 21 #include "util/tracepoint.h" 22 23 #include "util/debug.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/data.h" 27 #include "util/string2.h" 28 #include "util/map.h" 29 #include "util/util.h" 30 31 #include <stdio.h> 32 #include <sys/types.h> 33 #include <sys/prctl.h> 34 #include <semaphore.h> 35 #include <math.h> 36 #include <limits.h> 37 #include <ctype.h> 38 39 #include <linux/list.h> 40 #include <linux/hash.h> 41 #include <linux/kernel.h> 42 #include <linux/zalloc.h> 43 #include <linux/err.h> 44 #include <linux/stringify.h> 45 46 static struct perf_session *session; 47 static struct target target; 48 49 /* based on kernel/lockdep.c */ 50 #define LOCKHASH_BITS 12 51 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) 52 53 static struct hlist_head *lockhash_table; 54 55 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) 56 #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) 57 58 static struct rb_root thread_stats; 59 60 static bool combine_locks; 61 static bool show_thread_stats; 62 static bool show_lock_addrs; 63 static bool show_lock_owner; 64 static bool show_lock_cgroups; 65 static bool use_bpf; 66 static unsigned long bpf_map_entries = MAX_ENTRIES; 67 static int max_stack_depth = CONTENTION_STACK_DEPTH; 68 static int stack_skip = CONTENTION_STACK_SKIP; 69 static int print_nr_entries = INT_MAX / 2; 70 static LIST_HEAD(callstack_filters); 71 static const char *output_name = NULL; 72 static FILE *lock_output; 73 74 struct callstack_filter { 75 struct list_head list; 76 char name[]; 77 }; 78 79 static struct lock_filter filters; 80 81 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; 82 83 static bool needs_callstack(void) 84 { 85 return !list_empty(&callstack_filters); 86 } 87 88 static struct thread_stat *thread_stat_find(u32 tid) 89 { 90 struct rb_node *node; 91 struct thread_stat *st; 92 93 node = thread_stats.rb_node; 94 while (node) { 95 st = container_of(node, struct thread_stat, rb); 96 if (st->tid == tid) 97 return st; 98 else if (tid < st->tid) 99 node = node->rb_left; 100 else 101 node = node->rb_right; 102 } 103 104 return NULL; 105 } 106 107 static void thread_stat_insert(struct thread_stat *new) 108 { 109 struct rb_node **rb = &thread_stats.rb_node; 110 struct rb_node *parent = NULL; 111 struct thread_stat *p; 112 113 while (*rb) { 114 p = container_of(*rb, struct thread_stat, rb); 115 parent = *rb; 116 117 if (new->tid < p->tid) 118 rb = &(*rb)->rb_left; 119 else if (new->tid > p->tid) 120 rb = &(*rb)->rb_right; 121 else 122 BUG_ON("inserting invalid thread_stat\n"); 123 } 124 125 rb_link_node(&new->rb, parent, rb); 126 rb_insert_color(&new->rb, &thread_stats); 127 } 128 129 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 130 { 131 struct thread_stat *st; 132 133 st = thread_stat_find(tid); 134 if (st) 135 return st; 136 137 st = zalloc(sizeof(struct thread_stat)); 138 if (!st) { 139 pr_err("memory allocation failed\n"); 140 return NULL; 141 } 142 143 st->tid = tid; 144 INIT_LIST_HEAD(&st->seq_list); 145 146 thread_stat_insert(st); 147 148 return st; 149 } 150 151 static struct thread_stat *thread_stat_findnew_first(u32 tid); 152 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 153 thread_stat_findnew_first; 154 155 static struct thread_stat *thread_stat_findnew_first(u32 tid) 156 { 157 struct thread_stat *st; 158 159 st = zalloc(sizeof(struct thread_stat)); 160 if (!st) { 161 pr_err("memory allocation failed\n"); 162 return NULL; 163 } 164 st->tid = tid; 165 INIT_LIST_HEAD(&st->seq_list); 166 167 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 168 rb_insert_color(&st->rb, &thread_stats); 169 170 thread_stat_findnew = thread_stat_findnew_after_first; 171 return st; 172 } 173 174 /* build simple key function one is bigger than two */ 175 #define SINGLE_KEY(member) \ 176 static int lock_stat_key_ ## member(struct lock_stat *one, \ 177 struct lock_stat *two) \ 178 { \ 179 return one->member > two->member; \ 180 } 181 182 SINGLE_KEY(nr_acquired) 183 SINGLE_KEY(nr_contended) 184 SINGLE_KEY(avg_wait_time) 185 SINGLE_KEY(wait_time_total) 186 SINGLE_KEY(wait_time_max) 187 188 static int lock_stat_key_wait_time_min(struct lock_stat *one, 189 struct lock_stat *two) 190 { 191 u64 s1 = one->wait_time_min; 192 u64 s2 = two->wait_time_min; 193 if (s1 == ULLONG_MAX) 194 s1 = 0; 195 if (s2 == ULLONG_MAX) 196 s2 = 0; 197 return s1 > s2; 198 } 199 200 struct lock_key { 201 /* 202 * name: the value for specify by user 203 * this should be simpler than raw name of member 204 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 205 */ 206 const char *name; 207 /* header: the string printed on the header line */ 208 const char *header; 209 /* len: the printing width of the field */ 210 int len; 211 /* key: a pointer to function to compare two lock stats for sorting */ 212 int (*key)(struct lock_stat*, struct lock_stat*); 213 /* print: a pointer to function to print a given lock stats */ 214 void (*print)(struct lock_key*, struct lock_stat*); 215 /* list: list entry to link this */ 216 struct list_head list; 217 }; 218 219 static void lock_stat_key_print_time(unsigned long long nsec, int len) 220 { 221 static const struct { 222 float base; 223 const char *unit; 224 } table[] = { 225 { 1e9 * 3600, "h " }, 226 { 1e9 * 60, "m " }, 227 { 1e9, "s " }, 228 { 1e6, "ms" }, 229 { 1e3, "us" }, 230 { 0, NULL }, 231 }; 232 233 /* for CSV output */ 234 if (len == 0) { 235 fprintf(lock_output, "%llu", nsec); 236 return; 237 } 238 239 for (int i = 0; table[i].unit; i++) { 240 if (nsec < table[i].base) 241 continue; 242 243 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 244 return; 245 } 246 247 fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns"); 248 } 249 250 #define PRINT_KEY(member) \ 251 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 252 struct lock_stat *ls) \ 253 { \ 254 fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\ 255 } 256 257 #define PRINT_TIME(member) \ 258 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 259 struct lock_stat *ls) \ 260 { \ 261 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 262 } 263 264 PRINT_KEY(nr_acquired) 265 PRINT_KEY(nr_contended) 266 PRINT_TIME(avg_wait_time) 267 PRINT_TIME(wait_time_total) 268 PRINT_TIME(wait_time_max) 269 270 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 271 struct lock_stat *ls) 272 { 273 u64 wait_time = ls->wait_time_min; 274 275 if (wait_time == ULLONG_MAX) 276 wait_time = 0; 277 278 lock_stat_key_print_time(wait_time, key->len); 279 } 280 281 282 static const char *sort_key = "acquired"; 283 284 static int (*compare)(struct lock_stat *, struct lock_stat *); 285 286 static struct rb_root sorted; /* place to store intermediate data */ 287 static struct rb_root result; /* place to store sorted data */ 288 289 static LIST_HEAD(lock_keys); 290 static const char *output_fields; 291 292 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 293 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 294 static struct lock_key report_keys[] = { 295 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 296 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 297 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 298 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 299 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 300 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 301 302 /* extra comparisons much complicated should be here */ 303 { } 304 }; 305 306 static struct lock_key contention_keys[] = { 307 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 308 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 309 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 310 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 311 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 312 313 /* extra comparisons much complicated should be here */ 314 { } 315 }; 316 317 static int select_key(bool contention) 318 { 319 int i; 320 struct lock_key *keys = report_keys; 321 322 if (contention) 323 keys = contention_keys; 324 325 for (i = 0; keys[i].name; i++) { 326 if (!strcmp(keys[i].name, sort_key)) { 327 compare = keys[i].key; 328 329 /* selected key should be in the output fields */ 330 if (list_empty(&keys[i].list)) 331 list_add_tail(&keys[i].list, &lock_keys); 332 333 return 0; 334 } 335 } 336 337 pr_err("Unknown compare key: %s\n", sort_key); 338 return -1; 339 } 340 341 static int add_output_field(bool contention, char *name) 342 { 343 int i; 344 struct lock_key *keys = report_keys; 345 346 if (contention) 347 keys = contention_keys; 348 349 for (i = 0; keys[i].name; i++) { 350 if (strcmp(keys[i].name, name)) 351 continue; 352 353 /* prevent double link */ 354 if (list_empty(&keys[i].list)) 355 list_add_tail(&keys[i].list, &lock_keys); 356 357 return 0; 358 } 359 360 pr_err("Unknown output field: %s\n", name); 361 return -1; 362 } 363 364 static int setup_output_field(bool contention, const char *str) 365 { 366 char *tok, *tmp, *orig; 367 int i, ret = 0; 368 struct lock_key *keys = report_keys; 369 370 if (contention) 371 keys = contention_keys; 372 373 /* no output field given: use all of them */ 374 if (str == NULL) { 375 for (i = 0; keys[i].name; i++) 376 list_add_tail(&keys[i].list, &lock_keys); 377 return 0; 378 } 379 380 for (i = 0; keys[i].name; i++) 381 INIT_LIST_HEAD(&keys[i].list); 382 383 orig = tmp = strdup(str); 384 if (orig == NULL) 385 return -ENOMEM; 386 387 while ((tok = strsep(&tmp, ",")) != NULL){ 388 ret = add_output_field(contention, tok); 389 if (ret < 0) 390 break; 391 } 392 free(orig); 393 394 return ret; 395 } 396 397 static void combine_lock_stats(struct lock_stat *st) 398 { 399 struct rb_node **rb = &sorted.rb_node; 400 struct rb_node *parent = NULL; 401 struct lock_stat *p; 402 int ret; 403 404 while (*rb) { 405 p = container_of(*rb, struct lock_stat, rb); 406 parent = *rb; 407 408 if (st->name && p->name) 409 ret = strcmp(st->name, p->name); 410 else 411 ret = !!st->name - !!p->name; 412 413 if (ret == 0) { 414 p->nr_acquired += st->nr_acquired; 415 p->nr_contended += st->nr_contended; 416 p->wait_time_total += st->wait_time_total; 417 418 if (p->nr_contended) 419 p->avg_wait_time = p->wait_time_total / p->nr_contended; 420 421 if (p->wait_time_min > st->wait_time_min) 422 p->wait_time_min = st->wait_time_min; 423 if (p->wait_time_max < st->wait_time_max) 424 p->wait_time_max = st->wait_time_max; 425 426 p->broken |= st->broken; 427 st->combined = 1; 428 return; 429 } 430 431 if (ret < 0) 432 rb = &(*rb)->rb_left; 433 else 434 rb = &(*rb)->rb_right; 435 } 436 437 rb_link_node(&st->rb, parent, rb); 438 rb_insert_color(&st->rb, &sorted); 439 } 440 441 static void insert_to_result(struct lock_stat *st, 442 int (*bigger)(struct lock_stat *, struct lock_stat *)) 443 { 444 struct rb_node **rb = &result.rb_node; 445 struct rb_node *parent = NULL; 446 struct lock_stat *p; 447 448 if (combine_locks && st->combined) 449 return; 450 451 while (*rb) { 452 p = container_of(*rb, struct lock_stat, rb); 453 parent = *rb; 454 455 if (bigger(st, p)) 456 rb = &(*rb)->rb_left; 457 else 458 rb = &(*rb)->rb_right; 459 } 460 461 rb_link_node(&st->rb, parent, rb); 462 rb_insert_color(&st->rb, &result); 463 } 464 465 /* returns left most element of result, and erase it */ 466 static struct lock_stat *pop_from_result(void) 467 { 468 struct rb_node *node = result.rb_node; 469 470 if (!node) 471 return NULL; 472 473 while (node->rb_left) 474 node = node->rb_left; 475 476 rb_erase(node, &result); 477 return container_of(node, struct lock_stat, rb); 478 } 479 480 struct lock_stat *lock_stat_find(u64 addr) 481 { 482 struct hlist_head *entry = lockhashentry(addr); 483 struct lock_stat *ret; 484 485 hlist_for_each_entry(ret, entry, hash_entry) { 486 if (ret->addr == addr) 487 return ret; 488 } 489 return NULL; 490 } 491 492 struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) 493 { 494 struct hlist_head *entry = lockhashentry(addr); 495 struct lock_stat *ret, *new; 496 497 hlist_for_each_entry(ret, entry, hash_entry) { 498 if (ret->addr == addr) 499 return ret; 500 } 501 502 new = zalloc(sizeof(struct lock_stat)); 503 if (!new) 504 goto alloc_failed; 505 506 new->addr = addr; 507 new->name = strdup(name); 508 if (!new->name) { 509 free(new); 510 goto alloc_failed; 511 } 512 513 new->flags = flags; 514 new->wait_time_min = ULLONG_MAX; 515 516 hlist_add_head(&new->hash_entry, entry); 517 return new; 518 519 alloc_failed: 520 pr_err("memory allocation failed\n"); 521 return NULL; 522 } 523 524 bool match_callstack_filter(struct machine *machine, u64 *callstack) 525 { 526 struct map *kmap; 527 struct symbol *sym; 528 u64 ip; 529 530 if (list_empty(&callstack_filters)) 531 return true; 532 533 for (int i = 0; i < max_stack_depth; i++) { 534 struct callstack_filter *filter; 535 536 if (!callstack || !callstack[i]) 537 break; 538 539 ip = callstack[i]; 540 sym = machine__find_kernel_symbol(machine, ip, &kmap); 541 if (sym == NULL) 542 continue; 543 544 list_for_each_entry(filter, &callstack_filters, list) { 545 if (strstr(sym->name, filter->name)) 546 return true; 547 } 548 } 549 return false; 550 } 551 552 struct trace_lock_handler { 553 /* it's used on CONFIG_LOCKDEP */ 554 int (*acquire_event)(struct evsel *evsel, 555 struct perf_sample *sample); 556 557 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 558 int (*acquired_event)(struct evsel *evsel, 559 struct perf_sample *sample); 560 561 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 562 int (*contended_event)(struct evsel *evsel, 563 struct perf_sample *sample); 564 565 /* it's used on CONFIG_LOCKDEP */ 566 int (*release_event)(struct evsel *evsel, 567 struct perf_sample *sample); 568 569 /* it's used when CONFIG_LOCKDEP is off */ 570 int (*contention_begin_event)(struct evsel *evsel, 571 struct perf_sample *sample); 572 573 /* it's used when CONFIG_LOCKDEP is off */ 574 int (*contention_end_event)(struct evsel *evsel, 575 struct perf_sample *sample); 576 }; 577 578 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 579 { 580 struct lock_seq_stat *seq; 581 582 list_for_each_entry(seq, &ts->seq_list, list) { 583 if (seq->addr == addr) 584 return seq; 585 } 586 587 seq = zalloc(sizeof(struct lock_seq_stat)); 588 if (!seq) { 589 pr_err("memory allocation failed\n"); 590 return NULL; 591 } 592 seq->state = SEQ_STATE_UNINITIALIZED; 593 seq->addr = addr; 594 595 list_add(&seq->list, &ts->seq_list); 596 return seq; 597 } 598 599 enum broken_state { 600 BROKEN_ACQUIRE, 601 BROKEN_ACQUIRED, 602 BROKEN_CONTENDED, 603 BROKEN_RELEASE, 604 BROKEN_MAX, 605 }; 606 607 static int bad_hist[BROKEN_MAX]; 608 609 enum acquire_flags { 610 TRY_LOCK = 1, 611 READ_LOCK = 2, 612 }; 613 614 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid) 615 { 616 switch (aggr_mode) { 617 case LOCK_AGGR_ADDR: 618 *key = addr; 619 break; 620 case LOCK_AGGR_TASK: 621 *key = tid; 622 break; 623 case LOCK_AGGR_CALLER: 624 case LOCK_AGGR_CGROUP: 625 default: 626 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 627 return -EINVAL; 628 } 629 return 0; 630 } 631 632 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample); 633 634 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel, 635 struct perf_sample *sample) 636 { 637 if (aggr_mode == LOCK_AGGR_CALLER) { 638 *key = callchain_id(evsel, sample); 639 return 0; 640 } 641 return get_key_by_aggr_mode_simple(key, addr, sample->tid); 642 } 643 644 static int report_lock_acquire_event(struct evsel *evsel, 645 struct perf_sample *sample) 646 { 647 struct lock_stat *ls; 648 struct thread_stat *ts; 649 struct lock_seq_stat *seq; 650 const char *name = evsel__strval(evsel, sample, "name"); 651 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 652 int flag = evsel__intval(evsel, sample, "flags"); 653 u64 key; 654 int ret; 655 656 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 657 if (ret < 0) 658 return ret; 659 660 ls = lock_stat_findnew(key, name, 0); 661 if (!ls) 662 return -ENOMEM; 663 664 ts = thread_stat_findnew(sample->tid); 665 if (!ts) 666 return -ENOMEM; 667 668 seq = get_seq(ts, addr); 669 if (!seq) 670 return -ENOMEM; 671 672 switch (seq->state) { 673 case SEQ_STATE_UNINITIALIZED: 674 case SEQ_STATE_RELEASED: 675 if (!flag) { 676 seq->state = SEQ_STATE_ACQUIRING; 677 } else { 678 if (flag & TRY_LOCK) 679 ls->nr_trylock++; 680 if (flag & READ_LOCK) 681 ls->nr_readlock++; 682 seq->state = SEQ_STATE_READ_ACQUIRED; 683 seq->read_count = 1; 684 ls->nr_acquired++; 685 } 686 break; 687 case SEQ_STATE_READ_ACQUIRED: 688 if (flag & READ_LOCK) { 689 seq->read_count++; 690 ls->nr_acquired++; 691 goto end; 692 } else { 693 goto broken; 694 } 695 break; 696 case SEQ_STATE_ACQUIRED: 697 case SEQ_STATE_ACQUIRING: 698 case SEQ_STATE_CONTENDED: 699 broken: 700 /* broken lock sequence */ 701 if (!ls->broken) { 702 ls->broken = 1; 703 bad_hist[BROKEN_ACQUIRE]++; 704 } 705 list_del_init(&seq->list); 706 free(seq); 707 goto end; 708 default: 709 BUG_ON("Unknown state of lock sequence found!\n"); 710 break; 711 } 712 713 ls->nr_acquire++; 714 seq->prev_event_time = sample->time; 715 end: 716 return 0; 717 } 718 719 static int report_lock_acquired_event(struct evsel *evsel, 720 struct perf_sample *sample) 721 { 722 struct lock_stat *ls; 723 struct thread_stat *ts; 724 struct lock_seq_stat *seq; 725 u64 contended_term; 726 const char *name = evsel__strval(evsel, sample, "name"); 727 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 728 u64 key; 729 int ret; 730 731 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 732 if (ret < 0) 733 return ret; 734 735 ls = lock_stat_findnew(key, name, 0); 736 if (!ls) 737 return -ENOMEM; 738 739 ts = thread_stat_findnew(sample->tid); 740 if (!ts) 741 return -ENOMEM; 742 743 seq = get_seq(ts, addr); 744 if (!seq) 745 return -ENOMEM; 746 747 switch (seq->state) { 748 case SEQ_STATE_UNINITIALIZED: 749 /* orphan event, do nothing */ 750 return 0; 751 case SEQ_STATE_ACQUIRING: 752 break; 753 case SEQ_STATE_CONTENDED: 754 contended_term = sample->time - seq->prev_event_time; 755 ls->wait_time_total += contended_term; 756 if (contended_term < ls->wait_time_min) 757 ls->wait_time_min = contended_term; 758 if (ls->wait_time_max < contended_term) 759 ls->wait_time_max = contended_term; 760 break; 761 case SEQ_STATE_RELEASED: 762 case SEQ_STATE_ACQUIRED: 763 case SEQ_STATE_READ_ACQUIRED: 764 /* broken lock sequence */ 765 if (!ls->broken) { 766 ls->broken = 1; 767 bad_hist[BROKEN_ACQUIRED]++; 768 } 769 list_del_init(&seq->list); 770 free(seq); 771 goto end; 772 default: 773 BUG_ON("Unknown state of lock sequence found!\n"); 774 break; 775 } 776 777 seq->state = SEQ_STATE_ACQUIRED; 778 ls->nr_acquired++; 779 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 780 seq->prev_event_time = sample->time; 781 end: 782 return 0; 783 } 784 785 static int report_lock_contended_event(struct evsel *evsel, 786 struct perf_sample *sample) 787 { 788 struct lock_stat *ls; 789 struct thread_stat *ts; 790 struct lock_seq_stat *seq; 791 const char *name = evsel__strval(evsel, sample, "name"); 792 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 793 u64 key; 794 int ret; 795 796 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 797 if (ret < 0) 798 return ret; 799 800 ls = lock_stat_findnew(key, name, 0); 801 if (!ls) 802 return -ENOMEM; 803 804 ts = thread_stat_findnew(sample->tid); 805 if (!ts) 806 return -ENOMEM; 807 808 seq = get_seq(ts, addr); 809 if (!seq) 810 return -ENOMEM; 811 812 switch (seq->state) { 813 case SEQ_STATE_UNINITIALIZED: 814 /* orphan event, do nothing */ 815 return 0; 816 case SEQ_STATE_ACQUIRING: 817 break; 818 case SEQ_STATE_RELEASED: 819 case SEQ_STATE_ACQUIRED: 820 case SEQ_STATE_READ_ACQUIRED: 821 case SEQ_STATE_CONTENDED: 822 /* broken lock sequence */ 823 if (!ls->broken) { 824 ls->broken = 1; 825 bad_hist[BROKEN_CONTENDED]++; 826 } 827 list_del_init(&seq->list); 828 free(seq); 829 goto end; 830 default: 831 BUG_ON("Unknown state of lock sequence found!\n"); 832 break; 833 } 834 835 seq->state = SEQ_STATE_CONTENDED; 836 ls->nr_contended++; 837 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 838 seq->prev_event_time = sample->time; 839 end: 840 return 0; 841 } 842 843 static int report_lock_release_event(struct evsel *evsel, 844 struct perf_sample *sample) 845 { 846 struct lock_stat *ls; 847 struct thread_stat *ts; 848 struct lock_seq_stat *seq; 849 const char *name = evsel__strval(evsel, sample, "name"); 850 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 851 u64 key; 852 int ret; 853 854 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 855 if (ret < 0) 856 return ret; 857 858 ls = lock_stat_findnew(key, name, 0); 859 if (!ls) 860 return -ENOMEM; 861 862 ts = thread_stat_findnew(sample->tid); 863 if (!ts) 864 return -ENOMEM; 865 866 seq = get_seq(ts, addr); 867 if (!seq) 868 return -ENOMEM; 869 870 switch (seq->state) { 871 case SEQ_STATE_UNINITIALIZED: 872 goto end; 873 case SEQ_STATE_ACQUIRED: 874 break; 875 case SEQ_STATE_READ_ACQUIRED: 876 seq->read_count--; 877 BUG_ON(seq->read_count < 0); 878 if (seq->read_count) { 879 ls->nr_release++; 880 goto end; 881 } 882 break; 883 case SEQ_STATE_ACQUIRING: 884 case SEQ_STATE_CONTENDED: 885 case SEQ_STATE_RELEASED: 886 /* broken lock sequence */ 887 if (!ls->broken) { 888 ls->broken = 1; 889 bad_hist[BROKEN_RELEASE]++; 890 } 891 goto free_seq; 892 default: 893 BUG_ON("Unknown state of lock sequence found!\n"); 894 break; 895 } 896 897 ls->nr_release++; 898 free_seq: 899 list_del_init(&seq->list); 900 free(seq); 901 end: 902 return 0; 903 } 904 905 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, 906 char *buf, int size) 907 { 908 u64 offset; 909 910 if (map == NULL || sym == NULL) { 911 buf[0] = '\0'; 912 return 0; 913 } 914 915 offset = map__map_ip(map, ip) - sym->start; 916 917 if (offset) 918 return scnprintf(buf, size, "%s+%#lx", sym->name, offset); 919 else 920 return strlcpy(buf, sym->name, size); 921 } 922 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 923 char *buf, int size) 924 { 925 struct thread *thread; 926 struct callchain_cursor *cursor; 927 struct machine *machine = &session->machines.host; 928 struct symbol *sym; 929 int skip = 0; 930 int ret; 931 932 /* lock names will be replaced to task name later */ 933 if (show_thread_stats) 934 return -1; 935 936 thread = machine__findnew_thread(machine, -1, sample->pid); 937 if (thread == NULL) 938 return -1; 939 940 cursor = get_tls_callchain_cursor(); 941 942 /* use caller function name from the callchain */ 943 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 944 NULL, NULL, max_stack_depth); 945 if (ret != 0) { 946 thread__put(thread); 947 return -1; 948 } 949 950 callchain_cursor_commit(cursor); 951 thread__put(thread); 952 953 while (true) { 954 struct callchain_cursor_node *node; 955 956 node = callchain_cursor_current(cursor); 957 if (node == NULL) 958 break; 959 960 /* skip first few entries - for lock functions */ 961 if (++skip <= stack_skip) 962 goto next; 963 964 sym = node->ms.sym; 965 if (sym && !machine__is_lock_function(machine, node->ip)) { 966 get_symbol_name_offset(node->ms.map, sym, node->ip, 967 buf, size); 968 return 0; 969 } 970 971 next: 972 callchain_cursor_advance(cursor); 973 } 974 return -1; 975 } 976 977 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 978 { 979 struct callchain_cursor *cursor; 980 struct machine *machine = &session->machines.host; 981 struct thread *thread; 982 u64 hash = 0; 983 int skip = 0; 984 int ret; 985 986 thread = machine__findnew_thread(machine, -1, sample->pid); 987 if (thread == NULL) 988 return -1; 989 990 cursor = get_tls_callchain_cursor(); 991 /* use caller function name from the callchain */ 992 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 993 NULL, NULL, max_stack_depth); 994 thread__put(thread); 995 996 if (ret != 0) 997 return -1; 998 999 callchain_cursor_commit(cursor); 1000 1001 while (true) { 1002 struct callchain_cursor_node *node; 1003 1004 node = callchain_cursor_current(cursor); 1005 if (node == NULL) 1006 break; 1007 1008 /* skip first few entries - for lock functions */ 1009 if (++skip <= stack_skip) 1010 goto next; 1011 1012 if (node->ms.sym && machine__is_lock_function(machine, node->ip)) 1013 goto next; 1014 1015 hash ^= hash_long((unsigned long)node->ip, 64); 1016 1017 next: 1018 callchain_cursor_advance(cursor); 1019 } 1020 return hash; 1021 } 1022 1023 static u64 *get_callstack(struct perf_sample *sample, int max_stack) 1024 { 1025 u64 *callstack; 1026 u64 i; 1027 int c; 1028 1029 callstack = calloc(max_stack, sizeof(*callstack)); 1030 if (callstack == NULL) 1031 return NULL; 1032 1033 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) { 1034 u64 ip = sample->callchain->ips[i]; 1035 1036 if (ip >= PERF_CONTEXT_MAX) 1037 continue; 1038 1039 callstack[c++] = ip; 1040 } 1041 return callstack; 1042 } 1043 1044 static int report_lock_contention_begin_event(struct evsel *evsel, 1045 struct perf_sample *sample) 1046 { 1047 struct lock_stat *ls; 1048 struct thread_stat *ts; 1049 struct lock_seq_stat *seq; 1050 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1051 unsigned int flags = evsel__intval(evsel, sample, "flags"); 1052 u64 key; 1053 int i, ret; 1054 static bool kmap_loaded; 1055 struct machine *machine = &session->machines.host; 1056 struct map *kmap; 1057 struct symbol *sym; 1058 1059 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 1060 if (ret < 0) 1061 return ret; 1062 1063 if (!kmap_loaded) { 1064 unsigned long *addrs; 1065 1066 /* make sure it loads the kernel map to find lock symbols */ 1067 map__load(machine__kernel_map(machine)); 1068 kmap_loaded = true; 1069 1070 /* convert (kernel) symbols to addresses */ 1071 for (i = 0; i < filters.nr_syms; i++) { 1072 sym = machine__find_kernel_symbol_by_name(machine, 1073 filters.syms[i], 1074 &kmap); 1075 if (sym == NULL) { 1076 pr_warning("ignore unknown symbol: %s\n", 1077 filters.syms[i]); 1078 continue; 1079 } 1080 1081 addrs = realloc(filters.addrs, 1082 (filters.nr_addrs + 1) * sizeof(*addrs)); 1083 if (addrs == NULL) { 1084 pr_warning("memory allocation failure\n"); 1085 return -ENOMEM; 1086 } 1087 1088 addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start); 1089 filters.addrs = addrs; 1090 } 1091 } 1092 1093 ls = lock_stat_find(key); 1094 if (!ls) { 1095 char buf[128]; 1096 const char *name = ""; 1097 1098 switch (aggr_mode) { 1099 case LOCK_AGGR_ADDR: 1100 sym = machine__find_kernel_symbol(machine, key, &kmap); 1101 if (sym) 1102 name = sym->name; 1103 break; 1104 case LOCK_AGGR_CALLER: 1105 name = buf; 1106 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1107 name = "Unknown"; 1108 break; 1109 case LOCK_AGGR_CGROUP: 1110 case LOCK_AGGR_TASK: 1111 default: 1112 break; 1113 } 1114 1115 ls = lock_stat_findnew(key, name, flags); 1116 if (!ls) 1117 return -ENOMEM; 1118 } 1119 1120 if (filters.nr_types) { 1121 bool found = false; 1122 1123 for (i = 0; i < filters.nr_types; i++) { 1124 if (flags == filters.types[i]) { 1125 found = true; 1126 break; 1127 } 1128 } 1129 1130 if (!found) 1131 return 0; 1132 } 1133 1134 if (filters.nr_addrs) { 1135 bool found = false; 1136 1137 for (i = 0; i < filters.nr_addrs; i++) { 1138 if (addr == filters.addrs[i]) { 1139 found = true; 1140 break; 1141 } 1142 } 1143 1144 if (!found) 1145 return 0; 1146 } 1147 1148 if (needs_callstack()) { 1149 u64 *callstack = get_callstack(sample, max_stack_depth); 1150 if (callstack == NULL) 1151 return -ENOMEM; 1152 1153 if (!match_callstack_filter(machine, callstack)) { 1154 free(callstack); 1155 return 0; 1156 } 1157 1158 if (ls->callstack == NULL) 1159 ls->callstack = callstack; 1160 else 1161 free(callstack); 1162 } 1163 1164 ts = thread_stat_findnew(sample->tid); 1165 if (!ts) 1166 return -ENOMEM; 1167 1168 seq = get_seq(ts, addr); 1169 if (!seq) 1170 return -ENOMEM; 1171 1172 switch (seq->state) { 1173 case SEQ_STATE_UNINITIALIZED: 1174 case SEQ_STATE_ACQUIRED: 1175 break; 1176 case SEQ_STATE_CONTENDED: 1177 /* 1178 * It can have nested contention begin with mutex spinning, 1179 * then we would use the original contention begin event and 1180 * ignore the second one. 1181 */ 1182 goto end; 1183 case SEQ_STATE_ACQUIRING: 1184 case SEQ_STATE_READ_ACQUIRED: 1185 case SEQ_STATE_RELEASED: 1186 /* broken lock sequence */ 1187 if (!ls->broken) { 1188 ls->broken = 1; 1189 bad_hist[BROKEN_CONTENDED]++; 1190 } 1191 list_del_init(&seq->list); 1192 free(seq); 1193 goto end; 1194 default: 1195 BUG_ON("Unknown state of lock sequence found!\n"); 1196 break; 1197 } 1198 1199 if (seq->state != SEQ_STATE_CONTENDED) { 1200 seq->state = SEQ_STATE_CONTENDED; 1201 seq->prev_event_time = sample->time; 1202 ls->nr_contended++; 1203 } 1204 end: 1205 return 0; 1206 } 1207 1208 static int report_lock_contention_end_event(struct evsel *evsel, 1209 struct perf_sample *sample) 1210 { 1211 struct lock_stat *ls; 1212 struct thread_stat *ts; 1213 struct lock_seq_stat *seq; 1214 u64 contended_term; 1215 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1216 u64 key; 1217 int ret; 1218 1219 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 1220 if (ret < 0) 1221 return ret; 1222 1223 ls = lock_stat_find(key); 1224 if (!ls) 1225 return 0; 1226 1227 ts = thread_stat_find(sample->tid); 1228 if (!ts) 1229 return 0; 1230 1231 seq = get_seq(ts, addr); 1232 if (!seq) 1233 return -ENOMEM; 1234 1235 switch (seq->state) { 1236 case SEQ_STATE_UNINITIALIZED: 1237 goto end; 1238 case SEQ_STATE_CONTENDED: 1239 contended_term = sample->time - seq->prev_event_time; 1240 ls->wait_time_total += contended_term; 1241 if (contended_term < ls->wait_time_min) 1242 ls->wait_time_min = contended_term; 1243 if (ls->wait_time_max < contended_term) 1244 ls->wait_time_max = contended_term; 1245 break; 1246 case SEQ_STATE_ACQUIRING: 1247 case SEQ_STATE_ACQUIRED: 1248 case SEQ_STATE_READ_ACQUIRED: 1249 case SEQ_STATE_RELEASED: 1250 /* broken lock sequence */ 1251 if (!ls->broken) { 1252 ls->broken = 1; 1253 bad_hist[BROKEN_ACQUIRED]++; 1254 } 1255 list_del_init(&seq->list); 1256 free(seq); 1257 goto end; 1258 default: 1259 BUG_ON("Unknown state of lock sequence found!\n"); 1260 break; 1261 } 1262 1263 seq->state = SEQ_STATE_ACQUIRED; 1264 ls->nr_acquired++; 1265 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1266 end: 1267 return 0; 1268 } 1269 1270 /* lock oriented handlers */ 1271 /* TODO: handlers for CPU oriented, thread oriented */ 1272 static struct trace_lock_handler report_lock_ops = { 1273 .acquire_event = report_lock_acquire_event, 1274 .acquired_event = report_lock_acquired_event, 1275 .contended_event = report_lock_contended_event, 1276 .release_event = report_lock_release_event, 1277 .contention_begin_event = report_lock_contention_begin_event, 1278 .contention_end_event = report_lock_contention_end_event, 1279 }; 1280 1281 static struct trace_lock_handler contention_lock_ops = { 1282 .contention_begin_event = report_lock_contention_begin_event, 1283 .contention_end_event = report_lock_contention_end_event, 1284 }; 1285 1286 1287 static struct trace_lock_handler *trace_handler; 1288 1289 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1290 { 1291 if (trace_handler->acquire_event) 1292 return trace_handler->acquire_event(evsel, sample); 1293 return 0; 1294 } 1295 1296 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1297 { 1298 if (trace_handler->acquired_event) 1299 return trace_handler->acquired_event(evsel, sample); 1300 return 0; 1301 } 1302 1303 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1304 { 1305 if (trace_handler->contended_event) 1306 return trace_handler->contended_event(evsel, sample); 1307 return 0; 1308 } 1309 1310 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1311 { 1312 if (trace_handler->release_event) 1313 return trace_handler->release_event(evsel, sample); 1314 return 0; 1315 } 1316 1317 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1318 { 1319 if (trace_handler->contention_begin_event) 1320 return trace_handler->contention_begin_event(evsel, sample); 1321 return 0; 1322 } 1323 1324 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1325 { 1326 if (trace_handler->contention_end_event) 1327 return trace_handler->contention_end_event(evsel, sample); 1328 return 0; 1329 } 1330 1331 static void print_bad_events(int bad, int total) 1332 { 1333 /* Output for debug, this have to be removed */ 1334 int i; 1335 int broken = 0; 1336 const char *name[4] = 1337 { "acquire", "acquired", "contended", "release" }; 1338 1339 for (i = 0; i < BROKEN_MAX; i++) 1340 broken += bad_hist[i]; 1341 1342 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1343 return; 1344 1345 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1346 fprintf(lock_output, "bad: %d, total: %d\n", bad, total); 1347 fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1348 fprintf(lock_output, "histogram of events caused bad sequence\n"); 1349 for (i = 0; i < BROKEN_MAX; i++) 1350 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]); 1351 } 1352 1353 /* TODO: various way to print, coloring, nano or milli sec */ 1354 static void print_result(void) 1355 { 1356 struct lock_stat *st; 1357 struct lock_key *key; 1358 char cut_name[20]; 1359 int bad, total, printed; 1360 1361 if (!quiet) { 1362 fprintf(lock_output, "%20s ", "Name"); 1363 list_for_each_entry(key, &lock_keys, list) 1364 fprintf(lock_output, "%*s ", key->len, key->header); 1365 fprintf(lock_output, "\n\n"); 1366 } 1367 1368 bad = total = printed = 0; 1369 while ((st = pop_from_result())) { 1370 total++; 1371 if (st->broken) 1372 bad++; 1373 if (!st->nr_acquired) 1374 continue; 1375 1376 bzero(cut_name, 20); 1377 1378 if (strlen(st->name) < 20) { 1379 /* output raw name */ 1380 const char *name = st->name; 1381 1382 if (show_thread_stats) { 1383 struct thread *t; 1384 1385 /* st->addr contains tid of thread */ 1386 t = perf_session__findnew(session, st->addr); 1387 name = thread__comm_str(t); 1388 } 1389 1390 fprintf(lock_output, "%20s ", name); 1391 } else { 1392 strncpy(cut_name, st->name, 16); 1393 cut_name[16] = '.'; 1394 cut_name[17] = '.'; 1395 cut_name[18] = '.'; 1396 cut_name[19] = '\0'; 1397 /* cut off name for saving output style */ 1398 fprintf(lock_output, "%20s ", cut_name); 1399 } 1400 1401 list_for_each_entry(key, &lock_keys, list) { 1402 key->print(key, st); 1403 fprintf(lock_output, " "); 1404 } 1405 fprintf(lock_output, "\n"); 1406 1407 if (++printed >= print_nr_entries) 1408 break; 1409 } 1410 1411 print_bad_events(bad, total); 1412 } 1413 1414 static bool info_threads, info_map; 1415 1416 static void dump_threads(void) 1417 { 1418 struct thread_stat *st; 1419 struct rb_node *node; 1420 struct thread *t; 1421 1422 fprintf(lock_output, "%10s: comm\n", "Thread ID"); 1423 1424 node = rb_first(&thread_stats); 1425 while (node) { 1426 st = container_of(node, struct thread_stat, rb); 1427 t = perf_session__findnew(session, st->tid); 1428 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t)); 1429 node = rb_next(node); 1430 thread__put(t); 1431 } 1432 } 1433 1434 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1435 { 1436 int ret; 1437 1438 if (a->name && b->name) 1439 ret = strcmp(a->name, b->name); 1440 else 1441 ret = !!a->name - !!b->name; 1442 1443 if (!ret) 1444 return a->addr < b->addr; 1445 else 1446 return ret < 0; 1447 } 1448 1449 static void dump_map(void) 1450 { 1451 unsigned int i; 1452 struct lock_stat *st; 1453 1454 fprintf(lock_output, "Address of instance: name of class\n"); 1455 for (i = 0; i < LOCKHASH_SIZE; i++) { 1456 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1457 insert_to_result(st, compare_maps); 1458 } 1459 } 1460 1461 while ((st = pop_from_result())) 1462 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name); 1463 } 1464 1465 static int dump_info(void) 1466 { 1467 int rc = 0; 1468 1469 if (info_threads) 1470 dump_threads(); 1471 else if (info_map) 1472 dump_map(); 1473 else { 1474 rc = -1; 1475 pr_err("Unknown type of information\n"); 1476 } 1477 1478 return rc; 1479 } 1480 1481 static const struct evsel_str_handler lock_tracepoints[] = { 1482 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1483 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1484 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1485 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1486 }; 1487 1488 static const struct evsel_str_handler contention_tracepoints[] = { 1489 { "lock:contention_begin", evsel__process_contention_begin, }, 1490 { "lock:contention_end", evsel__process_contention_end, }, 1491 }; 1492 1493 static int process_event_update(struct perf_tool *tool, 1494 union perf_event *event, 1495 struct evlist **pevlist) 1496 { 1497 int ret; 1498 1499 ret = perf_event__process_event_update(tool, event, pevlist); 1500 if (ret < 0) 1501 return ret; 1502 1503 /* this can return -EEXIST since we call it for each evsel */ 1504 perf_session__set_tracepoints_handlers(session, lock_tracepoints); 1505 perf_session__set_tracepoints_handlers(session, contention_tracepoints); 1506 return 0; 1507 } 1508 1509 typedef int (*tracepoint_handler)(struct evsel *evsel, 1510 struct perf_sample *sample); 1511 1512 static int process_sample_event(struct perf_tool *tool __maybe_unused, 1513 union perf_event *event, 1514 struct perf_sample *sample, 1515 struct evsel *evsel, 1516 struct machine *machine) 1517 { 1518 int err = 0; 1519 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1520 sample->tid); 1521 1522 if (thread == NULL) { 1523 pr_debug("problem processing %d event, skipping it.\n", 1524 event->header.type); 1525 return -1; 1526 } 1527 1528 if (evsel->handler != NULL) { 1529 tracepoint_handler f = evsel->handler; 1530 err = f(evsel, sample); 1531 } 1532 1533 thread__put(thread); 1534 1535 return err; 1536 } 1537 1538 static void combine_result(void) 1539 { 1540 unsigned int i; 1541 struct lock_stat *st; 1542 1543 if (!combine_locks) 1544 return; 1545 1546 for (i = 0; i < LOCKHASH_SIZE; i++) { 1547 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1548 combine_lock_stats(st); 1549 } 1550 } 1551 } 1552 1553 static void sort_result(void) 1554 { 1555 unsigned int i; 1556 struct lock_stat *st; 1557 1558 for (i = 0; i < LOCKHASH_SIZE; i++) { 1559 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1560 insert_to_result(st, compare); 1561 } 1562 } 1563 } 1564 1565 static const struct { 1566 unsigned int flags; 1567 const char *str; 1568 const char *name; 1569 } lock_type_table[] = { 1570 { 0, "semaphore", "semaphore" }, 1571 { LCB_F_SPIN, "spinlock", "spinlock" }, 1572 { LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" }, 1573 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" }, 1574 { LCB_F_READ, "rwsem:R", "rwsem" }, 1575 { LCB_F_WRITE, "rwsem:W", "rwsem" }, 1576 { LCB_F_RT, "rt-mutex", "rt-mutex" }, 1577 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" }, 1578 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" }, 1579 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" }, 1580 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" }, 1581 { LCB_F_MUTEX, "mutex", "mutex" }, 1582 { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" }, 1583 /* alias for get_type_flag() */ 1584 { LCB_F_MUTEX | LCB_F_SPIN, "mutex-spin", "mutex" }, 1585 }; 1586 1587 static const char *get_type_str(unsigned int flags) 1588 { 1589 flags &= LCB_F_MAX_FLAGS - 1; 1590 1591 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1592 if (lock_type_table[i].flags == flags) 1593 return lock_type_table[i].str; 1594 } 1595 return "unknown"; 1596 } 1597 1598 static const char *get_type_name(unsigned int flags) 1599 { 1600 flags &= LCB_F_MAX_FLAGS - 1; 1601 1602 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1603 if (lock_type_table[i].flags == flags) 1604 return lock_type_table[i].name; 1605 } 1606 return "unknown"; 1607 } 1608 1609 static unsigned int get_type_flag(const char *str) 1610 { 1611 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1612 if (!strcmp(lock_type_table[i].name, str)) 1613 return lock_type_table[i].flags; 1614 } 1615 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1616 if (!strcmp(lock_type_table[i].str, str)) 1617 return lock_type_table[i].flags; 1618 } 1619 return UINT_MAX; 1620 } 1621 1622 static void lock_filter_finish(void) 1623 { 1624 zfree(&filters.types); 1625 filters.nr_types = 0; 1626 1627 zfree(&filters.addrs); 1628 filters.nr_addrs = 0; 1629 1630 for (int i = 0; i < filters.nr_syms; i++) 1631 free(filters.syms[i]); 1632 1633 zfree(&filters.syms); 1634 filters.nr_syms = 0; 1635 1636 zfree(&filters.cgrps); 1637 filters.nr_cgrps = 0; 1638 } 1639 1640 static void sort_contention_result(void) 1641 { 1642 sort_result(); 1643 } 1644 1645 static void print_header_stdio(void) 1646 { 1647 struct lock_key *key; 1648 1649 list_for_each_entry(key, &lock_keys, list) 1650 fprintf(lock_output, "%*s ", key->len, key->header); 1651 1652 switch (aggr_mode) { 1653 case LOCK_AGGR_TASK: 1654 fprintf(lock_output, " %10s %s\n\n", "pid", 1655 show_lock_owner ? "owner" : "comm"); 1656 break; 1657 case LOCK_AGGR_CALLER: 1658 fprintf(lock_output, " %10s %s\n\n", "type", "caller"); 1659 break; 1660 case LOCK_AGGR_ADDR: 1661 fprintf(lock_output, " %16s %s\n\n", "address", "symbol"); 1662 break; 1663 case LOCK_AGGR_CGROUP: 1664 fprintf(lock_output, " %s\n\n", "cgroup"); 1665 break; 1666 default: 1667 break; 1668 } 1669 } 1670 1671 static void print_header_csv(const char *sep) 1672 { 1673 struct lock_key *key; 1674 1675 fprintf(lock_output, "# output: "); 1676 list_for_each_entry(key, &lock_keys, list) 1677 fprintf(lock_output, "%s%s ", key->header, sep); 1678 1679 switch (aggr_mode) { 1680 case LOCK_AGGR_TASK: 1681 fprintf(lock_output, "%s%s %s\n", "pid", sep, 1682 show_lock_owner ? "owner" : "comm"); 1683 break; 1684 case LOCK_AGGR_CALLER: 1685 fprintf(lock_output, "%s%s %s", "type", sep, "caller"); 1686 if (verbose > 0) 1687 fprintf(lock_output, "%s %s", sep, "stacktrace"); 1688 fprintf(lock_output, "\n"); 1689 break; 1690 case LOCK_AGGR_ADDR: 1691 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type"); 1692 break; 1693 case LOCK_AGGR_CGROUP: 1694 fprintf(lock_output, "%s\n", "cgroup"); 1695 break; 1696 default: 1697 break; 1698 } 1699 } 1700 1701 static void print_header(void) 1702 { 1703 if (!quiet) { 1704 if (symbol_conf.field_sep) 1705 print_header_csv(symbol_conf.field_sep); 1706 else 1707 print_header_stdio(); 1708 } 1709 } 1710 1711 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st) 1712 { 1713 struct lock_key *key; 1714 struct thread *t; 1715 int pid; 1716 1717 list_for_each_entry(key, &lock_keys, list) { 1718 key->print(key, st); 1719 fprintf(lock_output, " "); 1720 } 1721 1722 switch (aggr_mode) { 1723 case LOCK_AGGR_CALLER: 1724 fprintf(lock_output, " %10s %s\n", get_type_str(st->flags), st->name); 1725 break; 1726 case LOCK_AGGR_TASK: 1727 pid = st->addr; 1728 t = perf_session__findnew(session, pid); 1729 fprintf(lock_output, " %10d %s\n", 1730 pid, pid == -1 ? "Unknown" : thread__comm_str(t)); 1731 break; 1732 case LOCK_AGGR_ADDR: 1733 fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr, 1734 st->name, get_type_name(st->flags)); 1735 break; 1736 case LOCK_AGGR_CGROUP: 1737 fprintf(lock_output, " %s\n", st->name); 1738 break; 1739 default: 1740 break; 1741 } 1742 1743 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1744 struct map *kmap; 1745 struct symbol *sym; 1746 char buf[128]; 1747 u64 ip; 1748 1749 for (int i = 0; i < max_stack_depth; i++) { 1750 if (!st->callstack || !st->callstack[i]) 1751 break; 1752 1753 ip = st->callstack[i]; 1754 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1755 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1756 fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf); 1757 } 1758 } 1759 } 1760 1761 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st, 1762 const char *sep) 1763 { 1764 struct lock_key *key; 1765 struct thread *t; 1766 int pid; 1767 1768 list_for_each_entry(key, &lock_keys, list) { 1769 key->print(key, st); 1770 fprintf(lock_output, "%s ", sep); 1771 } 1772 1773 switch (aggr_mode) { 1774 case LOCK_AGGR_CALLER: 1775 fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name); 1776 if (verbose <= 0) 1777 fprintf(lock_output, "\n"); 1778 break; 1779 case LOCK_AGGR_TASK: 1780 pid = st->addr; 1781 t = perf_session__findnew(session, pid); 1782 fprintf(lock_output, "%d%s %s\n", pid, sep, 1783 pid == -1 ? "Unknown" : thread__comm_str(t)); 1784 break; 1785 case LOCK_AGGR_ADDR: 1786 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep, 1787 st->name, sep, get_type_name(st->flags)); 1788 break; 1789 case LOCK_AGGR_CGROUP: 1790 fprintf(lock_output, "%s\n",st->name); 1791 break; 1792 default: 1793 break; 1794 } 1795 1796 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1797 struct map *kmap; 1798 struct symbol *sym; 1799 char buf[128]; 1800 u64 ip; 1801 1802 for (int i = 0; i < max_stack_depth; i++) { 1803 if (!st->callstack || !st->callstack[i]) 1804 break; 1805 1806 ip = st->callstack[i]; 1807 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1808 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1809 fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf); 1810 } 1811 fprintf(lock_output, "\n"); 1812 } 1813 } 1814 1815 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st) 1816 { 1817 if (symbol_conf.field_sep) 1818 print_lock_stat_csv(con, st, symbol_conf.field_sep); 1819 else 1820 print_lock_stat_stdio(con, st); 1821 } 1822 1823 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails) 1824 { 1825 /* Output for debug, this have to be removed */ 1826 int broken = fails->task + fails->stack + fails->time + fails->data; 1827 1828 if (!use_bpf) 1829 print_bad_events(bad, total); 1830 1831 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1832 return; 1833 1834 total += broken; 1835 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1836 fprintf(lock_output, "bad: %d, total: %d\n", broken, total); 1837 fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total); 1838 1839 fprintf(lock_output, "histogram of failure reasons\n"); 1840 fprintf(lock_output, " %10s: %d\n", "task", fails->task); 1841 fprintf(lock_output, " %10s: %d\n", "stack", fails->stack); 1842 fprintf(lock_output, " %10s: %d\n", "time", fails->time); 1843 fprintf(lock_output, " %10s: %d\n", "data", fails->data); 1844 } 1845 1846 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails, 1847 const char *sep) 1848 { 1849 /* Output for debug, this have to be removed */ 1850 if (use_bpf) 1851 bad = fails->task + fails->stack + fails->time + fails->data; 1852 1853 if (quiet || total == 0 || (bad == 0 && verbose <= 0)) 1854 return; 1855 1856 total += bad; 1857 fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad); 1858 1859 if (use_bpf) { 1860 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task); 1861 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack); 1862 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time); 1863 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data); 1864 } else { 1865 int i; 1866 const char *name[4] = { "acquire", "acquired", "contended", "release" }; 1867 1868 for (i = 0; i < BROKEN_MAX; i++) 1869 fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]); 1870 } 1871 fprintf(lock_output, "\n"); 1872 } 1873 1874 static void print_footer(int total, int bad, struct lock_contention_fails *fails) 1875 { 1876 if (symbol_conf.field_sep) 1877 print_footer_csv(total, bad, fails, symbol_conf.field_sep); 1878 else 1879 print_footer_stdio(total, bad, fails); 1880 } 1881 1882 static void print_contention_result(struct lock_contention *con) 1883 { 1884 struct lock_stat *st; 1885 int bad, total, printed; 1886 1887 if (!quiet) 1888 print_header(); 1889 1890 bad = total = printed = 0; 1891 1892 while ((st = pop_from_result())) { 1893 total += use_bpf ? st->nr_contended : 1; 1894 if (st->broken) 1895 bad++; 1896 1897 if (!st->wait_time_total) 1898 continue; 1899 1900 print_lock_stat(con, st); 1901 1902 if (++printed >= print_nr_entries) 1903 break; 1904 } 1905 1906 if (print_nr_entries) { 1907 /* update the total/bad stats */ 1908 while ((st = pop_from_result())) { 1909 total += use_bpf ? st->nr_contended : 1; 1910 if (st->broken) 1911 bad++; 1912 } 1913 } 1914 /* some entries are collected but hidden by the callstack filter */ 1915 total += con->nr_filtered; 1916 1917 print_footer(total, bad, &con->fails); 1918 } 1919 1920 static bool force; 1921 1922 static int __cmd_report(bool display_info) 1923 { 1924 int err = -EINVAL; 1925 struct perf_tool eops = { 1926 .attr = perf_event__process_attr, 1927 .event_update = process_event_update, 1928 .sample = process_sample_event, 1929 .comm = perf_event__process_comm, 1930 .mmap = perf_event__process_mmap, 1931 .namespaces = perf_event__process_namespaces, 1932 .tracing_data = perf_event__process_tracing_data, 1933 .ordered_events = true, 1934 }; 1935 struct perf_data data = { 1936 .path = input_name, 1937 .mode = PERF_DATA_MODE_READ, 1938 .force = force, 1939 }; 1940 1941 session = perf_session__new(&data, &eops); 1942 if (IS_ERR(session)) { 1943 pr_err("Initializing perf session failed\n"); 1944 return PTR_ERR(session); 1945 } 1946 1947 symbol_conf.allow_aliases = true; 1948 symbol__init(&session->header.env); 1949 1950 if (!data.is_pipe) { 1951 if (!perf_session__has_traces(session, "lock record")) 1952 goto out_delete; 1953 1954 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1955 pr_err("Initializing perf session tracepoint handlers failed\n"); 1956 goto out_delete; 1957 } 1958 1959 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1960 pr_err("Initializing perf session tracepoint handlers failed\n"); 1961 goto out_delete; 1962 } 1963 } 1964 1965 if (setup_output_field(false, output_fields)) 1966 goto out_delete; 1967 1968 if (select_key(false)) 1969 goto out_delete; 1970 1971 if (show_thread_stats) 1972 aggr_mode = LOCK_AGGR_TASK; 1973 1974 err = perf_session__process_events(session); 1975 if (err) 1976 goto out_delete; 1977 1978 setup_pager(); 1979 if (display_info) /* used for info subcommand */ 1980 err = dump_info(); 1981 else { 1982 combine_result(); 1983 sort_result(); 1984 print_result(); 1985 } 1986 1987 out_delete: 1988 perf_session__delete(session); 1989 return err; 1990 } 1991 1992 static void sighandler(int sig __maybe_unused) 1993 { 1994 } 1995 1996 static int check_lock_contention_options(const struct option *options, 1997 const char * const *usage) 1998 1999 { 2000 if (show_thread_stats && show_lock_addrs) { 2001 pr_err("Cannot use thread and addr mode together\n"); 2002 parse_options_usage(usage, options, "threads", 0); 2003 parse_options_usage(NULL, options, "lock-addr", 0); 2004 return -1; 2005 } 2006 2007 if (show_lock_owner && !use_bpf) { 2008 pr_err("Lock owners are available only with BPF\n"); 2009 parse_options_usage(usage, options, "lock-owner", 0); 2010 parse_options_usage(NULL, options, "use-bpf", 0); 2011 return -1; 2012 } 2013 2014 if (show_lock_owner && show_lock_addrs) { 2015 pr_err("Cannot use owner and addr mode together\n"); 2016 parse_options_usage(usage, options, "lock-owner", 0); 2017 parse_options_usage(NULL, options, "lock-addr", 0); 2018 return -1; 2019 } 2020 2021 if (show_lock_cgroups && !use_bpf) { 2022 pr_err("Cgroups are available only with BPF\n"); 2023 parse_options_usage(usage, options, "lock-cgroup", 0); 2024 parse_options_usage(NULL, options, "use-bpf", 0); 2025 return -1; 2026 } 2027 2028 if (show_lock_cgroups && show_lock_addrs) { 2029 pr_err("Cannot use cgroup and addr mode together\n"); 2030 parse_options_usage(usage, options, "lock-cgroup", 0); 2031 parse_options_usage(NULL, options, "lock-addr", 0); 2032 return -1; 2033 } 2034 2035 if (show_lock_cgroups && show_thread_stats) { 2036 pr_err("Cannot use cgroup and thread mode together\n"); 2037 parse_options_usage(usage, options, "lock-cgroup", 0); 2038 parse_options_usage(NULL, options, "threads", 0); 2039 return -1; 2040 } 2041 2042 if (symbol_conf.field_sep) { 2043 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */ 2044 strstr(symbol_conf.field_sep, "+") || /* part of caller offset */ 2045 strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */ 2046 pr_err("Cannot use the separator that is already used\n"); 2047 parse_options_usage(usage, options, "x", 1); 2048 return -1; 2049 } 2050 } 2051 2052 if (show_lock_owner) 2053 show_thread_stats = true; 2054 2055 return 0; 2056 } 2057 2058 static int __cmd_contention(int argc, const char **argv) 2059 { 2060 int err = -EINVAL; 2061 struct perf_tool eops = { 2062 .attr = perf_event__process_attr, 2063 .event_update = process_event_update, 2064 .sample = process_sample_event, 2065 .comm = perf_event__process_comm, 2066 .mmap = perf_event__process_mmap, 2067 .tracing_data = perf_event__process_tracing_data, 2068 .ordered_events = true, 2069 }; 2070 struct perf_data data = { 2071 .path = input_name, 2072 .mode = PERF_DATA_MODE_READ, 2073 .force = force, 2074 }; 2075 struct lock_contention con = { 2076 .target = &target, 2077 .map_nr_entries = bpf_map_entries, 2078 .max_stack = max_stack_depth, 2079 .stack_skip = stack_skip, 2080 .filters = &filters, 2081 .save_callstack = needs_callstack(), 2082 .owner = show_lock_owner, 2083 .cgroups = RB_ROOT, 2084 }; 2085 2086 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 2087 if (!lockhash_table) 2088 return -ENOMEM; 2089 2090 con.result = &lockhash_table[0]; 2091 2092 session = perf_session__new(use_bpf ? NULL : &data, &eops); 2093 if (IS_ERR(session)) { 2094 pr_err("Initializing perf session failed\n"); 2095 err = PTR_ERR(session); 2096 session = NULL; 2097 goto out_delete; 2098 } 2099 2100 con.machine = &session->machines.host; 2101 2102 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : 2103 show_lock_addrs ? LOCK_AGGR_ADDR : 2104 show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER; 2105 2106 if (con.aggr_mode == LOCK_AGGR_CALLER) 2107 con.save_callstack = true; 2108 2109 symbol_conf.allow_aliases = true; 2110 symbol__init(&session->header.env); 2111 2112 if (use_bpf) { 2113 err = target__validate(&target); 2114 if (err) { 2115 char errbuf[512]; 2116 2117 target__strerror(&target, err, errbuf, 512); 2118 pr_err("%s\n", errbuf); 2119 goto out_delete; 2120 } 2121 2122 signal(SIGINT, sighandler); 2123 signal(SIGCHLD, sighandler); 2124 signal(SIGTERM, sighandler); 2125 2126 con.evlist = evlist__new(); 2127 if (con.evlist == NULL) { 2128 err = -ENOMEM; 2129 goto out_delete; 2130 } 2131 2132 err = evlist__create_maps(con.evlist, &target); 2133 if (err < 0) 2134 goto out_delete; 2135 2136 if (argc) { 2137 err = evlist__prepare_workload(con.evlist, &target, 2138 argv, false, NULL); 2139 if (err < 0) 2140 goto out_delete; 2141 } 2142 2143 if (lock_contention_prepare(&con) < 0) { 2144 pr_err("lock contention BPF setup failed\n"); 2145 goto out_delete; 2146 } 2147 } else if (!data.is_pipe) { 2148 if (!perf_session__has_traces(session, "lock record")) 2149 goto out_delete; 2150 2151 if (!evlist__find_evsel_by_str(session->evlist, 2152 "lock:contention_begin")) { 2153 pr_err("lock contention evsel not found\n"); 2154 goto out_delete; 2155 } 2156 2157 if (perf_session__set_tracepoints_handlers(session, 2158 contention_tracepoints)) { 2159 pr_err("Initializing perf session tracepoint handlers failed\n"); 2160 goto out_delete; 2161 } 2162 } 2163 2164 if (setup_output_field(true, output_fields)) 2165 goto out_delete; 2166 2167 if (select_key(true)) 2168 goto out_delete; 2169 2170 if (symbol_conf.field_sep) { 2171 int i; 2172 struct lock_key *keys = contention_keys; 2173 2174 /* do not align output in CSV format */ 2175 for (i = 0; keys[i].name; i++) 2176 keys[i].len = 0; 2177 } 2178 2179 if (use_bpf) { 2180 lock_contention_start(); 2181 if (argc) 2182 evlist__start_workload(con.evlist); 2183 2184 /* wait for signal */ 2185 pause(); 2186 2187 lock_contention_stop(); 2188 lock_contention_read(&con); 2189 } else { 2190 err = perf_session__process_events(session); 2191 if (err) 2192 goto out_delete; 2193 } 2194 2195 setup_pager(); 2196 2197 sort_contention_result(); 2198 print_contention_result(&con); 2199 2200 out_delete: 2201 lock_filter_finish(); 2202 evlist__delete(con.evlist); 2203 lock_contention_finish(&con); 2204 perf_session__delete(session); 2205 zfree(&lockhash_table); 2206 return err; 2207 } 2208 2209 2210 static int __cmd_record(int argc, const char **argv) 2211 { 2212 const char *record_args[] = { 2213 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 2214 }; 2215 const char *callgraph_args[] = { 2216 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 2217 }; 2218 unsigned int rec_argc, i, j, ret; 2219 unsigned int nr_tracepoints; 2220 unsigned int nr_callgraph_args = 0; 2221 const char **rec_argv; 2222 bool has_lock_stat = true; 2223 2224 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 2225 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 2226 pr_debug("tracepoint %s is not enabled. " 2227 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 2228 lock_tracepoints[i].name); 2229 has_lock_stat = false; 2230 break; 2231 } 2232 } 2233 2234 if (has_lock_stat) 2235 goto setup_args; 2236 2237 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 2238 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 2239 pr_err("tracepoint %s is not enabled.\n", 2240 contention_tracepoints[i].name); 2241 return 1; 2242 } 2243 } 2244 2245 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 2246 2247 setup_args: 2248 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 2249 2250 if (has_lock_stat) 2251 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 2252 else 2253 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 2254 2255 /* factor of 2 is for -e in front of each tracepoint */ 2256 rec_argc += 2 * nr_tracepoints; 2257 2258 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2259 if (!rec_argv) 2260 return -ENOMEM; 2261 2262 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2263 rec_argv[i] = strdup(record_args[i]); 2264 2265 for (j = 0; j < nr_tracepoints; j++) { 2266 const char *ev_name; 2267 2268 if (has_lock_stat) 2269 ev_name = strdup(lock_tracepoints[j].name); 2270 else 2271 ev_name = strdup(contention_tracepoints[j].name); 2272 2273 if (!ev_name) 2274 return -ENOMEM; 2275 2276 rec_argv[i++] = "-e"; 2277 rec_argv[i++] = ev_name; 2278 } 2279 2280 for (j = 0; j < nr_callgraph_args; j++, i++) 2281 rec_argv[i] = callgraph_args[j]; 2282 2283 for (j = 1; j < (unsigned int)argc; j++, i++) 2284 rec_argv[i] = argv[j]; 2285 2286 BUG_ON(i != rec_argc); 2287 2288 ret = cmd_record(i, rec_argv); 2289 free(rec_argv); 2290 return ret; 2291 } 2292 2293 static int parse_map_entry(const struct option *opt, const char *str, 2294 int unset __maybe_unused) 2295 { 2296 unsigned long *len = (unsigned long *)opt->value; 2297 unsigned long val; 2298 char *endptr; 2299 2300 errno = 0; 2301 val = strtoul(str, &endptr, 0); 2302 if (*endptr != '\0' || errno != 0) { 2303 pr_err("invalid BPF map length: %s\n", str); 2304 return -1; 2305 } 2306 2307 *len = val; 2308 return 0; 2309 } 2310 2311 static int parse_max_stack(const struct option *opt, const char *str, 2312 int unset __maybe_unused) 2313 { 2314 unsigned long *len = (unsigned long *)opt->value; 2315 long val; 2316 char *endptr; 2317 2318 errno = 0; 2319 val = strtol(str, &endptr, 0); 2320 if (*endptr != '\0' || errno != 0) { 2321 pr_err("invalid max stack depth: %s\n", str); 2322 return -1; 2323 } 2324 2325 if (val < 0 || val > sysctl__max_stack()) { 2326 pr_err("invalid max stack depth: %ld\n", val); 2327 return -1; 2328 } 2329 2330 *len = val; 2331 return 0; 2332 } 2333 2334 static bool add_lock_type(unsigned int flags) 2335 { 2336 unsigned int *tmp; 2337 2338 tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types)); 2339 if (tmp == NULL) 2340 return false; 2341 2342 tmp[filters.nr_types++] = flags; 2343 filters.types = tmp; 2344 return true; 2345 } 2346 2347 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str, 2348 int unset __maybe_unused) 2349 { 2350 char *s, *tmp, *tok; 2351 int ret = 0; 2352 2353 s = strdup(str); 2354 if (s == NULL) 2355 return -1; 2356 2357 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2358 unsigned int flags = get_type_flag(tok); 2359 2360 if (flags == -1U) { 2361 pr_err("Unknown lock flags: %s\n", tok); 2362 ret = -1; 2363 break; 2364 } 2365 2366 if (!add_lock_type(flags)) { 2367 ret = -1; 2368 break; 2369 } 2370 } 2371 2372 free(s); 2373 return ret; 2374 } 2375 2376 static bool add_lock_addr(unsigned long addr) 2377 { 2378 unsigned long *tmp; 2379 2380 tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs)); 2381 if (tmp == NULL) { 2382 pr_err("Memory allocation failure\n"); 2383 return false; 2384 } 2385 2386 tmp[filters.nr_addrs++] = addr; 2387 filters.addrs = tmp; 2388 return true; 2389 } 2390 2391 static bool add_lock_sym(char *name) 2392 { 2393 char **tmp; 2394 char *sym = strdup(name); 2395 2396 if (sym == NULL) { 2397 pr_err("Memory allocation failure\n"); 2398 return false; 2399 } 2400 2401 tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms)); 2402 if (tmp == NULL) { 2403 pr_err("Memory allocation failure\n"); 2404 free(sym); 2405 return false; 2406 } 2407 2408 tmp[filters.nr_syms++] = sym; 2409 filters.syms = tmp; 2410 return true; 2411 } 2412 2413 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str, 2414 int unset __maybe_unused) 2415 { 2416 char *s, *tmp, *tok; 2417 int ret = 0; 2418 u64 addr; 2419 2420 s = strdup(str); 2421 if (s == NULL) 2422 return -1; 2423 2424 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2425 char *end; 2426 2427 addr = strtoul(tok, &end, 16); 2428 if (*end == '\0') { 2429 if (!add_lock_addr(addr)) { 2430 ret = -1; 2431 break; 2432 } 2433 continue; 2434 } 2435 2436 /* 2437 * At this moment, we don't have kernel symbols. Save the symbols 2438 * in a separate list and resolve them to addresses later. 2439 */ 2440 if (!add_lock_sym(tok)) { 2441 ret = -1; 2442 break; 2443 } 2444 } 2445 2446 free(s); 2447 return ret; 2448 } 2449 2450 static int parse_call_stack(const struct option *opt __maybe_unused, const char *str, 2451 int unset __maybe_unused) 2452 { 2453 char *s, *tmp, *tok; 2454 int ret = 0; 2455 2456 s = strdup(str); 2457 if (s == NULL) 2458 return -1; 2459 2460 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2461 struct callstack_filter *entry; 2462 2463 entry = malloc(sizeof(*entry) + strlen(tok) + 1); 2464 if (entry == NULL) { 2465 pr_err("Memory allocation failure\n"); 2466 return -1; 2467 } 2468 2469 strcpy(entry->name, tok); 2470 list_add_tail(&entry->list, &callstack_filters); 2471 } 2472 2473 free(s); 2474 return ret; 2475 } 2476 2477 static int parse_output(const struct option *opt __maybe_unused, const char *str, 2478 int unset __maybe_unused) 2479 { 2480 const char **name = (const char **)opt->value; 2481 2482 if (str == NULL) 2483 return -1; 2484 2485 lock_output = fopen(str, "w"); 2486 if (lock_output == NULL) { 2487 pr_err("Cannot open %s\n", str); 2488 return -1; 2489 } 2490 2491 *name = str; 2492 return 0; 2493 } 2494 2495 static bool add_lock_cgroup(char *name) 2496 { 2497 u64 *tmp; 2498 struct cgroup *cgrp; 2499 2500 cgrp = cgroup__new(name, /*do_open=*/false); 2501 if (cgrp == NULL) { 2502 pr_err("Failed to create cgroup: %s\n", name); 2503 return false; 2504 } 2505 2506 if (read_cgroup_id(cgrp) < 0) { 2507 pr_err("Failed to read cgroup id for %s\n", name); 2508 cgroup__put(cgrp); 2509 return false; 2510 } 2511 2512 tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps)); 2513 if (tmp == NULL) { 2514 pr_err("Memory allocation failure\n"); 2515 return false; 2516 } 2517 2518 tmp[filters.nr_cgrps++] = cgrp->id; 2519 filters.cgrps = tmp; 2520 cgroup__put(cgrp); 2521 return true; 2522 } 2523 2524 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str, 2525 int unset __maybe_unused) 2526 { 2527 char *s, *tmp, *tok; 2528 int ret = 0; 2529 2530 s = strdup(str); 2531 if (s == NULL) 2532 return -1; 2533 2534 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2535 if (!add_lock_cgroup(tok)) { 2536 ret = -1; 2537 break; 2538 } 2539 } 2540 2541 free(s); 2542 return ret; 2543 } 2544 2545 int cmd_lock(int argc, const char **argv) 2546 { 2547 const struct option lock_options[] = { 2548 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2549 OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output), 2550 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 2551 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 2552 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 2553 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2554 "file", "vmlinux pathname"), 2555 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 2556 "file", "kallsyms pathname"), 2557 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), 2558 OPT_END() 2559 }; 2560 2561 const struct option info_options[] = { 2562 OPT_BOOLEAN('t', "threads", &info_threads, 2563 "dump thread list in perf.data"), 2564 OPT_BOOLEAN('m', "map", &info_map, 2565 "map of lock instances (address:name table)"), 2566 OPT_PARENT(lock_options) 2567 }; 2568 2569 const struct option report_options[] = { 2570 OPT_STRING('k', "key", &sort_key, "acquired", 2571 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2572 OPT_STRING('F', "field", &output_fields, NULL, 2573 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2574 /* TODO: type */ 2575 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 2576 "combine locks in the same class"), 2577 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2578 "show per-thread lock stats"), 2579 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2580 OPT_PARENT(lock_options) 2581 }; 2582 2583 struct option contention_options[] = { 2584 OPT_STRING('k', "key", &sort_key, "wait_total", 2585 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 2586 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 2587 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 2588 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2589 "show per-thread lock stats"), 2590 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 2591 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 2592 "System-wide collection from all CPUs"), 2593 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 2594 "List of cpus to monitor"), 2595 OPT_STRING('p', "pid", &target.pid, "pid", 2596 "Trace on existing process id"), 2597 OPT_STRING(0, "tid", &target.tid, "tid", 2598 "Trace on existing thread id (exclusive to --pid)"), 2599 OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num", 2600 "Max number of BPF map entries", parse_map_entry), 2601 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", 2602 "Set the maximum stack depth when collecting lock contention, " 2603 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), 2604 OPT_INTEGER(0, "stack-skip", &stack_skip, 2605 "Set the number of stack depth to skip when finding a lock caller, " 2606 "Default: " __stringify(CONTENTION_STACK_SKIP)), 2607 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2608 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"), 2609 OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS", 2610 "Filter specific type of locks", parse_lock_type), 2611 OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES", 2612 "Filter specific address/symbol of locks", parse_lock_addr), 2613 OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES", 2614 "Filter specific function in the callstack", parse_call_stack), 2615 OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"), 2616 OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", 2617 "print result in CSV format with custom separator"), 2618 OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"), 2619 OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS", 2620 "Filter specific cgroups", parse_cgroup_filter), 2621 OPT_PARENT(lock_options) 2622 }; 2623 2624 const char * const info_usage[] = { 2625 "perf lock info [<options>]", 2626 NULL 2627 }; 2628 const char *const lock_subcommands[] = { "record", "report", "script", 2629 "info", "contention", NULL }; 2630 const char *lock_usage[] = { 2631 NULL, 2632 NULL 2633 }; 2634 const char * const report_usage[] = { 2635 "perf lock report [<options>]", 2636 NULL 2637 }; 2638 const char * const contention_usage[] = { 2639 "perf lock contention [<options>]", 2640 NULL 2641 }; 2642 unsigned int i; 2643 int rc = 0; 2644 2645 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 2646 if (!lockhash_table) 2647 return -ENOMEM; 2648 2649 for (i = 0; i < LOCKHASH_SIZE; i++) 2650 INIT_HLIST_HEAD(lockhash_table + i); 2651 2652 lock_output = stderr; 2653 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 2654 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2655 if (!argc) 2656 usage_with_options(lock_usage, lock_options); 2657 2658 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 2659 return __cmd_record(argc, argv); 2660 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 2661 trace_handler = &report_lock_ops; 2662 if (argc) { 2663 argc = parse_options(argc, argv, 2664 report_options, report_usage, 0); 2665 if (argc) 2666 usage_with_options(report_usage, report_options); 2667 } 2668 rc = __cmd_report(false); 2669 } else if (!strcmp(argv[0], "script")) { 2670 /* Aliased to 'perf script' */ 2671 rc = cmd_script(argc, argv); 2672 } else if (!strcmp(argv[0], "info")) { 2673 if (argc) { 2674 argc = parse_options(argc, argv, 2675 info_options, info_usage, 0); 2676 if (argc) 2677 usage_with_options(info_usage, info_options); 2678 } 2679 /* recycling report_lock_ops */ 2680 trace_handler = &report_lock_ops; 2681 rc = __cmd_report(true); 2682 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 2683 trace_handler = &contention_lock_ops; 2684 sort_key = "wait_total"; 2685 output_fields = "contended,wait_total,wait_max,avg_wait"; 2686 2687 #ifndef HAVE_BPF_SKEL 2688 set_option_nobuild(contention_options, 'b', "use-bpf", 2689 "no BUILD_BPF_SKEL=1", false); 2690 #endif 2691 if (argc) { 2692 argc = parse_options(argc, argv, contention_options, 2693 contention_usage, 0); 2694 } 2695 2696 if (check_lock_contention_options(contention_options, 2697 contention_usage) < 0) 2698 return -1; 2699 2700 rc = __cmd_contention(argc, argv); 2701 } else { 2702 usage_with_options(lock_usage, lock_options); 2703 } 2704 2705 zfree(&lockhash_table); 2706 return rc; 2707 } 2708