1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/callchain.h" 14 #include "util/lock-contention.h" 15 #include "util/bpf_skel/lock_data.h" 16 17 #include <subcmd/pager.h> 18 #include <subcmd/parse-options.h> 19 #include "util/trace-event.h" 20 #include "util/tracepoint.h" 21 22 #include "util/debug.h" 23 #include "util/session.h" 24 #include "util/tool.h" 25 #include "util/data.h" 26 #include "util/string2.h" 27 #include "util/map.h" 28 #include "util/util.h" 29 30 #include <sys/types.h> 31 #include <sys/prctl.h> 32 #include <semaphore.h> 33 #include <math.h> 34 #include <limits.h> 35 36 #include <linux/list.h> 37 #include <linux/hash.h> 38 #include <linux/kernel.h> 39 #include <linux/zalloc.h> 40 #include <linux/err.h> 41 #include <linux/stringify.h> 42 43 static struct perf_session *session; 44 static struct target target; 45 46 /* based on kernel/lockdep.c */ 47 #define LOCKHASH_BITS 12 48 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) 49 50 static struct hlist_head lockhash_table[LOCKHASH_SIZE]; 51 52 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) 53 #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) 54 55 static struct rb_root thread_stats; 56 57 static bool combine_locks; 58 static bool show_thread_stats; 59 static bool show_lock_addrs; 60 static bool use_bpf; 61 static unsigned long bpf_map_entries = 10240; 62 static int max_stack_depth = CONTENTION_STACK_DEPTH; 63 static int stack_skip = CONTENTION_STACK_SKIP; 64 static int print_nr_entries = INT_MAX / 2; 65 66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; 67 68 static struct thread_stat *thread_stat_find(u32 tid) 69 { 70 struct rb_node *node; 71 struct thread_stat *st; 72 73 node = thread_stats.rb_node; 74 while (node) { 75 st = container_of(node, struct thread_stat, rb); 76 if (st->tid == tid) 77 return st; 78 else if (tid < st->tid) 79 node = node->rb_left; 80 else 81 node = node->rb_right; 82 } 83 84 return NULL; 85 } 86 87 static void thread_stat_insert(struct thread_stat *new) 88 { 89 struct rb_node **rb = &thread_stats.rb_node; 90 struct rb_node *parent = NULL; 91 struct thread_stat *p; 92 93 while (*rb) { 94 p = container_of(*rb, struct thread_stat, rb); 95 parent = *rb; 96 97 if (new->tid < p->tid) 98 rb = &(*rb)->rb_left; 99 else if (new->tid > p->tid) 100 rb = &(*rb)->rb_right; 101 else 102 BUG_ON("inserting invalid thread_stat\n"); 103 } 104 105 rb_link_node(&new->rb, parent, rb); 106 rb_insert_color(&new->rb, &thread_stats); 107 } 108 109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 110 { 111 struct thread_stat *st; 112 113 st = thread_stat_find(tid); 114 if (st) 115 return st; 116 117 st = zalloc(sizeof(struct thread_stat)); 118 if (!st) { 119 pr_err("memory allocation failed\n"); 120 return NULL; 121 } 122 123 st->tid = tid; 124 INIT_LIST_HEAD(&st->seq_list); 125 126 thread_stat_insert(st); 127 128 return st; 129 } 130 131 static struct thread_stat *thread_stat_findnew_first(u32 tid); 132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 133 thread_stat_findnew_first; 134 135 static struct thread_stat *thread_stat_findnew_first(u32 tid) 136 { 137 struct thread_stat *st; 138 139 st = zalloc(sizeof(struct thread_stat)); 140 if (!st) { 141 pr_err("memory allocation failed\n"); 142 return NULL; 143 } 144 st->tid = tid; 145 INIT_LIST_HEAD(&st->seq_list); 146 147 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 148 rb_insert_color(&st->rb, &thread_stats); 149 150 thread_stat_findnew = thread_stat_findnew_after_first; 151 return st; 152 } 153 154 /* build simple key function one is bigger than two */ 155 #define SINGLE_KEY(member) \ 156 static int lock_stat_key_ ## member(struct lock_stat *one, \ 157 struct lock_stat *two) \ 158 { \ 159 return one->member > two->member; \ 160 } 161 162 SINGLE_KEY(nr_acquired) 163 SINGLE_KEY(nr_contended) 164 SINGLE_KEY(avg_wait_time) 165 SINGLE_KEY(wait_time_total) 166 SINGLE_KEY(wait_time_max) 167 168 static int lock_stat_key_wait_time_min(struct lock_stat *one, 169 struct lock_stat *two) 170 { 171 u64 s1 = one->wait_time_min; 172 u64 s2 = two->wait_time_min; 173 if (s1 == ULLONG_MAX) 174 s1 = 0; 175 if (s2 == ULLONG_MAX) 176 s2 = 0; 177 return s1 > s2; 178 } 179 180 struct lock_key { 181 /* 182 * name: the value for specify by user 183 * this should be simpler than raw name of member 184 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 185 */ 186 const char *name; 187 /* header: the string printed on the header line */ 188 const char *header; 189 /* len: the printing width of the field */ 190 int len; 191 /* key: a pointer to function to compare two lock stats for sorting */ 192 int (*key)(struct lock_stat*, struct lock_stat*); 193 /* print: a pointer to function to print a given lock stats */ 194 void (*print)(struct lock_key*, struct lock_stat*); 195 /* list: list entry to link this */ 196 struct list_head list; 197 }; 198 199 static void lock_stat_key_print_time(unsigned long long nsec, int len) 200 { 201 static const struct { 202 float base; 203 const char *unit; 204 } table[] = { 205 { 1e9 * 3600, "h " }, 206 { 1e9 * 60, "m " }, 207 { 1e9, "s " }, 208 { 1e6, "ms" }, 209 { 1e3, "us" }, 210 { 0, NULL }, 211 }; 212 213 for (int i = 0; table[i].unit; i++) { 214 if (nsec < table[i].base) 215 continue; 216 217 pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 218 return; 219 } 220 221 pr_info("%*llu %s", len - 3, nsec, "ns"); 222 } 223 224 #define PRINT_KEY(member) \ 225 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 226 struct lock_stat *ls) \ 227 { \ 228 pr_info("%*llu", key->len, (unsigned long long)ls->member); \ 229 } 230 231 #define PRINT_TIME(member) \ 232 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 233 struct lock_stat *ls) \ 234 { \ 235 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 236 } 237 238 PRINT_KEY(nr_acquired) 239 PRINT_KEY(nr_contended) 240 PRINT_TIME(avg_wait_time) 241 PRINT_TIME(wait_time_total) 242 PRINT_TIME(wait_time_max) 243 244 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 245 struct lock_stat *ls) 246 { 247 u64 wait_time = ls->wait_time_min; 248 249 if (wait_time == ULLONG_MAX) 250 wait_time = 0; 251 252 lock_stat_key_print_time(wait_time, key->len); 253 } 254 255 256 static const char *sort_key = "acquired"; 257 258 static int (*compare)(struct lock_stat *, struct lock_stat *); 259 260 static struct rb_root sorted; /* place to store intermediate data */ 261 static struct rb_root result; /* place to store sorted data */ 262 263 static LIST_HEAD(lock_keys); 264 static const char *output_fields; 265 266 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 267 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 268 static struct lock_key report_keys[] = { 269 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 270 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 271 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 272 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 273 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 274 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 275 276 /* extra comparisons much complicated should be here */ 277 { } 278 }; 279 280 static struct lock_key contention_keys[] = { 281 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 282 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 283 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 284 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 285 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 286 287 /* extra comparisons much complicated should be here */ 288 { } 289 }; 290 291 static int select_key(bool contention) 292 { 293 int i; 294 struct lock_key *keys = report_keys; 295 296 if (contention) 297 keys = contention_keys; 298 299 for (i = 0; keys[i].name; i++) { 300 if (!strcmp(keys[i].name, sort_key)) { 301 compare = keys[i].key; 302 303 /* selected key should be in the output fields */ 304 if (list_empty(&keys[i].list)) 305 list_add_tail(&keys[i].list, &lock_keys); 306 307 return 0; 308 } 309 } 310 311 pr_err("Unknown compare key: %s\n", sort_key); 312 return -1; 313 } 314 315 static int add_output_field(bool contention, char *name) 316 { 317 int i; 318 struct lock_key *keys = report_keys; 319 320 if (contention) 321 keys = contention_keys; 322 323 for (i = 0; keys[i].name; i++) { 324 if (strcmp(keys[i].name, name)) 325 continue; 326 327 /* prevent double link */ 328 if (list_empty(&keys[i].list)) 329 list_add_tail(&keys[i].list, &lock_keys); 330 331 return 0; 332 } 333 334 pr_err("Unknown output field: %s\n", name); 335 return -1; 336 } 337 338 static int setup_output_field(bool contention, const char *str) 339 { 340 char *tok, *tmp, *orig; 341 int i, ret = 0; 342 struct lock_key *keys = report_keys; 343 344 if (contention) 345 keys = contention_keys; 346 347 /* no output field given: use all of them */ 348 if (str == NULL) { 349 for (i = 0; keys[i].name; i++) 350 list_add_tail(&keys[i].list, &lock_keys); 351 return 0; 352 } 353 354 for (i = 0; keys[i].name; i++) 355 INIT_LIST_HEAD(&keys[i].list); 356 357 orig = tmp = strdup(str); 358 if (orig == NULL) 359 return -ENOMEM; 360 361 while ((tok = strsep(&tmp, ",")) != NULL){ 362 ret = add_output_field(contention, tok); 363 if (ret < 0) 364 break; 365 } 366 free(orig); 367 368 return ret; 369 } 370 371 static void combine_lock_stats(struct lock_stat *st) 372 { 373 struct rb_node **rb = &sorted.rb_node; 374 struct rb_node *parent = NULL; 375 struct lock_stat *p; 376 int ret; 377 378 while (*rb) { 379 p = container_of(*rb, struct lock_stat, rb); 380 parent = *rb; 381 382 if (st->name && p->name) 383 ret = strcmp(st->name, p->name); 384 else 385 ret = !!st->name - !!p->name; 386 387 if (ret == 0) { 388 p->nr_acquired += st->nr_acquired; 389 p->nr_contended += st->nr_contended; 390 p->wait_time_total += st->wait_time_total; 391 392 if (p->nr_contended) 393 p->avg_wait_time = p->wait_time_total / p->nr_contended; 394 395 if (p->wait_time_min > st->wait_time_min) 396 p->wait_time_min = st->wait_time_min; 397 if (p->wait_time_max < st->wait_time_max) 398 p->wait_time_max = st->wait_time_max; 399 400 p->broken |= st->broken; 401 st->combined = 1; 402 return; 403 } 404 405 if (ret < 0) 406 rb = &(*rb)->rb_left; 407 else 408 rb = &(*rb)->rb_right; 409 } 410 411 rb_link_node(&st->rb, parent, rb); 412 rb_insert_color(&st->rb, &sorted); 413 } 414 415 static void insert_to_result(struct lock_stat *st, 416 int (*bigger)(struct lock_stat *, struct lock_stat *)) 417 { 418 struct rb_node **rb = &result.rb_node; 419 struct rb_node *parent = NULL; 420 struct lock_stat *p; 421 422 if (combine_locks && st->combined) 423 return; 424 425 while (*rb) { 426 p = container_of(*rb, struct lock_stat, rb); 427 parent = *rb; 428 429 if (bigger(st, p)) 430 rb = &(*rb)->rb_left; 431 else 432 rb = &(*rb)->rb_right; 433 } 434 435 rb_link_node(&st->rb, parent, rb); 436 rb_insert_color(&st->rb, &result); 437 } 438 439 /* returns left most element of result, and erase it */ 440 static struct lock_stat *pop_from_result(void) 441 { 442 struct rb_node *node = result.rb_node; 443 444 if (!node) 445 return NULL; 446 447 while (node->rb_left) 448 node = node->rb_left; 449 450 rb_erase(node, &result); 451 return container_of(node, struct lock_stat, rb); 452 } 453 454 static struct lock_stat *lock_stat_find(u64 addr) 455 { 456 struct hlist_head *entry = lockhashentry(addr); 457 struct lock_stat *ret; 458 459 hlist_for_each_entry(ret, entry, hash_entry) { 460 if (ret->addr == addr) 461 return ret; 462 } 463 return NULL; 464 } 465 466 static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) 467 { 468 struct hlist_head *entry = lockhashentry(addr); 469 struct lock_stat *ret, *new; 470 471 hlist_for_each_entry(ret, entry, hash_entry) { 472 if (ret->addr == addr) 473 return ret; 474 } 475 476 new = zalloc(sizeof(struct lock_stat)); 477 if (!new) 478 goto alloc_failed; 479 480 new->addr = addr; 481 new->name = strdup(name); 482 if (!new->name) { 483 free(new); 484 goto alloc_failed; 485 } 486 487 new->flags = flags; 488 new->wait_time_min = ULLONG_MAX; 489 490 hlist_add_head(&new->hash_entry, entry); 491 return new; 492 493 alloc_failed: 494 pr_err("memory allocation failed\n"); 495 return NULL; 496 } 497 498 struct trace_lock_handler { 499 /* it's used on CONFIG_LOCKDEP */ 500 int (*acquire_event)(struct evsel *evsel, 501 struct perf_sample *sample); 502 503 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 504 int (*acquired_event)(struct evsel *evsel, 505 struct perf_sample *sample); 506 507 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 508 int (*contended_event)(struct evsel *evsel, 509 struct perf_sample *sample); 510 511 /* it's used on CONFIG_LOCKDEP */ 512 int (*release_event)(struct evsel *evsel, 513 struct perf_sample *sample); 514 515 /* it's used when CONFIG_LOCKDEP is off */ 516 int (*contention_begin_event)(struct evsel *evsel, 517 struct perf_sample *sample); 518 519 /* it's used when CONFIG_LOCKDEP is off */ 520 int (*contention_end_event)(struct evsel *evsel, 521 struct perf_sample *sample); 522 }; 523 524 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 525 { 526 struct lock_seq_stat *seq; 527 528 list_for_each_entry(seq, &ts->seq_list, list) { 529 if (seq->addr == addr) 530 return seq; 531 } 532 533 seq = zalloc(sizeof(struct lock_seq_stat)); 534 if (!seq) { 535 pr_err("memory allocation failed\n"); 536 return NULL; 537 } 538 seq->state = SEQ_STATE_UNINITIALIZED; 539 seq->addr = addr; 540 541 list_add(&seq->list, &ts->seq_list); 542 return seq; 543 } 544 545 enum broken_state { 546 BROKEN_ACQUIRE, 547 BROKEN_ACQUIRED, 548 BROKEN_CONTENDED, 549 BROKEN_RELEASE, 550 BROKEN_MAX, 551 }; 552 553 static int bad_hist[BROKEN_MAX]; 554 555 enum acquire_flags { 556 TRY_LOCK = 1, 557 READ_LOCK = 2, 558 }; 559 560 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid) 561 { 562 switch (aggr_mode) { 563 case LOCK_AGGR_ADDR: 564 *key = addr; 565 break; 566 case LOCK_AGGR_TASK: 567 *key = tid; 568 break; 569 case LOCK_AGGR_CALLER: 570 default: 571 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 572 return -EINVAL; 573 } 574 return 0; 575 } 576 577 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample); 578 579 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel, 580 struct perf_sample *sample) 581 { 582 if (aggr_mode == LOCK_AGGR_CALLER) { 583 *key = callchain_id(evsel, sample); 584 return 0; 585 } 586 return get_key_by_aggr_mode_simple(key, addr, sample->tid); 587 } 588 589 static int report_lock_acquire_event(struct evsel *evsel, 590 struct perf_sample *sample) 591 { 592 struct lock_stat *ls; 593 struct thread_stat *ts; 594 struct lock_seq_stat *seq; 595 const char *name = evsel__strval(evsel, sample, "name"); 596 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 597 int flag = evsel__intval(evsel, sample, "flags"); 598 u64 key; 599 int ret; 600 601 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 602 if (ret < 0) 603 return ret; 604 605 ls = lock_stat_findnew(key, name, 0); 606 if (!ls) 607 return -ENOMEM; 608 609 ts = thread_stat_findnew(sample->tid); 610 if (!ts) 611 return -ENOMEM; 612 613 seq = get_seq(ts, addr); 614 if (!seq) 615 return -ENOMEM; 616 617 switch (seq->state) { 618 case SEQ_STATE_UNINITIALIZED: 619 case SEQ_STATE_RELEASED: 620 if (!flag) { 621 seq->state = SEQ_STATE_ACQUIRING; 622 } else { 623 if (flag & TRY_LOCK) 624 ls->nr_trylock++; 625 if (flag & READ_LOCK) 626 ls->nr_readlock++; 627 seq->state = SEQ_STATE_READ_ACQUIRED; 628 seq->read_count = 1; 629 ls->nr_acquired++; 630 } 631 break; 632 case SEQ_STATE_READ_ACQUIRED: 633 if (flag & READ_LOCK) { 634 seq->read_count++; 635 ls->nr_acquired++; 636 goto end; 637 } else { 638 goto broken; 639 } 640 break; 641 case SEQ_STATE_ACQUIRED: 642 case SEQ_STATE_ACQUIRING: 643 case SEQ_STATE_CONTENDED: 644 broken: 645 /* broken lock sequence */ 646 if (!ls->broken) { 647 ls->broken = 1; 648 bad_hist[BROKEN_ACQUIRE]++; 649 } 650 list_del_init(&seq->list); 651 free(seq); 652 goto end; 653 default: 654 BUG_ON("Unknown state of lock sequence found!\n"); 655 break; 656 } 657 658 ls->nr_acquire++; 659 seq->prev_event_time = sample->time; 660 end: 661 return 0; 662 } 663 664 static int report_lock_acquired_event(struct evsel *evsel, 665 struct perf_sample *sample) 666 { 667 struct lock_stat *ls; 668 struct thread_stat *ts; 669 struct lock_seq_stat *seq; 670 u64 contended_term; 671 const char *name = evsel__strval(evsel, sample, "name"); 672 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 673 u64 key; 674 int ret; 675 676 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 677 if (ret < 0) 678 return ret; 679 680 ls = lock_stat_findnew(key, name, 0); 681 if (!ls) 682 return -ENOMEM; 683 684 ts = thread_stat_findnew(sample->tid); 685 if (!ts) 686 return -ENOMEM; 687 688 seq = get_seq(ts, addr); 689 if (!seq) 690 return -ENOMEM; 691 692 switch (seq->state) { 693 case SEQ_STATE_UNINITIALIZED: 694 /* orphan event, do nothing */ 695 return 0; 696 case SEQ_STATE_ACQUIRING: 697 break; 698 case SEQ_STATE_CONTENDED: 699 contended_term = sample->time - seq->prev_event_time; 700 ls->wait_time_total += contended_term; 701 if (contended_term < ls->wait_time_min) 702 ls->wait_time_min = contended_term; 703 if (ls->wait_time_max < contended_term) 704 ls->wait_time_max = contended_term; 705 break; 706 case SEQ_STATE_RELEASED: 707 case SEQ_STATE_ACQUIRED: 708 case SEQ_STATE_READ_ACQUIRED: 709 /* broken lock sequence */ 710 if (!ls->broken) { 711 ls->broken = 1; 712 bad_hist[BROKEN_ACQUIRED]++; 713 } 714 list_del_init(&seq->list); 715 free(seq); 716 goto end; 717 default: 718 BUG_ON("Unknown state of lock sequence found!\n"); 719 break; 720 } 721 722 seq->state = SEQ_STATE_ACQUIRED; 723 ls->nr_acquired++; 724 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 725 seq->prev_event_time = sample->time; 726 end: 727 return 0; 728 } 729 730 static int report_lock_contended_event(struct evsel *evsel, 731 struct perf_sample *sample) 732 { 733 struct lock_stat *ls; 734 struct thread_stat *ts; 735 struct lock_seq_stat *seq; 736 const char *name = evsel__strval(evsel, sample, "name"); 737 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 738 u64 key; 739 int ret; 740 741 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 742 if (ret < 0) 743 return ret; 744 745 ls = lock_stat_findnew(key, name, 0); 746 if (!ls) 747 return -ENOMEM; 748 749 ts = thread_stat_findnew(sample->tid); 750 if (!ts) 751 return -ENOMEM; 752 753 seq = get_seq(ts, addr); 754 if (!seq) 755 return -ENOMEM; 756 757 switch (seq->state) { 758 case SEQ_STATE_UNINITIALIZED: 759 /* orphan event, do nothing */ 760 return 0; 761 case SEQ_STATE_ACQUIRING: 762 break; 763 case SEQ_STATE_RELEASED: 764 case SEQ_STATE_ACQUIRED: 765 case SEQ_STATE_READ_ACQUIRED: 766 case SEQ_STATE_CONTENDED: 767 /* broken lock sequence */ 768 if (!ls->broken) { 769 ls->broken = 1; 770 bad_hist[BROKEN_CONTENDED]++; 771 } 772 list_del_init(&seq->list); 773 free(seq); 774 goto end; 775 default: 776 BUG_ON("Unknown state of lock sequence found!\n"); 777 break; 778 } 779 780 seq->state = SEQ_STATE_CONTENDED; 781 ls->nr_contended++; 782 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 783 seq->prev_event_time = sample->time; 784 end: 785 return 0; 786 } 787 788 static int report_lock_release_event(struct evsel *evsel, 789 struct perf_sample *sample) 790 { 791 struct lock_stat *ls; 792 struct thread_stat *ts; 793 struct lock_seq_stat *seq; 794 const char *name = evsel__strval(evsel, sample, "name"); 795 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 796 u64 key; 797 int ret; 798 799 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 800 if (ret < 0) 801 return ret; 802 803 ls = lock_stat_findnew(key, name, 0); 804 if (!ls) 805 return -ENOMEM; 806 807 ts = thread_stat_findnew(sample->tid); 808 if (!ts) 809 return -ENOMEM; 810 811 seq = get_seq(ts, addr); 812 if (!seq) 813 return -ENOMEM; 814 815 switch (seq->state) { 816 case SEQ_STATE_UNINITIALIZED: 817 goto end; 818 case SEQ_STATE_ACQUIRED: 819 break; 820 case SEQ_STATE_READ_ACQUIRED: 821 seq->read_count--; 822 BUG_ON(seq->read_count < 0); 823 if (seq->read_count) { 824 ls->nr_release++; 825 goto end; 826 } 827 break; 828 case SEQ_STATE_ACQUIRING: 829 case SEQ_STATE_CONTENDED: 830 case SEQ_STATE_RELEASED: 831 /* broken lock sequence */ 832 if (!ls->broken) { 833 ls->broken = 1; 834 bad_hist[BROKEN_RELEASE]++; 835 } 836 goto free_seq; 837 default: 838 BUG_ON("Unknown state of lock sequence found!\n"); 839 break; 840 } 841 842 ls->nr_release++; 843 free_seq: 844 list_del_init(&seq->list); 845 free(seq); 846 end: 847 return 0; 848 } 849 850 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, 851 char *buf, int size) 852 { 853 u64 offset; 854 855 if (map == NULL || sym == NULL) { 856 buf[0] = '\0'; 857 return 0; 858 } 859 860 offset = map->map_ip(map, ip) - sym->start; 861 862 if (offset) 863 return scnprintf(buf, size, "%s+%#lx", sym->name, offset); 864 else 865 return strlcpy(buf, sym->name, size); 866 } 867 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 868 char *buf, int size) 869 { 870 struct thread *thread; 871 struct callchain_cursor *cursor = &callchain_cursor; 872 struct machine *machine = &session->machines.host; 873 struct symbol *sym; 874 int skip = 0; 875 int ret; 876 877 /* lock names will be replaced to task name later */ 878 if (show_thread_stats) 879 return -1; 880 881 thread = machine__findnew_thread(machine, -1, sample->pid); 882 if (thread == NULL) 883 return -1; 884 885 /* use caller function name from the callchain */ 886 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 887 NULL, NULL, max_stack_depth); 888 if (ret != 0) { 889 thread__put(thread); 890 return -1; 891 } 892 893 callchain_cursor_commit(cursor); 894 thread__put(thread); 895 896 while (true) { 897 struct callchain_cursor_node *node; 898 899 node = callchain_cursor_current(cursor); 900 if (node == NULL) 901 break; 902 903 /* skip first few entries - for lock functions */ 904 if (++skip <= stack_skip) 905 goto next; 906 907 sym = node->ms.sym; 908 if (sym && !machine__is_lock_function(machine, node->ip)) { 909 get_symbol_name_offset(node->ms.map, sym, node->ip, 910 buf, size); 911 return 0; 912 } 913 914 next: 915 callchain_cursor_advance(cursor); 916 } 917 return -1; 918 } 919 920 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 921 { 922 struct callchain_cursor *cursor = &callchain_cursor; 923 struct machine *machine = &session->machines.host; 924 struct thread *thread; 925 u64 hash = 0; 926 int skip = 0; 927 int ret; 928 929 thread = machine__findnew_thread(machine, -1, sample->pid); 930 if (thread == NULL) 931 return -1; 932 933 /* use caller function name from the callchain */ 934 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 935 NULL, NULL, max_stack_depth); 936 thread__put(thread); 937 938 if (ret != 0) 939 return -1; 940 941 callchain_cursor_commit(cursor); 942 943 while (true) { 944 struct callchain_cursor_node *node; 945 946 node = callchain_cursor_current(cursor); 947 if (node == NULL) 948 break; 949 950 /* skip first few entries - for lock functions */ 951 if (++skip <= stack_skip) 952 goto next; 953 954 if (node->ms.sym && machine__is_lock_function(machine, node->ip)) 955 goto next; 956 957 hash ^= hash_long((unsigned long)node->ip, 64); 958 959 next: 960 callchain_cursor_advance(cursor); 961 } 962 return hash; 963 } 964 965 static u64 *get_callstack(struct perf_sample *sample, int max_stack) 966 { 967 u64 *callstack; 968 u64 i; 969 int c; 970 971 callstack = calloc(max_stack, sizeof(*callstack)); 972 if (callstack == NULL) 973 return NULL; 974 975 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) { 976 u64 ip = sample->callchain->ips[i]; 977 978 if (ip >= PERF_CONTEXT_MAX) 979 continue; 980 981 callstack[c++] = ip; 982 } 983 return callstack; 984 } 985 986 static int report_lock_contention_begin_event(struct evsel *evsel, 987 struct perf_sample *sample) 988 { 989 struct lock_stat *ls; 990 struct thread_stat *ts; 991 struct lock_seq_stat *seq; 992 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 993 u64 key; 994 int ret; 995 996 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 997 if (ret < 0) 998 return ret; 999 1000 ls = lock_stat_find(key); 1001 if (!ls) { 1002 char buf[128]; 1003 const char *name = ""; 1004 unsigned int flags = evsel__intval(evsel, sample, "flags"); 1005 struct machine *machine = &session->machines.host; 1006 struct map *kmap; 1007 struct symbol *sym; 1008 1009 switch (aggr_mode) { 1010 case LOCK_AGGR_ADDR: 1011 /* make sure it loads the kernel map to find lock symbols */ 1012 map__load(machine__kernel_map(machine)); 1013 1014 sym = machine__find_kernel_symbol(machine, key, &kmap); 1015 if (sym) 1016 name = sym->name; 1017 break; 1018 case LOCK_AGGR_CALLER: 1019 name = buf; 1020 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1021 name = "Unknown"; 1022 break; 1023 case LOCK_AGGR_TASK: 1024 default: 1025 break; 1026 } 1027 1028 ls = lock_stat_findnew(key, name, flags); 1029 if (!ls) 1030 return -ENOMEM; 1031 1032 if (aggr_mode == LOCK_AGGR_CALLER && verbose) { 1033 ls->callstack = get_callstack(sample, max_stack_depth); 1034 if (ls->callstack == NULL) 1035 return -ENOMEM; 1036 } 1037 } 1038 1039 ts = thread_stat_findnew(sample->tid); 1040 if (!ts) 1041 return -ENOMEM; 1042 1043 seq = get_seq(ts, addr); 1044 if (!seq) 1045 return -ENOMEM; 1046 1047 switch (seq->state) { 1048 case SEQ_STATE_UNINITIALIZED: 1049 case SEQ_STATE_ACQUIRED: 1050 break; 1051 case SEQ_STATE_CONTENDED: 1052 /* 1053 * It can have nested contention begin with mutex spinning, 1054 * then we would use the original contention begin event and 1055 * ignore the second one. 1056 */ 1057 goto end; 1058 case SEQ_STATE_ACQUIRING: 1059 case SEQ_STATE_READ_ACQUIRED: 1060 case SEQ_STATE_RELEASED: 1061 /* broken lock sequence */ 1062 if (!ls->broken) { 1063 ls->broken = 1; 1064 bad_hist[BROKEN_CONTENDED]++; 1065 } 1066 list_del_init(&seq->list); 1067 free(seq); 1068 goto end; 1069 default: 1070 BUG_ON("Unknown state of lock sequence found!\n"); 1071 break; 1072 } 1073 1074 if (seq->state != SEQ_STATE_CONTENDED) { 1075 seq->state = SEQ_STATE_CONTENDED; 1076 seq->prev_event_time = sample->time; 1077 ls->nr_contended++; 1078 } 1079 end: 1080 return 0; 1081 } 1082 1083 static int report_lock_contention_end_event(struct evsel *evsel, 1084 struct perf_sample *sample) 1085 { 1086 struct lock_stat *ls; 1087 struct thread_stat *ts; 1088 struct lock_seq_stat *seq; 1089 u64 contended_term; 1090 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1091 u64 key; 1092 int ret; 1093 1094 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 1095 if (ret < 0) 1096 return ret; 1097 1098 ls = lock_stat_find(key); 1099 if (!ls) 1100 return 0; 1101 1102 ts = thread_stat_find(sample->tid); 1103 if (!ts) 1104 return 0; 1105 1106 seq = get_seq(ts, addr); 1107 if (!seq) 1108 return -ENOMEM; 1109 1110 switch (seq->state) { 1111 case SEQ_STATE_UNINITIALIZED: 1112 goto end; 1113 case SEQ_STATE_CONTENDED: 1114 contended_term = sample->time - seq->prev_event_time; 1115 ls->wait_time_total += contended_term; 1116 if (contended_term < ls->wait_time_min) 1117 ls->wait_time_min = contended_term; 1118 if (ls->wait_time_max < contended_term) 1119 ls->wait_time_max = contended_term; 1120 break; 1121 case SEQ_STATE_ACQUIRING: 1122 case SEQ_STATE_ACQUIRED: 1123 case SEQ_STATE_READ_ACQUIRED: 1124 case SEQ_STATE_RELEASED: 1125 /* broken lock sequence */ 1126 if (!ls->broken) { 1127 ls->broken = 1; 1128 bad_hist[BROKEN_ACQUIRED]++; 1129 } 1130 list_del_init(&seq->list); 1131 free(seq); 1132 goto end; 1133 default: 1134 BUG_ON("Unknown state of lock sequence found!\n"); 1135 break; 1136 } 1137 1138 seq->state = SEQ_STATE_ACQUIRED; 1139 ls->nr_acquired++; 1140 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1141 end: 1142 return 0; 1143 } 1144 1145 /* lock oriented handlers */ 1146 /* TODO: handlers for CPU oriented, thread oriented */ 1147 static struct trace_lock_handler report_lock_ops = { 1148 .acquire_event = report_lock_acquire_event, 1149 .acquired_event = report_lock_acquired_event, 1150 .contended_event = report_lock_contended_event, 1151 .release_event = report_lock_release_event, 1152 .contention_begin_event = report_lock_contention_begin_event, 1153 .contention_end_event = report_lock_contention_end_event, 1154 }; 1155 1156 static struct trace_lock_handler contention_lock_ops = { 1157 .contention_begin_event = report_lock_contention_begin_event, 1158 .contention_end_event = report_lock_contention_end_event, 1159 }; 1160 1161 1162 static struct trace_lock_handler *trace_handler; 1163 1164 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1165 { 1166 if (trace_handler->acquire_event) 1167 return trace_handler->acquire_event(evsel, sample); 1168 return 0; 1169 } 1170 1171 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1172 { 1173 if (trace_handler->acquired_event) 1174 return trace_handler->acquired_event(evsel, sample); 1175 return 0; 1176 } 1177 1178 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1179 { 1180 if (trace_handler->contended_event) 1181 return trace_handler->contended_event(evsel, sample); 1182 return 0; 1183 } 1184 1185 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1186 { 1187 if (trace_handler->release_event) 1188 return trace_handler->release_event(evsel, sample); 1189 return 0; 1190 } 1191 1192 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1193 { 1194 if (trace_handler->contention_begin_event) 1195 return trace_handler->contention_begin_event(evsel, sample); 1196 return 0; 1197 } 1198 1199 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1200 { 1201 if (trace_handler->contention_end_event) 1202 return trace_handler->contention_end_event(evsel, sample); 1203 return 0; 1204 } 1205 1206 static void print_bad_events(int bad, int total) 1207 { 1208 /* Output for debug, this have to be removed */ 1209 int i; 1210 int broken = 0; 1211 const char *name[4] = 1212 { "acquire", "acquired", "contended", "release" }; 1213 1214 for (i = 0; i < BROKEN_MAX; i++) 1215 broken += bad_hist[i]; 1216 1217 if (quiet || (broken == 0 && !verbose)) 1218 return; 1219 1220 pr_info("\n=== output for debug===\n\n"); 1221 pr_info("bad: %d, total: %d\n", bad, total); 1222 pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1223 pr_info("histogram of events caused bad sequence\n"); 1224 for (i = 0; i < BROKEN_MAX; i++) 1225 pr_info(" %10s: %d\n", name[i], bad_hist[i]); 1226 } 1227 1228 /* TODO: various way to print, coloring, nano or milli sec */ 1229 static void print_result(void) 1230 { 1231 struct lock_stat *st; 1232 struct lock_key *key; 1233 char cut_name[20]; 1234 int bad, total, printed; 1235 1236 if (!quiet) { 1237 pr_info("%20s ", "Name"); 1238 list_for_each_entry(key, &lock_keys, list) 1239 pr_info("%*s ", key->len, key->header); 1240 pr_info("\n\n"); 1241 } 1242 1243 bad = total = printed = 0; 1244 while ((st = pop_from_result())) { 1245 total++; 1246 if (st->broken) 1247 bad++; 1248 if (!st->nr_acquired) 1249 continue; 1250 1251 bzero(cut_name, 20); 1252 1253 if (strlen(st->name) < 20) { 1254 /* output raw name */ 1255 const char *name = st->name; 1256 1257 if (show_thread_stats) { 1258 struct thread *t; 1259 1260 /* st->addr contains tid of thread */ 1261 t = perf_session__findnew(session, st->addr); 1262 name = thread__comm_str(t); 1263 } 1264 1265 pr_info("%20s ", name); 1266 } else { 1267 strncpy(cut_name, st->name, 16); 1268 cut_name[16] = '.'; 1269 cut_name[17] = '.'; 1270 cut_name[18] = '.'; 1271 cut_name[19] = '\0'; 1272 /* cut off name for saving output style */ 1273 pr_info("%20s ", cut_name); 1274 } 1275 1276 list_for_each_entry(key, &lock_keys, list) { 1277 key->print(key, st); 1278 pr_info(" "); 1279 } 1280 pr_info("\n"); 1281 1282 if (++printed >= print_nr_entries) 1283 break; 1284 } 1285 1286 print_bad_events(bad, total); 1287 } 1288 1289 static bool info_threads, info_map; 1290 1291 static void dump_threads(void) 1292 { 1293 struct thread_stat *st; 1294 struct rb_node *node; 1295 struct thread *t; 1296 1297 pr_info("%10s: comm\n", "Thread ID"); 1298 1299 node = rb_first(&thread_stats); 1300 while (node) { 1301 st = container_of(node, struct thread_stat, rb); 1302 t = perf_session__findnew(session, st->tid); 1303 pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); 1304 node = rb_next(node); 1305 thread__put(t); 1306 } 1307 } 1308 1309 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1310 { 1311 int ret; 1312 1313 if (a->name && b->name) 1314 ret = strcmp(a->name, b->name); 1315 else 1316 ret = !!a->name - !!b->name; 1317 1318 if (!ret) 1319 return a->addr < b->addr; 1320 else 1321 return ret < 0; 1322 } 1323 1324 static void dump_map(void) 1325 { 1326 unsigned int i; 1327 struct lock_stat *st; 1328 1329 pr_info("Address of instance: name of class\n"); 1330 for (i = 0; i < LOCKHASH_SIZE; i++) { 1331 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1332 insert_to_result(st, compare_maps); 1333 } 1334 } 1335 1336 while ((st = pop_from_result())) 1337 pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name); 1338 } 1339 1340 static int dump_info(void) 1341 { 1342 int rc = 0; 1343 1344 if (info_threads) 1345 dump_threads(); 1346 else if (info_map) 1347 dump_map(); 1348 else { 1349 rc = -1; 1350 pr_err("Unknown type of information\n"); 1351 } 1352 1353 return rc; 1354 } 1355 1356 static const struct evsel_str_handler lock_tracepoints[] = { 1357 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1358 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1359 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1360 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1361 }; 1362 1363 static const struct evsel_str_handler contention_tracepoints[] = { 1364 { "lock:contention_begin", evsel__process_contention_begin, }, 1365 { "lock:contention_end", evsel__process_contention_end, }, 1366 }; 1367 1368 static int process_event_update(struct perf_tool *tool, 1369 union perf_event *event, 1370 struct evlist **pevlist) 1371 { 1372 int ret; 1373 1374 ret = perf_event__process_event_update(tool, event, pevlist); 1375 if (ret < 0) 1376 return ret; 1377 1378 /* this can return -EEXIST since we call it for each evsel */ 1379 perf_session__set_tracepoints_handlers(session, lock_tracepoints); 1380 perf_session__set_tracepoints_handlers(session, contention_tracepoints); 1381 return 0; 1382 } 1383 1384 typedef int (*tracepoint_handler)(struct evsel *evsel, 1385 struct perf_sample *sample); 1386 1387 static int process_sample_event(struct perf_tool *tool __maybe_unused, 1388 union perf_event *event, 1389 struct perf_sample *sample, 1390 struct evsel *evsel, 1391 struct machine *machine) 1392 { 1393 int err = 0; 1394 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1395 sample->tid); 1396 1397 if (thread == NULL) { 1398 pr_debug("problem processing %d event, skipping it.\n", 1399 event->header.type); 1400 return -1; 1401 } 1402 1403 if (evsel->handler != NULL) { 1404 tracepoint_handler f = evsel->handler; 1405 err = f(evsel, sample); 1406 } 1407 1408 thread__put(thread); 1409 1410 return err; 1411 } 1412 1413 static void combine_result(void) 1414 { 1415 unsigned int i; 1416 struct lock_stat *st; 1417 1418 if (!combine_locks) 1419 return; 1420 1421 for (i = 0; i < LOCKHASH_SIZE; i++) { 1422 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1423 combine_lock_stats(st); 1424 } 1425 } 1426 } 1427 1428 static void sort_result(void) 1429 { 1430 unsigned int i; 1431 struct lock_stat *st; 1432 1433 for (i = 0; i < LOCKHASH_SIZE; i++) { 1434 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1435 insert_to_result(st, compare); 1436 } 1437 } 1438 } 1439 1440 static const char *get_type_str(struct lock_stat *st) 1441 { 1442 static const struct { 1443 unsigned int flags; 1444 const char *name; 1445 } table[] = { 1446 { 0, "semaphore" }, 1447 { LCB_F_SPIN, "spinlock" }, 1448 { LCB_F_SPIN | LCB_F_READ, "rwlock:R" }, 1449 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W"}, 1450 { LCB_F_READ, "rwsem:R" }, 1451 { LCB_F_WRITE, "rwsem:W" }, 1452 { LCB_F_RT, "rtmutex" }, 1453 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R" }, 1454 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W"}, 1455 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R" }, 1456 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W" }, 1457 { LCB_F_MUTEX, "mutex" }, 1458 { LCB_F_MUTEX | LCB_F_SPIN, "mutex" }, 1459 }; 1460 1461 for (unsigned int i = 0; i < ARRAY_SIZE(table); i++) { 1462 if (table[i].flags == st->flags) 1463 return table[i].name; 1464 } 1465 return "unknown"; 1466 } 1467 1468 static void sort_contention_result(void) 1469 { 1470 sort_result(); 1471 } 1472 1473 static void print_contention_result(struct lock_contention *con) 1474 { 1475 struct lock_stat *st; 1476 struct lock_key *key; 1477 int bad, total, printed; 1478 1479 if (!quiet) { 1480 list_for_each_entry(key, &lock_keys, list) 1481 pr_info("%*s ", key->len, key->header); 1482 1483 switch (aggr_mode) { 1484 case LOCK_AGGR_TASK: 1485 pr_info(" %10s %s\n\n", "pid", "comm"); 1486 break; 1487 case LOCK_AGGR_CALLER: 1488 pr_info(" %10s %s\n\n", "type", "caller"); 1489 break; 1490 case LOCK_AGGR_ADDR: 1491 pr_info(" %16s %s\n\n", "address", "symbol"); 1492 break; 1493 default: 1494 break; 1495 } 1496 } 1497 1498 bad = total = printed = 0; 1499 if (use_bpf) 1500 bad = bad_hist[BROKEN_CONTENDED]; 1501 1502 while ((st = pop_from_result())) { 1503 struct thread *t; 1504 int pid; 1505 1506 total += use_bpf ? st->nr_contended : 1; 1507 if (st->broken) 1508 bad++; 1509 1510 list_for_each_entry(key, &lock_keys, list) { 1511 key->print(key, st); 1512 pr_info(" "); 1513 } 1514 1515 switch (aggr_mode) { 1516 case LOCK_AGGR_CALLER: 1517 pr_info(" %10s %s\n", get_type_str(st), st->name); 1518 break; 1519 case LOCK_AGGR_TASK: 1520 pid = st->addr; 1521 t = perf_session__findnew(session, pid); 1522 pr_info(" %10d %s\n", pid, thread__comm_str(t)); 1523 break; 1524 case LOCK_AGGR_ADDR: 1525 pr_info(" %016llx %s\n", (unsigned long long)st->addr, 1526 st->name ? : ""); 1527 break; 1528 default: 1529 break; 1530 } 1531 1532 if (aggr_mode == LOCK_AGGR_CALLER && verbose) { 1533 struct map *kmap; 1534 struct symbol *sym; 1535 char buf[128]; 1536 u64 ip; 1537 1538 for (int i = 0; i < max_stack_depth; i++) { 1539 if (!st->callstack || !st->callstack[i]) 1540 break; 1541 1542 ip = st->callstack[i]; 1543 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1544 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1545 pr_info("\t\t\t%#lx %s\n", (unsigned long)ip, buf); 1546 } 1547 } 1548 1549 if (++printed >= print_nr_entries) 1550 break; 1551 } 1552 1553 print_bad_events(bad, total); 1554 } 1555 1556 static bool force; 1557 1558 static int __cmd_report(bool display_info) 1559 { 1560 int err = -EINVAL; 1561 struct perf_tool eops = { 1562 .attr = perf_event__process_attr, 1563 .event_update = process_event_update, 1564 .sample = process_sample_event, 1565 .comm = perf_event__process_comm, 1566 .mmap = perf_event__process_mmap, 1567 .namespaces = perf_event__process_namespaces, 1568 .tracing_data = perf_event__process_tracing_data, 1569 .ordered_events = true, 1570 }; 1571 struct perf_data data = { 1572 .path = input_name, 1573 .mode = PERF_DATA_MODE_READ, 1574 .force = force, 1575 }; 1576 1577 session = perf_session__new(&data, &eops); 1578 if (IS_ERR(session)) { 1579 pr_err("Initializing perf session failed\n"); 1580 return PTR_ERR(session); 1581 } 1582 1583 /* for lock function check */ 1584 symbol_conf.sort_by_name = true; 1585 symbol__init(&session->header.env); 1586 1587 if (!data.is_pipe) { 1588 if (!perf_session__has_traces(session, "lock record")) 1589 goto out_delete; 1590 1591 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1592 pr_err("Initializing perf session tracepoint handlers failed\n"); 1593 goto out_delete; 1594 } 1595 1596 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1597 pr_err("Initializing perf session tracepoint handlers failed\n"); 1598 goto out_delete; 1599 } 1600 } 1601 1602 if (setup_output_field(false, output_fields)) 1603 goto out_delete; 1604 1605 if (select_key(false)) 1606 goto out_delete; 1607 1608 if (show_thread_stats) 1609 aggr_mode = LOCK_AGGR_TASK; 1610 1611 err = perf_session__process_events(session); 1612 if (err) 1613 goto out_delete; 1614 1615 setup_pager(); 1616 if (display_info) /* used for info subcommand */ 1617 err = dump_info(); 1618 else { 1619 combine_result(); 1620 sort_result(); 1621 print_result(); 1622 } 1623 1624 out_delete: 1625 perf_session__delete(session); 1626 return err; 1627 } 1628 1629 static void sighandler(int sig __maybe_unused) 1630 { 1631 } 1632 1633 static int __cmd_contention(int argc, const char **argv) 1634 { 1635 int err = -EINVAL; 1636 struct perf_tool eops = { 1637 .attr = perf_event__process_attr, 1638 .event_update = process_event_update, 1639 .sample = process_sample_event, 1640 .comm = perf_event__process_comm, 1641 .mmap = perf_event__process_mmap, 1642 .tracing_data = perf_event__process_tracing_data, 1643 .ordered_events = true, 1644 }; 1645 struct perf_data data = { 1646 .path = input_name, 1647 .mode = PERF_DATA_MODE_READ, 1648 .force = force, 1649 }; 1650 struct lock_contention con = { 1651 .target = &target, 1652 .result = &lockhash_table[0], 1653 .map_nr_entries = bpf_map_entries, 1654 .max_stack = max_stack_depth, 1655 .stack_skip = stack_skip, 1656 }; 1657 1658 session = perf_session__new(use_bpf ? NULL : &data, &eops); 1659 if (IS_ERR(session)) { 1660 pr_err("Initializing perf session failed\n"); 1661 return PTR_ERR(session); 1662 } 1663 1664 con.machine = &session->machines.host; 1665 1666 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : 1667 show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER; 1668 1669 /* for lock function check */ 1670 symbol_conf.sort_by_name = true; 1671 symbol__init(&session->header.env); 1672 1673 if (use_bpf) { 1674 err = target__validate(&target); 1675 if (err) { 1676 char errbuf[512]; 1677 1678 target__strerror(&target, err, errbuf, 512); 1679 pr_err("%s\n", errbuf); 1680 goto out_delete; 1681 } 1682 1683 signal(SIGINT, sighandler); 1684 signal(SIGCHLD, sighandler); 1685 signal(SIGTERM, sighandler); 1686 1687 con.evlist = evlist__new(); 1688 if (con.evlist == NULL) { 1689 err = -ENOMEM; 1690 goto out_delete; 1691 } 1692 1693 err = evlist__create_maps(con.evlist, &target); 1694 if (err < 0) 1695 goto out_delete; 1696 1697 if (argc) { 1698 err = evlist__prepare_workload(con.evlist, &target, 1699 argv, false, NULL); 1700 if (err < 0) 1701 goto out_delete; 1702 } 1703 1704 if (lock_contention_prepare(&con) < 0) { 1705 pr_err("lock contention BPF setup failed\n"); 1706 goto out_delete; 1707 } 1708 } else if (!data.is_pipe) { 1709 if (!perf_session__has_traces(session, "lock record")) 1710 goto out_delete; 1711 1712 if (!evlist__find_evsel_by_str(session->evlist, 1713 "lock:contention_begin")) { 1714 pr_err("lock contention evsel not found\n"); 1715 goto out_delete; 1716 } 1717 1718 if (perf_session__set_tracepoints_handlers(session, 1719 contention_tracepoints)) { 1720 pr_err("Initializing perf session tracepoint handlers failed\n"); 1721 goto out_delete; 1722 } 1723 } 1724 1725 if (setup_output_field(true, output_fields)) 1726 goto out_delete; 1727 1728 if (select_key(true)) 1729 goto out_delete; 1730 1731 if (use_bpf) { 1732 lock_contention_start(); 1733 if (argc) 1734 evlist__start_workload(con.evlist); 1735 1736 /* wait for signal */ 1737 pause(); 1738 1739 lock_contention_stop(); 1740 lock_contention_read(&con); 1741 1742 /* abuse bad hist stats for lost entries */ 1743 bad_hist[BROKEN_CONTENDED] = con.lost; 1744 } else { 1745 err = perf_session__process_events(session); 1746 if (err) 1747 goto out_delete; 1748 } 1749 1750 setup_pager(); 1751 1752 sort_contention_result(); 1753 print_contention_result(&con); 1754 1755 out_delete: 1756 evlist__delete(con.evlist); 1757 lock_contention_finish(); 1758 perf_session__delete(session); 1759 return err; 1760 } 1761 1762 1763 static int __cmd_record(int argc, const char **argv) 1764 { 1765 const char *record_args[] = { 1766 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 1767 }; 1768 const char *callgraph_args[] = { 1769 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 1770 }; 1771 unsigned int rec_argc, i, j, ret; 1772 unsigned int nr_tracepoints; 1773 unsigned int nr_callgraph_args = 0; 1774 const char **rec_argv; 1775 bool has_lock_stat = true; 1776 1777 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 1778 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 1779 pr_debug("tracepoint %s is not enabled. " 1780 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 1781 lock_tracepoints[i].name); 1782 has_lock_stat = false; 1783 break; 1784 } 1785 } 1786 1787 if (has_lock_stat) 1788 goto setup_args; 1789 1790 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 1791 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 1792 pr_err("tracepoint %s is not enabled.\n", 1793 contention_tracepoints[i].name); 1794 return 1; 1795 } 1796 } 1797 1798 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 1799 1800 setup_args: 1801 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 1802 1803 if (has_lock_stat) 1804 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 1805 else 1806 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 1807 1808 /* factor of 2 is for -e in front of each tracepoint */ 1809 rec_argc += 2 * nr_tracepoints; 1810 1811 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1812 if (!rec_argv) 1813 return -ENOMEM; 1814 1815 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1816 rec_argv[i] = strdup(record_args[i]); 1817 1818 for (j = 0; j < nr_tracepoints; j++) { 1819 const char *ev_name; 1820 1821 if (has_lock_stat) 1822 ev_name = strdup(lock_tracepoints[j].name); 1823 else 1824 ev_name = strdup(contention_tracepoints[j].name); 1825 1826 if (!ev_name) 1827 return -ENOMEM; 1828 1829 rec_argv[i++] = "-e"; 1830 rec_argv[i++] = ev_name; 1831 } 1832 1833 for (j = 0; j < nr_callgraph_args; j++, i++) 1834 rec_argv[i] = callgraph_args[j]; 1835 1836 for (j = 1; j < (unsigned int)argc; j++, i++) 1837 rec_argv[i] = argv[j]; 1838 1839 BUG_ON(i != rec_argc); 1840 1841 ret = cmd_record(i, rec_argv); 1842 free(rec_argv); 1843 return ret; 1844 } 1845 1846 static int parse_map_entry(const struct option *opt, const char *str, 1847 int unset __maybe_unused) 1848 { 1849 unsigned long *len = (unsigned long *)opt->value; 1850 unsigned long val; 1851 char *endptr; 1852 1853 errno = 0; 1854 val = strtoul(str, &endptr, 0); 1855 if (*endptr != '\0' || errno != 0) { 1856 pr_err("invalid BPF map length: %s\n", str); 1857 return -1; 1858 } 1859 1860 *len = val; 1861 return 0; 1862 } 1863 1864 static int parse_max_stack(const struct option *opt, const char *str, 1865 int unset __maybe_unused) 1866 { 1867 unsigned long *len = (unsigned long *)opt->value; 1868 long val; 1869 char *endptr; 1870 1871 errno = 0; 1872 val = strtol(str, &endptr, 0); 1873 if (*endptr != '\0' || errno != 0) { 1874 pr_err("invalid max stack depth: %s\n", str); 1875 return -1; 1876 } 1877 1878 if (val < 0 || val > sysctl__max_stack()) { 1879 pr_err("invalid max stack depth: %ld\n", val); 1880 return -1; 1881 } 1882 1883 *len = val; 1884 return 0; 1885 } 1886 1887 int cmd_lock(int argc, const char **argv) 1888 { 1889 const struct option lock_options[] = { 1890 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1891 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 1892 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 1893 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 1894 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1895 "file", "vmlinux pathname"), 1896 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 1897 "file", "kallsyms pathname"), 1898 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), 1899 OPT_END() 1900 }; 1901 1902 const struct option info_options[] = { 1903 OPT_BOOLEAN('t', "threads", &info_threads, 1904 "dump thread list in perf.data"), 1905 OPT_BOOLEAN('m', "map", &info_map, 1906 "map of lock instances (address:name table)"), 1907 OPT_PARENT(lock_options) 1908 }; 1909 1910 const struct option report_options[] = { 1911 OPT_STRING('k', "key", &sort_key, "acquired", 1912 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1913 OPT_STRING('F', "field", &output_fields, NULL, 1914 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1915 /* TODO: type */ 1916 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 1917 "combine locks in the same class"), 1918 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1919 "show per-thread lock stats"), 1920 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 1921 OPT_PARENT(lock_options) 1922 }; 1923 1924 struct option contention_options[] = { 1925 OPT_STRING('k', "key", &sort_key, "wait_total", 1926 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 1927 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 1928 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 1929 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1930 "show per-thread lock stats"), 1931 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 1932 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1933 "System-wide collection from all CPUs"), 1934 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1935 "List of cpus to monitor"), 1936 OPT_STRING('p', "pid", &target.pid, "pid", 1937 "Trace on existing process id"), 1938 OPT_STRING(0, "tid", &target.tid, "tid", 1939 "Trace on existing thread id (exclusive to --pid)"), 1940 OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num", 1941 "Max number of BPF map entries", parse_map_entry), 1942 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", 1943 "Set the maximum stack depth when collecting lopck contention, " 1944 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), 1945 OPT_INTEGER(0, "stack-skip", &stack_skip, 1946 "Set the number of stack depth to skip when finding a lock caller, " 1947 "Default: " __stringify(CONTENTION_STACK_SKIP)), 1948 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 1949 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"), 1950 OPT_PARENT(lock_options) 1951 }; 1952 1953 const char * const info_usage[] = { 1954 "perf lock info [<options>]", 1955 NULL 1956 }; 1957 const char *const lock_subcommands[] = { "record", "report", "script", 1958 "info", "contention", NULL }; 1959 const char *lock_usage[] = { 1960 NULL, 1961 NULL 1962 }; 1963 const char * const report_usage[] = { 1964 "perf lock report [<options>]", 1965 NULL 1966 }; 1967 const char * const contention_usage[] = { 1968 "perf lock contention [<options>]", 1969 NULL 1970 }; 1971 unsigned int i; 1972 int rc = 0; 1973 1974 for (i = 0; i < LOCKHASH_SIZE; i++) 1975 INIT_HLIST_HEAD(lockhash_table + i); 1976 1977 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 1978 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 1979 if (!argc) 1980 usage_with_options(lock_usage, lock_options); 1981 1982 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 1983 return __cmd_record(argc, argv); 1984 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 1985 trace_handler = &report_lock_ops; 1986 if (argc) { 1987 argc = parse_options(argc, argv, 1988 report_options, report_usage, 0); 1989 if (argc) 1990 usage_with_options(report_usage, report_options); 1991 } 1992 rc = __cmd_report(false); 1993 } else if (!strcmp(argv[0], "script")) { 1994 /* Aliased to 'perf script' */ 1995 return cmd_script(argc, argv); 1996 } else if (!strcmp(argv[0], "info")) { 1997 if (argc) { 1998 argc = parse_options(argc, argv, 1999 info_options, info_usage, 0); 2000 if (argc) 2001 usage_with_options(info_usage, info_options); 2002 } 2003 /* recycling report_lock_ops */ 2004 trace_handler = &report_lock_ops; 2005 rc = __cmd_report(true); 2006 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 2007 trace_handler = &contention_lock_ops; 2008 sort_key = "wait_total"; 2009 output_fields = "contended,wait_total,wait_max,avg_wait"; 2010 2011 #ifndef HAVE_BPF_SKEL 2012 set_option_nobuild(contention_options, 'b', "use-bpf", 2013 "no BUILD_BPF_SKEL=1", false); 2014 #endif 2015 if (argc) { 2016 argc = parse_options(argc, argv, contention_options, 2017 contention_usage, 0); 2018 } 2019 2020 if (show_thread_stats && show_lock_addrs) { 2021 pr_err("Cannot use thread and addr mode together\n"); 2022 parse_options_usage(contention_usage, contention_options, 2023 "threads", 0); 2024 parse_options_usage(NULL, contention_options, 2025 "lock-addr", 0); 2026 return -1; 2027 } 2028 2029 rc = __cmd_contention(argc, argv); 2030 } else { 2031 usage_with_options(lock_usage, lock_options); 2032 } 2033 2034 return rc; 2035 } 2036