1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/callchain.h" 14 #include "util/lock-contention.h" 15 16 #include <subcmd/pager.h> 17 #include <subcmd/parse-options.h> 18 #include "util/trace-event.h" 19 #include "util/tracepoint.h" 20 21 #include "util/debug.h" 22 #include "util/session.h" 23 #include "util/tool.h" 24 #include "util/data.h" 25 #include "util/string2.h" 26 #include "util/map.h" 27 28 #include <sys/types.h> 29 #include <sys/prctl.h> 30 #include <semaphore.h> 31 #include <math.h> 32 #include <limits.h> 33 34 #include <linux/list.h> 35 #include <linux/hash.h> 36 #include <linux/kernel.h> 37 #include <linux/zalloc.h> 38 #include <linux/err.h> 39 #include <linux/stringify.h> 40 41 static struct perf_session *session; 42 static struct target target; 43 44 /* based on kernel/lockdep.c */ 45 #define LOCKHASH_BITS 12 46 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) 47 48 static struct hlist_head lockhash_table[LOCKHASH_SIZE]; 49 50 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) 51 #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) 52 53 static struct rb_root thread_stats; 54 55 static bool combine_locks; 56 static bool show_thread_stats; 57 static bool use_bpf; 58 static unsigned long bpf_map_entries = 10240; 59 60 static enum { 61 LOCK_AGGR_ADDR, 62 LOCK_AGGR_TASK, 63 LOCK_AGGR_CALLER, 64 } aggr_mode = LOCK_AGGR_ADDR; 65 66 static u64 sched_text_start; 67 static u64 sched_text_end; 68 static u64 lock_text_start; 69 static u64 lock_text_end; 70 71 static struct thread_stat *thread_stat_find(u32 tid) 72 { 73 struct rb_node *node; 74 struct thread_stat *st; 75 76 node = thread_stats.rb_node; 77 while (node) { 78 st = container_of(node, struct thread_stat, rb); 79 if (st->tid == tid) 80 return st; 81 else if (tid < st->tid) 82 node = node->rb_left; 83 else 84 node = node->rb_right; 85 } 86 87 return NULL; 88 } 89 90 static void thread_stat_insert(struct thread_stat *new) 91 { 92 struct rb_node **rb = &thread_stats.rb_node; 93 struct rb_node *parent = NULL; 94 struct thread_stat *p; 95 96 while (*rb) { 97 p = container_of(*rb, struct thread_stat, rb); 98 parent = *rb; 99 100 if (new->tid < p->tid) 101 rb = &(*rb)->rb_left; 102 else if (new->tid > p->tid) 103 rb = &(*rb)->rb_right; 104 else 105 BUG_ON("inserting invalid thread_stat\n"); 106 } 107 108 rb_link_node(&new->rb, parent, rb); 109 rb_insert_color(&new->rb, &thread_stats); 110 } 111 112 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 113 { 114 struct thread_stat *st; 115 116 st = thread_stat_find(tid); 117 if (st) 118 return st; 119 120 st = zalloc(sizeof(struct thread_stat)); 121 if (!st) { 122 pr_err("memory allocation failed\n"); 123 return NULL; 124 } 125 126 st->tid = tid; 127 INIT_LIST_HEAD(&st->seq_list); 128 129 thread_stat_insert(st); 130 131 return st; 132 } 133 134 static struct thread_stat *thread_stat_findnew_first(u32 tid); 135 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 136 thread_stat_findnew_first; 137 138 static struct thread_stat *thread_stat_findnew_first(u32 tid) 139 { 140 struct thread_stat *st; 141 142 st = zalloc(sizeof(struct thread_stat)); 143 if (!st) { 144 pr_err("memory allocation failed\n"); 145 return NULL; 146 } 147 st->tid = tid; 148 INIT_LIST_HEAD(&st->seq_list); 149 150 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 151 rb_insert_color(&st->rb, &thread_stats); 152 153 thread_stat_findnew = thread_stat_findnew_after_first; 154 return st; 155 } 156 157 /* build simple key function one is bigger than two */ 158 #define SINGLE_KEY(member) \ 159 static int lock_stat_key_ ## member(struct lock_stat *one, \ 160 struct lock_stat *two) \ 161 { \ 162 return one->member > two->member; \ 163 } 164 165 SINGLE_KEY(nr_acquired) 166 SINGLE_KEY(nr_contended) 167 SINGLE_KEY(avg_wait_time) 168 SINGLE_KEY(wait_time_total) 169 SINGLE_KEY(wait_time_max) 170 171 static int lock_stat_key_wait_time_min(struct lock_stat *one, 172 struct lock_stat *two) 173 { 174 u64 s1 = one->wait_time_min; 175 u64 s2 = two->wait_time_min; 176 if (s1 == ULLONG_MAX) 177 s1 = 0; 178 if (s2 == ULLONG_MAX) 179 s2 = 0; 180 return s1 > s2; 181 } 182 183 struct lock_key { 184 /* 185 * name: the value for specify by user 186 * this should be simpler than raw name of member 187 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 188 */ 189 const char *name; 190 /* header: the string printed on the header line */ 191 const char *header; 192 /* len: the printing width of the field */ 193 int len; 194 /* key: a pointer to function to compare two lock stats for sorting */ 195 int (*key)(struct lock_stat*, struct lock_stat*); 196 /* print: a pointer to function to print a given lock stats */ 197 void (*print)(struct lock_key*, struct lock_stat*); 198 /* list: list entry to link this */ 199 struct list_head list; 200 }; 201 202 static void lock_stat_key_print_time(unsigned long long nsec, int len) 203 { 204 static const struct { 205 float base; 206 const char *unit; 207 } table[] = { 208 { 1e9 * 3600, "h " }, 209 { 1e9 * 60, "m " }, 210 { 1e9, "s " }, 211 { 1e6, "ms" }, 212 { 1e3, "us" }, 213 { 0, NULL }, 214 }; 215 216 for (int i = 0; table[i].unit; i++) { 217 if (nsec < table[i].base) 218 continue; 219 220 pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 221 return; 222 } 223 224 pr_info("%*llu %s", len - 3, nsec, "ns"); 225 } 226 227 #define PRINT_KEY(member) \ 228 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 229 struct lock_stat *ls) \ 230 { \ 231 pr_info("%*llu", key->len, (unsigned long long)ls->member); \ 232 } 233 234 #define PRINT_TIME(member) \ 235 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 236 struct lock_stat *ls) \ 237 { \ 238 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 239 } 240 241 PRINT_KEY(nr_acquired) 242 PRINT_KEY(nr_contended) 243 PRINT_TIME(avg_wait_time) 244 PRINT_TIME(wait_time_total) 245 PRINT_TIME(wait_time_max) 246 247 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 248 struct lock_stat *ls) 249 { 250 u64 wait_time = ls->wait_time_min; 251 252 if (wait_time == ULLONG_MAX) 253 wait_time = 0; 254 255 lock_stat_key_print_time(wait_time, key->len); 256 } 257 258 259 static const char *sort_key = "acquired"; 260 261 static int (*compare)(struct lock_stat *, struct lock_stat *); 262 263 static struct rb_root sorted; /* place to store intermediate data */ 264 static struct rb_root result; /* place to store sorted data */ 265 266 static LIST_HEAD(lock_keys); 267 static const char *output_fields; 268 269 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 270 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 271 static struct lock_key report_keys[] = { 272 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 273 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 274 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 275 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 276 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 277 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 278 279 /* extra comparisons much complicated should be here */ 280 { } 281 }; 282 283 static struct lock_key contention_keys[] = { 284 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 285 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 286 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 287 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 288 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 289 290 /* extra comparisons much complicated should be here */ 291 { } 292 }; 293 294 static int select_key(bool contention) 295 { 296 int i; 297 struct lock_key *keys = report_keys; 298 299 if (contention) 300 keys = contention_keys; 301 302 for (i = 0; keys[i].name; i++) { 303 if (!strcmp(keys[i].name, sort_key)) { 304 compare = keys[i].key; 305 306 /* selected key should be in the output fields */ 307 if (list_empty(&keys[i].list)) 308 list_add_tail(&keys[i].list, &lock_keys); 309 310 return 0; 311 } 312 } 313 314 pr_err("Unknown compare key: %s\n", sort_key); 315 return -1; 316 } 317 318 static int add_output_field(bool contention, char *name) 319 { 320 int i; 321 struct lock_key *keys = report_keys; 322 323 if (contention) 324 keys = contention_keys; 325 326 for (i = 0; keys[i].name; i++) { 327 if (strcmp(keys[i].name, name)) 328 continue; 329 330 /* prevent double link */ 331 if (list_empty(&keys[i].list)) 332 list_add_tail(&keys[i].list, &lock_keys); 333 334 return 0; 335 } 336 337 pr_err("Unknown output field: %s\n", name); 338 return -1; 339 } 340 341 static int setup_output_field(bool contention, const char *str) 342 { 343 char *tok, *tmp, *orig; 344 int i, ret = 0; 345 struct lock_key *keys = report_keys; 346 347 if (contention) 348 keys = contention_keys; 349 350 /* no output field given: use all of them */ 351 if (str == NULL) { 352 for (i = 0; keys[i].name; i++) 353 list_add_tail(&keys[i].list, &lock_keys); 354 return 0; 355 } 356 357 for (i = 0; keys[i].name; i++) 358 INIT_LIST_HEAD(&keys[i].list); 359 360 orig = tmp = strdup(str); 361 if (orig == NULL) 362 return -ENOMEM; 363 364 while ((tok = strsep(&tmp, ",")) != NULL){ 365 ret = add_output_field(contention, tok); 366 if (ret < 0) 367 break; 368 } 369 free(orig); 370 371 return ret; 372 } 373 374 static void combine_lock_stats(struct lock_stat *st) 375 { 376 struct rb_node **rb = &sorted.rb_node; 377 struct rb_node *parent = NULL; 378 struct lock_stat *p; 379 int ret; 380 381 while (*rb) { 382 p = container_of(*rb, struct lock_stat, rb); 383 parent = *rb; 384 385 if (st->name && p->name) 386 ret = strcmp(st->name, p->name); 387 else 388 ret = !!st->name - !!p->name; 389 390 if (ret == 0) { 391 p->nr_acquired += st->nr_acquired; 392 p->nr_contended += st->nr_contended; 393 p->wait_time_total += st->wait_time_total; 394 395 if (p->nr_contended) 396 p->avg_wait_time = p->wait_time_total / p->nr_contended; 397 398 if (p->wait_time_min > st->wait_time_min) 399 p->wait_time_min = st->wait_time_min; 400 if (p->wait_time_max < st->wait_time_max) 401 p->wait_time_max = st->wait_time_max; 402 403 p->broken |= st->broken; 404 st->combined = 1; 405 return; 406 } 407 408 if (ret < 0) 409 rb = &(*rb)->rb_left; 410 else 411 rb = &(*rb)->rb_right; 412 } 413 414 rb_link_node(&st->rb, parent, rb); 415 rb_insert_color(&st->rb, &sorted); 416 } 417 418 static void insert_to_result(struct lock_stat *st, 419 int (*bigger)(struct lock_stat *, struct lock_stat *)) 420 { 421 struct rb_node **rb = &result.rb_node; 422 struct rb_node *parent = NULL; 423 struct lock_stat *p; 424 425 if (combine_locks && st->combined) 426 return; 427 428 while (*rb) { 429 p = container_of(*rb, struct lock_stat, rb); 430 parent = *rb; 431 432 if (bigger(st, p)) 433 rb = &(*rb)->rb_left; 434 else 435 rb = &(*rb)->rb_right; 436 } 437 438 rb_link_node(&st->rb, parent, rb); 439 rb_insert_color(&st->rb, &result); 440 } 441 442 /* returns left most element of result, and erase it */ 443 static struct lock_stat *pop_from_result(void) 444 { 445 struct rb_node *node = result.rb_node; 446 447 if (!node) 448 return NULL; 449 450 while (node->rb_left) 451 node = node->rb_left; 452 453 rb_erase(node, &result); 454 return container_of(node, struct lock_stat, rb); 455 } 456 457 static struct lock_stat *lock_stat_find(u64 addr) 458 { 459 struct hlist_head *entry = lockhashentry(addr); 460 struct lock_stat *ret; 461 462 hlist_for_each_entry(ret, entry, hash_entry) { 463 if (ret->addr == addr) 464 return ret; 465 } 466 return NULL; 467 } 468 469 static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) 470 { 471 struct hlist_head *entry = lockhashentry(addr); 472 struct lock_stat *ret, *new; 473 474 hlist_for_each_entry(ret, entry, hash_entry) { 475 if (ret->addr == addr) 476 return ret; 477 } 478 479 new = zalloc(sizeof(struct lock_stat)); 480 if (!new) 481 goto alloc_failed; 482 483 new->addr = addr; 484 new->name = strdup(name); 485 if (!new->name) { 486 free(new); 487 goto alloc_failed; 488 } 489 490 new->flags = flags; 491 new->wait_time_min = ULLONG_MAX; 492 493 hlist_add_head(&new->hash_entry, entry); 494 return new; 495 496 alloc_failed: 497 pr_err("memory allocation failed\n"); 498 return NULL; 499 } 500 501 struct trace_lock_handler { 502 /* it's used on CONFIG_LOCKDEP */ 503 int (*acquire_event)(struct evsel *evsel, 504 struct perf_sample *sample); 505 506 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 507 int (*acquired_event)(struct evsel *evsel, 508 struct perf_sample *sample); 509 510 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 511 int (*contended_event)(struct evsel *evsel, 512 struct perf_sample *sample); 513 514 /* it's used on CONFIG_LOCKDEP */ 515 int (*release_event)(struct evsel *evsel, 516 struct perf_sample *sample); 517 518 /* it's used when CONFIG_LOCKDEP is off */ 519 int (*contention_begin_event)(struct evsel *evsel, 520 struct perf_sample *sample); 521 522 /* it's used when CONFIG_LOCKDEP is off */ 523 int (*contention_end_event)(struct evsel *evsel, 524 struct perf_sample *sample); 525 }; 526 527 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 528 { 529 struct lock_seq_stat *seq; 530 531 list_for_each_entry(seq, &ts->seq_list, list) { 532 if (seq->addr == addr) 533 return seq; 534 } 535 536 seq = zalloc(sizeof(struct lock_seq_stat)); 537 if (!seq) { 538 pr_err("memory allocation failed\n"); 539 return NULL; 540 } 541 seq->state = SEQ_STATE_UNINITIALIZED; 542 seq->addr = addr; 543 544 list_add(&seq->list, &ts->seq_list); 545 return seq; 546 } 547 548 enum broken_state { 549 BROKEN_ACQUIRE, 550 BROKEN_ACQUIRED, 551 BROKEN_CONTENDED, 552 BROKEN_RELEASE, 553 BROKEN_MAX, 554 }; 555 556 static int bad_hist[BROKEN_MAX]; 557 558 enum acquire_flags { 559 TRY_LOCK = 1, 560 READ_LOCK = 2, 561 }; 562 563 static int report_lock_acquire_event(struct evsel *evsel, 564 struct perf_sample *sample) 565 { 566 struct lock_stat *ls; 567 struct thread_stat *ts; 568 struct lock_seq_stat *seq; 569 const char *name = evsel__strval(evsel, sample, "name"); 570 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 571 int flag = evsel__intval(evsel, sample, "flags"); 572 u64 key; 573 574 switch (aggr_mode) { 575 case LOCK_AGGR_ADDR: 576 key = addr; 577 break; 578 case LOCK_AGGR_TASK: 579 key = sample->tid; 580 break; 581 case LOCK_AGGR_CALLER: 582 default: 583 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 584 return -EINVAL; 585 } 586 587 ls = lock_stat_findnew(key, name, 0); 588 if (!ls) 589 return -ENOMEM; 590 591 ts = thread_stat_findnew(sample->tid); 592 if (!ts) 593 return -ENOMEM; 594 595 seq = get_seq(ts, addr); 596 if (!seq) 597 return -ENOMEM; 598 599 switch (seq->state) { 600 case SEQ_STATE_UNINITIALIZED: 601 case SEQ_STATE_RELEASED: 602 if (!flag) { 603 seq->state = SEQ_STATE_ACQUIRING; 604 } else { 605 if (flag & TRY_LOCK) 606 ls->nr_trylock++; 607 if (flag & READ_LOCK) 608 ls->nr_readlock++; 609 seq->state = SEQ_STATE_READ_ACQUIRED; 610 seq->read_count = 1; 611 ls->nr_acquired++; 612 } 613 break; 614 case SEQ_STATE_READ_ACQUIRED: 615 if (flag & READ_LOCK) { 616 seq->read_count++; 617 ls->nr_acquired++; 618 goto end; 619 } else { 620 goto broken; 621 } 622 break; 623 case SEQ_STATE_ACQUIRED: 624 case SEQ_STATE_ACQUIRING: 625 case SEQ_STATE_CONTENDED: 626 broken: 627 /* broken lock sequence */ 628 if (!ls->broken) { 629 ls->broken = 1; 630 bad_hist[BROKEN_ACQUIRE]++; 631 } 632 list_del_init(&seq->list); 633 free(seq); 634 goto end; 635 default: 636 BUG_ON("Unknown state of lock sequence found!\n"); 637 break; 638 } 639 640 ls->nr_acquire++; 641 seq->prev_event_time = sample->time; 642 end: 643 return 0; 644 } 645 646 static int report_lock_acquired_event(struct evsel *evsel, 647 struct perf_sample *sample) 648 { 649 struct lock_stat *ls; 650 struct thread_stat *ts; 651 struct lock_seq_stat *seq; 652 u64 contended_term; 653 const char *name = evsel__strval(evsel, sample, "name"); 654 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 655 u64 key; 656 657 switch (aggr_mode) { 658 case LOCK_AGGR_ADDR: 659 key = addr; 660 break; 661 case LOCK_AGGR_TASK: 662 key = sample->tid; 663 break; 664 case LOCK_AGGR_CALLER: 665 default: 666 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 667 return -EINVAL; 668 } 669 670 ls = lock_stat_findnew(key, name, 0); 671 if (!ls) 672 return -ENOMEM; 673 674 ts = thread_stat_findnew(sample->tid); 675 if (!ts) 676 return -ENOMEM; 677 678 seq = get_seq(ts, addr); 679 if (!seq) 680 return -ENOMEM; 681 682 switch (seq->state) { 683 case SEQ_STATE_UNINITIALIZED: 684 /* orphan event, do nothing */ 685 return 0; 686 case SEQ_STATE_ACQUIRING: 687 break; 688 case SEQ_STATE_CONTENDED: 689 contended_term = sample->time - seq->prev_event_time; 690 ls->wait_time_total += contended_term; 691 if (contended_term < ls->wait_time_min) 692 ls->wait_time_min = contended_term; 693 if (ls->wait_time_max < contended_term) 694 ls->wait_time_max = contended_term; 695 break; 696 case SEQ_STATE_RELEASED: 697 case SEQ_STATE_ACQUIRED: 698 case SEQ_STATE_READ_ACQUIRED: 699 /* broken lock sequence */ 700 if (!ls->broken) { 701 ls->broken = 1; 702 bad_hist[BROKEN_ACQUIRED]++; 703 } 704 list_del_init(&seq->list); 705 free(seq); 706 goto end; 707 default: 708 BUG_ON("Unknown state of lock sequence found!\n"); 709 break; 710 } 711 712 seq->state = SEQ_STATE_ACQUIRED; 713 ls->nr_acquired++; 714 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 715 seq->prev_event_time = sample->time; 716 end: 717 return 0; 718 } 719 720 static int report_lock_contended_event(struct evsel *evsel, 721 struct perf_sample *sample) 722 { 723 struct lock_stat *ls; 724 struct thread_stat *ts; 725 struct lock_seq_stat *seq; 726 const char *name = evsel__strval(evsel, sample, "name"); 727 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 728 u64 key; 729 730 switch (aggr_mode) { 731 case LOCK_AGGR_ADDR: 732 key = addr; 733 break; 734 case LOCK_AGGR_TASK: 735 key = sample->tid; 736 break; 737 case LOCK_AGGR_CALLER: 738 default: 739 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 740 return -EINVAL; 741 } 742 743 ls = lock_stat_findnew(key, name, 0); 744 if (!ls) 745 return -ENOMEM; 746 747 ts = thread_stat_findnew(sample->tid); 748 if (!ts) 749 return -ENOMEM; 750 751 seq = get_seq(ts, addr); 752 if (!seq) 753 return -ENOMEM; 754 755 switch (seq->state) { 756 case SEQ_STATE_UNINITIALIZED: 757 /* orphan event, do nothing */ 758 return 0; 759 case SEQ_STATE_ACQUIRING: 760 break; 761 case SEQ_STATE_RELEASED: 762 case SEQ_STATE_ACQUIRED: 763 case SEQ_STATE_READ_ACQUIRED: 764 case SEQ_STATE_CONTENDED: 765 /* broken lock sequence */ 766 if (!ls->broken) { 767 ls->broken = 1; 768 bad_hist[BROKEN_CONTENDED]++; 769 } 770 list_del_init(&seq->list); 771 free(seq); 772 goto end; 773 default: 774 BUG_ON("Unknown state of lock sequence found!\n"); 775 break; 776 } 777 778 seq->state = SEQ_STATE_CONTENDED; 779 ls->nr_contended++; 780 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 781 seq->prev_event_time = sample->time; 782 end: 783 return 0; 784 } 785 786 static int report_lock_release_event(struct evsel *evsel, 787 struct perf_sample *sample) 788 { 789 struct lock_stat *ls; 790 struct thread_stat *ts; 791 struct lock_seq_stat *seq; 792 const char *name = evsel__strval(evsel, sample, "name"); 793 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 794 u64 key; 795 796 switch (aggr_mode) { 797 case LOCK_AGGR_ADDR: 798 key = addr; 799 break; 800 case LOCK_AGGR_TASK: 801 key = sample->tid; 802 break; 803 case LOCK_AGGR_CALLER: 804 default: 805 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 806 return -EINVAL; 807 } 808 809 ls = lock_stat_findnew(key, name, 0); 810 if (!ls) 811 return -ENOMEM; 812 813 ts = thread_stat_findnew(sample->tid); 814 if (!ts) 815 return -ENOMEM; 816 817 seq = get_seq(ts, addr); 818 if (!seq) 819 return -ENOMEM; 820 821 switch (seq->state) { 822 case SEQ_STATE_UNINITIALIZED: 823 goto end; 824 case SEQ_STATE_ACQUIRED: 825 break; 826 case SEQ_STATE_READ_ACQUIRED: 827 seq->read_count--; 828 BUG_ON(seq->read_count < 0); 829 if (seq->read_count) { 830 ls->nr_release++; 831 goto end; 832 } 833 break; 834 case SEQ_STATE_ACQUIRING: 835 case SEQ_STATE_CONTENDED: 836 case SEQ_STATE_RELEASED: 837 /* broken lock sequence */ 838 if (!ls->broken) { 839 ls->broken = 1; 840 bad_hist[BROKEN_RELEASE]++; 841 } 842 goto free_seq; 843 default: 844 BUG_ON("Unknown state of lock sequence found!\n"); 845 break; 846 } 847 848 ls->nr_release++; 849 free_seq: 850 list_del_init(&seq->list); 851 free(seq); 852 end: 853 return 0; 854 } 855 856 bool is_lock_function(struct machine *machine, u64 addr) 857 { 858 if (!sched_text_start) { 859 struct map *kmap; 860 struct symbol *sym; 861 862 sym = machine__find_kernel_symbol_by_name(machine, 863 "__sched_text_start", 864 &kmap); 865 if (!sym) { 866 /* to avoid retry */ 867 sched_text_start = 1; 868 return false; 869 } 870 871 sched_text_start = kmap->unmap_ip(kmap, sym->start); 872 873 /* should not fail from here */ 874 sym = machine__find_kernel_symbol_by_name(machine, 875 "__sched_text_end", 876 &kmap); 877 sched_text_end = kmap->unmap_ip(kmap, sym->start); 878 879 sym = machine__find_kernel_symbol_by_name(machine, 880 "__lock_text_start", 881 &kmap); 882 lock_text_start = kmap->unmap_ip(kmap, sym->start); 883 884 sym = machine__find_kernel_symbol_by_name(machine, 885 "__lock_text_end", 886 &kmap); 887 lock_text_end = kmap->unmap_ip(kmap, sym->start); 888 } 889 890 /* failed to get kernel symbols */ 891 if (sched_text_start == 1) 892 return false; 893 894 /* mutex and rwsem functions are in sched text section */ 895 if (sched_text_start <= addr && addr < sched_text_end) 896 return true; 897 898 /* spinlock functions are in lock text section */ 899 if (lock_text_start <= addr && addr < lock_text_end) 900 return true; 901 902 return false; 903 } 904 905 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 906 char *buf, int size) 907 { 908 struct thread *thread; 909 struct callchain_cursor *cursor = &callchain_cursor; 910 struct machine *machine = &session->machines.host; 911 struct symbol *sym; 912 int skip = 0; 913 int ret; 914 915 /* lock names will be replaced to task name later */ 916 if (show_thread_stats) 917 return -1; 918 919 thread = machine__findnew_thread(machine, -1, sample->pid); 920 if (thread == NULL) 921 return -1; 922 923 /* use caller function name from the callchain */ 924 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 925 NULL, NULL, CONTENTION_STACK_DEPTH); 926 if (ret != 0) { 927 thread__put(thread); 928 return -1; 929 } 930 931 callchain_cursor_commit(cursor); 932 thread__put(thread); 933 934 while (true) { 935 struct callchain_cursor_node *node; 936 937 node = callchain_cursor_current(cursor); 938 if (node == NULL) 939 break; 940 941 /* skip first few entries - for lock functions */ 942 if (++skip <= CONTENTION_STACK_SKIP) 943 goto next; 944 945 sym = node->ms.sym; 946 if (sym && !is_lock_function(machine, node->ip)) { 947 struct map *map = node->ms.map; 948 u64 offset; 949 950 offset = map->map_ip(map, node->ip) - sym->start; 951 952 if (offset) 953 scnprintf(buf, size, "%s+%#lx", sym->name, offset); 954 else 955 strlcpy(buf, sym->name, size); 956 return 0; 957 } 958 959 next: 960 callchain_cursor_advance(cursor); 961 } 962 return -1; 963 } 964 965 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 966 { 967 struct callchain_cursor *cursor = &callchain_cursor; 968 struct machine *machine = &session->machines.host; 969 struct thread *thread; 970 u64 hash = 0; 971 int skip = 0; 972 int ret; 973 974 thread = machine__findnew_thread(machine, -1, sample->pid); 975 if (thread == NULL) 976 return -1; 977 978 /* use caller function name from the callchain */ 979 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 980 NULL, NULL, CONTENTION_STACK_DEPTH); 981 thread__put(thread); 982 983 if (ret != 0) 984 return -1; 985 986 callchain_cursor_commit(cursor); 987 988 while (true) { 989 struct callchain_cursor_node *node; 990 991 node = callchain_cursor_current(cursor); 992 if (node == NULL) 993 break; 994 995 /* skip first few entries - for lock functions */ 996 if (++skip <= CONTENTION_STACK_SKIP) 997 goto next; 998 999 if (node->ms.sym && is_lock_function(machine, node->ip)) 1000 goto next; 1001 1002 hash ^= hash_long((unsigned long)node->ip, 64); 1003 1004 next: 1005 callchain_cursor_advance(cursor); 1006 } 1007 return hash; 1008 } 1009 1010 static int report_lock_contention_begin_event(struct evsel *evsel, 1011 struct perf_sample *sample) 1012 { 1013 struct lock_stat *ls; 1014 struct thread_stat *ts; 1015 struct lock_seq_stat *seq; 1016 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1017 u64 key; 1018 1019 switch (aggr_mode) { 1020 case LOCK_AGGR_ADDR: 1021 key = addr; 1022 break; 1023 case LOCK_AGGR_TASK: 1024 key = sample->tid; 1025 break; 1026 case LOCK_AGGR_CALLER: 1027 key = callchain_id(evsel, sample); 1028 break; 1029 default: 1030 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 1031 return -EINVAL; 1032 } 1033 1034 ls = lock_stat_find(key); 1035 if (!ls) { 1036 char buf[128]; 1037 const char *caller = buf; 1038 unsigned int flags = evsel__intval(evsel, sample, "flags"); 1039 1040 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1041 caller = "Unknown"; 1042 1043 ls = lock_stat_findnew(key, caller, flags); 1044 if (!ls) 1045 return -ENOMEM; 1046 } 1047 1048 ts = thread_stat_findnew(sample->tid); 1049 if (!ts) 1050 return -ENOMEM; 1051 1052 seq = get_seq(ts, addr); 1053 if (!seq) 1054 return -ENOMEM; 1055 1056 switch (seq->state) { 1057 case SEQ_STATE_UNINITIALIZED: 1058 case SEQ_STATE_ACQUIRED: 1059 break; 1060 case SEQ_STATE_CONTENDED: 1061 /* 1062 * It can have nested contention begin with mutex spinning, 1063 * then we would use the original contention begin event and 1064 * ignore the second one. 1065 */ 1066 goto end; 1067 case SEQ_STATE_ACQUIRING: 1068 case SEQ_STATE_READ_ACQUIRED: 1069 case SEQ_STATE_RELEASED: 1070 /* broken lock sequence */ 1071 if (!ls->broken) { 1072 ls->broken = 1; 1073 bad_hist[BROKEN_CONTENDED]++; 1074 } 1075 list_del_init(&seq->list); 1076 free(seq); 1077 goto end; 1078 default: 1079 BUG_ON("Unknown state of lock sequence found!\n"); 1080 break; 1081 } 1082 1083 if (seq->state != SEQ_STATE_CONTENDED) { 1084 seq->state = SEQ_STATE_CONTENDED; 1085 seq->prev_event_time = sample->time; 1086 ls->nr_contended++; 1087 } 1088 end: 1089 return 0; 1090 } 1091 1092 static int report_lock_contention_end_event(struct evsel *evsel, 1093 struct perf_sample *sample) 1094 { 1095 struct lock_stat *ls; 1096 struct thread_stat *ts; 1097 struct lock_seq_stat *seq; 1098 u64 contended_term; 1099 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1100 u64 key; 1101 1102 switch (aggr_mode) { 1103 case LOCK_AGGR_ADDR: 1104 key = addr; 1105 break; 1106 case LOCK_AGGR_TASK: 1107 key = sample->tid; 1108 break; 1109 case LOCK_AGGR_CALLER: 1110 key = callchain_id(evsel, sample); 1111 break; 1112 default: 1113 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 1114 return -EINVAL; 1115 } 1116 1117 ls = lock_stat_find(key); 1118 if (!ls) 1119 return 0; 1120 1121 ts = thread_stat_find(sample->tid); 1122 if (!ts) 1123 return 0; 1124 1125 seq = get_seq(ts, addr); 1126 if (!seq) 1127 return -ENOMEM; 1128 1129 switch (seq->state) { 1130 case SEQ_STATE_UNINITIALIZED: 1131 goto end; 1132 case SEQ_STATE_CONTENDED: 1133 contended_term = sample->time - seq->prev_event_time; 1134 ls->wait_time_total += contended_term; 1135 if (contended_term < ls->wait_time_min) 1136 ls->wait_time_min = contended_term; 1137 if (ls->wait_time_max < contended_term) 1138 ls->wait_time_max = contended_term; 1139 break; 1140 case SEQ_STATE_ACQUIRING: 1141 case SEQ_STATE_ACQUIRED: 1142 case SEQ_STATE_READ_ACQUIRED: 1143 case SEQ_STATE_RELEASED: 1144 /* broken lock sequence */ 1145 if (!ls->broken) { 1146 ls->broken = 1; 1147 bad_hist[BROKEN_ACQUIRED]++; 1148 } 1149 list_del_init(&seq->list); 1150 free(seq); 1151 goto end; 1152 default: 1153 BUG_ON("Unknown state of lock sequence found!\n"); 1154 break; 1155 } 1156 1157 seq->state = SEQ_STATE_ACQUIRED; 1158 ls->nr_acquired++; 1159 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1160 end: 1161 return 0; 1162 } 1163 1164 /* lock oriented handlers */ 1165 /* TODO: handlers for CPU oriented, thread oriented */ 1166 static struct trace_lock_handler report_lock_ops = { 1167 .acquire_event = report_lock_acquire_event, 1168 .acquired_event = report_lock_acquired_event, 1169 .contended_event = report_lock_contended_event, 1170 .release_event = report_lock_release_event, 1171 .contention_begin_event = report_lock_contention_begin_event, 1172 .contention_end_event = report_lock_contention_end_event, 1173 }; 1174 1175 static struct trace_lock_handler contention_lock_ops = { 1176 .contention_begin_event = report_lock_contention_begin_event, 1177 .contention_end_event = report_lock_contention_end_event, 1178 }; 1179 1180 1181 static struct trace_lock_handler *trace_handler; 1182 1183 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1184 { 1185 if (trace_handler->acquire_event) 1186 return trace_handler->acquire_event(evsel, sample); 1187 return 0; 1188 } 1189 1190 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1191 { 1192 if (trace_handler->acquired_event) 1193 return trace_handler->acquired_event(evsel, sample); 1194 return 0; 1195 } 1196 1197 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1198 { 1199 if (trace_handler->contended_event) 1200 return trace_handler->contended_event(evsel, sample); 1201 return 0; 1202 } 1203 1204 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1205 { 1206 if (trace_handler->release_event) 1207 return trace_handler->release_event(evsel, sample); 1208 return 0; 1209 } 1210 1211 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1212 { 1213 if (trace_handler->contention_begin_event) 1214 return trace_handler->contention_begin_event(evsel, sample); 1215 return 0; 1216 } 1217 1218 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1219 { 1220 if (trace_handler->contention_end_event) 1221 return trace_handler->contention_end_event(evsel, sample); 1222 return 0; 1223 } 1224 1225 static void print_bad_events(int bad, int total) 1226 { 1227 /* Output for debug, this have to be removed */ 1228 int i; 1229 int broken = 0; 1230 const char *name[4] = 1231 { "acquire", "acquired", "contended", "release" }; 1232 1233 for (i = 0; i < BROKEN_MAX; i++) 1234 broken += bad_hist[i]; 1235 1236 if (broken == 0 && !verbose) 1237 return; 1238 1239 pr_info("\n=== output for debug===\n\n"); 1240 pr_info("bad: %d, total: %d\n", bad, total); 1241 pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1242 pr_info("histogram of events caused bad sequence\n"); 1243 for (i = 0; i < BROKEN_MAX; i++) 1244 pr_info(" %10s: %d\n", name[i], bad_hist[i]); 1245 } 1246 1247 /* TODO: various way to print, coloring, nano or milli sec */ 1248 static void print_result(void) 1249 { 1250 struct lock_stat *st; 1251 struct lock_key *key; 1252 char cut_name[20]; 1253 int bad, total; 1254 1255 pr_info("%20s ", "Name"); 1256 list_for_each_entry(key, &lock_keys, list) 1257 pr_info("%*s ", key->len, key->header); 1258 pr_info("\n\n"); 1259 1260 bad = total = 0; 1261 while ((st = pop_from_result())) { 1262 total++; 1263 if (st->broken) 1264 bad++; 1265 if (!st->nr_acquired) 1266 continue; 1267 1268 bzero(cut_name, 20); 1269 1270 if (strlen(st->name) < 20) { 1271 /* output raw name */ 1272 const char *name = st->name; 1273 1274 if (show_thread_stats) { 1275 struct thread *t; 1276 1277 /* st->addr contains tid of thread */ 1278 t = perf_session__findnew(session, st->addr); 1279 name = thread__comm_str(t); 1280 } 1281 1282 pr_info("%20s ", name); 1283 } else { 1284 strncpy(cut_name, st->name, 16); 1285 cut_name[16] = '.'; 1286 cut_name[17] = '.'; 1287 cut_name[18] = '.'; 1288 cut_name[19] = '\0'; 1289 /* cut off name for saving output style */ 1290 pr_info("%20s ", cut_name); 1291 } 1292 1293 list_for_each_entry(key, &lock_keys, list) { 1294 key->print(key, st); 1295 pr_info(" "); 1296 } 1297 pr_info("\n"); 1298 } 1299 1300 print_bad_events(bad, total); 1301 } 1302 1303 static bool info_threads, info_map; 1304 1305 static void dump_threads(void) 1306 { 1307 struct thread_stat *st; 1308 struct rb_node *node; 1309 struct thread *t; 1310 1311 pr_info("%10s: comm\n", "Thread ID"); 1312 1313 node = rb_first(&thread_stats); 1314 while (node) { 1315 st = container_of(node, struct thread_stat, rb); 1316 t = perf_session__findnew(session, st->tid); 1317 pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); 1318 node = rb_next(node); 1319 thread__put(t); 1320 } 1321 } 1322 1323 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1324 { 1325 int ret; 1326 1327 if (a->name && b->name) 1328 ret = strcmp(a->name, b->name); 1329 else 1330 ret = !!a->name - !!b->name; 1331 1332 if (!ret) 1333 return a->addr < b->addr; 1334 else 1335 return ret < 0; 1336 } 1337 1338 static void dump_map(void) 1339 { 1340 unsigned int i; 1341 struct lock_stat *st; 1342 1343 pr_info("Address of instance: name of class\n"); 1344 for (i = 0; i < LOCKHASH_SIZE; i++) { 1345 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1346 insert_to_result(st, compare_maps); 1347 } 1348 } 1349 1350 while ((st = pop_from_result())) 1351 pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name); 1352 } 1353 1354 static int dump_info(void) 1355 { 1356 int rc = 0; 1357 1358 if (info_threads) 1359 dump_threads(); 1360 else if (info_map) 1361 dump_map(); 1362 else { 1363 rc = -1; 1364 pr_err("Unknown type of information\n"); 1365 } 1366 1367 return rc; 1368 } 1369 1370 typedef int (*tracepoint_handler)(struct evsel *evsel, 1371 struct perf_sample *sample); 1372 1373 static int process_sample_event(struct perf_tool *tool __maybe_unused, 1374 union perf_event *event, 1375 struct perf_sample *sample, 1376 struct evsel *evsel, 1377 struct machine *machine) 1378 { 1379 int err = 0; 1380 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1381 sample->tid); 1382 1383 if (thread == NULL) { 1384 pr_debug("problem processing %d event, skipping it.\n", 1385 event->header.type); 1386 return -1; 1387 } 1388 1389 if (evsel->handler != NULL) { 1390 tracepoint_handler f = evsel->handler; 1391 err = f(evsel, sample); 1392 } 1393 1394 thread__put(thread); 1395 1396 return err; 1397 } 1398 1399 static void combine_result(void) 1400 { 1401 unsigned int i; 1402 struct lock_stat *st; 1403 1404 if (!combine_locks) 1405 return; 1406 1407 for (i = 0; i < LOCKHASH_SIZE; i++) { 1408 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1409 combine_lock_stats(st); 1410 } 1411 } 1412 } 1413 1414 static void sort_result(void) 1415 { 1416 unsigned int i; 1417 struct lock_stat *st; 1418 1419 for (i = 0; i < LOCKHASH_SIZE; i++) { 1420 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1421 insert_to_result(st, compare); 1422 } 1423 } 1424 } 1425 1426 static const char *get_type_str(struct lock_stat *st) 1427 { 1428 static const struct { 1429 unsigned int flags; 1430 const char *name; 1431 } table[] = { 1432 { 0, "semaphore" }, 1433 { LCB_F_SPIN, "spinlock" }, 1434 { LCB_F_SPIN | LCB_F_READ, "rwlock:R" }, 1435 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W"}, 1436 { LCB_F_READ, "rwsem:R" }, 1437 { LCB_F_WRITE, "rwsem:W" }, 1438 { LCB_F_RT, "rtmutex" }, 1439 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R" }, 1440 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W"}, 1441 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R" }, 1442 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W" }, 1443 { LCB_F_MUTEX, "mutex" }, 1444 { LCB_F_MUTEX | LCB_F_SPIN, "mutex" }, 1445 }; 1446 1447 for (unsigned int i = 0; i < ARRAY_SIZE(table); i++) { 1448 if (table[i].flags == st->flags) 1449 return table[i].name; 1450 } 1451 return "unknown"; 1452 } 1453 1454 static void sort_contention_result(void) 1455 { 1456 sort_result(); 1457 } 1458 1459 static void print_contention_result(void) 1460 { 1461 struct lock_stat *st; 1462 struct lock_key *key; 1463 int bad, total; 1464 1465 list_for_each_entry(key, &lock_keys, list) 1466 pr_info("%*s ", key->len, key->header); 1467 1468 if (show_thread_stats) 1469 pr_info(" %10s %s\n\n", "pid", "comm"); 1470 else 1471 pr_info(" %10s %s\n\n", "type", "caller"); 1472 1473 bad = total = 0; 1474 if (use_bpf) 1475 bad = bad_hist[BROKEN_CONTENDED]; 1476 1477 while ((st = pop_from_result())) { 1478 total += use_bpf ? st->nr_contended : 1; 1479 if (st->broken) 1480 bad++; 1481 1482 list_for_each_entry(key, &lock_keys, list) { 1483 key->print(key, st); 1484 pr_info(" "); 1485 } 1486 1487 if (show_thread_stats) { 1488 struct thread *t; 1489 int pid = st->addr; 1490 1491 /* st->addr contains tid of thread */ 1492 t = perf_session__findnew(session, pid); 1493 pr_info(" %10d %s\n", pid, thread__comm_str(t)); 1494 continue; 1495 } 1496 1497 pr_info(" %10s %s\n", get_type_str(st), st->name); 1498 } 1499 1500 print_bad_events(bad, total); 1501 } 1502 1503 static const struct evsel_str_handler lock_tracepoints[] = { 1504 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1505 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1506 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1507 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1508 }; 1509 1510 static const struct evsel_str_handler contention_tracepoints[] = { 1511 { "lock:contention_begin", evsel__process_contention_begin, }, 1512 { "lock:contention_end", evsel__process_contention_end, }, 1513 }; 1514 1515 static bool force; 1516 1517 static int __cmd_report(bool display_info) 1518 { 1519 int err = -EINVAL; 1520 struct perf_tool eops = { 1521 .sample = process_sample_event, 1522 .comm = perf_event__process_comm, 1523 .mmap = perf_event__process_mmap, 1524 .namespaces = perf_event__process_namespaces, 1525 .ordered_events = true, 1526 }; 1527 struct perf_data data = { 1528 .path = input_name, 1529 .mode = PERF_DATA_MODE_READ, 1530 .force = force, 1531 }; 1532 1533 session = perf_session__new(&data, &eops); 1534 if (IS_ERR(session)) { 1535 pr_err("Initializing perf session failed\n"); 1536 return PTR_ERR(session); 1537 } 1538 1539 /* for lock function check */ 1540 symbol_conf.sort_by_name = true; 1541 symbol__init(&session->header.env); 1542 1543 if (!perf_session__has_traces(session, "lock record")) 1544 goto out_delete; 1545 1546 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1547 pr_err("Initializing perf session tracepoint handlers failed\n"); 1548 goto out_delete; 1549 } 1550 1551 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1552 pr_err("Initializing perf session tracepoint handlers failed\n"); 1553 goto out_delete; 1554 } 1555 1556 if (setup_output_field(false, output_fields)) 1557 goto out_delete; 1558 1559 if (select_key(false)) 1560 goto out_delete; 1561 1562 if (show_thread_stats) 1563 aggr_mode = LOCK_AGGR_TASK; 1564 1565 err = perf_session__process_events(session); 1566 if (err) 1567 goto out_delete; 1568 1569 setup_pager(); 1570 if (display_info) /* used for info subcommand */ 1571 err = dump_info(); 1572 else { 1573 combine_result(); 1574 sort_result(); 1575 print_result(); 1576 } 1577 1578 out_delete: 1579 perf_session__delete(session); 1580 return err; 1581 } 1582 1583 static void sighandler(int sig __maybe_unused) 1584 { 1585 } 1586 1587 static int __cmd_contention(int argc, const char **argv) 1588 { 1589 int err = -EINVAL; 1590 struct perf_tool eops = { 1591 .sample = process_sample_event, 1592 .comm = perf_event__process_comm, 1593 .mmap = perf_event__process_mmap, 1594 .ordered_events = true, 1595 }; 1596 struct perf_data data = { 1597 .path = input_name, 1598 .mode = PERF_DATA_MODE_READ, 1599 .force = force, 1600 }; 1601 struct lock_contention con = { 1602 .target = &target, 1603 .result = &lockhash_table[0], 1604 .map_nr_entries = bpf_map_entries, 1605 }; 1606 1607 session = perf_session__new(use_bpf ? NULL : &data, &eops); 1608 if (IS_ERR(session)) { 1609 pr_err("Initializing perf session failed\n"); 1610 return PTR_ERR(session); 1611 } 1612 1613 /* for lock function check */ 1614 symbol_conf.sort_by_name = true; 1615 symbol__init(&session->header.env); 1616 1617 if (use_bpf) { 1618 err = target__validate(&target); 1619 if (err) { 1620 char errbuf[512]; 1621 1622 target__strerror(&target, err, errbuf, 512); 1623 pr_err("%s\n", errbuf); 1624 goto out_delete; 1625 } 1626 1627 signal(SIGINT, sighandler); 1628 signal(SIGCHLD, sighandler); 1629 signal(SIGTERM, sighandler); 1630 1631 con.machine = &session->machines.host; 1632 1633 con.evlist = evlist__new(); 1634 if (con.evlist == NULL) { 1635 err = -ENOMEM; 1636 goto out_delete; 1637 } 1638 1639 err = evlist__create_maps(con.evlist, &target); 1640 if (err < 0) 1641 goto out_delete; 1642 1643 if (argc) { 1644 err = evlist__prepare_workload(con.evlist, &target, 1645 argv, false, NULL); 1646 if (err < 0) 1647 goto out_delete; 1648 } 1649 1650 if (lock_contention_prepare(&con) < 0) { 1651 pr_err("lock contention BPF setup failed\n"); 1652 goto out_delete; 1653 } 1654 } else { 1655 if (!perf_session__has_traces(session, "lock record")) 1656 goto out_delete; 1657 1658 if (!evlist__find_evsel_by_str(session->evlist, 1659 "lock:contention_begin")) { 1660 pr_err("lock contention evsel not found\n"); 1661 goto out_delete; 1662 } 1663 1664 if (perf_session__set_tracepoints_handlers(session, 1665 contention_tracepoints)) { 1666 pr_err("Initializing perf session tracepoint handlers failed\n"); 1667 goto out_delete; 1668 } 1669 } 1670 1671 if (setup_output_field(true, output_fields)) 1672 goto out_delete; 1673 1674 if (select_key(true)) 1675 goto out_delete; 1676 1677 if (show_thread_stats) 1678 aggr_mode = LOCK_AGGR_TASK; 1679 else 1680 aggr_mode = LOCK_AGGR_CALLER; 1681 1682 if (use_bpf) { 1683 lock_contention_start(); 1684 if (argc) 1685 evlist__start_workload(con.evlist); 1686 1687 /* wait for signal */ 1688 pause(); 1689 1690 lock_contention_stop(); 1691 lock_contention_read(&con); 1692 1693 /* abuse bad hist stats for lost entries */ 1694 bad_hist[BROKEN_CONTENDED] = con.lost; 1695 } else { 1696 err = perf_session__process_events(session); 1697 if (err) 1698 goto out_delete; 1699 } 1700 1701 setup_pager(); 1702 1703 sort_contention_result(); 1704 print_contention_result(); 1705 1706 out_delete: 1707 evlist__delete(con.evlist); 1708 lock_contention_finish(); 1709 perf_session__delete(session); 1710 return err; 1711 } 1712 1713 1714 static int __cmd_record(int argc, const char **argv) 1715 { 1716 const char *record_args[] = { 1717 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 1718 }; 1719 const char *callgraph_args[] = { 1720 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 1721 }; 1722 unsigned int rec_argc, i, j, ret; 1723 unsigned int nr_tracepoints; 1724 unsigned int nr_callgraph_args = 0; 1725 const char **rec_argv; 1726 bool has_lock_stat = true; 1727 1728 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 1729 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 1730 pr_debug("tracepoint %s is not enabled. " 1731 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 1732 lock_tracepoints[i].name); 1733 has_lock_stat = false; 1734 break; 1735 } 1736 } 1737 1738 if (has_lock_stat) 1739 goto setup_args; 1740 1741 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 1742 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 1743 pr_err("tracepoint %s is not enabled.\n", 1744 contention_tracepoints[i].name); 1745 return 1; 1746 } 1747 } 1748 1749 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 1750 1751 setup_args: 1752 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 1753 1754 if (has_lock_stat) 1755 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 1756 else 1757 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 1758 1759 /* factor of 2 is for -e in front of each tracepoint */ 1760 rec_argc += 2 * nr_tracepoints; 1761 1762 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1763 if (!rec_argv) 1764 return -ENOMEM; 1765 1766 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1767 rec_argv[i] = strdup(record_args[i]); 1768 1769 for (j = 0; j < nr_tracepoints; j++) { 1770 const char *ev_name; 1771 1772 if (has_lock_stat) 1773 ev_name = strdup(lock_tracepoints[j].name); 1774 else 1775 ev_name = strdup(contention_tracepoints[j].name); 1776 1777 if (!ev_name) 1778 return -ENOMEM; 1779 1780 rec_argv[i++] = "-e"; 1781 rec_argv[i++] = ev_name; 1782 } 1783 1784 for (j = 0; j < nr_callgraph_args; j++, i++) 1785 rec_argv[i] = callgraph_args[j]; 1786 1787 for (j = 1; j < (unsigned int)argc; j++, i++) 1788 rec_argv[i] = argv[j]; 1789 1790 BUG_ON(i != rec_argc); 1791 1792 ret = cmd_record(i, rec_argv); 1793 free(rec_argv); 1794 return ret; 1795 } 1796 1797 static int parse_map_entry(const struct option *opt, const char *str, 1798 int unset __maybe_unused) 1799 { 1800 unsigned long *len = (unsigned long *)opt->value; 1801 unsigned long val; 1802 char *endptr; 1803 1804 errno = 0; 1805 val = strtoul(str, &endptr, 0); 1806 if (*endptr != '\0' || errno != 0) { 1807 pr_err("invalid BPF map length: %s\n", str); 1808 return -1; 1809 } 1810 1811 *len = val; 1812 return 0; 1813 } 1814 1815 int cmd_lock(int argc, const char **argv) 1816 { 1817 const struct option lock_options[] = { 1818 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1819 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 1820 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 1821 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 1822 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1823 "file", "vmlinux pathname"), 1824 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 1825 "file", "kallsyms pathname"), 1826 OPT_END() 1827 }; 1828 1829 const struct option info_options[] = { 1830 OPT_BOOLEAN('t', "threads", &info_threads, 1831 "dump thread list in perf.data"), 1832 OPT_BOOLEAN('m', "map", &info_map, 1833 "map of lock instances (address:name table)"), 1834 OPT_PARENT(lock_options) 1835 }; 1836 1837 const struct option report_options[] = { 1838 OPT_STRING('k', "key", &sort_key, "acquired", 1839 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1840 OPT_STRING('F', "field", &output_fields, NULL, 1841 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1842 /* TODO: type */ 1843 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 1844 "combine locks in the same class"), 1845 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1846 "show per-thread lock stats"), 1847 OPT_PARENT(lock_options) 1848 }; 1849 1850 struct option contention_options[] = { 1851 OPT_STRING('k', "key", &sort_key, "wait_total", 1852 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 1853 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 1854 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 1855 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1856 "show per-thread lock stats"), 1857 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 1858 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1859 "System-wide collection from all CPUs"), 1860 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1861 "List of cpus to monitor"), 1862 OPT_STRING('p', "pid", &target.pid, "pid", 1863 "Trace on existing process id"), 1864 OPT_STRING(0, "tid", &target.tid, "tid", 1865 "Trace on existing thread id (exclusive to --pid)"), 1866 OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num", 1867 "Max number of BPF map entries", parse_map_entry), 1868 OPT_PARENT(lock_options) 1869 }; 1870 1871 const char * const info_usage[] = { 1872 "perf lock info [<options>]", 1873 NULL 1874 }; 1875 const char *const lock_subcommands[] = { "record", "report", "script", 1876 "info", "contention", NULL }; 1877 const char *lock_usage[] = { 1878 NULL, 1879 NULL 1880 }; 1881 const char * const report_usage[] = { 1882 "perf lock report [<options>]", 1883 NULL 1884 }; 1885 const char * const contention_usage[] = { 1886 "perf lock contention [<options>]", 1887 NULL 1888 }; 1889 unsigned int i; 1890 int rc = 0; 1891 1892 for (i = 0; i < LOCKHASH_SIZE; i++) 1893 INIT_HLIST_HEAD(lockhash_table + i); 1894 1895 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 1896 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 1897 if (!argc) 1898 usage_with_options(lock_usage, lock_options); 1899 1900 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 1901 return __cmd_record(argc, argv); 1902 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 1903 trace_handler = &report_lock_ops; 1904 if (argc) { 1905 argc = parse_options(argc, argv, 1906 report_options, report_usage, 0); 1907 if (argc) 1908 usage_with_options(report_usage, report_options); 1909 } 1910 rc = __cmd_report(false); 1911 } else if (!strcmp(argv[0], "script")) { 1912 /* Aliased to 'perf script' */ 1913 return cmd_script(argc, argv); 1914 } else if (!strcmp(argv[0], "info")) { 1915 if (argc) { 1916 argc = parse_options(argc, argv, 1917 info_options, info_usage, 0); 1918 if (argc) 1919 usage_with_options(info_usage, info_options); 1920 } 1921 /* recycling report_lock_ops */ 1922 trace_handler = &report_lock_ops; 1923 rc = __cmd_report(true); 1924 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 1925 trace_handler = &contention_lock_ops; 1926 sort_key = "wait_total"; 1927 output_fields = "contended,wait_total,wait_max,avg_wait"; 1928 1929 #ifndef HAVE_BPF_SKEL 1930 set_option_nobuild(contention_options, 'b', "use-bpf", 1931 "no BUILD_BPF_SKEL=1", false); 1932 #endif 1933 if (argc) { 1934 argc = parse_options(argc, argv, contention_options, 1935 contention_usage, 0); 1936 } 1937 rc = __cmd_contention(argc, argv); 1938 } else { 1939 usage_with_options(lock_usage, lock_options); 1940 } 1941 1942 return rc; 1943 } 1944