1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/cgroup.h" 14 #include "util/callchain.h" 15 #include "util/lock-contention.h" 16 #include "util/bpf_skel/lock_data.h" 17 18 #include <subcmd/pager.h> 19 #include <subcmd/parse-options.h> 20 #include "util/trace-event.h" 21 #include "util/tracepoint.h" 22 23 #include "util/debug.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/data.h" 27 #include "util/string2.h" 28 #include "util/map.h" 29 #include "util/util.h" 30 31 #include <stdio.h> 32 #include <sys/types.h> 33 #include <sys/prctl.h> 34 #include <semaphore.h> 35 #include <math.h> 36 #include <limits.h> 37 #include <ctype.h> 38 39 #include <linux/list.h> 40 #include <linux/hash.h> 41 #include <linux/kernel.h> 42 #include <linux/zalloc.h> 43 #include <linux/err.h> 44 #include <linux/stringify.h> 45 46 static struct perf_session *session; 47 static struct target target; 48 49 static struct rb_root thread_stats; 50 51 static bool combine_locks; 52 static bool show_thread_stats; 53 static bool show_lock_addrs; 54 static bool show_lock_owner; 55 static bool show_lock_cgroups; 56 static bool use_bpf; 57 static unsigned long bpf_map_entries = MAX_ENTRIES; 58 static int max_stack_depth = CONTENTION_STACK_DEPTH; 59 static int stack_skip = CONTENTION_STACK_SKIP; 60 static int print_nr_entries = INT_MAX / 2; 61 static const char *output_name = NULL; 62 static FILE *lock_output; 63 64 static struct lock_filter filters; 65 66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; 67 68 static struct thread_stat *thread_stat_find(u32 tid) 69 { 70 struct rb_node *node; 71 struct thread_stat *st; 72 73 node = thread_stats.rb_node; 74 while (node) { 75 st = container_of(node, struct thread_stat, rb); 76 if (st->tid == tid) 77 return st; 78 else if (tid < st->tid) 79 node = node->rb_left; 80 else 81 node = node->rb_right; 82 } 83 84 return NULL; 85 } 86 87 static void thread_stat_insert(struct thread_stat *new) 88 { 89 struct rb_node **rb = &thread_stats.rb_node; 90 struct rb_node *parent = NULL; 91 struct thread_stat *p; 92 93 while (*rb) { 94 p = container_of(*rb, struct thread_stat, rb); 95 parent = *rb; 96 97 if (new->tid < p->tid) 98 rb = &(*rb)->rb_left; 99 else if (new->tid > p->tid) 100 rb = &(*rb)->rb_right; 101 else 102 BUG_ON("inserting invalid thread_stat\n"); 103 } 104 105 rb_link_node(&new->rb, parent, rb); 106 rb_insert_color(&new->rb, &thread_stats); 107 } 108 109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 110 { 111 struct thread_stat *st; 112 113 st = thread_stat_find(tid); 114 if (st) 115 return st; 116 117 st = zalloc(sizeof(struct thread_stat)); 118 if (!st) { 119 pr_err("memory allocation failed\n"); 120 return NULL; 121 } 122 123 st->tid = tid; 124 INIT_LIST_HEAD(&st->seq_list); 125 126 thread_stat_insert(st); 127 128 return st; 129 } 130 131 static struct thread_stat *thread_stat_findnew_first(u32 tid); 132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 133 thread_stat_findnew_first; 134 135 static struct thread_stat *thread_stat_findnew_first(u32 tid) 136 { 137 struct thread_stat *st; 138 139 st = zalloc(sizeof(struct thread_stat)); 140 if (!st) { 141 pr_err("memory allocation failed\n"); 142 return NULL; 143 } 144 st->tid = tid; 145 INIT_LIST_HEAD(&st->seq_list); 146 147 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 148 rb_insert_color(&st->rb, &thread_stats); 149 150 thread_stat_findnew = thread_stat_findnew_after_first; 151 return st; 152 } 153 154 /* build simple key function one is bigger than two */ 155 #define SINGLE_KEY(member) \ 156 static int lock_stat_key_ ## member(struct lock_stat *one, \ 157 struct lock_stat *two) \ 158 { \ 159 return one->member > two->member; \ 160 } 161 162 SINGLE_KEY(nr_acquired) 163 SINGLE_KEY(nr_contended) 164 SINGLE_KEY(avg_wait_time) 165 SINGLE_KEY(wait_time_total) 166 SINGLE_KEY(wait_time_max) 167 168 static int lock_stat_key_wait_time_min(struct lock_stat *one, 169 struct lock_stat *two) 170 { 171 u64 s1 = one->wait_time_min; 172 u64 s2 = two->wait_time_min; 173 if (s1 == ULLONG_MAX) 174 s1 = 0; 175 if (s2 == ULLONG_MAX) 176 s2 = 0; 177 return s1 > s2; 178 } 179 180 struct lock_key { 181 /* 182 * name: the value for specify by user 183 * this should be simpler than raw name of member 184 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 185 */ 186 const char *name; 187 /* header: the string printed on the header line */ 188 const char *header; 189 /* len: the printing width of the field */ 190 int len; 191 /* key: a pointer to function to compare two lock stats for sorting */ 192 int (*key)(struct lock_stat*, struct lock_stat*); 193 /* print: a pointer to function to print a given lock stats */ 194 void (*print)(struct lock_key*, struct lock_stat*); 195 /* list: list entry to link this */ 196 struct list_head list; 197 }; 198 199 static void lock_stat_key_print_time(unsigned long long nsec, int len) 200 { 201 static const struct { 202 float base; 203 const char *unit; 204 } table[] = { 205 { 1e9 * 3600, "h " }, 206 { 1e9 * 60, "m " }, 207 { 1e9, "s " }, 208 { 1e6, "ms" }, 209 { 1e3, "us" }, 210 { 0, NULL }, 211 }; 212 213 /* for CSV output */ 214 if (len == 0) { 215 fprintf(lock_output, "%llu", nsec); 216 return; 217 } 218 219 for (int i = 0; table[i].unit; i++) { 220 if (nsec < table[i].base) 221 continue; 222 223 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 224 return; 225 } 226 227 fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns"); 228 } 229 230 #define PRINT_KEY(member) \ 231 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 232 struct lock_stat *ls) \ 233 { \ 234 fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\ 235 } 236 237 #define PRINT_TIME(member) \ 238 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 239 struct lock_stat *ls) \ 240 { \ 241 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 242 } 243 244 PRINT_KEY(nr_acquired) 245 PRINT_KEY(nr_contended) 246 PRINT_TIME(avg_wait_time) 247 PRINT_TIME(wait_time_total) 248 PRINT_TIME(wait_time_max) 249 250 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 251 struct lock_stat *ls) 252 { 253 u64 wait_time = ls->wait_time_min; 254 255 if (wait_time == ULLONG_MAX) 256 wait_time = 0; 257 258 lock_stat_key_print_time(wait_time, key->len); 259 } 260 261 262 static const char *sort_key = "acquired"; 263 264 static int (*compare)(struct lock_stat *, struct lock_stat *); 265 266 static struct rb_root sorted; /* place to store intermediate data */ 267 static struct rb_root result; /* place to store sorted data */ 268 269 static LIST_HEAD(lock_keys); 270 static const char *output_fields; 271 272 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 273 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 274 static struct lock_key report_keys[] = { 275 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 276 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 277 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 278 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 279 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 280 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 281 282 /* extra comparisons much complicated should be here */ 283 { } 284 }; 285 286 static struct lock_key contention_keys[] = { 287 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 288 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 289 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 290 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 291 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 292 293 /* extra comparisons much complicated should be here */ 294 { } 295 }; 296 297 static int select_key(bool contention) 298 { 299 int i; 300 struct lock_key *keys = report_keys; 301 302 if (contention) 303 keys = contention_keys; 304 305 for (i = 0; keys[i].name; i++) { 306 if (!strcmp(keys[i].name, sort_key)) { 307 compare = keys[i].key; 308 309 /* selected key should be in the output fields */ 310 if (list_empty(&keys[i].list)) 311 list_add_tail(&keys[i].list, &lock_keys); 312 313 return 0; 314 } 315 } 316 317 pr_err("Unknown compare key: %s\n", sort_key); 318 return -1; 319 } 320 321 static int add_output_field(bool contention, char *name) 322 { 323 int i; 324 struct lock_key *keys = report_keys; 325 326 if (contention) 327 keys = contention_keys; 328 329 for (i = 0; keys[i].name; i++) { 330 if (strcmp(keys[i].name, name)) 331 continue; 332 333 /* prevent double link */ 334 if (list_empty(&keys[i].list)) 335 list_add_tail(&keys[i].list, &lock_keys); 336 337 return 0; 338 } 339 340 pr_err("Unknown output field: %s\n", name); 341 return -1; 342 } 343 344 static int setup_output_field(bool contention, const char *str) 345 { 346 char *tok, *tmp, *orig; 347 int i, ret = 0; 348 struct lock_key *keys = report_keys; 349 350 if (contention) 351 keys = contention_keys; 352 353 /* no output field given: use all of them */ 354 if (str == NULL) { 355 for (i = 0; keys[i].name; i++) 356 list_add_tail(&keys[i].list, &lock_keys); 357 return 0; 358 } 359 360 for (i = 0; keys[i].name; i++) 361 INIT_LIST_HEAD(&keys[i].list); 362 363 orig = tmp = strdup(str); 364 if (orig == NULL) 365 return -ENOMEM; 366 367 while ((tok = strsep(&tmp, ",")) != NULL){ 368 ret = add_output_field(contention, tok); 369 if (ret < 0) 370 break; 371 } 372 free(orig); 373 374 return ret; 375 } 376 377 static void combine_lock_stats(struct lock_stat *st) 378 { 379 struct rb_node **rb = &sorted.rb_node; 380 struct rb_node *parent = NULL; 381 struct lock_stat *p; 382 int ret; 383 384 while (*rb) { 385 p = container_of(*rb, struct lock_stat, rb); 386 parent = *rb; 387 388 if (st->name && p->name) 389 ret = strcmp(st->name, p->name); 390 else 391 ret = !!st->name - !!p->name; 392 393 if (ret == 0) { 394 p->nr_acquired += st->nr_acquired; 395 p->nr_contended += st->nr_contended; 396 p->wait_time_total += st->wait_time_total; 397 398 if (p->nr_contended) 399 p->avg_wait_time = p->wait_time_total / p->nr_contended; 400 401 if (p->wait_time_min > st->wait_time_min) 402 p->wait_time_min = st->wait_time_min; 403 if (p->wait_time_max < st->wait_time_max) 404 p->wait_time_max = st->wait_time_max; 405 406 p->broken |= st->broken; 407 st->combined = 1; 408 return; 409 } 410 411 if (ret < 0) 412 rb = &(*rb)->rb_left; 413 else 414 rb = &(*rb)->rb_right; 415 } 416 417 rb_link_node(&st->rb, parent, rb); 418 rb_insert_color(&st->rb, &sorted); 419 } 420 421 static void insert_to(struct rb_root *rr, struct lock_stat *st, 422 int (*bigger)(struct lock_stat *, struct lock_stat *)) 423 { 424 struct rb_node **rb = &rr->rb_node; 425 struct rb_node *parent = NULL; 426 struct lock_stat *p; 427 428 while (*rb) { 429 p = container_of(*rb, struct lock_stat, rb); 430 parent = *rb; 431 432 if (bigger(st, p)) 433 rb = &(*rb)->rb_left; 434 else 435 rb = &(*rb)->rb_right; 436 } 437 438 rb_link_node(&st->rb, parent, rb); 439 rb_insert_color(&st->rb, rr); 440 } 441 442 static inline void insert_to_result(struct lock_stat *st, 443 int (*bigger)(struct lock_stat *, 444 struct lock_stat *)) 445 { 446 if (combine_locks && st->combined) 447 return; 448 insert_to(&result, st, bigger); 449 } 450 451 static inline struct lock_stat *pop_from(struct rb_root *rr) 452 { 453 struct rb_node *node = rr->rb_node; 454 455 if (!node) 456 return NULL; 457 458 while (node->rb_left) 459 node = node->rb_left; 460 461 rb_erase(node, rr); 462 return container_of(node, struct lock_stat, rb); 463 464 } 465 466 /* returns left most element of result, and erase it */ 467 static struct lock_stat *pop_from_result(void) 468 { 469 return pop_from(&result); 470 } 471 472 struct trace_lock_handler { 473 /* it's used on CONFIG_LOCKDEP */ 474 int (*acquire_event)(struct evsel *evsel, 475 struct perf_sample *sample); 476 477 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 478 int (*acquired_event)(struct evsel *evsel, 479 struct perf_sample *sample); 480 481 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 482 int (*contended_event)(struct evsel *evsel, 483 struct perf_sample *sample); 484 485 /* it's used on CONFIG_LOCKDEP */ 486 int (*release_event)(struct evsel *evsel, 487 struct perf_sample *sample); 488 489 /* it's used when CONFIG_LOCKDEP is off */ 490 int (*contention_begin_event)(struct evsel *evsel, 491 struct perf_sample *sample); 492 493 /* it's used when CONFIG_LOCKDEP is off */ 494 int (*contention_end_event)(struct evsel *evsel, 495 struct perf_sample *sample); 496 }; 497 498 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 499 { 500 struct lock_seq_stat *seq; 501 502 list_for_each_entry(seq, &ts->seq_list, list) { 503 if (seq->addr == addr) 504 return seq; 505 } 506 507 seq = zalloc(sizeof(struct lock_seq_stat)); 508 if (!seq) { 509 pr_err("memory allocation failed\n"); 510 return NULL; 511 } 512 seq->state = SEQ_STATE_UNINITIALIZED; 513 seq->addr = addr; 514 515 list_add(&seq->list, &ts->seq_list); 516 return seq; 517 } 518 519 enum broken_state { 520 BROKEN_ACQUIRE, 521 BROKEN_ACQUIRED, 522 BROKEN_CONTENDED, 523 BROKEN_RELEASE, 524 BROKEN_MAX, 525 }; 526 527 static int bad_hist[BROKEN_MAX]; 528 529 enum acquire_flags { 530 TRY_LOCK = 1, 531 READ_LOCK = 2, 532 }; 533 534 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid) 535 { 536 switch (aggr_mode) { 537 case LOCK_AGGR_ADDR: 538 *key = addr; 539 break; 540 case LOCK_AGGR_TASK: 541 *key = tid; 542 break; 543 case LOCK_AGGR_CALLER: 544 case LOCK_AGGR_CGROUP: 545 default: 546 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 547 return -EINVAL; 548 } 549 return 0; 550 } 551 552 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample); 553 554 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel, 555 struct perf_sample *sample) 556 { 557 if (aggr_mode == LOCK_AGGR_CALLER) { 558 *key = callchain_id(evsel, sample); 559 return 0; 560 } 561 return get_key_by_aggr_mode_simple(key, addr, sample->tid); 562 } 563 564 static int report_lock_acquire_event(struct evsel *evsel, 565 struct perf_sample *sample) 566 { 567 struct lock_stat *ls; 568 struct thread_stat *ts; 569 struct lock_seq_stat *seq; 570 const char *name = evsel__strval(evsel, sample, "name"); 571 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 572 int flag = evsel__intval(evsel, sample, "flags"); 573 u64 key; 574 int ret; 575 576 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 577 if (ret < 0) 578 return ret; 579 580 ls = lock_stat_findnew(key, name, 0); 581 if (!ls) 582 return -ENOMEM; 583 584 ts = thread_stat_findnew(sample->tid); 585 if (!ts) 586 return -ENOMEM; 587 588 seq = get_seq(ts, addr); 589 if (!seq) 590 return -ENOMEM; 591 592 switch (seq->state) { 593 case SEQ_STATE_UNINITIALIZED: 594 case SEQ_STATE_RELEASED: 595 if (!flag) { 596 seq->state = SEQ_STATE_ACQUIRING; 597 } else { 598 if (flag & TRY_LOCK) 599 ls->nr_trylock++; 600 if (flag & READ_LOCK) 601 ls->nr_readlock++; 602 seq->state = SEQ_STATE_READ_ACQUIRED; 603 seq->read_count = 1; 604 ls->nr_acquired++; 605 } 606 break; 607 case SEQ_STATE_READ_ACQUIRED: 608 if (flag & READ_LOCK) { 609 seq->read_count++; 610 ls->nr_acquired++; 611 goto end; 612 } else { 613 goto broken; 614 } 615 break; 616 case SEQ_STATE_ACQUIRED: 617 case SEQ_STATE_ACQUIRING: 618 case SEQ_STATE_CONTENDED: 619 broken: 620 /* broken lock sequence */ 621 if (!ls->broken) { 622 ls->broken = 1; 623 bad_hist[BROKEN_ACQUIRE]++; 624 } 625 list_del_init(&seq->list); 626 free(seq); 627 goto end; 628 default: 629 BUG_ON("Unknown state of lock sequence found!\n"); 630 break; 631 } 632 633 ls->nr_acquire++; 634 seq->prev_event_time = sample->time; 635 end: 636 return 0; 637 } 638 639 static int report_lock_acquired_event(struct evsel *evsel, 640 struct perf_sample *sample) 641 { 642 struct lock_stat *ls; 643 struct thread_stat *ts; 644 struct lock_seq_stat *seq; 645 u64 contended_term; 646 const char *name = evsel__strval(evsel, sample, "name"); 647 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 648 u64 key; 649 int ret; 650 651 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 652 if (ret < 0) 653 return ret; 654 655 ls = lock_stat_findnew(key, name, 0); 656 if (!ls) 657 return -ENOMEM; 658 659 ts = thread_stat_findnew(sample->tid); 660 if (!ts) 661 return -ENOMEM; 662 663 seq = get_seq(ts, addr); 664 if (!seq) 665 return -ENOMEM; 666 667 switch (seq->state) { 668 case SEQ_STATE_UNINITIALIZED: 669 /* orphan event, do nothing */ 670 return 0; 671 case SEQ_STATE_ACQUIRING: 672 break; 673 case SEQ_STATE_CONTENDED: 674 contended_term = sample->time - seq->prev_event_time; 675 ls->wait_time_total += contended_term; 676 if (contended_term < ls->wait_time_min) 677 ls->wait_time_min = contended_term; 678 if (ls->wait_time_max < contended_term) 679 ls->wait_time_max = contended_term; 680 break; 681 case SEQ_STATE_RELEASED: 682 case SEQ_STATE_ACQUIRED: 683 case SEQ_STATE_READ_ACQUIRED: 684 /* broken lock sequence */ 685 if (!ls->broken) { 686 ls->broken = 1; 687 bad_hist[BROKEN_ACQUIRED]++; 688 } 689 list_del_init(&seq->list); 690 free(seq); 691 goto end; 692 default: 693 BUG_ON("Unknown state of lock sequence found!\n"); 694 break; 695 } 696 697 seq->state = SEQ_STATE_ACQUIRED; 698 ls->nr_acquired++; 699 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 700 seq->prev_event_time = sample->time; 701 end: 702 return 0; 703 } 704 705 static int report_lock_contended_event(struct evsel *evsel, 706 struct perf_sample *sample) 707 { 708 struct lock_stat *ls; 709 struct thread_stat *ts; 710 struct lock_seq_stat *seq; 711 const char *name = evsel__strval(evsel, sample, "name"); 712 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 713 u64 key; 714 int ret; 715 716 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 717 if (ret < 0) 718 return ret; 719 720 ls = lock_stat_findnew(key, name, 0); 721 if (!ls) 722 return -ENOMEM; 723 724 ts = thread_stat_findnew(sample->tid); 725 if (!ts) 726 return -ENOMEM; 727 728 seq = get_seq(ts, addr); 729 if (!seq) 730 return -ENOMEM; 731 732 switch (seq->state) { 733 case SEQ_STATE_UNINITIALIZED: 734 /* orphan event, do nothing */ 735 return 0; 736 case SEQ_STATE_ACQUIRING: 737 break; 738 case SEQ_STATE_RELEASED: 739 case SEQ_STATE_ACQUIRED: 740 case SEQ_STATE_READ_ACQUIRED: 741 case SEQ_STATE_CONTENDED: 742 /* broken lock sequence */ 743 if (!ls->broken) { 744 ls->broken = 1; 745 bad_hist[BROKEN_CONTENDED]++; 746 } 747 list_del_init(&seq->list); 748 free(seq); 749 goto end; 750 default: 751 BUG_ON("Unknown state of lock sequence found!\n"); 752 break; 753 } 754 755 seq->state = SEQ_STATE_CONTENDED; 756 ls->nr_contended++; 757 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 758 seq->prev_event_time = sample->time; 759 end: 760 return 0; 761 } 762 763 static int report_lock_release_event(struct evsel *evsel, 764 struct perf_sample *sample) 765 { 766 struct lock_stat *ls; 767 struct thread_stat *ts; 768 struct lock_seq_stat *seq; 769 const char *name = evsel__strval(evsel, sample, "name"); 770 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 771 u64 key; 772 int ret; 773 774 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 775 if (ret < 0) 776 return ret; 777 778 ls = lock_stat_findnew(key, name, 0); 779 if (!ls) 780 return -ENOMEM; 781 782 ts = thread_stat_findnew(sample->tid); 783 if (!ts) 784 return -ENOMEM; 785 786 seq = get_seq(ts, addr); 787 if (!seq) 788 return -ENOMEM; 789 790 switch (seq->state) { 791 case SEQ_STATE_UNINITIALIZED: 792 goto end; 793 case SEQ_STATE_ACQUIRED: 794 break; 795 case SEQ_STATE_READ_ACQUIRED: 796 seq->read_count--; 797 BUG_ON(seq->read_count < 0); 798 if (seq->read_count) { 799 ls->nr_release++; 800 goto end; 801 } 802 break; 803 case SEQ_STATE_ACQUIRING: 804 case SEQ_STATE_CONTENDED: 805 case SEQ_STATE_RELEASED: 806 /* broken lock sequence */ 807 if (!ls->broken) { 808 ls->broken = 1; 809 bad_hist[BROKEN_RELEASE]++; 810 } 811 goto free_seq; 812 default: 813 BUG_ON("Unknown state of lock sequence found!\n"); 814 break; 815 } 816 817 ls->nr_release++; 818 free_seq: 819 list_del_init(&seq->list); 820 free(seq); 821 end: 822 return 0; 823 } 824 825 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, 826 char *buf, int size) 827 { 828 u64 offset; 829 830 if (map == NULL || sym == NULL) { 831 buf[0] = '\0'; 832 return 0; 833 } 834 835 offset = map__map_ip(map, ip) - sym->start; 836 837 if (offset) 838 return scnprintf(buf, size, "%s+%#lx", sym->name, offset); 839 else 840 return strlcpy(buf, sym->name, size); 841 } 842 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 843 char *buf, int size) 844 { 845 struct thread *thread; 846 struct callchain_cursor *cursor; 847 struct machine *machine = &session->machines.host; 848 struct symbol *sym; 849 int skip = 0; 850 int ret; 851 852 /* lock names will be replaced to task name later */ 853 if (show_thread_stats) 854 return -1; 855 856 thread = machine__findnew_thread(machine, -1, sample->pid); 857 if (thread == NULL) 858 return -1; 859 860 cursor = get_tls_callchain_cursor(); 861 862 /* use caller function name from the callchain */ 863 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 864 NULL, NULL, max_stack_depth); 865 if (ret != 0) { 866 thread__put(thread); 867 return -1; 868 } 869 870 callchain_cursor_commit(cursor); 871 thread__put(thread); 872 873 while (true) { 874 struct callchain_cursor_node *node; 875 876 node = callchain_cursor_current(cursor); 877 if (node == NULL) 878 break; 879 880 /* skip first few entries - for lock functions */ 881 if (++skip <= stack_skip) 882 goto next; 883 884 sym = node->ms.sym; 885 if (sym && !machine__is_lock_function(machine, node->ip)) { 886 get_symbol_name_offset(node->ms.map, sym, node->ip, 887 buf, size); 888 return 0; 889 } 890 891 next: 892 callchain_cursor_advance(cursor); 893 } 894 return -1; 895 } 896 897 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 898 { 899 struct callchain_cursor *cursor; 900 struct machine *machine = &session->machines.host; 901 struct thread *thread; 902 u64 hash = 0; 903 int skip = 0; 904 int ret; 905 906 thread = machine__findnew_thread(machine, -1, sample->pid); 907 if (thread == NULL) 908 return -1; 909 910 cursor = get_tls_callchain_cursor(); 911 /* use caller function name from the callchain */ 912 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 913 NULL, NULL, max_stack_depth); 914 thread__put(thread); 915 916 if (ret != 0) 917 return -1; 918 919 callchain_cursor_commit(cursor); 920 921 while (true) { 922 struct callchain_cursor_node *node; 923 924 node = callchain_cursor_current(cursor); 925 if (node == NULL) 926 break; 927 928 /* skip first few entries - for lock functions */ 929 if (++skip <= stack_skip) 930 goto next; 931 932 if (node->ms.sym && machine__is_lock_function(machine, node->ip)) 933 goto next; 934 935 hash ^= hash_long((unsigned long)node->ip, 64); 936 937 next: 938 callchain_cursor_advance(cursor); 939 } 940 return hash; 941 } 942 943 static u64 *get_callstack(struct perf_sample *sample, int max_stack) 944 { 945 u64 *callstack; 946 u64 i; 947 int c; 948 949 callstack = calloc(max_stack, sizeof(*callstack)); 950 if (callstack == NULL) 951 return NULL; 952 953 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) { 954 u64 ip = sample->callchain->ips[i]; 955 956 if (ip >= PERF_CONTEXT_MAX) 957 continue; 958 959 callstack[c++] = ip; 960 } 961 return callstack; 962 } 963 964 static int report_lock_contention_begin_event(struct evsel *evsel, 965 struct perf_sample *sample) 966 { 967 struct lock_stat *ls; 968 struct thread_stat *ts; 969 struct lock_seq_stat *seq; 970 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 971 unsigned int flags = evsel__intval(evsel, sample, "flags"); 972 u64 key; 973 int i, ret; 974 static bool kmap_loaded; 975 struct machine *machine = &session->machines.host; 976 struct map *kmap; 977 struct symbol *sym; 978 979 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 980 if (ret < 0) 981 return ret; 982 983 if (!kmap_loaded) { 984 unsigned long *addrs; 985 986 /* make sure it loads the kernel map to find lock symbols */ 987 map__load(machine__kernel_map(machine)); 988 kmap_loaded = true; 989 990 /* convert (kernel) symbols to addresses */ 991 for (i = 0; i < filters.nr_syms; i++) { 992 sym = machine__find_kernel_symbol_by_name(machine, 993 filters.syms[i], 994 &kmap); 995 if (sym == NULL) { 996 pr_warning("ignore unknown symbol: %s\n", 997 filters.syms[i]); 998 continue; 999 } 1000 1001 addrs = realloc(filters.addrs, 1002 (filters.nr_addrs + 1) * sizeof(*addrs)); 1003 if (addrs == NULL) { 1004 pr_warning("memory allocation failure\n"); 1005 return -ENOMEM; 1006 } 1007 1008 addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start); 1009 filters.addrs = addrs; 1010 } 1011 } 1012 1013 ls = lock_stat_find(key); 1014 if (!ls) { 1015 char buf[128]; 1016 const char *name = ""; 1017 1018 switch (aggr_mode) { 1019 case LOCK_AGGR_ADDR: 1020 sym = machine__find_kernel_symbol(machine, key, &kmap); 1021 if (sym) 1022 name = sym->name; 1023 break; 1024 case LOCK_AGGR_CALLER: 1025 name = buf; 1026 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1027 name = "Unknown"; 1028 break; 1029 case LOCK_AGGR_CGROUP: 1030 case LOCK_AGGR_TASK: 1031 default: 1032 break; 1033 } 1034 1035 ls = lock_stat_findnew(key, name, flags); 1036 if (!ls) 1037 return -ENOMEM; 1038 } 1039 1040 if (filters.nr_types) { 1041 bool found = false; 1042 1043 for (i = 0; i < filters.nr_types; i++) { 1044 if (flags == filters.types[i]) { 1045 found = true; 1046 break; 1047 } 1048 } 1049 1050 if (!found) 1051 return 0; 1052 } 1053 1054 if (filters.nr_addrs) { 1055 bool found = false; 1056 1057 for (i = 0; i < filters.nr_addrs; i++) { 1058 if (addr == filters.addrs[i]) { 1059 found = true; 1060 break; 1061 } 1062 } 1063 1064 if (!found) 1065 return 0; 1066 } 1067 1068 if (needs_callstack()) { 1069 u64 *callstack = get_callstack(sample, max_stack_depth); 1070 if (callstack == NULL) 1071 return -ENOMEM; 1072 1073 if (!match_callstack_filter(machine, callstack, max_stack_depth)) { 1074 free(callstack); 1075 return 0; 1076 } 1077 1078 if (ls->callstack == NULL) 1079 ls->callstack = callstack; 1080 else 1081 free(callstack); 1082 } 1083 1084 ts = thread_stat_findnew(sample->tid); 1085 if (!ts) 1086 return -ENOMEM; 1087 1088 seq = get_seq(ts, addr); 1089 if (!seq) 1090 return -ENOMEM; 1091 1092 switch (seq->state) { 1093 case SEQ_STATE_UNINITIALIZED: 1094 case SEQ_STATE_ACQUIRED: 1095 break; 1096 case SEQ_STATE_CONTENDED: 1097 /* 1098 * It can have nested contention begin with mutex spinning, 1099 * then we would use the original contention begin event and 1100 * ignore the second one. 1101 */ 1102 goto end; 1103 case SEQ_STATE_ACQUIRING: 1104 case SEQ_STATE_READ_ACQUIRED: 1105 case SEQ_STATE_RELEASED: 1106 /* broken lock sequence */ 1107 if (!ls->broken) { 1108 ls->broken = 1; 1109 bad_hist[BROKEN_CONTENDED]++; 1110 } 1111 list_del_init(&seq->list); 1112 free(seq); 1113 goto end; 1114 default: 1115 BUG_ON("Unknown state of lock sequence found!\n"); 1116 break; 1117 } 1118 1119 if (seq->state != SEQ_STATE_CONTENDED) { 1120 seq->state = SEQ_STATE_CONTENDED; 1121 seq->prev_event_time = sample->time; 1122 ls->nr_contended++; 1123 } 1124 end: 1125 return 0; 1126 } 1127 1128 static int report_lock_contention_end_event(struct evsel *evsel, 1129 struct perf_sample *sample) 1130 { 1131 struct lock_stat *ls; 1132 struct thread_stat *ts; 1133 struct lock_seq_stat *seq; 1134 u64 contended_term; 1135 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1136 u64 key; 1137 int ret; 1138 1139 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 1140 if (ret < 0) 1141 return ret; 1142 1143 ls = lock_stat_find(key); 1144 if (!ls) 1145 return 0; 1146 1147 ts = thread_stat_find(sample->tid); 1148 if (!ts) 1149 return 0; 1150 1151 seq = get_seq(ts, addr); 1152 if (!seq) 1153 return -ENOMEM; 1154 1155 switch (seq->state) { 1156 case SEQ_STATE_UNINITIALIZED: 1157 goto end; 1158 case SEQ_STATE_CONTENDED: 1159 contended_term = sample->time - seq->prev_event_time; 1160 ls->wait_time_total += contended_term; 1161 if (contended_term < ls->wait_time_min) 1162 ls->wait_time_min = contended_term; 1163 if (ls->wait_time_max < contended_term) 1164 ls->wait_time_max = contended_term; 1165 break; 1166 case SEQ_STATE_ACQUIRING: 1167 case SEQ_STATE_ACQUIRED: 1168 case SEQ_STATE_READ_ACQUIRED: 1169 case SEQ_STATE_RELEASED: 1170 /* broken lock sequence */ 1171 if (!ls->broken) { 1172 ls->broken = 1; 1173 bad_hist[BROKEN_ACQUIRED]++; 1174 } 1175 list_del_init(&seq->list); 1176 free(seq); 1177 goto end; 1178 default: 1179 BUG_ON("Unknown state of lock sequence found!\n"); 1180 break; 1181 } 1182 1183 seq->state = SEQ_STATE_ACQUIRED; 1184 ls->nr_acquired++; 1185 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1186 end: 1187 return 0; 1188 } 1189 1190 /* lock oriented handlers */ 1191 /* TODO: handlers for CPU oriented, thread oriented */ 1192 static struct trace_lock_handler report_lock_ops = { 1193 .acquire_event = report_lock_acquire_event, 1194 .acquired_event = report_lock_acquired_event, 1195 .contended_event = report_lock_contended_event, 1196 .release_event = report_lock_release_event, 1197 .contention_begin_event = report_lock_contention_begin_event, 1198 .contention_end_event = report_lock_contention_end_event, 1199 }; 1200 1201 static struct trace_lock_handler contention_lock_ops = { 1202 .contention_begin_event = report_lock_contention_begin_event, 1203 .contention_end_event = report_lock_contention_end_event, 1204 }; 1205 1206 1207 static struct trace_lock_handler *trace_handler; 1208 1209 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1210 { 1211 if (trace_handler->acquire_event) 1212 return trace_handler->acquire_event(evsel, sample); 1213 return 0; 1214 } 1215 1216 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1217 { 1218 if (trace_handler->acquired_event) 1219 return trace_handler->acquired_event(evsel, sample); 1220 return 0; 1221 } 1222 1223 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1224 { 1225 if (trace_handler->contended_event) 1226 return trace_handler->contended_event(evsel, sample); 1227 return 0; 1228 } 1229 1230 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1231 { 1232 if (trace_handler->release_event) 1233 return trace_handler->release_event(evsel, sample); 1234 return 0; 1235 } 1236 1237 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1238 { 1239 if (trace_handler->contention_begin_event) 1240 return trace_handler->contention_begin_event(evsel, sample); 1241 return 0; 1242 } 1243 1244 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1245 { 1246 if (trace_handler->contention_end_event) 1247 return trace_handler->contention_end_event(evsel, sample); 1248 return 0; 1249 } 1250 1251 static void print_bad_events(int bad, int total) 1252 { 1253 /* Output for debug, this have to be removed */ 1254 int i; 1255 int broken = 0; 1256 const char *name[4] = 1257 { "acquire", "acquired", "contended", "release" }; 1258 1259 for (i = 0; i < BROKEN_MAX; i++) 1260 broken += bad_hist[i]; 1261 1262 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1263 return; 1264 1265 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1266 fprintf(lock_output, "bad: %d, total: %d\n", bad, total); 1267 fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1268 fprintf(lock_output, "histogram of events caused bad sequence\n"); 1269 for (i = 0; i < BROKEN_MAX; i++) 1270 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]); 1271 } 1272 1273 /* TODO: various way to print, coloring, nano or milli sec */ 1274 static void print_result(void) 1275 { 1276 struct lock_stat *st; 1277 struct lock_key *key; 1278 char cut_name[20]; 1279 int bad, total, printed; 1280 1281 if (!quiet) { 1282 fprintf(lock_output, "%20s ", "Name"); 1283 list_for_each_entry(key, &lock_keys, list) 1284 fprintf(lock_output, "%*s ", key->len, key->header); 1285 fprintf(lock_output, "\n\n"); 1286 } 1287 1288 bad = total = printed = 0; 1289 while ((st = pop_from_result())) { 1290 total++; 1291 if (st->broken) 1292 bad++; 1293 if (!st->nr_acquired) 1294 continue; 1295 1296 bzero(cut_name, 20); 1297 1298 if (strlen(st->name) < 20) { 1299 /* output raw name */ 1300 const char *name = st->name; 1301 1302 if (show_thread_stats) { 1303 struct thread *t; 1304 1305 /* st->addr contains tid of thread */ 1306 t = perf_session__findnew(session, st->addr); 1307 name = thread__comm_str(t); 1308 } 1309 1310 fprintf(lock_output, "%20s ", name); 1311 } else { 1312 strncpy(cut_name, st->name, 16); 1313 cut_name[16] = '.'; 1314 cut_name[17] = '.'; 1315 cut_name[18] = '.'; 1316 cut_name[19] = '\0'; 1317 /* cut off name for saving output style */ 1318 fprintf(lock_output, "%20s ", cut_name); 1319 } 1320 1321 list_for_each_entry(key, &lock_keys, list) { 1322 key->print(key, st); 1323 fprintf(lock_output, " "); 1324 } 1325 fprintf(lock_output, "\n"); 1326 1327 if (++printed >= print_nr_entries) 1328 break; 1329 } 1330 1331 print_bad_events(bad, total); 1332 } 1333 1334 static bool info_threads, info_map; 1335 1336 static void dump_threads(void) 1337 { 1338 struct thread_stat *st; 1339 struct rb_node *node; 1340 struct thread *t; 1341 1342 fprintf(lock_output, "%10s: comm\n", "Thread ID"); 1343 1344 node = rb_first(&thread_stats); 1345 while (node) { 1346 st = container_of(node, struct thread_stat, rb); 1347 t = perf_session__findnew(session, st->tid); 1348 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t)); 1349 node = rb_next(node); 1350 thread__put(t); 1351 } 1352 } 1353 1354 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1355 { 1356 int ret; 1357 1358 if (a->name && b->name) 1359 ret = strcmp(a->name, b->name); 1360 else 1361 ret = !!a->name - !!b->name; 1362 1363 if (!ret) 1364 return a->addr < b->addr; 1365 else 1366 return ret < 0; 1367 } 1368 1369 static void dump_map(void) 1370 { 1371 unsigned int i; 1372 struct lock_stat *st; 1373 1374 fprintf(lock_output, "Address of instance: name of class\n"); 1375 for (i = 0; i < LOCKHASH_SIZE; i++) { 1376 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1377 insert_to_result(st, compare_maps); 1378 } 1379 } 1380 1381 while ((st = pop_from_result())) 1382 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name); 1383 } 1384 1385 static void dump_info(void) 1386 { 1387 if (info_threads) 1388 dump_threads(); 1389 1390 if (info_map) { 1391 if (info_threads) 1392 fputc('\n', lock_output); 1393 dump_map(); 1394 } 1395 } 1396 1397 static const struct evsel_str_handler lock_tracepoints[] = { 1398 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1399 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1400 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1401 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1402 }; 1403 1404 static const struct evsel_str_handler contention_tracepoints[] = { 1405 { "lock:contention_begin", evsel__process_contention_begin, }, 1406 { "lock:contention_end", evsel__process_contention_end, }, 1407 }; 1408 1409 static int process_event_update(const struct perf_tool *tool, 1410 union perf_event *event, 1411 struct evlist **pevlist) 1412 { 1413 int ret; 1414 1415 ret = perf_event__process_event_update(tool, event, pevlist); 1416 if (ret < 0) 1417 return ret; 1418 1419 /* this can return -EEXIST since we call it for each evsel */ 1420 perf_session__set_tracepoints_handlers(session, lock_tracepoints); 1421 perf_session__set_tracepoints_handlers(session, contention_tracepoints); 1422 return 0; 1423 } 1424 1425 typedef int (*tracepoint_handler)(struct evsel *evsel, 1426 struct perf_sample *sample); 1427 1428 static int process_sample_event(const struct perf_tool *tool __maybe_unused, 1429 union perf_event *event, 1430 struct perf_sample *sample, 1431 struct evsel *evsel, 1432 struct machine *machine) 1433 { 1434 int err = 0; 1435 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1436 sample->tid); 1437 1438 if (thread == NULL) { 1439 pr_debug("problem processing %d event, skipping it.\n", 1440 event->header.type); 1441 return -1; 1442 } 1443 1444 if (evsel->handler != NULL) { 1445 tracepoint_handler f = evsel->handler; 1446 err = f(evsel, sample); 1447 } 1448 1449 thread__put(thread); 1450 1451 return err; 1452 } 1453 1454 static void combine_result(void) 1455 { 1456 unsigned int i; 1457 struct lock_stat *st; 1458 1459 if (!combine_locks) 1460 return; 1461 1462 for (i = 0; i < LOCKHASH_SIZE; i++) { 1463 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1464 combine_lock_stats(st); 1465 } 1466 } 1467 } 1468 1469 static void sort_result(void) 1470 { 1471 unsigned int i; 1472 struct lock_stat *st; 1473 1474 for (i = 0; i < LOCKHASH_SIZE; i++) { 1475 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1476 insert_to_result(st, compare); 1477 } 1478 } 1479 } 1480 1481 static const struct { 1482 unsigned int flags; 1483 /* 1484 * Name of the lock flags (access), with delimeter ':'. 1485 * For example, rwsem:R of rwsem:W. 1486 */ 1487 const char *flags_name; 1488 /* Name of the lock (type), for example, rwlock or rwsem. */ 1489 const char *lock_name; 1490 } lock_type_table[] = { 1491 { 0, "semaphore", "semaphore" }, 1492 { LCB_F_SPIN, "spinlock", "spinlock" }, 1493 { LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" }, 1494 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" }, 1495 { LCB_F_READ, "rwsem:R", "rwsem" }, 1496 { LCB_F_WRITE, "rwsem:W", "rwsem" }, 1497 { LCB_F_RT, "rt-mutex", "rt-mutex" }, 1498 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" }, 1499 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" }, 1500 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" }, 1501 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" }, 1502 { LCB_F_MUTEX, "mutex", "mutex" }, 1503 { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" }, 1504 /* alias for optimistic spinning only */ 1505 { LCB_F_MUTEX | LCB_F_SPIN, "mutex:spin", "mutex-spin" }, 1506 }; 1507 1508 static const char *get_type_flags_name(unsigned int flags) 1509 { 1510 flags &= LCB_F_TYPE_MASK; 1511 1512 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1513 if (lock_type_table[i].flags == flags) 1514 return lock_type_table[i].flags_name; 1515 } 1516 return "unknown"; 1517 } 1518 1519 static const char *get_type_lock_name(unsigned int flags) 1520 { 1521 flags &= LCB_F_TYPE_MASK; 1522 1523 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1524 if (lock_type_table[i].flags == flags) 1525 return lock_type_table[i].lock_name; 1526 } 1527 return "unknown"; 1528 } 1529 1530 static void lock_filter_finish(void) 1531 { 1532 zfree(&filters.types); 1533 filters.nr_types = 0; 1534 1535 zfree(&filters.addrs); 1536 filters.nr_addrs = 0; 1537 1538 for (int i = 0; i < filters.nr_syms; i++) 1539 free(filters.syms[i]); 1540 1541 zfree(&filters.syms); 1542 filters.nr_syms = 0; 1543 1544 zfree(&filters.cgrps); 1545 filters.nr_cgrps = 0; 1546 1547 for (int i = 0; i < filters.nr_slabs; i++) 1548 free(filters.slabs[i]); 1549 1550 zfree(&filters.slabs); 1551 filters.nr_slabs = 0; 1552 } 1553 1554 static void sort_contention_result(void) 1555 { 1556 sort_result(); 1557 } 1558 1559 static void print_header_stdio(void) 1560 { 1561 struct lock_key *key; 1562 1563 list_for_each_entry(key, &lock_keys, list) 1564 fprintf(lock_output, "%*s ", key->len, key->header); 1565 1566 switch (aggr_mode) { 1567 case LOCK_AGGR_TASK: 1568 fprintf(lock_output, " %10s %s\n\n", "pid", 1569 show_lock_owner ? "owner" : "comm"); 1570 break; 1571 case LOCK_AGGR_CALLER: 1572 fprintf(lock_output, " %10s %s\n\n", "type", "caller"); 1573 break; 1574 case LOCK_AGGR_ADDR: 1575 fprintf(lock_output, " %16s %s\n\n", "address", "symbol"); 1576 break; 1577 case LOCK_AGGR_CGROUP: 1578 fprintf(lock_output, " %s\n\n", "cgroup"); 1579 break; 1580 default: 1581 break; 1582 } 1583 } 1584 1585 static void print_header_csv(const char *sep) 1586 { 1587 struct lock_key *key; 1588 1589 fprintf(lock_output, "# output: "); 1590 list_for_each_entry(key, &lock_keys, list) 1591 fprintf(lock_output, "%s%s ", key->header, sep); 1592 1593 switch (aggr_mode) { 1594 case LOCK_AGGR_TASK: 1595 fprintf(lock_output, "%s%s %s\n", "pid", sep, 1596 show_lock_owner ? "owner" : "comm"); 1597 break; 1598 case LOCK_AGGR_CALLER: 1599 fprintf(lock_output, "%s%s %s", "type", sep, "caller"); 1600 if (verbose > 0) 1601 fprintf(lock_output, "%s %s", sep, "stacktrace"); 1602 fprintf(lock_output, "\n"); 1603 break; 1604 case LOCK_AGGR_ADDR: 1605 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type"); 1606 break; 1607 case LOCK_AGGR_CGROUP: 1608 fprintf(lock_output, "%s\n", "cgroup"); 1609 break; 1610 default: 1611 break; 1612 } 1613 } 1614 1615 static void print_header(void) 1616 { 1617 if (!quiet) { 1618 if (symbol_conf.field_sep) 1619 print_header_csv(symbol_conf.field_sep); 1620 else 1621 print_header_stdio(); 1622 } 1623 } 1624 1625 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st) 1626 { 1627 struct lock_key *key; 1628 struct thread *t; 1629 int pid; 1630 1631 list_for_each_entry(key, &lock_keys, list) { 1632 key->print(key, st); 1633 fprintf(lock_output, " "); 1634 } 1635 1636 switch (aggr_mode) { 1637 case LOCK_AGGR_CALLER: 1638 fprintf(lock_output, " %10s %s\n", get_type_flags_name(st->flags), st->name); 1639 break; 1640 case LOCK_AGGR_TASK: 1641 pid = st->addr; 1642 t = perf_session__findnew(session, pid); 1643 fprintf(lock_output, " %10d %s\n", 1644 pid, pid == -1 ? "Unknown" : thread__comm_str(t)); 1645 break; 1646 case LOCK_AGGR_ADDR: 1647 fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr, 1648 st->name, get_type_lock_name(st->flags)); 1649 break; 1650 case LOCK_AGGR_CGROUP: 1651 fprintf(lock_output, " %s\n", st->name); 1652 break; 1653 default: 1654 break; 1655 } 1656 1657 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1658 struct map *kmap; 1659 struct symbol *sym; 1660 char buf[128]; 1661 u64 ip; 1662 1663 for (int i = 0; i < max_stack_depth; i++) { 1664 if (!st->callstack || !st->callstack[i]) 1665 break; 1666 1667 ip = st->callstack[i]; 1668 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1669 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1670 fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf); 1671 } 1672 } 1673 } 1674 1675 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st, 1676 const char *sep) 1677 { 1678 struct lock_key *key; 1679 struct thread *t; 1680 int pid; 1681 1682 list_for_each_entry(key, &lock_keys, list) { 1683 key->print(key, st); 1684 fprintf(lock_output, "%s ", sep); 1685 } 1686 1687 switch (aggr_mode) { 1688 case LOCK_AGGR_CALLER: 1689 fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name); 1690 if (verbose <= 0) 1691 fprintf(lock_output, "\n"); 1692 break; 1693 case LOCK_AGGR_TASK: 1694 pid = st->addr; 1695 t = perf_session__findnew(session, pid); 1696 fprintf(lock_output, "%d%s %s\n", pid, sep, 1697 pid == -1 ? "Unknown" : thread__comm_str(t)); 1698 break; 1699 case LOCK_AGGR_ADDR: 1700 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep, 1701 st->name, sep, get_type_lock_name(st->flags)); 1702 break; 1703 case LOCK_AGGR_CGROUP: 1704 fprintf(lock_output, "%s\n",st->name); 1705 break; 1706 default: 1707 break; 1708 } 1709 1710 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1711 struct map *kmap; 1712 struct symbol *sym; 1713 char buf[128]; 1714 u64 ip; 1715 1716 for (int i = 0; i < max_stack_depth; i++) { 1717 if (!st->callstack || !st->callstack[i]) 1718 break; 1719 1720 ip = st->callstack[i]; 1721 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1722 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1723 fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf); 1724 } 1725 fprintf(lock_output, "\n"); 1726 } 1727 } 1728 1729 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st) 1730 { 1731 if (symbol_conf.field_sep) 1732 print_lock_stat_csv(con, st, symbol_conf.field_sep); 1733 else 1734 print_lock_stat_stdio(con, st); 1735 } 1736 1737 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails) 1738 { 1739 /* Output for debug, this have to be removed */ 1740 int broken = fails->task + fails->stack + fails->time + fails->data; 1741 1742 if (!use_bpf) 1743 print_bad_events(bad, total); 1744 1745 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1746 return; 1747 1748 total += broken; 1749 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1750 fprintf(lock_output, "bad: %d, total: %d\n", broken, total); 1751 fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total); 1752 1753 fprintf(lock_output, "histogram of failure reasons\n"); 1754 fprintf(lock_output, " %10s: %d\n", "task", fails->task); 1755 fprintf(lock_output, " %10s: %d\n", "stack", fails->stack); 1756 fprintf(lock_output, " %10s: %d\n", "time", fails->time); 1757 fprintf(lock_output, " %10s: %d\n", "data", fails->data); 1758 } 1759 1760 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails, 1761 const char *sep) 1762 { 1763 /* Output for debug, this have to be removed */ 1764 if (use_bpf) 1765 bad = fails->task + fails->stack + fails->time + fails->data; 1766 1767 if (quiet || total == 0 || (bad == 0 && verbose <= 0)) 1768 return; 1769 1770 total += bad; 1771 fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad); 1772 1773 if (use_bpf) { 1774 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task); 1775 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack); 1776 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time); 1777 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data); 1778 } else { 1779 int i; 1780 const char *name[4] = { "acquire", "acquired", "contended", "release" }; 1781 1782 for (i = 0; i < BROKEN_MAX; i++) 1783 fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]); 1784 } 1785 fprintf(lock_output, "\n"); 1786 } 1787 1788 static void print_footer(int total, int bad, struct lock_contention_fails *fails) 1789 { 1790 if (symbol_conf.field_sep) 1791 print_footer_csv(total, bad, fails, symbol_conf.field_sep); 1792 else 1793 print_footer_stdio(total, bad, fails); 1794 } 1795 1796 static void print_contention_result(struct lock_contention *con) 1797 { 1798 struct lock_stat *st; 1799 int bad, total, printed; 1800 1801 if (!quiet) 1802 print_header(); 1803 1804 bad = total = printed = 0; 1805 1806 while ((st = pop_from_result())) { 1807 total += use_bpf ? st->nr_contended : 1; 1808 if (st->broken) 1809 bad++; 1810 1811 if (!st->wait_time_total) 1812 continue; 1813 1814 print_lock_stat(con, st); 1815 1816 if (++printed >= print_nr_entries) 1817 break; 1818 } 1819 1820 if (con->owner && con->save_callstack && verbose > 0) { 1821 struct rb_root root = RB_ROOT; 1822 1823 if (symbol_conf.field_sep) 1824 fprintf(lock_output, "# owner stack trace:\n"); 1825 else 1826 fprintf(lock_output, "\n=== owner stack trace ===\n\n"); 1827 while ((st = pop_owner_stack_trace(con))) 1828 insert_to(&root, st, compare); 1829 1830 while ((st = pop_from(&root))) { 1831 print_lock_stat(con, st); 1832 free(st); 1833 } 1834 } 1835 1836 if (print_nr_entries) { 1837 /* update the total/bad stats */ 1838 while ((st = pop_from_result())) { 1839 total += use_bpf ? st->nr_contended : 1; 1840 if (st->broken) 1841 bad++; 1842 } 1843 } 1844 /* some entries are collected but hidden by the callstack filter */ 1845 total += con->nr_filtered; 1846 1847 print_footer(total, bad, &con->fails); 1848 } 1849 1850 static bool force; 1851 1852 static int __cmd_report(bool display_info) 1853 { 1854 int err = -EINVAL; 1855 struct perf_tool eops; 1856 struct perf_data data = { 1857 .path = input_name, 1858 .mode = PERF_DATA_MODE_READ, 1859 .force = force, 1860 }; 1861 1862 perf_tool__init(&eops, /*ordered_events=*/true); 1863 eops.attr = perf_event__process_attr; 1864 eops.event_update = process_event_update; 1865 eops.sample = process_sample_event; 1866 eops.comm = perf_event__process_comm; 1867 eops.mmap = perf_event__process_mmap; 1868 eops.namespaces = perf_event__process_namespaces; 1869 eops.tracing_data = perf_event__process_tracing_data; 1870 session = perf_session__new(&data, &eops); 1871 if (IS_ERR(session)) { 1872 pr_err("Initializing perf session failed\n"); 1873 return PTR_ERR(session); 1874 } 1875 1876 symbol_conf.allow_aliases = true; 1877 symbol__init(&session->header.env); 1878 1879 if (!data.is_pipe) { 1880 if (!perf_session__has_traces(session, "lock record")) 1881 goto out_delete; 1882 1883 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1884 pr_err("Initializing perf session tracepoint handlers failed\n"); 1885 goto out_delete; 1886 } 1887 1888 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1889 pr_err("Initializing perf session tracepoint handlers failed\n"); 1890 goto out_delete; 1891 } 1892 } 1893 1894 if (setup_output_field(false, output_fields)) 1895 goto out_delete; 1896 1897 if (select_key(false)) 1898 goto out_delete; 1899 1900 if (show_thread_stats) 1901 aggr_mode = LOCK_AGGR_TASK; 1902 1903 err = perf_session__process_events(session); 1904 if (err) 1905 goto out_delete; 1906 1907 setup_pager(); 1908 if (display_info) /* used for info subcommand */ 1909 dump_info(); 1910 else { 1911 combine_result(); 1912 sort_result(); 1913 print_result(); 1914 } 1915 1916 out_delete: 1917 perf_session__delete(session); 1918 return err; 1919 } 1920 1921 static void sighandler(int sig __maybe_unused) 1922 { 1923 } 1924 1925 static int check_lock_contention_options(const struct option *options, 1926 const char * const *usage) 1927 1928 { 1929 if (show_thread_stats && show_lock_addrs) { 1930 pr_err("Cannot use thread and addr mode together\n"); 1931 parse_options_usage(usage, options, "threads", 0); 1932 parse_options_usage(NULL, options, "lock-addr", 0); 1933 return -1; 1934 } 1935 1936 if (show_lock_owner && !use_bpf) { 1937 pr_err("Lock owners are available only with BPF\n"); 1938 parse_options_usage(usage, options, "lock-owner", 0); 1939 parse_options_usage(NULL, options, "use-bpf", 0); 1940 return -1; 1941 } 1942 1943 if (show_lock_owner && show_lock_addrs) { 1944 pr_err("Cannot use owner and addr mode together\n"); 1945 parse_options_usage(usage, options, "lock-owner", 0); 1946 parse_options_usage(NULL, options, "lock-addr", 0); 1947 return -1; 1948 } 1949 1950 if (show_lock_cgroups && !use_bpf) { 1951 pr_err("Cgroups are available only with BPF\n"); 1952 parse_options_usage(usage, options, "lock-cgroup", 0); 1953 parse_options_usage(NULL, options, "use-bpf", 0); 1954 return -1; 1955 } 1956 1957 if (show_lock_cgroups && show_lock_addrs) { 1958 pr_err("Cannot use cgroup and addr mode together\n"); 1959 parse_options_usage(usage, options, "lock-cgroup", 0); 1960 parse_options_usage(NULL, options, "lock-addr", 0); 1961 return -1; 1962 } 1963 1964 if (show_lock_cgroups && show_thread_stats) { 1965 pr_err("Cannot use cgroup and thread mode together\n"); 1966 parse_options_usage(usage, options, "lock-cgroup", 0); 1967 parse_options_usage(NULL, options, "threads", 0); 1968 return -1; 1969 } 1970 1971 if (symbol_conf.field_sep) { 1972 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */ 1973 strstr(symbol_conf.field_sep, "+") || /* part of caller offset */ 1974 strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */ 1975 pr_err("Cannot use the separator that is already used\n"); 1976 parse_options_usage(usage, options, "x", 1); 1977 return -1; 1978 } 1979 } 1980 1981 if (show_lock_owner && !show_thread_stats) { 1982 pr_warning("Now -o try to show owner's callstack instead of pid and comm.\n"); 1983 pr_warning("Please use -t option too to keep the old behavior.\n"); 1984 } 1985 1986 return 0; 1987 } 1988 1989 static int __cmd_contention(int argc, const char **argv) 1990 { 1991 int err = -EINVAL; 1992 struct perf_tool eops; 1993 struct perf_data data = { 1994 .path = input_name, 1995 .mode = PERF_DATA_MODE_READ, 1996 .force = force, 1997 }; 1998 struct lock_contention con = { 1999 .target = &target, 2000 .map_nr_entries = bpf_map_entries, 2001 .max_stack = max_stack_depth, 2002 .stack_skip = stack_skip, 2003 .filters = &filters, 2004 .save_callstack = needs_callstack(), 2005 .owner = show_lock_owner, 2006 .cgroups = RB_ROOT, 2007 }; 2008 2009 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 2010 if (!lockhash_table) 2011 return -ENOMEM; 2012 2013 con.result = &lockhash_table[0]; 2014 2015 perf_tool__init(&eops, /*ordered_events=*/true); 2016 eops.attr = perf_event__process_attr; 2017 eops.event_update = process_event_update; 2018 eops.sample = process_sample_event; 2019 eops.comm = perf_event__process_comm; 2020 eops.mmap = perf_event__process_mmap; 2021 eops.tracing_data = perf_event__process_tracing_data; 2022 2023 session = perf_session__new(use_bpf ? NULL : &data, &eops); 2024 if (IS_ERR(session)) { 2025 pr_err("Initializing perf session failed\n"); 2026 err = PTR_ERR(session); 2027 session = NULL; 2028 goto out_delete; 2029 } 2030 2031 con.machine = &session->machines.host; 2032 2033 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : 2034 show_lock_addrs ? LOCK_AGGR_ADDR : 2035 show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER; 2036 2037 if (con.aggr_mode == LOCK_AGGR_CALLER) 2038 con.save_callstack = true; 2039 2040 symbol_conf.allow_aliases = true; 2041 symbol__init(&session->header.env); 2042 2043 if (use_bpf) { 2044 err = target__validate(&target); 2045 if (err) { 2046 char errbuf[512]; 2047 2048 target__strerror(&target, err, errbuf, 512); 2049 pr_err("%s\n", errbuf); 2050 goto out_delete; 2051 } 2052 2053 signal(SIGINT, sighandler); 2054 signal(SIGCHLD, sighandler); 2055 signal(SIGTERM, sighandler); 2056 2057 con.evlist = evlist__new(); 2058 if (con.evlist == NULL) { 2059 err = -ENOMEM; 2060 goto out_delete; 2061 } 2062 2063 err = evlist__create_maps(con.evlist, &target); 2064 if (err < 0) 2065 goto out_delete; 2066 2067 if (argc) { 2068 err = evlist__prepare_workload(con.evlist, &target, 2069 argv, false, NULL); 2070 if (err < 0) 2071 goto out_delete; 2072 } 2073 2074 err = lock_contention_prepare(&con); 2075 if (err < 0) { 2076 pr_err("lock contention BPF setup failed\n"); 2077 goto out_delete; 2078 } 2079 } else if (!data.is_pipe) { 2080 if (!perf_session__has_traces(session, "lock record")) 2081 goto out_delete; 2082 2083 if (!evlist__find_evsel_by_str(session->evlist, 2084 "lock:contention_begin")) { 2085 pr_err("lock contention evsel not found\n"); 2086 goto out_delete; 2087 } 2088 2089 if (perf_session__set_tracepoints_handlers(session, 2090 contention_tracepoints)) { 2091 pr_err("Initializing perf session tracepoint handlers failed\n"); 2092 goto out_delete; 2093 } 2094 } 2095 2096 err = setup_output_field(true, output_fields); 2097 if (err) { 2098 pr_err("Failed to setup output field\n"); 2099 goto out_delete; 2100 } 2101 2102 err = select_key(true); 2103 if (err) 2104 goto out_delete; 2105 2106 if (symbol_conf.field_sep) { 2107 int i; 2108 struct lock_key *keys = contention_keys; 2109 2110 /* do not align output in CSV format */ 2111 for (i = 0; keys[i].name; i++) 2112 keys[i].len = 0; 2113 } 2114 2115 if (use_bpf) { 2116 lock_contention_start(); 2117 if (argc) 2118 evlist__start_workload(con.evlist); 2119 2120 /* wait for signal */ 2121 pause(); 2122 2123 lock_contention_stop(); 2124 lock_contention_read(&con); 2125 } else { 2126 err = perf_session__process_events(session); 2127 if (err) 2128 goto out_delete; 2129 } 2130 2131 setup_pager(); 2132 2133 sort_contention_result(); 2134 print_contention_result(&con); 2135 2136 out_delete: 2137 lock_filter_finish(); 2138 evlist__delete(con.evlist); 2139 lock_contention_finish(&con); 2140 perf_session__delete(session); 2141 zfree(&lockhash_table); 2142 return err; 2143 } 2144 2145 2146 static int __cmd_record(int argc, const char **argv) 2147 { 2148 const char *record_args[] = { 2149 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 2150 }; 2151 const char *callgraph_args[] = { 2152 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 2153 }; 2154 unsigned int rec_argc, i, j, ret; 2155 unsigned int nr_tracepoints; 2156 unsigned int nr_callgraph_args = 0; 2157 const char **rec_argv; 2158 bool has_lock_stat = true; 2159 2160 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 2161 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 2162 pr_debug("tracepoint %s is not enabled. " 2163 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 2164 lock_tracepoints[i].name); 2165 has_lock_stat = false; 2166 break; 2167 } 2168 } 2169 2170 if (has_lock_stat) 2171 goto setup_args; 2172 2173 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 2174 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 2175 pr_err("tracepoint %s is not enabled.\n", 2176 contention_tracepoints[i].name); 2177 return 1; 2178 } 2179 } 2180 2181 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 2182 2183 setup_args: 2184 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 2185 2186 if (has_lock_stat) 2187 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 2188 else 2189 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 2190 2191 /* factor of 2 is for -e in front of each tracepoint */ 2192 rec_argc += 2 * nr_tracepoints; 2193 2194 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2195 if (!rec_argv) 2196 return -ENOMEM; 2197 2198 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2199 rec_argv[i] = record_args[i]; 2200 2201 for (j = 0; j < nr_tracepoints; j++) { 2202 rec_argv[i++] = "-e"; 2203 rec_argv[i++] = has_lock_stat 2204 ? lock_tracepoints[j].name 2205 : contention_tracepoints[j].name; 2206 } 2207 2208 for (j = 0; j < nr_callgraph_args; j++, i++) 2209 rec_argv[i] = callgraph_args[j]; 2210 2211 for (j = 1; j < (unsigned int)argc; j++, i++) 2212 rec_argv[i] = argv[j]; 2213 2214 BUG_ON(i != rec_argc); 2215 2216 ret = cmd_record(i, rec_argv); 2217 free(rec_argv); 2218 return ret; 2219 } 2220 2221 static int parse_map_entry(const struct option *opt, const char *str, 2222 int unset __maybe_unused) 2223 { 2224 unsigned long *len = (unsigned long *)opt->value; 2225 unsigned long val; 2226 char *endptr; 2227 2228 errno = 0; 2229 val = strtoul(str, &endptr, 0); 2230 if (*endptr != '\0' || errno != 0) { 2231 pr_err("invalid BPF map length: %s\n", str); 2232 return -1; 2233 } 2234 2235 *len = val; 2236 return 0; 2237 } 2238 2239 static int parse_max_stack(const struct option *opt, const char *str, 2240 int unset __maybe_unused) 2241 { 2242 unsigned long *len = (unsigned long *)opt->value; 2243 long val; 2244 char *endptr; 2245 2246 errno = 0; 2247 val = strtol(str, &endptr, 0); 2248 if (*endptr != '\0' || errno != 0) { 2249 pr_err("invalid max stack depth: %s\n", str); 2250 return -1; 2251 } 2252 2253 if (val < 0 || val > sysctl__max_stack()) { 2254 pr_err("invalid max stack depth: %ld\n", val); 2255 return -1; 2256 } 2257 2258 *len = val; 2259 return 0; 2260 } 2261 2262 static bool add_lock_type(unsigned int flags) 2263 { 2264 unsigned int *tmp; 2265 2266 tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types)); 2267 if (tmp == NULL) 2268 return false; 2269 2270 tmp[filters.nr_types++] = flags; 2271 filters.types = tmp; 2272 return true; 2273 } 2274 2275 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str, 2276 int unset __maybe_unused) 2277 { 2278 char *s, *tmp, *tok; 2279 2280 s = strdup(str); 2281 if (s == NULL) 2282 return -1; 2283 2284 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2285 bool found = false; 2286 2287 /* `tok` is a flags name if it contains ':'. */ 2288 if (strchr(tok, ':')) { 2289 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 2290 if (!strcmp(lock_type_table[i].flags_name, tok) && 2291 add_lock_type(lock_type_table[i].flags)) { 2292 found = true; 2293 break; 2294 } 2295 } 2296 2297 if (!found) { 2298 pr_err("Unknown lock flags name: %s\n", tok); 2299 free(s); 2300 return -1; 2301 } 2302 2303 continue; 2304 } 2305 2306 /* 2307 * Otherwise `tok` is a lock name. 2308 * Single lock name could contain multiple flags. 2309 * Replace alias `pcpu-sem` with actual name `percpu-rwsem. 2310 */ 2311 if (!strcmp(tok, "pcpu-sem")) 2312 tok = (char *)"percpu-rwsem"; 2313 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 2314 if (!strcmp(lock_type_table[i].lock_name, tok)) { 2315 if (add_lock_type(lock_type_table[i].flags)) { 2316 found = true; 2317 } else { 2318 free(s); 2319 return -1; 2320 } 2321 } 2322 } 2323 2324 if (!found) { 2325 pr_err("Unknown lock name: %s\n", tok); 2326 free(s); 2327 return -1; 2328 } 2329 2330 } 2331 2332 free(s); 2333 return 0; 2334 } 2335 2336 static bool add_lock_addr(unsigned long addr) 2337 { 2338 unsigned long *tmp; 2339 2340 tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs)); 2341 if (tmp == NULL) { 2342 pr_err("Memory allocation failure\n"); 2343 return false; 2344 } 2345 2346 tmp[filters.nr_addrs++] = addr; 2347 filters.addrs = tmp; 2348 return true; 2349 } 2350 2351 static bool add_lock_sym(char *name) 2352 { 2353 char **tmp; 2354 char *sym = strdup(name); 2355 2356 if (sym == NULL) { 2357 pr_err("Memory allocation failure\n"); 2358 return false; 2359 } 2360 2361 tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms)); 2362 if (tmp == NULL) { 2363 pr_err("Memory allocation failure\n"); 2364 free(sym); 2365 return false; 2366 } 2367 2368 tmp[filters.nr_syms++] = sym; 2369 filters.syms = tmp; 2370 return true; 2371 } 2372 2373 static bool add_lock_slab(char *name) 2374 { 2375 char **tmp; 2376 char *sym = strdup(name); 2377 2378 if (sym == NULL) { 2379 pr_err("Memory allocation failure\n"); 2380 return false; 2381 } 2382 2383 tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs)); 2384 if (tmp == NULL) { 2385 pr_err("Memory allocation failure\n"); 2386 return false; 2387 } 2388 2389 tmp[filters.nr_slabs++] = sym; 2390 filters.slabs = tmp; 2391 return true; 2392 } 2393 2394 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str, 2395 int unset __maybe_unused) 2396 { 2397 char *s, *tmp, *tok; 2398 int ret = 0; 2399 u64 addr; 2400 2401 s = strdup(str); 2402 if (s == NULL) 2403 return -1; 2404 2405 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2406 char *end; 2407 2408 addr = strtoul(tok, &end, 16); 2409 if (*end == '\0') { 2410 if (!add_lock_addr(addr)) { 2411 ret = -1; 2412 break; 2413 } 2414 continue; 2415 } 2416 2417 if (*tok == '&') { 2418 if (!add_lock_slab(tok + 1)) { 2419 ret = -1; 2420 break; 2421 } 2422 continue; 2423 } 2424 2425 /* 2426 * At this moment, we don't have kernel symbols. Save the symbols 2427 * in a separate list and resolve them to addresses later. 2428 */ 2429 if (!add_lock_sym(tok)) { 2430 ret = -1; 2431 break; 2432 } 2433 } 2434 2435 free(s); 2436 return ret; 2437 } 2438 2439 static int parse_output(const struct option *opt __maybe_unused, const char *str, 2440 int unset __maybe_unused) 2441 { 2442 const char **name = (const char **)opt->value; 2443 2444 if (str == NULL) 2445 return -1; 2446 2447 lock_output = fopen(str, "w"); 2448 if (lock_output == NULL) { 2449 pr_err("Cannot open %s\n", str); 2450 return -1; 2451 } 2452 2453 *name = str; 2454 return 0; 2455 } 2456 2457 static bool add_lock_cgroup(char *name) 2458 { 2459 u64 *tmp; 2460 struct cgroup *cgrp; 2461 2462 cgrp = cgroup__new(name, /*do_open=*/false); 2463 if (cgrp == NULL) { 2464 pr_err("Failed to create cgroup: %s\n", name); 2465 return false; 2466 } 2467 2468 if (read_cgroup_id(cgrp) < 0) { 2469 pr_err("Failed to read cgroup id for %s\n", name); 2470 cgroup__put(cgrp); 2471 return false; 2472 } 2473 2474 tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps)); 2475 if (tmp == NULL) { 2476 pr_err("Memory allocation failure\n"); 2477 return false; 2478 } 2479 2480 tmp[filters.nr_cgrps++] = cgrp->id; 2481 filters.cgrps = tmp; 2482 cgroup__put(cgrp); 2483 return true; 2484 } 2485 2486 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str, 2487 int unset __maybe_unused) 2488 { 2489 char *s, *tmp, *tok; 2490 int ret = 0; 2491 2492 s = strdup(str); 2493 if (s == NULL) 2494 return -1; 2495 2496 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2497 if (!add_lock_cgroup(tok)) { 2498 ret = -1; 2499 break; 2500 } 2501 } 2502 2503 free(s); 2504 return ret; 2505 } 2506 2507 int cmd_lock(int argc, const char **argv) 2508 { 2509 const struct option lock_options[] = { 2510 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2511 OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output), 2512 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 2513 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 2514 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 2515 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2516 "file", "vmlinux pathname"), 2517 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 2518 "file", "kallsyms pathname"), 2519 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), 2520 OPT_END() 2521 }; 2522 2523 const struct option info_options[] = { 2524 OPT_BOOLEAN('t', "threads", &info_threads, 2525 "dump the thread list in perf.data"), 2526 OPT_BOOLEAN('m', "map", &info_map, 2527 "dump the map of lock instances (address:name table)"), 2528 OPT_PARENT(lock_options) 2529 }; 2530 2531 const struct option report_options[] = { 2532 OPT_STRING('k', "key", &sort_key, "acquired", 2533 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2534 OPT_STRING('F', "field", &output_fields, NULL, 2535 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2536 /* TODO: type */ 2537 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 2538 "combine locks in the same class"), 2539 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2540 "show per-thread lock stats"), 2541 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2542 OPT_PARENT(lock_options) 2543 }; 2544 2545 struct option contention_options[] = { 2546 OPT_STRING('k', "key", &sort_key, "wait_total", 2547 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 2548 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 2549 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 2550 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2551 "show per-thread lock stats"), 2552 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 2553 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 2554 "System-wide collection from all CPUs"), 2555 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 2556 "List of cpus to monitor"), 2557 OPT_STRING('p', "pid", &target.pid, "pid", 2558 "Trace on existing process id"), 2559 OPT_STRING(0, "tid", &target.tid, "tid", 2560 "Trace on existing thread id (exclusive to --pid)"), 2561 OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num", 2562 "Max number of BPF map entries", parse_map_entry), 2563 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", 2564 "Set the maximum stack depth when collecting lock contention, " 2565 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), 2566 OPT_INTEGER(0, "stack-skip", &stack_skip, 2567 "Set the number of stack depth to skip when finding a lock caller, " 2568 "Default: " __stringify(CONTENTION_STACK_SKIP)), 2569 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2570 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"), 2571 OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS", 2572 "Filter specific type of locks", parse_lock_type), 2573 OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES", 2574 "Filter specific address/symbol of locks", parse_lock_addr), 2575 OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES", 2576 "Filter specific function in the callstack", parse_call_stack), 2577 OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"), 2578 OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", 2579 "print result in CSV format with custom separator"), 2580 OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"), 2581 OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS", 2582 "Filter specific cgroups", parse_cgroup_filter), 2583 OPT_PARENT(lock_options) 2584 }; 2585 2586 const char * const info_usage[] = { 2587 "perf lock info [<options>]", 2588 NULL 2589 }; 2590 const char *const lock_subcommands[] = { "record", "report", "script", 2591 "info", "contention", NULL }; 2592 const char *lock_usage[] = { 2593 NULL, 2594 NULL 2595 }; 2596 const char * const report_usage[] = { 2597 "perf lock report [<options>]", 2598 NULL 2599 }; 2600 const char * const contention_usage[] = { 2601 "perf lock contention [<options>]", 2602 NULL 2603 }; 2604 unsigned int i; 2605 int rc = 0; 2606 2607 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 2608 if (!lockhash_table) 2609 return -ENOMEM; 2610 2611 for (i = 0; i < LOCKHASH_SIZE; i++) 2612 INIT_HLIST_HEAD(lockhash_table + i); 2613 2614 lock_output = stderr; 2615 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 2616 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2617 if (!argc) 2618 usage_with_options(lock_usage, lock_options); 2619 2620 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 2621 return __cmd_record(argc, argv); 2622 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 2623 trace_handler = &report_lock_ops; 2624 if (argc) { 2625 argc = parse_options(argc, argv, 2626 report_options, report_usage, 0); 2627 if (argc) 2628 usage_with_options(report_usage, report_options); 2629 } 2630 rc = __cmd_report(false); 2631 } else if (!strcmp(argv[0], "script")) { 2632 /* Aliased to 'perf script' */ 2633 rc = cmd_script(argc, argv); 2634 } else if (!strcmp(argv[0], "info")) { 2635 if (argc) { 2636 argc = parse_options(argc, argv, 2637 info_options, info_usage, 0); 2638 if (argc) 2639 usage_with_options(info_usage, info_options); 2640 } 2641 2642 /* If neither threads nor map requested, display both */ 2643 if (!info_threads && !info_map) { 2644 info_threads = true; 2645 info_map = true; 2646 } 2647 2648 /* recycling report_lock_ops */ 2649 trace_handler = &report_lock_ops; 2650 rc = __cmd_report(true); 2651 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 2652 trace_handler = &contention_lock_ops; 2653 sort_key = "wait_total"; 2654 output_fields = "contended,wait_total,wait_max,avg_wait"; 2655 2656 #ifndef HAVE_BPF_SKEL 2657 set_option_nobuild(contention_options, 'b', "use-bpf", 2658 "no BUILD_BPF_SKEL=1", false); 2659 #endif 2660 if (argc) { 2661 argc = parse_options(argc, argv, contention_options, 2662 contention_usage, 0); 2663 } 2664 2665 if (check_lock_contention_options(contention_options, 2666 contention_usage) < 0) 2667 return -1; 2668 2669 rc = __cmd_contention(argc, argv); 2670 } else { 2671 usage_with_options(lock_usage, lock_options); 2672 } 2673 2674 /* free usage string allocated by parse_options_subcommand */ 2675 free((void *)lock_usage[0]); 2676 2677 zfree(&lockhash_table); 2678 return rc; 2679 } 2680