1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/cgroup.h" 14 #include "util/callchain.h" 15 #include "util/lock-contention.h" 16 #include "util/bpf_skel/lock_data.h" 17 18 #include <subcmd/pager.h> 19 #include <subcmd/parse-options.h> 20 #include "util/trace-event.h" 21 #include "util/tracepoint.h" 22 23 #include "util/debug.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/data.h" 27 #include "util/string2.h" 28 #include "util/map.h" 29 #include "util/util.h" 30 31 #include <stdio.h> 32 #include <sys/types.h> 33 #include <sys/prctl.h> 34 #include <semaphore.h> 35 #include <math.h> 36 #include <limits.h> 37 #include <ctype.h> 38 39 #include <linux/list.h> 40 #include <linux/hash.h> 41 #include <linux/kernel.h> 42 #include <linux/zalloc.h> 43 #include <linux/err.h> 44 #include <linux/stringify.h> 45 46 static struct perf_session *session; 47 static struct target target; 48 49 static struct rb_root thread_stats; 50 51 static bool combine_locks; 52 static bool show_thread_stats; 53 static bool show_lock_addrs; 54 static bool show_lock_owner; 55 static bool show_lock_cgroups; 56 static bool use_bpf; 57 static unsigned long bpf_map_entries = MAX_ENTRIES; 58 static int max_stack_depth = CONTENTION_STACK_DEPTH; 59 static int stack_skip = CONTENTION_STACK_SKIP; 60 static int print_nr_entries = INT_MAX / 2; 61 static const char *output_name = NULL; 62 static FILE *lock_output; 63 64 static struct lock_filter filters; 65 66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; 67 68 static struct thread_stat *thread_stat_find(u32 tid) 69 { 70 struct rb_node *node; 71 struct thread_stat *st; 72 73 node = thread_stats.rb_node; 74 while (node) { 75 st = container_of(node, struct thread_stat, rb); 76 if (st->tid == tid) 77 return st; 78 else if (tid < st->tid) 79 node = node->rb_left; 80 else 81 node = node->rb_right; 82 } 83 84 return NULL; 85 } 86 87 static void thread_stat_insert(struct thread_stat *new) 88 { 89 struct rb_node **rb = &thread_stats.rb_node; 90 struct rb_node *parent = NULL; 91 struct thread_stat *p; 92 93 while (*rb) { 94 p = container_of(*rb, struct thread_stat, rb); 95 parent = *rb; 96 97 if (new->tid < p->tid) 98 rb = &(*rb)->rb_left; 99 else if (new->tid > p->tid) 100 rb = &(*rb)->rb_right; 101 else 102 BUG_ON("inserting invalid thread_stat\n"); 103 } 104 105 rb_link_node(&new->rb, parent, rb); 106 rb_insert_color(&new->rb, &thread_stats); 107 } 108 109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 110 { 111 struct thread_stat *st; 112 113 st = thread_stat_find(tid); 114 if (st) 115 return st; 116 117 st = zalloc(sizeof(struct thread_stat)); 118 if (!st) { 119 pr_err("memory allocation failed\n"); 120 return NULL; 121 } 122 123 st->tid = tid; 124 INIT_LIST_HEAD(&st->seq_list); 125 126 thread_stat_insert(st); 127 128 return st; 129 } 130 131 static struct thread_stat *thread_stat_findnew_first(u32 tid); 132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 133 thread_stat_findnew_first; 134 135 static struct thread_stat *thread_stat_findnew_first(u32 tid) 136 { 137 struct thread_stat *st; 138 139 st = zalloc(sizeof(struct thread_stat)); 140 if (!st) { 141 pr_err("memory allocation failed\n"); 142 return NULL; 143 } 144 st->tid = tid; 145 INIT_LIST_HEAD(&st->seq_list); 146 147 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 148 rb_insert_color(&st->rb, &thread_stats); 149 150 thread_stat_findnew = thread_stat_findnew_after_first; 151 return st; 152 } 153 154 /* build simple key function one is bigger than two */ 155 #define SINGLE_KEY(member) \ 156 static int lock_stat_key_ ## member(struct lock_stat *one, \ 157 struct lock_stat *two) \ 158 { \ 159 return one->member > two->member; \ 160 } 161 162 SINGLE_KEY(nr_acquired) 163 SINGLE_KEY(nr_contended) 164 SINGLE_KEY(avg_wait_time) 165 SINGLE_KEY(wait_time_total) 166 SINGLE_KEY(wait_time_max) 167 168 static int lock_stat_key_wait_time_min(struct lock_stat *one, 169 struct lock_stat *two) 170 { 171 u64 s1 = one->wait_time_min; 172 u64 s2 = two->wait_time_min; 173 if (s1 == ULLONG_MAX) 174 s1 = 0; 175 if (s2 == ULLONG_MAX) 176 s2 = 0; 177 return s1 > s2; 178 } 179 180 struct lock_key { 181 /* 182 * name: the value for specify by user 183 * this should be simpler than raw name of member 184 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 185 */ 186 const char *name; 187 /* header: the string printed on the header line */ 188 const char *header; 189 /* len: the printing width of the field */ 190 int len; 191 /* key: a pointer to function to compare two lock stats for sorting */ 192 int (*key)(struct lock_stat*, struct lock_stat*); 193 /* print: a pointer to function to print a given lock stats */ 194 void (*print)(struct lock_key*, struct lock_stat*); 195 /* list: list entry to link this */ 196 struct list_head list; 197 }; 198 199 static void lock_stat_key_print_time(unsigned long long nsec, int len) 200 { 201 static const struct { 202 float base; 203 const char *unit; 204 } table[] = { 205 { 1e9 * 3600, "h " }, 206 { 1e9 * 60, "m " }, 207 { 1e9, "s " }, 208 { 1e6, "ms" }, 209 { 1e3, "us" }, 210 { 0, NULL }, 211 }; 212 213 /* for CSV output */ 214 if (len == 0) { 215 fprintf(lock_output, "%llu", nsec); 216 return; 217 } 218 219 for (int i = 0; table[i].unit; i++) { 220 if (nsec < table[i].base) 221 continue; 222 223 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 224 return; 225 } 226 227 fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns"); 228 } 229 230 #define PRINT_KEY(member) \ 231 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 232 struct lock_stat *ls) \ 233 { \ 234 fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\ 235 } 236 237 #define PRINT_TIME(member) \ 238 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 239 struct lock_stat *ls) \ 240 { \ 241 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 242 } 243 244 PRINT_KEY(nr_acquired) 245 PRINT_KEY(nr_contended) 246 PRINT_TIME(avg_wait_time) 247 PRINT_TIME(wait_time_total) 248 PRINT_TIME(wait_time_max) 249 250 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 251 struct lock_stat *ls) 252 { 253 u64 wait_time = ls->wait_time_min; 254 255 if (wait_time == ULLONG_MAX) 256 wait_time = 0; 257 258 lock_stat_key_print_time(wait_time, key->len); 259 } 260 261 262 static const char *sort_key = "acquired"; 263 264 static int (*compare)(struct lock_stat *, struct lock_stat *); 265 266 static struct rb_root sorted; /* place to store intermediate data */ 267 static struct rb_root result; /* place to store sorted data */ 268 269 static LIST_HEAD(lock_keys); 270 static const char *output_fields; 271 272 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 273 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 274 static struct lock_key report_keys[] = { 275 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 276 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 277 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 278 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 279 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 280 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 281 282 /* extra comparisons much complicated should be here */ 283 { } 284 }; 285 286 static struct lock_key contention_keys[] = { 287 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 288 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 289 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 290 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 291 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 292 293 /* extra comparisons much complicated should be here */ 294 { } 295 }; 296 297 static int select_key(bool contention) 298 { 299 int i; 300 struct lock_key *keys = report_keys; 301 302 if (contention) 303 keys = contention_keys; 304 305 for (i = 0; keys[i].name; i++) { 306 if (!strcmp(keys[i].name, sort_key)) { 307 compare = keys[i].key; 308 309 /* selected key should be in the output fields */ 310 if (list_empty(&keys[i].list)) 311 list_add_tail(&keys[i].list, &lock_keys); 312 313 return 0; 314 } 315 } 316 317 pr_err("Unknown compare key: %s\n", sort_key); 318 return -1; 319 } 320 321 static int add_output_field(bool contention, char *name) 322 { 323 int i; 324 struct lock_key *keys = report_keys; 325 326 if (contention) 327 keys = contention_keys; 328 329 for (i = 0; keys[i].name; i++) { 330 if (strcmp(keys[i].name, name)) 331 continue; 332 333 /* prevent double link */ 334 if (list_empty(&keys[i].list)) 335 list_add_tail(&keys[i].list, &lock_keys); 336 337 return 0; 338 } 339 340 pr_err("Unknown output field: %s\n", name); 341 return -1; 342 } 343 344 static int setup_output_field(bool contention, const char *str) 345 { 346 char *tok, *tmp, *orig; 347 int i, ret = 0; 348 struct lock_key *keys = report_keys; 349 350 if (contention) 351 keys = contention_keys; 352 353 /* no output field given: use all of them */ 354 if (str == NULL) { 355 for (i = 0; keys[i].name; i++) 356 list_add_tail(&keys[i].list, &lock_keys); 357 return 0; 358 } 359 360 for (i = 0; keys[i].name; i++) 361 INIT_LIST_HEAD(&keys[i].list); 362 363 orig = tmp = strdup(str); 364 if (orig == NULL) 365 return -ENOMEM; 366 367 while ((tok = strsep(&tmp, ",")) != NULL){ 368 ret = add_output_field(contention, tok); 369 if (ret < 0) 370 break; 371 } 372 free(orig); 373 374 return ret; 375 } 376 377 static void combine_lock_stats(struct lock_stat *st) 378 { 379 struct rb_node **rb = &sorted.rb_node; 380 struct rb_node *parent = NULL; 381 struct lock_stat *p; 382 int ret; 383 384 while (*rb) { 385 p = container_of(*rb, struct lock_stat, rb); 386 parent = *rb; 387 388 if (st->name && p->name) 389 ret = strcmp(st->name, p->name); 390 else 391 ret = !!st->name - !!p->name; 392 393 if (ret == 0) { 394 p->nr_acquired += st->nr_acquired; 395 p->nr_contended += st->nr_contended; 396 p->wait_time_total += st->wait_time_total; 397 398 if (p->nr_contended) 399 p->avg_wait_time = p->wait_time_total / p->nr_contended; 400 401 if (p->wait_time_min > st->wait_time_min) 402 p->wait_time_min = st->wait_time_min; 403 if (p->wait_time_max < st->wait_time_max) 404 p->wait_time_max = st->wait_time_max; 405 406 p->broken |= st->broken; 407 st->combined = 1; 408 return; 409 } 410 411 if (ret < 0) 412 rb = &(*rb)->rb_left; 413 else 414 rb = &(*rb)->rb_right; 415 } 416 417 rb_link_node(&st->rb, parent, rb); 418 rb_insert_color(&st->rb, &sorted); 419 } 420 421 static void insert_to_result(struct lock_stat *st, 422 int (*bigger)(struct lock_stat *, struct lock_stat *)) 423 { 424 struct rb_node **rb = &result.rb_node; 425 struct rb_node *parent = NULL; 426 struct lock_stat *p; 427 428 if (combine_locks && st->combined) 429 return; 430 431 while (*rb) { 432 p = container_of(*rb, struct lock_stat, rb); 433 parent = *rb; 434 435 if (bigger(st, p)) 436 rb = &(*rb)->rb_left; 437 else 438 rb = &(*rb)->rb_right; 439 } 440 441 rb_link_node(&st->rb, parent, rb); 442 rb_insert_color(&st->rb, &result); 443 } 444 445 /* returns left most element of result, and erase it */ 446 static struct lock_stat *pop_from_result(void) 447 { 448 struct rb_node *node = result.rb_node; 449 450 if (!node) 451 return NULL; 452 453 while (node->rb_left) 454 node = node->rb_left; 455 456 rb_erase(node, &result); 457 return container_of(node, struct lock_stat, rb); 458 } 459 460 struct trace_lock_handler { 461 /* it's used on CONFIG_LOCKDEP */ 462 int (*acquire_event)(struct evsel *evsel, 463 struct perf_sample *sample); 464 465 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 466 int (*acquired_event)(struct evsel *evsel, 467 struct perf_sample *sample); 468 469 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 470 int (*contended_event)(struct evsel *evsel, 471 struct perf_sample *sample); 472 473 /* it's used on CONFIG_LOCKDEP */ 474 int (*release_event)(struct evsel *evsel, 475 struct perf_sample *sample); 476 477 /* it's used when CONFIG_LOCKDEP is off */ 478 int (*contention_begin_event)(struct evsel *evsel, 479 struct perf_sample *sample); 480 481 /* it's used when CONFIG_LOCKDEP is off */ 482 int (*contention_end_event)(struct evsel *evsel, 483 struct perf_sample *sample); 484 }; 485 486 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 487 { 488 struct lock_seq_stat *seq; 489 490 list_for_each_entry(seq, &ts->seq_list, list) { 491 if (seq->addr == addr) 492 return seq; 493 } 494 495 seq = zalloc(sizeof(struct lock_seq_stat)); 496 if (!seq) { 497 pr_err("memory allocation failed\n"); 498 return NULL; 499 } 500 seq->state = SEQ_STATE_UNINITIALIZED; 501 seq->addr = addr; 502 503 list_add(&seq->list, &ts->seq_list); 504 return seq; 505 } 506 507 enum broken_state { 508 BROKEN_ACQUIRE, 509 BROKEN_ACQUIRED, 510 BROKEN_CONTENDED, 511 BROKEN_RELEASE, 512 BROKEN_MAX, 513 }; 514 515 static int bad_hist[BROKEN_MAX]; 516 517 enum acquire_flags { 518 TRY_LOCK = 1, 519 READ_LOCK = 2, 520 }; 521 522 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid) 523 { 524 switch (aggr_mode) { 525 case LOCK_AGGR_ADDR: 526 *key = addr; 527 break; 528 case LOCK_AGGR_TASK: 529 *key = tid; 530 break; 531 case LOCK_AGGR_CALLER: 532 case LOCK_AGGR_CGROUP: 533 default: 534 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 535 return -EINVAL; 536 } 537 return 0; 538 } 539 540 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample); 541 542 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel, 543 struct perf_sample *sample) 544 { 545 if (aggr_mode == LOCK_AGGR_CALLER) { 546 *key = callchain_id(evsel, sample); 547 return 0; 548 } 549 return get_key_by_aggr_mode_simple(key, addr, sample->tid); 550 } 551 552 static int report_lock_acquire_event(struct evsel *evsel, 553 struct perf_sample *sample) 554 { 555 struct lock_stat *ls; 556 struct thread_stat *ts; 557 struct lock_seq_stat *seq; 558 const char *name = evsel__strval(evsel, sample, "name"); 559 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 560 int flag = evsel__intval(evsel, sample, "flags"); 561 u64 key; 562 int ret; 563 564 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 565 if (ret < 0) 566 return ret; 567 568 ls = lock_stat_findnew(key, name, 0); 569 if (!ls) 570 return -ENOMEM; 571 572 ts = thread_stat_findnew(sample->tid); 573 if (!ts) 574 return -ENOMEM; 575 576 seq = get_seq(ts, addr); 577 if (!seq) 578 return -ENOMEM; 579 580 switch (seq->state) { 581 case SEQ_STATE_UNINITIALIZED: 582 case SEQ_STATE_RELEASED: 583 if (!flag) { 584 seq->state = SEQ_STATE_ACQUIRING; 585 } else { 586 if (flag & TRY_LOCK) 587 ls->nr_trylock++; 588 if (flag & READ_LOCK) 589 ls->nr_readlock++; 590 seq->state = SEQ_STATE_READ_ACQUIRED; 591 seq->read_count = 1; 592 ls->nr_acquired++; 593 } 594 break; 595 case SEQ_STATE_READ_ACQUIRED: 596 if (flag & READ_LOCK) { 597 seq->read_count++; 598 ls->nr_acquired++; 599 goto end; 600 } else { 601 goto broken; 602 } 603 break; 604 case SEQ_STATE_ACQUIRED: 605 case SEQ_STATE_ACQUIRING: 606 case SEQ_STATE_CONTENDED: 607 broken: 608 /* broken lock sequence */ 609 if (!ls->broken) { 610 ls->broken = 1; 611 bad_hist[BROKEN_ACQUIRE]++; 612 } 613 list_del_init(&seq->list); 614 free(seq); 615 goto end; 616 default: 617 BUG_ON("Unknown state of lock sequence found!\n"); 618 break; 619 } 620 621 ls->nr_acquire++; 622 seq->prev_event_time = sample->time; 623 end: 624 return 0; 625 } 626 627 static int report_lock_acquired_event(struct evsel *evsel, 628 struct perf_sample *sample) 629 { 630 struct lock_stat *ls; 631 struct thread_stat *ts; 632 struct lock_seq_stat *seq; 633 u64 contended_term; 634 const char *name = evsel__strval(evsel, sample, "name"); 635 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 636 u64 key; 637 int ret; 638 639 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 640 if (ret < 0) 641 return ret; 642 643 ls = lock_stat_findnew(key, name, 0); 644 if (!ls) 645 return -ENOMEM; 646 647 ts = thread_stat_findnew(sample->tid); 648 if (!ts) 649 return -ENOMEM; 650 651 seq = get_seq(ts, addr); 652 if (!seq) 653 return -ENOMEM; 654 655 switch (seq->state) { 656 case SEQ_STATE_UNINITIALIZED: 657 /* orphan event, do nothing */ 658 return 0; 659 case SEQ_STATE_ACQUIRING: 660 break; 661 case SEQ_STATE_CONTENDED: 662 contended_term = sample->time - seq->prev_event_time; 663 ls->wait_time_total += contended_term; 664 if (contended_term < ls->wait_time_min) 665 ls->wait_time_min = contended_term; 666 if (ls->wait_time_max < contended_term) 667 ls->wait_time_max = contended_term; 668 break; 669 case SEQ_STATE_RELEASED: 670 case SEQ_STATE_ACQUIRED: 671 case SEQ_STATE_READ_ACQUIRED: 672 /* broken lock sequence */ 673 if (!ls->broken) { 674 ls->broken = 1; 675 bad_hist[BROKEN_ACQUIRED]++; 676 } 677 list_del_init(&seq->list); 678 free(seq); 679 goto end; 680 default: 681 BUG_ON("Unknown state of lock sequence found!\n"); 682 break; 683 } 684 685 seq->state = SEQ_STATE_ACQUIRED; 686 ls->nr_acquired++; 687 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 688 seq->prev_event_time = sample->time; 689 end: 690 return 0; 691 } 692 693 static int report_lock_contended_event(struct evsel *evsel, 694 struct perf_sample *sample) 695 { 696 struct lock_stat *ls; 697 struct thread_stat *ts; 698 struct lock_seq_stat *seq; 699 const char *name = evsel__strval(evsel, sample, "name"); 700 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 701 u64 key; 702 int ret; 703 704 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 705 if (ret < 0) 706 return ret; 707 708 ls = lock_stat_findnew(key, name, 0); 709 if (!ls) 710 return -ENOMEM; 711 712 ts = thread_stat_findnew(sample->tid); 713 if (!ts) 714 return -ENOMEM; 715 716 seq = get_seq(ts, addr); 717 if (!seq) 718 return -ENOMEM; 719 720 switch (seq->state) { 721 case SEQ_STATE_UNINITIALIZED: 722 /* orphan event, do nothing */ 723 return 0; 724 case SEQ_STATE_ACQUIRING: 725 break; 726 case SEQ_STATE_RELEASED: 727 case SEQ_STATE_ACQUIRED: 728 case SEQ_STATE_READ_ACQUIRED: 729 case SEQ_STATE_CONTENDED: 730 /* broken lock sequence */ 731 if (!ls->broken) { 732 ls->broken = 1; 733 bad_hist[BROKEN_CONTENDED]++; 734 } 735 list_del_init(&seq->list); 736 free(seq); 737 goto end; 738 default: 739 BUG_ON("Unknown state of lock sequence found!\n"); 740 break; 741 } 742 743 seq->state = SEQ_STATE_CONTENDED; 744 ls->nr_contended++; 745 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 746 seq->prev_event_time = sample->time; 747 end: 748 return 0; 749 } 750 751 static int report_lock_release_event(struct evsel *evsel, 752 struct perf_sample *sample) 753 { 754 struct lock_stat *ls; 755 struct thread_stat *ts; 756 struct lock_seq_stat *seq; 757 const char *name = evsel__strval(evsel, sample, "name"); 758 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 759 u64 key; 760 int ret; 761 762 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 763 if (ret < 0) 764 return ret; 765 766 ls = lock_stat_findnew(key, name, 0); 767 if (!ls) 768 return -ENOMEM; 769 770 ts = thread_stat_findnew(sample->tid); 771 if (!ts) 772 return -ENOMEM; 773 774 seq = get_seq(ts, addr); 775 if (!seq) 776 return -ENOMEM; 777 778 switch (seq->state) { 779 case SEQ_STATE_UNINITIALIZED: 780 goto end; 781 case SEQ_STATE_ACQUIRED: 782 break; 783 case SEQ_STATE_READ_ACQUIRED: 784 seq->read_count--; 785 BUG_ON(seq->read_count < 0); 786 if (seq->read_count) { 787 ls->nr_release++; 788 goto end; 789 } 790 break; 791 case SEQ_STATE_ACQUIRING: 792 case SEQ_STATE_CONTENDED: 793 case SEQ_STATE_RELEASED: 794 /* broken lock sequence */ 795 if (!ls->broken) { 796 ls->broken = 1; 797 bad_hist[BROKEN_RELEASE]++; 798 } 799 goto free_seq; 800 default: 801 BUG_ON("Unknown state of lock sequence found!\n"); 802 break; 803 } 804 805 ls->nr_release++; 806 free_seq: 807 list_del_init(&seq->list); 808 free(seq); 809 end: 810 return 0; 811 } 812 813 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, 814 char *buf, int size) 815 { 816 u64 offset; 817 818 if (map == NULL || sym == NULL) { 819 buf[0] = '\0'; 820 return 0; 821 } 822 823 offset = map__map_ip(map, ip) - sym->start; 824 825 if (offset) 826 return scnprintf(buf, size, "%s+%#lx", sym->name, offset); 827 else 828 return strlcpy(buf, sym->name, size); 829 } 830 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 831 char *buf, int size) 832 { 833 struct thread *thread; 834 struct callchain_cursor *cursor; 835 struct machine *machine = &session->machines.host; 836 struct symbol *sym; 837 int skip = 0; 838 int ret; 839 840 /* lock names will be replaced to task name later */ 841 if (show_thread_stats) 842 return -1; 843 844 thread = machine__findnew_thread(machine, -1, sample->pid); 845 if (thread == NULL) 846 return -1; 847 848 cursor = get_tls_callchain_cursor(); 849 850 /* use caller function name from the callchain */ 851 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 852 NULL, NULL, max_stack_depth); 853 if (ret != 0) { 854 thread__put(thread); 855 return -1; 856 } 857 858 callchain_cursor_commit(cursor); 859 thread__put(thread); 860 861 while (true) { 862 struct callchain_cursor_node *node; 863 864 node = callchain_cursor_current(cursor); 865 if (node == NULL) 866 break; 867 868 /* skip first few entries - for lock functions */ 869 if (++skip <= stack_skip) 870 goto next; 871 872 sym = node->ms.sym; 873 if (sym && !machine__is_lock_function(machine, node->ip)) { 874 get_symbol_name_offset(node->ms.map, sym, node->ip, 875 buf, size); 876 return 0; 877 } 878 879 next: 880 callchain_cursor_advance(cursor); 881 } 882 return -1; 883 } 884 885 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 886 { 887 struct callchain_cursor *cursor; 888 struct machine *machine = &session->machines.host; 889 struct thread *thread; 890 u64 hash = 0; 891 int skip = 0; 892 int ret; 893 894 thread = machine__findnew_thread(machine, -1, sample->pid); 895 if (thread == NULL) 896 return -1; 897 898 cursor = get_tls_callchain_cursor(); 899 /* use caller function name from the callchain */ 900 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 901 NULL, NULL, max_stack_depth); 902 thread__put(thread); 903 904 if (ret != 0) 905 return -1; 906 907 callchain_cursor_commit(cursor); 908 909 while (true) { 910 struct callchain_cursor_node *node; 911 912 node = callchain_cursor_current(cursor); 913 if (node == NULL) 914 break; 915 916 /* skip first few entries - for lock functions */ 917 if (++skip <= stack_skip) 918 goto next; 919 920 if (node->ms.sym && machine__is_lock_function(machine, node->ip)) 921 goto next; 922 923 hash ^= hash_long((unsigned long)node->ip, 64); 924 925 next: 926 callchain_cursor_advance(cursor); 927 } 928 return hash; 929 } 930 931 static u64 *get_callstack(struct perf_sample *sample, int max_stack) 932 { 933 u64 *callstack; 934 u64 i; 935 int c; 936 937 callstack = calloc(max_stack, sizeof(*callstack)); 938 if (callstack == NULL) 939 return NULL; 940 941 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) { 942 u64 ip = sample->callchain->ips[i]; 943 944 if (ip >= PERF_CONTEXT_MAX) 945 continue; 946 947 callstack[c++] = ip; 948 } 949 return callstack; 950 } 951 952 static int report_lock_contention_begin_event(struct evsel *evsel, 953 struct perf_sample *sample) 954 { 955 struct lock_stat *ls; 956 struct thread_stat *ts; 957 struct lock_seq_stat *seq; 958 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 959 unsigned int flags = evsel__intval(evsel, sample, "flags"); 960 u64 key; 961 int i, ret; 962 static bool kmap_loaded; 963 struct machine *machine = &session->machines.host; 964 struct map *kmap; 965 struct symbol *sym; 966 967 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 968 if (ret < 0) 969 return ret; 970 971 if (!kmap_loaded) { 972 unsigned long *addrs; 973 974 /* make sure it loads the kernel map to find lock symbols */ 975 map__load(machine__kernel_map(machine)); 976 kmap_loaded = true; 977 978 /* convert (kernel) symbols to addresses */ 979 for (i = 0; i < filters.nr_syms; i++) { 980 sym = machine__find_kernel_symbol_by_name(machine, 981 filters.syms[i], 982 &kmap); 983 if (sym == NULL) { 984 pr_warning("ignore unknown symbol: %s\n", 985 filters.syms[i]); 986 continue; 987 } 988 989 addrs = realloc(filters.addrs, 990 (filters.nr_addrs + 1) * sizeof(*addrs)); 991 if (addrs == NULL) { 992 pr_warning("memory allocation failure\n"); 993 return -ENOMEM; 994 } 995 996 addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start); 997 filters.addrs = addrs; 998 } 999 } 1000 1001 ls = lock_stat_find(key); 1002 if (!ls) { 1003 char buf[128]; 1004 const char *name = ""; 1005 1006 switch (aggr_mode) { 1007 case LOCK_AGGR_ADDR: 1008 sym = machine__find_kernel_symbol(machine, key, &kmap); 1009 if (sym) 1010 name = sym->name; 1011 break; 1012 case LOCK_AGGR_CALLER: 1013 name = buf; 1014 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1015 name = "Unknown"; 1016 break; 1017 case LOCK_AGGR_CGROUP: 1018 case LOCK_AGGR_TASK: 1019 default: 1020 break; 1021 } 1022 1023 ls = lock_stat_findnew(key, name, flags); 1024 if (!ls) 1025 return -ENOMEM; 1026 } 1027 1028 if (filters.nr_types) { 1029 bool found = false; 1030 1031 for (i = 0; i < filters.nr_types; i++) { 1032 if (flags == filters.types[i]) { 1033 found = true; 1034 break; 1035 } 1036 } 1037 1038 if (!found) 1039 return 0; 1040 } 1041 1042 if (filters.nr_addrs) { 1043 bool found = false; 1044 1045 for (i = 0; i < filters.nr_addrs; i++) { 1046 if (addr == filters.addrs[i]) { 1047 found = true; 1048 break; 1049 } 1050 } 1051 1052 if (!found) 1053 return 0; 1054 } 1055 1056 if (needs_callstack()) { 1057 u64 *callstack = get_callstack(sample, max_stack_depth); 1058 if (callstack == NULL) 1059 return -ENOMEM; 1060 1061 if (!match_callstack_filter(machine, callstack, max_stack_depth)) { 1062 free(callstack); 1063 return 0; 1064 } 1065 1066 if (ls->callstack == NULL) 1067 ls->callstack = callstack; 1068 else 1069 free(callstack); 1070 } 1071 1072 ts = thread_stat_findnew(sample->tid); 1073 if (!ts) 1074 return -ENOMEM; 1075 1076 seq = get_seq(ts, addr); 1077 if (!seq) 1078 return -ENOMEM; 1079 1080 switch (seq->state) { 1081 case SEQ_STATE_UNINITIALIZED: 1082 case SEQ_STATE_ACQUIRED: 1083 break; 1084 case SEQ_STATE_CONTENDED: 1085 /* 1086 * It can have nested contention begin with mutex spinning, 1087 * then we would use the original contention begin event and 1088 * ignore the second one. 1089 */ 1090 goto end; 1091 case SEQ_STATE_ACQUIRING: 1092 case SEQ_STATE_READ_ACQUIRED: 1093 case SEQ_STATE_RELEASED: 1094 /* broken lock sequence */ 1095 if (!ls->broken) { 1096 ls->broken = 1; 1097 bad_hist[BROKEN_CONTENDED]++; 1098 } 1099 list_del_init(&seq->list); 1100 free(seq); 1101 goto end; 1102 default: 1103 BUG_ON("Unknown state of lock sequence found!\n"); 1104 break; 1105 } 1106 1107 if (seq->state != SEQ_STATE_CONTENDED) { 1108 seq->state = SEQ_STATE_CONTENDED; 1109 seq->prev_event_time = sample->time; 1110 ls->nr_contended++; 1111 } 1112 end: 1113 return 0; 1114 } 1115 1116 static int report_lock_contention_end_event(struct evsel *evsel, 1117 struct perf_sample *sample) 1118 { 1119 struct lock_stat *ls; 1120 struct thread_stat *ts; 1121 struct lock_seq_stat *seq; 1122 u64 contended_term; 1123 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1124 u64 key; 1125 int ret; 1126 1127 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 1128 if (ret < 0) 1129 return ret; 1130 1131 ls = lock_stat_find(key); 1132 if (!ls) 1133 return 0; 1134 1135 ts = thread_stat_find(sample->tid); 1136 if (!ts) 1137 return 0; 1138 1139 seq = get_seq(ts, addr); 1140 if (!seq) 1141 return -ENOMEM; 1142 1143 switch (seq->state) { 1144 case SEQ_STATE_UNINITIALIZED: 1145 goto end; 1146 case SEQ_STATE_CONTENDED: 1147 contended_term = sample->time - seq->prev_event_time; 1148 ls->wait_time_total += contended_term; 1149 if (contended_term < ls->wait_time_min) 1150 ls->wait_time_min = contended_term; 1151 if (ls->wait_time_max < contended_term) 1152 ls->wait_time_max = contended_term; 1153 break; 1154 case SEQ_STATE_ACQUIRING: 1155 case SEQ_STATE_ACQUIRED: 1156 case SEQ_STATE_READ_ACQUIRED: 1157 case SEQ_STATE_RELEASED: 1158 /* broken lock sequence */ 1159 if (!ls->broken) { 1160 ls->broken = 1; 1161 bad_hist[BROKEN_ACQUIRED]++; 1162 } 1163 list_del_init(&seq->list); 1164 free(seq); 1165 goto end; 1166 default: 1167 BUG_ON("Unknown state of lock sequence found!\n"); 1168 break; 1169 } 1170 1171 seq->state = SEQ_STATE_ACQUIRED; 1172 ls->nr_acquired++; 1173 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1174 end: 1175 return 0; 1176 } 1177 1178 /* lock oriented handlers */ 1179 /* TODO: handlers for CPU oriented, thread oriented */ 1180 static struct trace_lock_handler report_lock_ops = { 1181 .acquire_event = report_lock_acquire_event, 1182 .acquired_event = report_lock_acquired_event, 1183 .contended_event = report_lock_contended_event, 1184 .release_event = report_lock_release_event, 1185 .contention_begin_event = report_lock_contention_begin_event, 1186 .contention_end_event = report_lock_contention_end_event, 1187 }; 1188 1189 static struct trace_lock_handler contention_lock_ops = { 1190 .contention_begin_event = report_lock_contention_begin_event, 1191 .contention_end_event = report_lock_contention_end_event, 1192 }; 1193 1194 1195 static struct trace_lock_handler *trace_handler; 1196 1197 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1198 { 1199 if (trace_handler->acquire_event) 1200 return trace_handler->acquire_event(evsel, sample); 1201 return 0; 1202 } 1203 1204 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1205 { 1206 if (trace_handler->acquired_event) 1207 return trace_handler->acquired_event(evsel, sample); 1208 return 0; 1209 } 1210 1211 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1212 { 1213 if (trace_handler->contended_event) 1214 return trace_handler->contended_event(evsel, sample); 1215 return 0; 1216 } 1217 1218 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1219 { 1220 if (trace_handler->release_event) 1221 return trace_handler->release_event(evsel, sample); 1222 return 0; 1223 } 1224 1225 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1226 { 1227 if (trace_handler->contention_begin_event) 1228 return trace_handler->contention_begin_event(evsel, sample); 1229 return 0; 1230 } 1231 1232 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1233 { 1234 if (trace_handler->contention_end_event) 1235 return trace_handler->contention_end_event(evsel, sample); 1236 return 0; 1237 } 1238 1239 static void print_bad_events(int bad, int total) 1240 { 1241 /* Output for debug, this have to be removed */ 1242 int i; 1243 int broken = 0; 1244 const char *name[4] = 1245 { "acquire", "acquired", "contended", "release" }; 1246 1247 for (i = 0; i < BROKEN_MAX; i++) 1248 broken += bad_hist[i]; 1249 1250 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1251 return; 1252 1253 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1254 fprintf(lock_output, "bad: %d, total: %d\n", bad, total); 1255 fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1256 fprintf(lock_output, "histogram of events caused bad sequence\n"); 1257 for (i = 0; i < BROKEN_MAX; i++) 1258 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]); 1259 } 1260 1261 /* TODO: various way to print, coloring, nano or milli sec */ 1262 static void print_result(void) 1263 { 1264 struct lock_stat *st; 1265 struct lock_key *key; 1266 char cut_name[20]; 1267 int bad, total, printed; 1268 1269 if (!quiet) { 1270 fprintf(lock_output, "%20s ", "Name"); 1271 list_for_each_entry(key, &lock_keys, list) 1272 fprintf(lock_output, "%*s ", key->len, key->header); 1273 fprintf(lock_output, "\n\n"); 1274 } 1275 1276 bad = total = printed = 0; 1277 while ((st = pop_from_result())) { 1278 total++; 1279 if (st->broken) 1280 bad++; 1281 if (!st->nr_acquired) 1282 continue; 1283 1284 bzero(cut_name, 20); 1285 1286 if (strlen(st->name) < 20) { 1287 /* output raw name */ 1288 const char *name = st->name; 1289 1290 if (show_thread_stats) { 1291 struct thread *t; 1292 1293 /* st->addr contains tid of thread */ 1294 t = perf_session__findnew(session, st->addr); 1295 name = thread__comm_str(t); 1296 } 1297 1298 fprintf(lock_output, "%20s ", name); 1299 } else { 1300 strncpy(cut_name, st->name, 16); 1301 cut_name[16] = '.'; 1302 cut_name[17] = '.'; 1303 cut_name[18] = '.'; 1304 cut_name[19] = '\0'; 1305 /* cut off name for saving output style */ 1306 fprintf(lock_output, "%20s ", cut_name); 1307 } 1308 1309 list_for_each_entry(key, &lock_keys, list) { 1310 key->print(key, st); 1311 fprintf(lock_output, " "); 1312 } 1313 fprintf(lock_output, "\n"); 1314 1315 if (++printed >= print_nr_entries) 1316 break; 1317 } 1318 1319 print_bad_events(bad, total); 1320 } 1321 1322 static bool info_threads, info_map; 1323 1324 static void dump_threads(void) 1325 { 1326 struct thread_stat *st; 1327 struct rb_node *node; 1328 struct thread *t; 1329 1330 fprintf(lock_output, "%10s: comm\n", "Thread ID"); 1331 1332 node = rb_first(&thread_stats); 1333 while (node) { 1334 st = container_of(node, struct thread_stat, rb); 1335 t = perf_session__findnew(session, st->tid); 1336 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t)); 1337 node = rb_next(node); 1338 thread__put(t); 1339 } 1340 } 1341 1342 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1343 { 1344 int ret; 1345 1346 if (a->name && b->name) 1347 ret = strcmp(a->name, b->name); 1348 else 1349 ret = !!a->name - !!b->name; 1350 1351 if (!ret) 1352 return a->addr < b->addr; 1353 else 1354 return ret < 0; 1355 } 1356 1357 static void dump_map(void) 1358 { 1359 unsigned int i; 1360 struct lock_stat *st; 1361 1362 fprintf(lock_output, "Address of instance: name of class\n"); 1363 for (i = 0; i < LOCKHASH_SIZE; i++) { 1364 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1365 insert_to_result(st, compare_maps); 1366 } 1367 } 1368 1369 while ((st = pop_from_result())) 1370 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name); 1371 } 1372 1373 static void dump_info(void) 1374 { 1375 if (info_threads) 1376 dump_threads(); 1377 1378 if (info_map) { 1379 if (info_threads) 1380 fputc('\n', lock_output); 1381 dump_map(); 1382 } 1383 } 1384 1385 static const struct evsel_str_handler lock_tracepoints[] = { 1386 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1387 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1388 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1389 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1390 }; 1391 1392 static const struct evsel_str_handler contention_tracepoints[] = { 1393 { "lock:contention_begin", evsel__process_contention_begin, }, 1394 { "lock:contention_end", evsel__process_contention_end, }, 1395 }; 1396 1397 static int process_event_update(const struct perf_tool *tool, 1398 union perf_event *event, 1399 struct evlist **pevlist) 1400 { 1401 int ret; 1402 1403 ret = perf_event__process_event_update(tool, event, pevlist); 1404 if (ret < 0) 1405 return ret; 1406 1407 /* this can return -EEXIST since we call it for each evsel */ 1408 perf_session__set_tracepoints_handlers(session, lock_tracepoints); 1409 perf_session__set_tracepoints_handlers(session, contention_tracepoints); 1410 return 0; 1411 } 1412 1413 typedef int (*tracepoint_handler)(struct evsel *evsel, 1414 struct perf_sample *sample); 1415 1416 static int process_sample_event(const struct perf_tool *tool __maybe_unused, 1417 union perf_event *event, 1418 struct perf_sample *sample, 1419 struct evsel *evsel, 1420 struct machine *machine) 1421 { 1422 int err = 0; 1423 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1424 sample->tid); 1425 1426 if (thread == NULL) { 1427 pr_debug("problem processing %d event, skipping it.\n", 1428 event->header.type); 1429 return -1; 1430 } 1431 1432 if (evsel->handler != NULL) { 1433 tracepoint_handler f = evsel->handler; 1434 err = f(evsel, sample); 1435 } 1436 1437 thread__put(thread); 1438 1439 return err; 1440 } 1441 1442 static void combine_result(void) 1443 { 1444 unsigned int i; 1445 struct lock_stat *st; 1446 1447 if (!combine_locks) 1448 return; 1449 1450 for (i = 0; i < LOCKHASH_SIZE; i++) { 1451 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1452 combine_lock_stats(st); 1453 } 1454 } 1455 } 1456 1457 static void sort_result(void) 1458 { 1459 unsigned int i; 1460 struct lock_stat *st; 1461 1462 for (i = 0; i < LOCKHASH_SIZE; i++) { 1463 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1464 insert_to_result(st, compare); 1465 } 1466 } 1467 } 1468 1469 static const struct { 1470 unsigned int flags; 1471 /* 1472 * Name of the lock flags (access), with delimeter ':'. 1473 * For example, rwsem:R of rwsem:W. 1474 */ 1475 const char *flags_name; 1476 /* Name of the lock (type), for example, rwlock or rwsem. */ 1477 const char *lock_name; 1478 } lock_type_table[] = { 1479 { 0, "semaphore", "semaphore" }, 1480 { LCB_F_SPIN, "spinlock", "spinlock" }, 1481 { LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" }, 1482 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" }, 1483 { LCB_F_READ, "rwsem:R", "rwsem" }, 1484 { LCB_F_WRITE, "rwsem:W", "rwsem" }, 1485 { LCB_F_RT, "rt-mutex", "rt-mutex" }, 1486 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" }, 1487 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" }, 1488 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" }, 1489 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" }, 1490 { LCB_F_MUTEX, "mutex", "mutex" }, 1491 { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" }, 1492 /* alias for optimistic spinning only */ 1493 { LCB_F_MUTEX | LCB_F_SPIN, "mutex:spin", "mutex-spin" }, 1494 }; 1495 1496 static const char *get_type_flags_name(unsigned int flags) 1497 { 1498 flags &= LCB_F_TYPE_MASK; 1499 1500 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1501 if (lock_type_table[i].flags == flags) 1502 return lock_type_table[i].flags_name; 1503 } 1504 return "unknown"; 1505 } 1506 1507 static const char *get_type_lock_name(unsigned int flags) 1508 { 1509 flags &= LCB_F_TYPE_MASK; 1510 1511 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1512 if (lock_type_table[i].flags == flags) 1513 return lock_type_table[i].lock_name; 1514 } 1515 return "unknown"; 1516 } 1517 1518 static void lock_filter_finish(void) 1519 { 1520 zfree(&filters.types); 1521 filters.nr_types = 0; 1522 1523 zfree(&filters.addrs); 1524 filters.nr_addrs = 0; 1525 1526 for (int i = 0; i < filters.nr_syms; i++) 1527 free(filters.syms[i]); 1528 1529 zfree(&filters.syms); 1530 filters.nr_syms = 0; 1531 1532 zfree(&filters.cgrps); 1533 filters.nr_cgrps = 0; 1534 1535 for (int i = 0; i < filters.nr_slabs; i++) 1536 free(filters.slabs[i]); 1537 1538 zfree(&filters.slabs); 1539 filters.nr_slabs = 0; 1540 } 1541 1542 static void sort_contention_result(void) 1543 { 1544 sort_result(); 1545 } 1546 1547 static void print_header_stdio(void) 1548 { 1549 struct lock_key *key; 1550 1551 list_for_each_entry(key, &lock_keys, list) 1552 fprintf(lock_output, "%*s ", key->len, key->header); 1553 1554 switch (aggr_mode) { 1555 case LOCK_AGGR_TASK: 1556 fprintf(lock_output, " %10s %s\n\n", "pid", 1557 show_lock_owner ? "owner" : "comm"); 1558 break; 1559 case LOCK_AGGR_CALLER: 1560 fprintf(lock_output, " %10s %s\n\n", "type", "caller"); 1561 break; 1562 case LOCK_AGGR_ADDR: 1563 fprintf(lock_output, " %16s %s\n\n", "address", "symbol"); 1564 break; 1565 case LOCK_AGGR_CGROUP: 1566 fprintf(lock_output, " %s\n\n", "cgroup"); 1567 break; 1568 default: 1569 break; 1570 } 1571 } 1572 1573 static void print_header_csv(const char *sep) 1574 { 1575 struct lock_key *key; 1576 1577 fprintf(lock_output, "# output: "); 1578 list_for_each_entry(key, &lock_keys, list) 1579 fprintf(lock_output, "%s%s ", key->header, sep); 1580 1581 switch (aggr_mode) { 1582 case LOCK_AGGR_TASK: 1583 fprintf(lock_output, "%s%s %s\n", "pid", sep, 1584 show_lock_owner ? "owner" : "comm"); 1585 break; 1586 case LOCK_AGGR_CALLER: 1587 fprintf(lock_output, "%s%s %s", "type", sep, "caller"); 1588 if (verbose > 0) 1589 fprintf(lock_output, "%s %s", sep, "stacktrace"); 1590 fprintf(lock_output, "\n"); 1591 break; 1592 case LOCK_AGGR_ADDR: 1593 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type"); 1594 break; 1595 case LOCK_AGGR_CGROUP: 1596 fprintf(lock_output, "%s\n", "cgroup"); 1597 break; 1598 default: 1599 break; 1600 } 1601 } 1602 1603 static void print_header(void) 1604 { 1605 if (!quiet) { 1606 if (symbol_conf.field_sep) 1607 print_header_csv(symbol_conf.field_sep); 1608 else 1609 print_header_stdio(); 1610 } 1611 } 1612 1613 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st) 1614 { 1615 struct lock_key *key; 1616 struct thread *t; 1617 int pid; 1618 1619 list_for_each_entry(key, &lock_keys, list) { 1620 key->print(key, st); 1621 fprintf(lock_output, " "); 1622 } 1623 1624 switch (aggr_mode) { 1625 case LOCK_AGGR_CALLER: 1626 fprintf(lock_output, " %10s %s\n", get_type_flags_name(st->flags), st->name); 1627 break; 1628 case LOCK_AGGR_TASK: 1629 pid = st->addr; 1630 t = perf_session__findnew(session, pid); 1631 fprintf(lock_output, " %10d %s\n", 1632 pid, pid == -1 ? "Unknown" : thread__comm_str(t)); 1633 break; 1634 case LOCK_AGGR_ADDR: 1635 fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr, 1636 st->name, get_type_lock_name(st->flags)); 1637 break; 1638 case LOCK_AGGR_CGROUP: 1639 fprintf(lock_output, " %s\n", st->name); 1640 break; 1641 default: 1642 break; 1643 } 1644 1645 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1646 struct map *kmap; 1647 struct symbol *sym; 1648 char buf[128]; 1649 u64 ip; 1650 1651 for (int i = 0; i < max_stack_depth; i++) { 1652 if (!st->callstack || !st->callstack[i]) 1653 break; 1654 1655 ip = st->callstack[i]; 1656 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1657 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1658 fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf); 1659 } 1660 } 1661 } 1662 1663 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st, 1664 const char *sep) 1665 { 1666 struct lock_key *key; 1667 struct thread *t; 1668 int pid; 1669 1670 list_for_each_entry(key, &lock_keys, list) { 1671 key->print(key, st); 1672 fprintf(lock_output, "%s ", sep); 1673 } 1674 1675 switch (aggr_mode) { 1676 case LOCK_AGGR_CALLER: 1677 fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name); 1678 if (verbose <= 0) 1679 fprintf(lock_output, "\n"); 1680 break; 1681 case LOCK_AGGR_TASK: 1682 pid = st->addr; 1683 t = perf_session__findnew(session, pid); 1684 fprintf(lock_output, "%d%s %s\n", pid, sep, 1685 pid == -1 ? "Unknown" : thread__comm_str(t)); 1686 break; 1687 case LOCK_AGGR_ADDR: 1688 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep, 1689 st->name, sep, get_type_lock_name(st->flags)); 1690 break; 1691 case LOCK_AGGR_CGROUP: 1692 fprintf(lock_output, "%s\n",st->name); 1693 break; 1694 default: 1695 break; 1696 } 1697 1698 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1699 struct map *kmap; 1700 struct symbol *sym; 1701 char buf[128]; 1702 u64 ip; 1703 1704 for (int i = 0; i < max_stack_depth; i++) { 1705 if (!st->callstack || !st->callstack[i]) 1706 break; 1707 1708 ip = st->callstack[i]; 1709 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1710 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1711 fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf); 1712 } 1713 fprintf(lock_output, "\n"); 1714 } 1715 } 1716 1717 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st) 1718 { 1719 if (symbol_conf.field_sep) 1720 print_lock_stat_csv(con, st, symbol_conf.field_sep); 1721 else 1722 print_lock_stat_stdio(con, st); 1723 } 1724 1725 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails) 1726 { 1727 /* Output for debug, this have to be removed */ 1728 int broken = fails->task + fails->stack + fails->time + fails->data; 1729 1730 if (!use_bpf) 1731 print_bad_events(bad, total); 1732 1733 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1734 return; 1735 1736 total += broken; 1737 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1738 fprintf(lock_output, "bad: %d, total: %d\n", broken, total); 1739 fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total); 1740 1741 fprintf(lock_output, "histogram of failure reasons\n"); 1742 fprintf(lock_output, " %10s: %d\n", "task", fails->task); 1743 fprintf(lock_output, " %10s: %d\n", "stack", fails->stack); 1744 fprintf(lock_output, " %10s: %d\n", "time", fails->time); 1745 fprintf(lock_output, " %10s: %d\n", "data", fails->data); 1746 } 1747 1748 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails, 1749 const char *sep) 1750 { 1751 /* Output for debug, this have to be removed */ 1752 if (use_bpf) 1753 bad = fails->task + fails->stack + fails->time + fails->data; 1754 1755 if (quiet || total == 0 || (bad == 0 && verbose <= 0)) 1756 return; 1757 1758 total += bad; 1759 fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad); 1760 1761 if (use_bpf) { 1762 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task); 1763 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack); 1764 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time); 1765 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data); 1766 } else { 1767 int i; 1768 const char *name[4] = { "acquire", "acquired", "contended", "release" }; 1769 1770 for (i = 0; i < BROKEN_MAX; i++) 1771 fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]); 1772 } 1773 fprintf(lock_output, "\n"); 1774 } 1775 1776 static void print_footer(int total, int bad, struct lock_contention_fails *fails) 1777 { 1778 if (symbol_conf.field_sep) 1779 print_footer_csv(total, bad, fails, symbol_conf.field_sep); 1780 else 1781 print_footer_stdio(total, bad, fails); 1782 } 1783 1784 static void print_contention_result(struct lock_contention *con) 1785 { 1786 struct lock_stat *st; 1787 int bad, total, printed; 1788 1789 if (!quiet) 1790 print_header(); 1791 1792 bad = total = printed = 0; 1793 1794 while ((st = pop_from_result())) { 1795 total += use_bpf ? st->nr_contended : 1; 1796 if (st->broken) 1797 bad++; 1798 1799 if (!st->wait_time_total) 1800 continue; 1801 1802 print_lock_stat(con, st); 1803 1804 if (++printed >= print_nr_entries) 1805 break; 1806 } 1807 1808 if (print_nr_entries) { 1809 /* update the total/bad stats */ 1810 while ((st = pop_from_result())) { 1811 total += use_bpf ? st->nr_contended : 1; 1812 if (st->broken) 1813 bad++; 1814 } 1815 } 1816 /* some entries are collected but hidden by the callstack filter */ 1817 total += con->nr_filtered; 1818 1819 print_footer(total, bad, &con->fails); 1820 } 1821 1822 static bool force; 1823 1824 static int __cmd_report(bool display_info) 1825 { 1826 int err = -EINVAL; 1827 struct perf_tool eops; 1828 struct perf_data data = { 1829 .path = input_name, 1830 .mode = PERF_DATA_MODE_READ, 1831 .force = force, 1832 }; 1833 1834 perf_tool__init(&eops, /*ordered_events=*/true); 1835 eops.attr = perf_event__process_attr; 1836 eops.event_update = process_event_update; 1837 eops.sample = process_sample_event; 1838 eops.comm = perf_event__process_comm; 1839 eops.mmap = perf_event__process_mmap; 1840 eops.namespaces = perf_event__process_namespaces; 1841 eops.tracing_data = perf_event__process_tracing_data; 1842 session = perf_session__new(&data, &eops); 1843 if (IS_ERR(session)) { 1844 pr_err("Initializing perf session failed\n"); 1845 return PTR_ERR(session); 1846 } 1847 1848 symbol_conf.allow_aliases = true; 1849 symbol__init(&session->header.env); 1850 1851 if (!data.is_pipe) { 1852 if (!perf_session__has_traces(session, "lock record")) 1853 goto out_delete; 1854 1855 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1856 pr_err("Initializing perf session tracepoint handlers failed\n"); 1857 goto out_delete; 1858 } 1859 1860 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1861 pr_err("Initializing perf session tracepoint handlers failed\n"); 1862 goto out_delete; 1863 } 1864 } 1865 1866 if (setup_output_field(false, output_fields)) 1867 goto out_delete; 1868 1869 if (select_key(false)) 1870 goto out_delete; 1871 1872 if (show_thread_stats) 1873 aggr_mode = LOCK_AGGR_TASK; 1874 1875 err = perf_session__process_events(session); 1876 if (err) 1877 goto out_delete; 1878 1879 setup_pager(); 1880 if (display_info) /* used for info subcommand */ 1881 dump_info(); 1882 else { 1883 combine_result(); 1884 sort_result(); 1885 print_result(); 1886 } 1887 1888 out_delete: 1889 perf_session__delete(session); 1890 return err; 1891 } 1892 1893 static void sighandler(int sig __maybe_unused) 1894 { 1895 } 1896 1897 static int check_lock_contention_options(const struct option *options, 1898 const char * const *usage) 1899 1900 { 1901 if (show_thread_stats && show_lock_addrs) { 1902 pr_err("Cannot use thread and addr mode together\n"); 1903 parse_options_usage(usage, options, "threads", 0); 1904 parse_options_usage(NULL, options, "lock-addr", 0); 1905 return -1; 1906 } 1907 1908 if (show_lock_owner && !use_bpf) { 1909 pr_err("Lock owners are available only with BPF\n"); 1910 parse_options_usage(usage, options, "lock-owner", 0); 1911 parse_options_usage(NULL, options, "use-bpf", 0); 1912 return -1; 1913 } 1914 1915 if (show_lock_owner && show_lock_addrs) { 1916 pr_err("Cannot use owner and addr mode together\n"); 1917 parse_options_usage(usage, options, "lock-owner", 0); 1918 parse_options_usage(NULL, options, "lock-addr", 0); 1919 return -1; 1920 } 1921 1922 if (show_lock_cgroups && !use_bpf) { 1923 pr_err("Cgroups are available only with BPF\n"); 1924 parse_options_usage(usage, options, "lock-cgroup", 0); 1925 parse_options_usage(NULL, options, "use-bpf", 0); 1926 return -1; 1927 } 1928 1929 if (show_lock_cgroups && show_lock_addrs) { 1930 pr_err("Cannot use cgroup and addr mode together\n"); 1931 parse_options_usage(usage, options, "lock-cgroup", 0); 1932 parse_options_usage(NULL, options, "lock-addr", 0); 1933 return -1; 1934 } 1935 1936 if (show_lock_cgroups && show_thread_stats) { 1937 pr_err("Cannot use cgroup and thread mode together\n"); 1938 parse_options_usage(usage, options, "lock-cgroup", 0); 1939 parse_options_usage(NULL, options, "threads", 0); 1940 return -1; 1941 } 1942 1943 if (symbol_conf.field_sep) { 1944 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */ 1945 strstr(symbol_conf.field_sep, "+") || /* part of caller offset */ 1946 strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */ 1947 pr_err("Cannot use the separator that is already used\n"); 1948 parse_options_usage(usage, options, "x", 1); 1949 return -1; 1950 } 1951 } 1952 1953 if (show_lock_owner) 1954 show_thread_stats = true; 1955 1956 return 0; 1957 } 1958 1959 static int __cmd_contention(int argc, const char **argv) 1960 { 1961 int err = -EINVAL; 1962 struct perf_tool eops; 1963 struct perf_data data = { 1964 .path = input_name, 1965 .mode = PERF_DATA_MODE_READ, 1966 .force = force, 1967 }; 1968 struct lock_contention con = { 1969 .target = &target, 1970 .map_nr_entries = bpf_map_entries, 1971 .max_stack = max_stack_depth, 1972 .stack_skip = stack_skip, 1973 .filters = &filters, 1974 .save_callstack = needs_callstack(), 1975 .owner = show_lock_owner, 1976 .cgroups = RB_ROOT, 1977 }; 1978 1979 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 1980 if (!lockhash_table) 1981 return -ENOMEM; 1982 1983 con.result = &lockhash_table[0]; 1984 1985 perf_tool__init(&eops, /*ordered_events=*/true); 1986 eops.attr = perf_event__process_attr; 1987 eops.event_update = process_event_update; 1988 eops.sample = process_sample_event; 1989 eops.comm = perf_event__process_comm; 1990 eops.mmap = perf_event__process_mmap; 1991 eops.tracing_data = perf_event__process_tracing_data; 1992 1993 session = perf_session__new(use_bpf ? NULL : &data, &eops); 1994 if (IS_ERR(session)) { 1995 pr_err("Initializing perf session failed\n"); 1996 err = PTR_ERR(session); 1997 session = NULL; 1998 goto out_delete; 1999 } 2000 2001 con.machine = &session->machines.host; 2002 2003 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : 2004 show_lock_addrs ? LOCK_AGGR_ADDR : 2005 show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER; 2006 2007 if (con.aggr_mode == LOCK_AGGR_CALLER) 2008 con.save_callstack = true; 2009 2010 symbol_conf.allow_aliases = true; 2011 symbol__init(&session->header.env); 2012 2013 if (use_bpf) { 2014 err = target__validate(&target); 2015 if (err) { 2016 char errbuf[512]; 2017 2018 target__strerror(&target, err, errbuf, 512); 2019 pr_err("%s\n", errbuf); 2020 goto out_delete; 2021 } 2022 2023 signal(SIGINT, sighandler); 2024 signal(SIGCHLD, sighandler); 2025 signal(SIGTERM, sighandler); 2026 2027 con.evlist = evlist__new(); 2028 if (con.evlist == NULL) { 2029 err = -ENOMEM; 2030 goto out_delete; 2031 } 2032 2033 err = evlist__create_maps(con.evlist, &target); 2034 if (err < 0) 2035 goto out_delete; 2036 2037 if (argc) { 2038 err = evlist__prepare_workload(con.evlist, &target, 2039 argv, false, NULL); 2040 if (err < 0) 2041 goto out_delete; 2042 } 2043 2044 err = lock_contention_prepare(&con); 2045 if (err < 0) { 2046 pr_err("lock contention BPF setup failed\n"); 2047 goto out_delete; 2048 } 2049 } else if (!data.is_pipe) { 2050 if (!perf_session__has_traces(session, "lock record")) 2051 goto out_delete; 2052 2053 if (!evlist__find_evsel_by_str(session->evlist, 2054 "lock:contention_begin")) { 2055 pr_err("lock contention evsel not found\n"); 2056 goto out_delete; 2057 } 2058 2059 if (perf_session__set_tracepoints_handlers(session, 2060 contention_tracepoints)) { 2061 pr_err("Initializing perf session tracepoint handlers failed\n"); 2062 goto out_delete; 2063 } 2064 } 2065 2066 err = setup_output_field(true, output_fields); 2067 if (err) { 2068 pr_err("Failed to setup output field\n"); 2069 goto out_delete; 2070 } 2071 2072 err = select_key(true); 2073 if (err) 2074 goto out_delete; 2075 2076 if (symbol_conf.field_sep) { 2077 int i; 2078 struct lock_key *keys = contention_keys; 2079 2080 /* do not align output in CSV format */ 2081 for (i = 0; keys[i].name; i++) 2082 keys[i].len = 0; 2083 } 2084 2085 if (use_bpf) { 2086 lock_contention_start(); 2087 if (argc) 2088 evlist__start_workload(con.evlist); 2089 2090 /* wait for signal */ 2091 pause(); 2092 2093 lock_contention_stop(); 2094 lock_contention_read(&con); 2095 } else { 2096 err = perf_session__process_events(session); 2097 if (err) 2098 goto out_delete; 2099 } 2100 2101 setup_pager(); 2102 2103 sort_contention_result(); 2104 print_contention_result(&con); 2105 2106 out_delete: 2107 lock_filter_finish(); 2108 evlist__delete(con.evlist); 2109 lock_contention_finish(&con); 2110 perf_session__delete(session); 2111 zfree(&lockhash_table); 2112 return err; 2113 } 2114 2115 2116 static int __cmd_record(int argc, const char **argv) 2117 { 2118 const char *record_args[] = { 2119 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 2120 }; 2121 const char *callgraph_args[] = { 2122 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 2123 }; 2124 unsigned int rec_argc, i, j, ret; 2125 unsigned int nr_tracepoints; 2126 unsigned int nr_callgraph_args = 0; 2127 const char **rec_argv; 2128 bool has_lock_stat = true; 2129 2130 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 2131 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 2132 pr_debug("tracepoint %s is not enabled. " 2133 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 2134 lock_tracepoints[i].name); 2135 has_lock_stat = false; 2136 break; 2137 } 2138 } 2139 2140 if (has_lock_stat) 2141 goto setup_args; 2142 2143 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 2144 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 2145 pr_err("tracepoint %s is not enabled.\n", 2146 contention_tracepoints[i].name); 2147 return 1; 2148 } 2149 } 2150 2151 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 2152 2153 setup_args: 2154 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 2155 2156 if (has_lock_stat) 2157 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 2158 else 2159 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 2160 2161 /* factor of 2 is for -e in front of each tracepoint */ 2162 rec_argc += 2 * nr_tracepoints; 2163 2164 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2165 if (!rec_argv) 2166 return -ENOMEM; 2167 2168 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2169 rec_argv[i] = record_args[i]; 2170 2171 for (j = 0; j < nr_tracepoints; j++) { 2172 rec_argv[i++] = "-e"; 2173 rec_argv[i++] = has_lock_stat 2174 ? lock_tracepoints[j].name 2175 : contention_tracepoints[j].name; 2176 } 2177 2178 for (j = 0; j < nr_callgraph_args; j++, i++) 2179 rec_argv[i] = callgraph_args[j]; 2180 2181 for (j = 1; j < (unsigned int)argc; j++, i++) 2182 rec_argv[i] = argv[j]; 2183 2184 BUG_ON(i != rec_argc); 2185 2186 ret = cmd_record(i, rec_argv); 2187 free(rec_argv); 2188 return ret; 2189 } 2190 2191 static int parse_map_entry(const struct option *opt, const char *str, 2192 int unset __maybe_unused) 2193 { 2194 unsigned long *len = (unsigned long *)opt->value; 2195 unsigned long val; 2196 char *endptr; 2197 2198 errno = 0; 2199 val = strtoul(str, &endptr, 0); 2200 if (*endptr != '\0' || errno != 0) { 2201 pr_err("invalid BPF map length: %s\n", str); 2202 return -1; 2203 } 2204 2205 *len = val; 2206 return 0; 2207 } 2208 2209 static int parse_max_stack(const struct option *opt, const char *str, 2210 int unset __maybe_unused) 2211 { 2212 unsigned long *len = (unsigned long *)opt->value; 2213 long val; 2214 char *endptr; 2215 2216 errno = 0; 2217 val = strtol(str, &endptr, 0); 2218 if (*endptr != '\0' || errno != 0) { 2219 pr_err("invalid max stack depth: %s\n", str); 2220 return -1; 2221 } 2222 2223 if (val < 0 || val > sysctl__max_stack()) { 2224 pr_err("invalid max stack depth: %ld\n", val); 2225 return -1; 2226 } 2227 2228 *len = val; 2229 return 0; 2230 } 2231 2232 static bool add_lock_type(unsigned int flags) 2233 { 2234 unsigned int *tmp; 2235 2236 tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types)); 2237 if (tmp == NULL) 2238 return false; 2239 2240 tmp[filters.nr_types++] = flags; 2241 filters.types = tmp; 2242 return true; 2243 } 2244 2245 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str, 2246 int unset __maybe_unused) 2247 { 2248 char *s, *tmp, *tok; 2249 2250 s = strdup(str); 2251 if (s == NULL) 2252 return -1; 2253 2254 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2255 bool found = false; 2256 2257 /* `tok` is a flags name if it contains ':'. */ 2258 if (strchr(tok, ':')) { 2259 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 2260 if (!strcmp(lock_type_table[i].flags_name, tok) && 2261 add_lock_type(lock_type_table[i].flags)) { 2262 found = true; 2263 break; 2264 } 2265 } 2266 2267 if (!found) { 2268 pr_err("Unknown lock flags name: %s\n", tok); 2269 free(s); 2270 return -1; 2271 } 2272 2273 continue; 2274 } 2275 2276 /* 2277 * Otherwise `tok` is a lock name. 2278 * Single lock name could contain multiple flags. 2279 * Replace alias `pcpu-sem` with actual name `percpu-rwsem. 2280 */ 2281 if (!strcmp(tok, "pcpu-sem")) 2282 tok = (char *)"percpu-rwsem"; 2283 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 2284 if (!strcmp(lock_type_table[i].lock_name, tok)) { 2285 if (add_lock_type(lock_type_table[i].flags)) { 2286 found = true; 2287 } else { 2288 free(s); 2289 return -1; 2290 } 2291 } 2292 } 2293 2294 if (!found) { 2295 pr_err("Unknown lock name: %s\n", tok); 2296 free(s); 2297 return -1; 2298 } 2299 2300 } 2301 2302 free(s); 2303 return 0; 2304 } 2305 2306 static bool add_lock_addr(unsigned long addr) 2307 { 2308 unsigned long *tmp; 2309 2310 tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs)); 2311 if (tmp == NULL) { 2312 pr_err("Memory allocation failure\n"); 2313 return false; 2314 } 2315 2316 tmp[filters.nr_addrs++] = addr; 2317 filters.addrs = tmp; 2318 return true; 2319 } 2320 2321 static bool add_lock_sym(char *name) 2322 { 2323 char **tmp; 2324 char *sym = strdup(name); 2325 2326 if (sym == NULL) { 2327 pr_err("Memory allocation failure\n"); 2328 return false; 2329 } 2330 2331 tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms)); 2332 if (tmp == NULL) { 2333 pr_err("Memory allocation failure\n"); 2334 free(sym); 2335 return false; 2336 } 2337 2338 tmp[filters.nr_syms++] = sym; 2339 filters.syms = tmp; 2340 return true; 2341 } 2342 2343 static bool add_lock_slab(char *name) 2344 { 2345 char **tmp; 2346 char *sym = strdup(name); 2347 2348 if (sym == NULL) { 2349 pr_err("Memory allocation failure\n"); 2350 return false; 2351 } 2352 2353 tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs)); 2354 if (tmp == NULL) { 2355 pr_err("Memory allocation failure\n"); 2356 return false; 2357 } 2358 2359 tmp[filters.nr_slabs++] = sym; 2360 filters.slabs = tmp; 2361 return true; 2362 } 2363 2364 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str, 2365 int unset __maybe_unused) 2366 { 2367 char *s, *tmp, *tok; 2368 int ret = 0; 2369 u64 addr; 2370 2371 s = strdup(str); 2372 if (s == NULL) 2373 return -1; 2374 2375 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2376 char *end; 2377 2378 addr = strtoul(tok, &end, 16); 2379 if (*end == '\0') { 2380 if (!add_lock_addr(addr)) { 2381 ret = -1; 2382 break; 2383 } 2384 continue; 2385 } 2386 2387 if (*tok == '&') { 2388 if (!add_lock_slab(tok + 1)) { 2389 ret = -1; 2390 break; 2391 } 2392 continue; 2393 } 2394 2395 /* 2396 * At this moment, we don't have kernel symbols. Save the symbols 2397 * in a separate list and resolve them to addresses later. 2398 */ 2399 if (!add_lock_sym(tok)) { 2400 ret = -1; 2401 break; 2402 } 2403 } 2404 2405 free(s); 2406 return ret; 2407 } 2408 2409 static int parse_output(const struct option *opt __maybe_unused, const char *str, 2410 int unset __maybe_unused) 2411 { 2412 const char **name = (const char **)opt->value; 2413 2414 if (str == NULL) 2415 return -1; 2416 2417 lock_output = fopen(str, "w"); 2418 if (lock_output == NULL) { 2419 pr_err("Cannot open %s\n", str); 2420 return -1; 2421 } 2422 2423 *name = str; 2424 return 0; 2425 } 2426 2427 static bool add_lock_cgroup(char *name) 2428 { 2429 u64 *tmp; 2430 struct cgroup *cgrp; 2431 2432 cgrp = cgroup__new(name, /*do_open=*/false); 2433 if (cgrp == NULL) { 2434 pr_err("Failed to create cgroup: %s\n", name); 2435 return false; 2436 } 2437 2438 if (read_cgroup_id(cgrp) < 0) { 2439 pr_err("Failed to read cgroup id for %s\n", name); 2440 cgroup__put(cgrp); 2441 return false; 2442 } 2443 2444 tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps)); 2445 if (tmp == NULL) { 2446 pr_err("Memory allocation failure\n"); 2447 return false; 2448 } 2449 2450 tmp[filters.nr_cgrps++] = cgrp->id; 2451 filters.cgrps = tmp; 2452 cgroup__put(cgrp); 2453 return true; 2454 } 2455 2456 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str, 2457 int unset __maybe_unused) 2458 { 2459 char *s, *tmp, *tok; 2460 int ret = 0; 2461 2462 s = strdup(str); 2463 if (s == NULL) 2464 return -1; 2465 2466 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2467 if (!add_lock_cgroup(tok)) { 2468 ret = -1; 2469 break; 2470 } 2471 } 2472 2473 free(s); 2474 return ret; 2475 } 2476 2477 int cmd_lock(int argc, const char **argv) 2478 { 2479 const struct option lock_options[] = { 2480 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2481 OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output), 2482 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 2483 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 2484 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 2485 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2486 "file", "vmlinux pathname"), 2487 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 2488 "file", "kallsyms pathname"), 2489 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), 2490 OPT_END() 2491 }; 2492 2493 const struct option info_options[] = { 2494 OPT_BOOLEAN('t', "threads", &info_threads, 2495 "dump the thread list in perf.data"), 2496 OPT_BOOLEAN('m', "map", &info_map, 2497 "dump the map of lock instances (address:name table)"), 2498 OPT_PARENT(lock_options) 2499 }; 2500 2501 const struct option report_options[] = { 2502 OPT_STRING('k', "key", &sort_key, "acquired", 2503 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2504 OPT_STRING('F', "field", &output_fields, NULL, 2505 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2506 /* TODO: type */ 2507 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 2508 "combine locks in the same class"), 2509 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2510 "show per-thread lock stats"), 2511 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2512 OPT_PARENT(lock_options) 2513 }; 2514 2515 struct option contention_options[] = { 2516 OPT_STRING('k', "key", &sort_key, "wait_total", 2517 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 2518 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 2519 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 2520 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2521 "show per-thread lock stats"), 2522 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 2523 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 2524 "System-wide collection from all CPUs"), 2525 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 2526 "List of cpus to monitor"), 2527 OPT_STRING('p', "pid", &target.pid, "pid", 2528 "Trace on existing process id"), 2529 OPT_STRING(0, "tid", &target.tid, "tid", 2530 "Trace on existing thread id (exclusive to --pid)"), 2531 OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num", 2532 "Max number of BPF map entries", parse_map_entry), 2533 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", 2534 "Set the maximum stack depth when collecting lock contention, " 2535 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), 2536 OPT_INTEGER(0, "stack-skip", &stack_skip, 2537 "Set the number of stack depth to skip when finding a lock caller, " 2538 "Default: " __stringify(CONTENTION_STACK_SKIP)), 2539 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2540 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"), 2541 OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS", 2542 "Filter specific type of locks", parse_lock_type), 2543 OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES", 2544 "Filter specific address/symbol of locks", parse_lock_addr), 2545 OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES", 2546 "Filter specific function in the callstack", parse_call_stack), 2547 OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"), 2548 OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", 2549 "print result in CSV format with custom separator"), 2550 OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"), 2551 OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS", 2552 "Filter specific cgroups", parse_cgroup_filter), 2553 OPT_PARENT(lock_options) 2554 }; 2555 2556 const char * const info_usage[] = { 2557 "perf lock info [<options>]", 2558 NULL 2559 }; 2560 const char *const lock_subcommands[] = { "record", "report", "script", 2561 "info", "contention", NULL }; 2562 const char *lock_usage[] = { 2563 NULL, 2564 NULL 2565 }; 2566 const char * const report_usage[] = { 2567 "perf lock report [<options>]", 2568 NULL 2569 }; 2570 const char * const contention_usage[] = { 2571 "perf lock contention [<options>]", 2572 NULL 2573 }; 2574 unsigned int i; 2575 int rc = 0; 2576 2577 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 2578 if (!lockhash_table) 2579 return -ENOMEM; 2580 2581 for (i = 0; i < LOCKHASH_SIZE; i++) 2582 INIT_HLIST_HEAD(lockhash_table + i); 2583 2584 lock_output = stderr; 2585 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 2586 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2587 if (!argc) 2588 usage_with_options(lock_usage, lock_options); 2589 2590 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 2591 return __cmd_record(argc, argv); 2592 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 2593 trace_handler = &report_lock_ops; 2594 if (argc) { 2595 argc = parse_options(argc, argv, 2596 report_options, report_usage, 0); 2597 if (argc) 2598 usage_with_options(report_usage, report_options); 2599 } 2600 rc = __cmd_report(false); 2601 } else if (!strcmp(argv[0], "script")) { 2602 /* Aliased to 'perf script' */ 2603 rc = cmd_script(argc, argv); 2604 } else if (!strcmp(argv[0], "info")) { 2605 if (argc) { 2606 argc = parse_options(argc, argv, 2607 info_options, info_usage, 0); 2608 if (argc) 2609 usage_with_options(info_usage, info_options); 2610 } 2611 2612 /* If neither threads nor map requested, display both */ 2613 if (!info_threads && !info_map) { 2614 info_threads = true; 2615 info_map = true; 2616 } 2617 2618 /* recycling report_lock_ops */ 2619 trace_handler = &report_lock_ops; 2620 rc = __cmd_report(true); 2621 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 2622 trace_handler = &contention_lock_ops; 2623 sort_key = "wait_total"; 2624 output_fields = "contended,wait_total,wait_max,avg_wait"; 2625 2626 #ifndef HAVE_BPF_SKEL 2627 set_option_nobuild(contention_options, 'b', "use-bpf", 2628 "no BUILD_BPF_SKEL=1", false); 2629 #endif 2630 if (argc) { 2631 argc = parse_options(argc, argv, contention_options, 2632 contention_usage, 0); 2633 } 2634 2635 if (check_lock_contention_options(contention_options, 2636 contention_usage) < 0) 2637 return -1; 2638 2639 rc = __cmd_contention(argc, argv); 2640 } else { 2641 usage_with_options(lock_usage, lock_options); 2642 } 2643 2644 /* free usage string allocated by parse_options_subcommand */ 2645 free((void *)lock_usage[0]); 2646 2647 zfree(&lockhash_table); 2648 return rc; 2649 } 2650