1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/cgroup.h" 14 #include "util/callchain.h" 15 #include "util/lock-contention.h" 16 #include "util/bpf_skel/lock_data.h" 17 18 #include <subcmd/pager.h> 19 #include <subcmd/parse-options.h> 20 #include "util/trace-event.h" 21 #include "util/tracepoint.h" 22 23 #include "util/debug.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/data.h" 27 #include "util/string2.h" 28 #include "util/map.h" 29 #include "util/util.h" 30 31 #include <stdio.h> 32 #include <sys/types.h> 33 #include <sys/prctl.h> 34 #include <semaphore.h> 35 #include <math.h> 36 #include <limits.h> 37 #include <ctype.h> 38 39 #include <linux/list.h> 40 #include <linux/hash.h> 41 #include <linux/kernel.h> 42 #include <linux/zalloc.h> 43 #include <linux/err.h> 44 #include <linux/stringify.h> 45 46 static struct perf_session *session; 47 static struct target target; 48 49 static struct rb_root thread_stats; 50 51 static bool combine_locks; 52 static bool show_thread_stats; 53 static bool show_lock_addrs; 54 static bool show_lock_owner; 55 static bool show_lock_cgroups; 56 static bool use_bpf; 57 static unsigned long bpf_map_entries = MAX_ENTRIES; 58 static int max_stack_depth = CONTENTION_STACK_DEPTH; 59 static int stack_skip = CONTENTION_STACK_SKIP; 60 static int print_nr_entries = INT_MAX / 2; 61 static const char *output_name = NULL; 62 static FILE *lock_output; 63 64 static struct lock_filter filters; 65 66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; 67 68 static struct thread_stat *thread_stat_find(u32 tid) 69 { 70 struct rb_node *node; 71 struct thread_stat *st; 72 73 node = thread_stats.rb_node; 74 while (node) { 75 st = container_of(node, struct thread_stat, rb); 76 if (st->tid == tid) 77 return st; 78 else if (tid < st->tid) 79 node = node->rb_left; 80 else 81 node = node->rb_right; 82 } 83 84 return NULL; 85 } 86 87 static void thread_stat_insert(struct thread_stat *new) 88 { 89 struct rb_node **rb = &thread_stats.rb_node; 90 struct rb_node *parent = NULL; 91 struct thread_stat *p; 92 93 while (*rb) { 94 p = container_of(*rb, struct thread_stat, rb); 95 parent = *rb; 96 97 if (new->tid < p->tid) 98 rb = &(*rb)->rb_left; 99 else if (new->tid > p->tid) 100 rb = &(*rb)->rb_right; 101 else 102 BUG_ON("inserting invalid thread_stat\n"); 103 } 104 105 rb_link_node(&new->rb, parent, rb); 106 rb_insert_color(&new->rb, &thread_stats); 107 } 108 109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 110 { 111 struct thread_stat *st; 112 113 st = thread_stat_find(tid); 114 if (st) 115 return st; 116 117 st = zalloc(sizeof(struct thread_stat)); 118 if (!st) { 119 pr_err("memory allocation failed\n"); 120 return NULL; 121 } 122 123 st->tid = tid; 124 INIT_LIST_HEAD(&st->seq_list); 125 126 thread_stat_insert(st); 127 128 return st; 129 } 130 131 static struct thread_stat *thread_stat_findnew_first(u32 tid); 132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 133 thread_stat_findnew_first; 134 135 static struct thread_stat *thread_stat_findnew_first(u32 tid) 136 { 137 struct thread_stat *st; 138 139 st = zalloc(sizeof(struct thread_stat)); 140 if (!st) { 141 pr_err("memory allocation failed\n"); 142 return NULL; 143 } 144 st->tid = tid; 145 INIT_LIST_HEAD(&st->seq_list); 146 147 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 148 rb_insert_color(&st->rb, &thread_stats); 149 150 thread_stat_findnew = thread_stat_findnew_after_first; 151 return st; 152 } 153 154 /* build simple key function one is bigger than two */ 155 #define SINGLE_KEY(member) \ 156 static int lock_stat_key_ ## member(struct lock_stat *one, \ 157 struct lock_stat *two) \ 158 { \ 159 return one->member > two->member; \ 160 } 161 162 SINGLE_KEY(nr_acquired) 163 SINGLE_KEY(nr_contended) 164 SINGLE_KEY(avg_wait_time) 165 SINGLE_KEY(wait_time_total) 166 SINGLE_KEY(wait_time_max) 167 168 static int lock_stat_key_wait_time_min(struct lock_stat *one, 169 struct lock_stat *two) 170 { 171 u64 s1 = one->wait_time_min; 172 u64 s2 = two->wait_time_min; 173 if (s1 == ULLONG_MAX) 174 s1 = 0; 175 if (s2 == ULLONG_MAX) 176 s2 = 0; 177 return s1 > s2; 178 } 179 180 struct lock_key { 181 /* 182 * name: the value for specify by user 183 * this should be simpler than raw name of member 184 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 185 */ 186 const char *name; 187 /* header: the string printed on the header line */ 188 const char *header; 189 /* len: the printing width of the field */ 190 int len; 191 /* key: a pointer to function to compare two lock stats for sorting */ 192 int (*key)(struct lock_stat*, struct lock_stat*); 193 /* print: a pointer to function to print a given lock stats */ 194 void (*print)(struct lock_key*, struct lock_stat*); 195 /* list: list entry to link this */ 196 struct list_head list; 197 }; 198 199 static void lock_stat_key_print_time(unsigned long long nsec, int len) 200 { 201 static const struct { 202 float base; 203 const char *unit; 204 } table[] = { 205 { 1e9 * 3600, "h " }, 206 { 1e9 * 60, "m " }, 207 { 1e9, "s " }, 208 { 1e6, "ms" }, 209 { 1e3, "us" }, 210 { 0, NULL }, 211 }; 212 213 /* for CSV output */ 214 if (len == 0) { 215 fprintf(lock_output, "%llu", nsec); 216 return; 217 } 218 219 for (int i = 0; table[i].unit; i++) { 220 if (nsec < table[i].base) 221 continue; 222 223 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 224 return; 225 } 226 227 fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns"); 228 } 229 230 #define PRINT_KEY(member) \ 231 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 232 struct lock_stat *ls) \ 233 { \ 234 fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\ 235 } 236 237 #define PRINT_TIME(member) \ 238 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 239 struct lock_stat *ls) \ 240 { \ 241 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 242 } 243 244 PRINT_KEY(nr_acquired) 245 PRINT_KEY(nr_contended) 246 PRINT_TIME(avg_wait_time) 247 PRINT_TIME(wait_time_total) 248 PRINT_TIME(wait_time_max) 249 250 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 251 struct lock_stat *ls) 252 { 253 u64 wait_time = ls->wait_time_min; 254 255 if (wait_time == ULLONG_MAX) 256 wait_time = 0; 257 258 lock_stat_key_print_time(wait_time, key->len); 259 } 260 261 262 static const char *sort_key = "acquired"; 263 264 static int (*compare)(struct lock_stat *, struct lock_stat *); 265 266 static struct rb_root sorted; /* place to store intermediate data */ 267 static struct rb_root result; /* place to store sorted data */ 268 269 static LIST_HEAD(lock_keys); 270 static const char *output_fields; 271 272 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 273 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 274 static struct lock_key report_keys[] = { 275 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 276 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 277 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 278 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 279 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 280 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 281 282 /* extra comparisons much complicated should be here */ 283 { } 284 }; 285 286 static struct lock_key contention_keys[] = { 287 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 288 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 289 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 290 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 291 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 292 293 /* extra comparisons much complicated should be here */ 294 { } 295 }; 296 297 static int select_key(bool contention) 298 { 299 int i; 300 struct lock_key *keys = report_keys; 301 302 if (contention) 303 keys = contention_keys; 304 305 for (i = 0; keys[i].name; i++) { 306 if (!strcmp(keys[i].name, sort_key)) { 307 compare = keys[i].key; 308 309 /* selected key should be in the output fields */ 310 if (list_empty(&keys[i].list)) 311 list_add_tail(&keys[i].list, &lock_keys); 312 313 return 0; 314 } 315 } 316 317 pr_err("Unknown compare key: %s\n", sort_key); 318 return -1; 319 } 320 321 static int add_output_field(bool contention, char *name) 322 { 323 int i; 324 struct lock_key *keys = report_keys; 325 326 if (contention) 327 keys = contention_keys; 328 329 for (i = 0; keys[i].name; i++) { 330 if (strcmp(keys[i].name, name)) 331 continue; 332 333 /* prevent double link */ 334 if (list_empty(&keys[i].list)) 335 list_add_tail(&keys[i].list, &lock_keys); 336 337 return 0; 338 } 339 340 pr_err("Unknown output field: %s\n", name); 341 return -1; 342 } 343 344 static int setup_output_field(bool contention, const char *str) 345 { 346 char *tok, *tmp, *orig; 347 int i, ret = 0; 348 struct lock_key *keys = report_keys; 349 350 if (contention) 351 keys = contention_keys; 352 353 /* no output field given: use all of them */ 354 if (str == NULL) { 355 for (i = 0; keys[i].name; i++) 356 list_add_tail(&keys[i].list, &lock_keys); 357 return 0; 358 } 359 360 for (i = 0; keys[i].name; i++) 361 INIT_LIST_HEAD(&keys[i].list); 362 363 orig = tmp = strdup(str); 364 if (orig == NULL) 365 return -ENOMEM; 366 367 while ((tok = strsep(&tmp, ",")) != NULL){ 368 ret = add_output_field(contention, tok); 369 if (ret < 0) 370 break; 371 } 372 free(orig); 373 374 return ret; 375 } 376 377 static void combine_lock_stats(struct lock_stat *st) 378 { 379 struct rb_node **rb = &sorted.rb_node; 380 struct rb_node *parent = NULL; 381 struct lock_stat *p; 382 int ret; 383 384 while (*rb) { 385 p = container_of(*rb, struct lock_stat, rb); 386 parent = *rb; 387 388 if (st->name && p->name) 389 ret = strcmp(st->name, p->name); 390 else 391 ret = !!st->name - !!p->name; 392 393 if (ret == 0) { 394 p->nr_acquired += st->nr_acquired; 395 p->nr_contended += st->nr_contended; 396 p->wait_time_total += st->wait_time_total; 397 398 if (p->nr_contended) 399 p->avg_wait_time = p->wait_time_total / p->nr_contended; 400 401 if (p->wait_time_min > st->wait_time_min) 402 p->wait_time_min = st->wait_time_min; 403 if (p->wait_time_max < st->wait_time_max) 404 p->wait_time_max = st->wait_time_max; 405 406 p->broken |= st->broken; 407 st->combined = 1; 408 return; 409 } 410 411 if (ret < 0) 412 rb = &(*rb)->rb_left; 413 else 414 rb = &(*rb)->rb_right; 415 } 416 417 rb_link_node(&st->rb, parent, rb); 418 rb_insert_color(&st->rb, &sorted); 419 } 420 421 static void insert_to_result(struct lock_stat *st, 422 int (*bigger)(struct lock_stat *, struct lock_stat *)) 423 { 424 struct rb_node **rb = &result.rb_node; 425 struct rb_node *parent = NULL; 426 struct lock_stat *p; 427 428 if (combine_locks && st->combined) 429 return; 430 431 while (*rb) { 432 p = container_of(*rb, struct lock_stat, rb); 433 parent = *rb; 434 435 if (bigger(st, p)) 436 rb = &(*rb)->rb_left; 437 else 438 rb = &(*rb)->rb_right; 439 } 440 441 rb_link_node(&st->rb, parent, rb); 442 rb_insert_color(&st->rb, &result); 443 } 444 445 /* returns left most element of result, and erase it */ 446 static struct lock_stat *pop_from_result(void) 447 { 448 struct rb_node *node = result.rb_node; 449 450 if (!node) 451 return NULL; 452 453 while (node->rb_left) 454 node = node->rb_left; 455 456 rb_erase(node, &result); 457 return container_of(node, struct lock_stat, rb); 458 } 459 460 struct trace_lock_handler { 461 /* it's used on CONFIG_LOCKDEP */ 462 int (*acquire_event)(struct evsel *evsel, 463 struct perf_sample *sample); 464 465 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 466 int (*acquired_event)(struct evsel *evsel, 467 struct perf_sample *sample); 468 469 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 470 int (*contended_event)(struct evsel *evsel, 471 struct perf_sample *sample); 472 473 /* it's used on CONFIG_LOCKDEP */ 474 int (*release_event)(struct evsel *evsel, 475 struct perf_sample *sample); 476 477 /* it's used when CONFIG_LOCKDEP is off */ 478 int (*contention_begin_event)(struct evsel *evsel, 479 struct perf_sample *sample); 480 481 /* it's used when CONFIG_LOCKDEP is off */ 482 int (*contention_end_event)(struct evsel *evsel, 483 struct perf_sample *sample); 484 }; 485 486 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 487 { 488 struct lock_seq_stat *seq; 489 490 list_for_each_entry(seq, &ts->seq_list, list) { 491 if (seq->addr == addr) 492 return seq; 493 } 494 495 seq = zalloc(sizeof(struct lock_seq_stat)); 496 if (!seq) { 497 pr_err("memory allocation failed\n"); 498 return NULL; 499 } 500 seq->state = SEQ_STATE_UNINITIALIZED; 501 seq->addr = addr; 502 503 list_add(&seq->list, &ts->seq_list); 504 return seq; 505 } 506 507 enum broken_state { 508 BROKEN_ACQUIRE, 509 BROKEN_ACQUIRED, 510 BROKEN_CONTENDED, 511 BROKEN_RELEASE, 512 BROKEN_MAX, 513 }; 514 515 static int bad_hist[BROKEN_MAX]; 516 517 enum acquire_flags { 518 TRY_LOCK = 1, 519 READ_LOCK = 2, 520 }; 521 522 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid) 523 { 524 switch (aggr_mode) { 525 case LOCK_AGGR_ADDR: 526 *key = addr; 527 break; 528 case LOCK_AGGR_TASK: 529 *key = tid; 530 break; 531 case LOCK_AGGR_CALLER: 532 case LOCK_AGGR_CGROUP: 533 default: 534 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 535 return -EINVAL; 536 } 537 return 0; 538 } 539 540 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample); 541 542 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel, 543 struct perf_sample *sample) 544 { 545 if (aggr_mode == LOCK_AGGR_CALLER) { 546 *key = callchain_id(evsel, sample); 547 return 0; 548 } 549 return get_key_by_aggr_mode_simple(key, addr, sample->tid); 550 } 551 552 static int report_lock_acquire_event(struct evsel *evsel, 553 struct perf_sample *sample) 554 { 555 struct lock_stat *ls; 556 struct thread_stat *ts; 557 struct lock_seq_stat *seq; 558 const char *name = evsel__strval(evsel, sample, "name"); 559 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 560 int flag = evsel__intval(evsel, sample, "flags"); 561 u64 key; 562 int ret; 563 564 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 565 if (ret < 0) 566 return ret; 567 568 ls = lock_stat_findnew(key, name, 0); 569 if (!ls) 570 return -ENOMEM; 571 572 ts = thread_stat_findnew(sample->tid); 573 if (!ts) 574 return -ENOMEM; 575 576 seq = get_seq(ts, addr); 577 if (!seq) 578 return -ENOMEM; 579 580 switch (seq->state) { 581 case SEQ_STATE_UNINITIALIZED: 582 case SEQ_STATE_RELEASED: 583 if (!flag) { 584 seq->state = SEQ_STATE_ACQUIRING; 585 } else { 586 if (flag & TRY_LOCK) 587 ls->nr_trylock++; 588 if (flag & READ_LOCK) 589 ls->nr_readlock++; 590 seq->state = SEQ_STATE_READ_ACQUIRED; 591 seq->read_count = 1; 592 ls->nr_acquired++; 593 } 594 break; 595 case SEQ_STATE_READ_ACQUIRED: 596 if (flag & READ_LOCK) { 597 seq->read_count++; 598 ls->nr_acquired++; 599 goto end; 600 } else { 601 goto broken; 602 } 603 break; 604 case SEQ_STATE_ACQUIRED: 605 case SEQ_STATE_ACQUIRING: 606 case SEQ_STATE_CONTENDED: 607 broken: 608 /* broken lock sequence */ 609 if (!ls->broken) { 610 ls->broken = 1; 611 bad_hist[BROKEN_ACQUIRE]++; 612 } 613 list_del_init(&seq->list); 614 free(seq); 615 goto end; 616 default: 617 BUG_ON("Unknown state of lock sequence found!\n"); 618 break; 619 } 620 621 ls->nr_acquire++; 622 seq->prev_event_time = sample->time; 623 end: 624 return 0; 625 } 626 627 static int report_lock_acquired_event(struct evsel *evsel, 628 struct perf_sample *sample) 629 { 630 struct lock_stat *ls; 631 struct thread_stat *ts; 632 struct lock_seq_stat *seq; 633 u64 contended_term; 634 const char *name = evsel__strval(evsel, sample, "name"); 635 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 636 u64 key; 637 int ret; 638 639 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 640 if (ret < 0) 641 return ret; 642 643 ls = lock_stat_findnew(key, name, 0); 644 if (!ls) 645 return -ENOMEM; 646 647 ts = thread_stat_findnew(sample->tid); 648 if (!ts) 649 return -ENOMEM; 650 651 seq = get_seq(ts, addr); 652 if (!seq) 653 return -ENOMEM; 654 655 switch (seq->state) { 656 case SEQ_STATE_UNINITIALIZED: 657 /* orphan event, do nothing */ 658 return 0; 659 case SEQ_STATE_ACQUIRING: 660 break; 661 case SEQ_STATE_CONTENDED: 662 contended_term = sample->time - seq->prev_event_time; 663 ls->wait_time_total += contended_term; 664 if (contended_term < ls->wait_time_min) 665 ls->wait_time_min = contended_term; 666 if (ls->wait_time_max < contended_term) 667 ls->wait_time_max = contended_term; 668 break; 669 case SEQ_STATE_RELEASED: 670 case SEQ_STATE_ACQUIRED: 671 case SEQ_STATE_READ_ACQUIRED: 672 /* broken lock sequence */ 673 if (!ls->broken) { 674 ls->broken = 1; 675 bad_hist[BROKEN_ACQUIRED]++; 676 } 677 list_del_init(&seq->list); 678 free(seq); 679 goto end; 680 default: 681 BUG_ON("Unknown state of lock sequence found!\n"); 682 break; 683 } 684 685 seq->state = SEQ_STATE_ACQUIRED; 686 ls->nr_acquired++; 687 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 688 seq->prev_event_time = sample->time; 689 end: 690 return 0; 691 } 692 693 static int report_lock_contended_event(struct evsel *evsel, 694 struct perf_sample *sample) 695 { 696 struct lock_stat *ls; 697 struct thread_stat *ts; 698 struct lock_seq_stat *seq; 699 const char *name = evsel__strval(evsel, sample, "name"); 700 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 701 u64 key; 702 int ret; 703 704 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 705 if (ret < 0) 706 return ret; 707 708 ls = lock_stat_findnew(key, name, 0); 709 if (!ls) 710 return -ENOMEM; 711 712 ts = thread_stat_findnew(sample->tid); 713 if (!ts) 714 return -ENOMEM; 715 716 seq = get_seq(ts, addr); 717 if (!seq) 718 return -ENOMEM; 719 720 switch (seq->state) { 721 case SEQ_STATE_UNINITIALIZED: 722 /* orphan event, do nothing */ 723 return 0; 724 case SEQ_STATE_ACQUIRING: 725 break; 726 case SEQ_STATE_RELEASED: 727 case SEQ_STATE_ACQUIRED: 728 case SEQ_STATE_READ_ACQUIRED: 729 case SEQ_STATE_CONTENDED: 730 /* broken lock sequence */ 731 if (!ls->broken) { 732 ls->broken = 1; 733 bad_hist[BROKEN_CONTENDED]++; 734 } 735 list_del_init(&seq->list); 736 free(seq); 737 goto end; 738 default: 739 BUG_ON("Unknown state of lock sequence found!\n"); 740 break; 741 } 742 743 seq->state = SEQ_STATE_CONTENDED; 744 ls->nr_contended++; 745 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 746 seq->prev_event_time = sample->time; 747 end: 748 return 0; 749 } 750 751 static int report_lock_release_event(struct evsel *evsel, 752 struct perf_sample *sample) 753 { 754 struct lock_stat *ls; 755 struct thread_stat *ts; 756 struct lock_seq_stat *seq; 757 const char *name = evsel__strval(evsel, sample, "name"); 758 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 759 u64 key; 760 int ret; 761 762 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 763 if (ret < 0) 764 return ret; 765 766 ls = lock_stat_findnew(key, name, 0); 767 if (!ls) 768 return -ENOMEM; 769 770 ts = thread_stat_findnew(sample->tid); 771 if (!ts) 772 return -ENOMEM; 773 774 seq = get_seq(ts, addr); 775 if (!seq) 776 return -ENOMEM; 777 778 switch (seq->state) { 779 case SEQ_STATE_UNINITIALIZED: 780 goto end; 781 case SEQ_STATE_ACQUIRED: 782 break; 783 case SEQ_STATE_READ_ACQUIRED: 784 seq->read_count--; 785 BUG_ON(seq->read_count < 0); 786 if (seq->read_count) { 787 ls->nr_release++; 788 goto end; 789 } 790 break; 791 case SEQ_STATE_ACQUIRING: 792 case SEQ_STATE_CONTENDED: 793 case SEQ_STATE_RELEASED: 794 /* broken lock sequence */ 795 if (!ls->broken) { 796 ls->broken = 1; 797 bad_hist[BROKEN_RELEASE]++; 798 } 799 goto free_seq; 800 default: 801 BUG_ON("Unknown state of lock sequence found!\n"); 802 break; 803 } 804 805 ls->nr_release++; 806 free_seq: 807 list_del_init(&seq->list); 808 free(seq); 809 end: 810 return 0; 811 } 812 813 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, 814 char *buf, int size) 815 { 816 u64 offset; 817 818 if (map == NULL || sym == NULL) { 819 buf[0] = '\0'; 820 return 0; 821 } 822 823 offset = map__map_ip(map, ip) - sym->start; 824 825 if (offset) 826 return scnprintf(buf, size, "%s+%#lx", sym->name, offset); 827 else 828 return strlcpy(buf, sym->name, size); 829 } 830 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 831 char *buf, int size) 832 { 833 struct thread *thread; 834 struct callchain_cursor *cursor; 835 struct machine *machine = &session->machines.host; 836 struct symbol *sym; 837 int skip = 0; 838 int ret; 839 840 /* lock names will be replaced to task name later */ 841 if (show_thread_stats) 842 return -1; 843 844 thread = machine__findnew_thread(machine, -1, sample->pid); 845 if (thread == NULL) 846 return -1; 847 848 cursor = get_tls_callchain_cursor(); 849 850 /* use caller function name from the callchain */ 851 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 852 NULL, NULL, max_stack_depth); 853 if (ret != 0) { 854 thread__put(thread); 855 return -1; 856 } 857 858 callchain_cursor_commit(cursor); 859 thread__put(thread); 860 861 while (true) { 862 struct callchain_cursor_node *node; 863 864 node = callchain_cursor_current(cursor); 865 if (node == NULL) 866 break; 867 868 /* skip first few entries - for lock functions */ 869 if (++skip <= stack_skip) 870 goto next; 871 872 sym = node->ms.sym; 873 if (sym && !machine__is_lock_function(machine, node->ip)) { 874 get_symbol_name_offset(node->ms.map, sym, node->ip, 875 buf, size); 876 return 0; 877 } 878 879 next: 880 callchain_cursor_advance(cursor); 881 } 882 return -1; 883 } 884 885 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 886 { 887 struct callchain_cursor *cursor; 888 struct machine *machine = &session->machines.host; 889 struct thread *thread; 890 u64 hash = 0; 891 int skip = 0; 892 int ret; 893 894 thread = machine__findnew_thread(machine, -1, sample->pid); 895 if (thread == NULL) 896 return -1; 897 898 cursor = get_tls_callchain_cursor(); 899 /* use caller function name from the callchain */ 900 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 901 NULL, NULL, max_stack_depth); 902 thread__put(thread); 903 904 if (ret != 0) 905 return -1; 906 907 callchain_cursor_commit(cursor); 908 909 while (true) { 910 struct callchain_cursor_node *node; 911 912 node = callchain_cursor_current(cursor); 913 if (node == NULL) 914 break; 915 916 /* skip first few entries - for lock functions */ 917 if (++skip <= stack_skip) 918 goto next; 919 920 if (node->ms.sym && machine__is_lock_function(machine, node->ip)) 921 goto next; 922 923 hash ^= hash_long((unsigned long)node->ip, 64); 924 925 next: 926 callchain_cursor_advance(cursor); 927 } 928 return hash; 929 } 930 931 static u64 *get_callstack(struct perf_sample *sample, int max_stack) 932 { 933 u64 *callstack; 934 u64 i; 935 int c; 936 937 callstack = calloc(max_stack, sizeof(*callstack)); 938 if (callstack == NULL) 939 return NULL; 940 941 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) { 942 u64 ip = sample->callchain->ips[i]; 943 944 if (ip >= PERF_CONTEXT_MAX) 945 continue; 946 947 callstack[c++] = ip; 948 } 949 return callstack; 950 } 951 952 static int report_lock_contention_begin_event(struct evsel *evsel, 953 struct perf_sample *sample) 954 { 955 struct lock_stat *ls; 956 struct thread_stat *ts; 957 struct lock_seq_stat *seq; 958 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 959 unsigned int flags = evsel__intval(evsel, sample, "flags"); 960 u64 key; 961 int i, ret; 962 static bool kmap_loaded; 963 struct machine *machine = &session->machines.host; 964 struct map *kmap; 965 struct symbol *sym; 966 967 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 968 if (ret < 0) 969 return ret; 970 971 if (!kmap_loaded) { 972 unsigned long *addrs; 973 974 /* make sure it loads the kernel map to find lock symbols */ 975 map__load(machine__kernel_map(machine)); 976 kmap_loaded = true; 977 978 /* convert (kernel) symbols to addresses */ 979 for (i = 0; i < filters.nr_syms; i++) { 980 sym = machine__find_kernel_symbol_by_name(machine, 981 filters.syms[i], 982 &kmap); 983 if (sym == NULL) { 984 pr_warning("ignore unknown symbol: %s\n", 985 filters.syms[i]); 986 continue; 987 } 988 989 addrs = realloc(filters.addrs, 990 (filters.nr_addrs + 1) * sizeof(*addrs)); 991 if (addrs == NULL) { 992 pr_warning("memory allocation failure\n"); 993 return -ENOMEM; 994 } 995 996 addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start); 997 filters.addrs = addrs; 998 } 999 } 1000 1001 ls = lock_stat_find(key); 1002 if (!ls) { 1003 char buf[128]; 1004 const char *name = ""; 1005 1006 switch (aggr_mode) { 1007 case LOCK_AGGR_ADDR: 1008 sym = machine__find_kernel_symbol(machine, key, &kmap); 1009 if (sym) 1010 name = sym->name; 1011 break; 1012 case LOCK_AGGR_CALLER: 1013 name = buf; 1014 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1015 name = "Unknown"; 1016 break; 1017 case LOCK_AGGR_CGROUP: 1018 case LOCK_AGGR_TASK: 1019 default: 1020 break; 1021 } 1022 1023 ls = lock_stat_findnew(key, name, flags); 1024 if (!ls) 1025 return -ENOMEM; 1026 } 1027 1028 if (filters.nr_types) { 1029 bool found = false; 1030 1031 for (i = 0; i < filters.nr_types; i++) { 1032 if (flags == filters.types[i]) { 1033 found = true; 1034 break; 1035 } 1036 } 1037 1038 if (!found) 1039 return 0; 1040 } 1041 1042 if (filters.nr_addrs) { 1043 bool found = false; 1044 1045 for (i = 0; i < filters.nr_addrs; i++) { 1046 if (addr == filters.addrs[i]) { 1047 found = true; 1048 break; 1049 } 1050 } 1051 1052 if (!found) 1053 return 0; 1054 } 1055 1056 if (needs_callstack()) { 1057 u64 *callstack = get_callstack(sample, max_stack_depth); 1058 if (callstack == NULL) 1059 return -ENOMEM; 1060 1061 if (!match_callstack_filter(machine, callstack, max_stack_depth)) { 1062 free(callstack); 1063 return 0; 1064 } 1065 1066 if (ls->callstack == NULL) 1067 ls->callstack = callstack; 1068 else 1069 free(callstack); 1070 } 1071 1072 ts = thread_stat_findnew(sample->tid); 1073 if (!ts) 1074 return -ENOMEM; 1075 1076 seq = get_seq(ts, addr); 1077 if (!seq) 1078 return -ENOMEM; 1079 1080 switch (seq->state) { 1081 case SEQ_STATE_UNINITIALIZED: 1082 case SEQ_STATE_ACQUIRED: 1083 break; 1084 case SEQ_STATE_CONTENDED: 1085 /* 1086 * It can have nested contention begin with mutex spinning, 1087 * then we would use the original contention begin event and 1088 * ignore the second one. 1089 */ 1090 goto end; 1091 case SEQ_STATE_ACQUIRING: 1092 case SEQ_STATE_READ_ACQUIRED: 1093 case SEQ_STATE_RELEASED: 1094 /* broken lock sequence */ 1095 if (!ls->broken) { 1096 ls->broken = 1; 1097 bad_hist[BROKEN_CONTENDED]++; 1098 } 1099 list_del_init(&seq->list); 1100 free(seq); 1101 goto end; 1102 default: 1103 BUG_ON("Unknown state of lock sequence found!\n"); 1104 break; 1105 } 1106 1107 if (seq->state != SEQ_STATE_CONTENDED) { 1108 seq->state = SEQ_STATE_CONTENDED; 1109 seq->prev_event_time = sample->time; 1110 ls->nr_contended++; 1111 } 1112 end: 1113 return 0; 1114 } 1115 1116 static int report_lock_contention_end_event(struct evsel *evsel, 1117 struct perf_sample *sample) 1118 { 1119 struct lock_stat *ls; 1120 struct thread_stat *ts; 1121 struct lock_seq_stat *seq; 1122 u64 contended_term; 1123 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1124 u64 key; 1125 int ret; 1126 1127 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 1128 if (ret < 0) 1129 return ret; 1130 1131 ls = lock_stat_find(key); 1132 if (!ls) 1133 return 0; 1134 1135 ts = thread_stat_find(sample->tid); 1136 if (!ts) 1137 return 0; 1138 1139 seq = get_seq(ts, addr); 1140 if (!seq) 1141 return -ENOMEM; 1142 1143 switch (seq->state) { 1144 case SEQ_STATE_UNINITIALIZED: 1145 goto end; 1146 case SEQ_STATE_CONTENDED: 1147 contended_term = sample->time - seq->prev_event_time; 1148 ls->wait_time_total += contended_term; 1149 if (contended_term < ls->wait_time_min) 1150 ls->wait_time_min = contended_term; 1151 if (ls->wait_time_max < contended_term) 1152 ls->wait_time_max = contended_term; 1153 break; 1154 case SEQ_STATE_ACQUIRING: 1155 case SEQ_STATE_ACQUIRED: 1156 case SEQ_STATE_READ_ACQUIRED: 1157 case SEQ_STATE_RELEASED: 1158 /* broken lock sequence */ 1159 if (!ls->broken) { 1160 ls->broken = 1; 1161 bad_hist[BROKEN_ACQUIRED]++; 1162 } 1163 list_del_init(&seq->list); 1164 free(seq); 1165 goto end; 1166 default: 1167 BUG_ON("Unknown state of lock sequence found!\n"); 1168 break; 1169 } 1170 1171 seq->state = SEQ_STATE_ACQUIRED; 1172 ls->nr_acquired++; 1173 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1174 end: 1175 return 0; 1176 } 1177 1178 /* lock oriented handlers */ 1179 /* TODO: handlers for CPU oriented, thread oriented */ 1180 static struct trace_lock_handler report_lock_ops = { 1181 .acquire_event = report_lock_acquire_event, 1182 .acquired_event = report_lock_acquired_event, 1183 .contended_event = report_lock_contended_event, 1184 .release_event = report_lock_release_event, 1185 .contention_begin_event = report_lock_contention_begin_event, 1186 .contention_end_event = report_lock_contention_end_event, 1187 }; 1188 1189 static struct trace_lock_handler contention_lock_ops = { 1190 .contention_begin_event = report_lock_contention_begin_event, 1191 .contention_end_event = report_lock_contention_end_event, 1192 }; 1193 1194 1195 static struct trace_lock_handler *trace_handler; 1196 1197 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1198 { 1199 if (trace_handler->acquire_event) 1200 return trace_handler->acquire_event(evsel, sample); 1201 return 0; 1202 } 1203 1204 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1205 { 1206 if (trace_handler->acquired_event) 1207 return trace_handler->acquired_event(evsel, sample); 1208 return 0; 1209 } 1210 1211 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1212 { 1213 if (trace_handler->contended_event) 1214 return trace_handler->contended_event(evsel, sample); 1215 return 0; 1216 } 1217 1218 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1219 { 1220 if (trace_handler->release_event) 1221 return trace_handler->release_event(evsel, sample); 1222 return 0; 1223 } 1224 1225 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1226 { 1227 if (trace_handler->contention_begin_event) 1228 return trace_handler->contention_begin_event(evsel, sample); 1229 return 0; 1230 } 1231 1232 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1233 { 1234 if (trace_handler->contention_end_event) 1235 return trace_handler->contention_end_event(evsel, sample); 1236 return 0; 1237 } 1238 1239 static void print_bad_events(int bad, int total) 1240 { 1241 /* Output for debug, this have to be removed */ 1242 int i; 1243 int broken = 0; 1244 const char *name[4] = 1245 { "acquire", "acquired", "contended", "release" }; 1246 1247 for (i = 0; i < BROKEN_MAX; i++) 1248 broken += bad_hist[i]; 1249 1250 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1251 return; 1252 1253 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1254 fprintf(lock_output, "bad: %d, total: %d\n", bad, total); 1255 fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1256 fprintf(lock_output, "histogram of events caused bad sequence\n"); 1257 for (i = 0; i < BROKEN_MAX; i++) 1258 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]); 1259 } 1260 1261 /* TODO: various way to print, coloring, nano or milli sec */ 1262 static void print_result(void) 1263 { 1264 struct lock_stat *st; 1265 struct lock_key *key; 1266 char cut_name[20]; 1267 int bad, total, printed; 1268 1269 if (!quiet) { 1270 fprintf(lock_output, "%20s ", "Name"); 1271 list_for_each_entry(key, &lock_keys, list) 1272 fprintf(lock_output, "%*s ", key->len, key->header); 1273 fprintf(lock_output, "\n\n"); 1274 } 1275 1276 bad = total = printed = 0; 1277 while ((st = pop_from_result())) { 1278 total++; 1279 if (st->broken) 1280 bad++; 1281 if (!st->nr_acquired) 1282 continue; 1283 1284 bzero(cut_name, 20); 1285 1286 if (strlen(st->name) < 20) { 1287 /* output raw name */ 1288 const char *name = st->name; 1289 1290 if (show_thread_stats) { 1291 struct thread *t; 1292 1293 /* st->addr contains tid of thread */ 1294 t = perf_session__findnew(session, st->addr); 1295 name = thread__comm_str(t); 1296 } 1297 1298 fprintf(lock_output, "%20s ", name); 1299 } else { 1300 strncpy(cut_name, st->name, 16); 1301 cut_name[16] = '.'; 1302 cut_name[17] = '.'; 1303 cut_name[18] = '.'; 1304 cut_name[19] = '\0'; 1305 /* cut off name for saving output style */ 1306 fprintf(lock_output, "%20s ", cut_name); 1307 } 1308 1309 list_for_each_entry(key, &lock_keys, list) { 1310 key->print(key, st); 1311 fprintf(lock_output, " "); 1312 } 1313 fprintf(lock_output, "\n"); 1314 1315 if (++printed >= print_nr_entries) 1316 break; 1317 } 1318 1319 print_bad_events(bad, total); 1320 } 1321 1322 static bool info_threads, info_map; 1323 1324 static void dump_threads(void) 1325 { 1326 struct thread_stat *st; 1327 struct rb_node *node; 1328 struct thread *t; 1329 1330 fprintf(lock_output, "%10s: comm\n", "Thread ID"); 1331 1332 node = rb_first(&thread_stats); 1333 while (node) { 1334 st = container_of(node, struct thread_stat, rb); 1335 t = perf_session__findnew(session, st->tid); 1336 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t)); 1337 node = rb_next(node); 1338 thread__put(t); 1339 } 1340 } 1341 1342 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1343 { 1344 int ret; 1345 1346 if (a->name && b->name) 1347 ret = strcmp(a->name, b->name); 1348 else 1349 ret = !!a->name - !!b->name; 1350 1351 if (!ret) 1352 return a->addr < b->addr; 1353 else 1354 return ret < 0; 1355 } 1356 1357 static void dump_map(void) 1358 { 1359 unsigned int i; 1360 struct lock_stat *st; 1361 1362 fprintf(lock_output, "Address of instance: name of class\n"); 1363 for (i = 0; i < LOCKHASH_SIZE; i++) { 1364 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1365 insert_to_result(st, compare_maps); 1366 } 1367 } 1368 1369 while ((st = pop_from_result())) 1370 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name); 1371 } 1372 1373 static void dump_info(void) 1374 { 1375 if (info_threads) 1376 dump_threads(); 1377 1378 if (info_map) { 1379 if (info_threads) 1380 fputc('\n', lock_output); 1381 dump_map(); 1382 } 1383 } 1384 1385 static const struct evsel_str_handler lock_tracepoints[] = { 1386 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1387 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1388 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1389 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1390 }; 1391 1392 static const struct evsel_str_handler contention_tracepoints[] = { 1393 { "lock:contention_begin", evsel__process_contention_begin, }, 1394 { "lock:contention_end", evsel__process_contention_end, }, 1395 }; 1396 1397 static int process_event_update(const struct perf_tool *tool, 1398 union perf_event *event, 1399 struct evlist **pevlist) 1400 { 1401 int ret; 1402 1403 ret = perf_event__process_event_update(tool, event, pevlist); 1404 if (ret < 0) 1405 return ret; 1406 1407 /* this can return -EEXIST since we call it for each evsel */ 1408 perf_session__set_tracepoints_handlers(session, lock_tracepoints); 1409 perf_session__set_tracepoints_handlers(session, contention_tracepoints); 1410 return 0; 1411 } 1412 1413 typedef int (*tracepoint_handler)(struct evsel *evsel, 1414 struct perf_sample *sample); 1415 1416 static int process_sample_event(const struct perf_tool *tool __maybe_unused, 1417 union perf_event *event, 1418 struct perf_sample *sample, 1419 struct evsel *evsel, 1420 struct machine *machine) 1421 { 1422 int err = 0; 1423 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1424 sample->tid); 1425 1426 if (thread == NULL) { 1427 pr_debug("problem processing %d event, skipping it.\n", 1428 event->header.type); 1429 return -1; 1430 } 1431 1432 if (evsel->handler != NULL) { 1433 tracepoint_handler f = evsel->handler; 1434 err = f(evsel, sample); 1435 } 1436 1437 thread__put(thread); 1438 1439 return err; 1440 } 1441 1442 static void combine_result(void) 1443 { 1444 unsigned int i; 1445 struct lock_stat *st; 1446 1447 if (!combine_locks) 1448 return; 1449 1450 for (i = 0; i < LOCKHASH_SIZE; i++) { 1451 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1452 combine_lock_stats(st); 1453 } 1454 } 1455 } 1456 1457 static void sort_result(void) 1458 { 1459 unsigned int i; 1460 struct lock_stat *st; 1461 1462 for (i = 0; i < LOCKHASH_SIZE; i++) { 1463 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1464 insert_to_result(st, compare); 1465 } 1466 } 1467 } 1468 1469 static const struct { 1470 unsigned int flags; 1471 const char *str; 1472 const char *name; 1473 } lock_type_table[] = { 1474 { 0, "semaphore", "semaphore" }, 1475 { LCB_F_SPIN, "spinlock", "spinlock" }, 1476 { LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" }, 1477 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" }, 1478 { LCB_F_READ, "rwsem:R", "rwsem" }, 1479 { LCB_F_WRITE, "rwsem:W", "rwsem" }, 1480 { LCB_F_RT, "rt-mutex", "rt-mutex" }, 1481 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" }, 1482 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" }, 1483 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" }, 1484 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" }, 1485 { LCB_F_MUTEX, "mutex", "mutex" }, 1486 { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" }, 1487 /* alias for get_type_flag() */ 1488 { LCB_F_MUTEX | LCB_F_SPIN, "mutex-spin", "mutex" }, 1489 }; 1490 1491 static const char *get_type_str(unsigned int flags) 1492 { 1493 flags &= LCB_F_TYPE_MASK; 1494 1495 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1496 if (lock_type_table[i].flags == flags) 1497 return lock_type_table[i].str; 1498 } 1499 return "unknown"; 1500 } 1501 1502 static const char *get_type_name(unsigned int flags) 1503 { 1504 flags &= LCB_F_TYPE_MASK; 1505 1506 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1507 if (lock_type_table[i].flags == flags) 1508 return lock_type_table[i].name; 1509 } 1510 return "unknown"; 1511 } 1512 1513 static unsigned int get_type_flag(const char *str) 1514 { 1515 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1516 if (!strcmp(lock_type_table[i].name, str)) 1517 return lock_type_table[i].flags; 1518 } 1519 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { 1520 if (!strcmp(lock_type_table[i].str, str)) 1521 return lock_type_table[i].flags; 1522 } 1523 return UINT_MAX; 1524 } 1525 1526 static void lock_filter_finish(void) 1527 { 1528 zfree(&filters.types); 1529 filters.nr_types = 0; 1530 1531 zfree(&filters.addrs); 1532 filters.nr_addrs = 0; 1533 1534 for (int i = 0; i < filters.nr_syms; i++) 1535 free(filters.syms[i]); 1536 1537 zfree(&filters.syms); 1538 filters.nr_syms = 0; 1539 1540 zfree(&filters.cgrps); 1541 filters.nr_cgrps = 0; 1542 1543 for (int i = 0; i < filters.nr_slabs; i++) 1544 free(filters.slabs[i]); 1545 1546 zfree(&filters.slabs); 1547 filters.nr_slabs = 0; 1548 } 1549 1550 static void sort_contention_result(void) 1551 { 1552 sort_result(); 1553 } 1554 1555 static void print_header_stdio(void) 1556 { 1557 struct lock_key *key; 1558 1559 list_for_each_entry(key, &lock_keys, list) 1560 fprintf(lock_output, "%*s ", key->len, key->header); 1561 1562 switch (aggr_mode) { 1563 case LOCK_AGGR_TASK: 1564 fprintf(lock_output, " %10s %s\n\n", "pid", 1565 show_lock_owner ? "owner" : "comm"); 1566 break; 1567 case LOCK_AGGR_CALLER: 1568 fprintf(lock_output, " %10s %s\n\n", "type", "caller"); 1569 break; 1570 case LOCK_AGGR_ADDR: 1571 fprintf(lock_output, " %16s %s\n\n", "address", "symbol"); 1572 break; 1573 case LOCK_AGGR_CGROUP: 1574 fprintf(lock_output, " %s\n\n", "cgroup"); 1575 break; 1576 default: 1577 break; 1578 } 1579 } 1580 1581 static void print_header_csv(const char *sep) 1582 { 1583 struct lock_key *key; 1584 1585 fprintf(lock_output, "# output: "); 1586 list_for_each_entry(key, &lock_keys, list) 1587 fprintf(lock_output, "%s%s ", key->header, sep); 1588 1589 switch (aggr_mode) { 1590 case LOCK_AGGR_TASK: 1591 fprintf(lock_output, "%s%s %s\n", "pid", sep, 1592 show_lock_owner ? "owner" : "comm"); 1593 break; 1594 case LOCK_AGGR_CALLER: 1595 fprintf(lock_output, "%s%s %s", "type", sep, "caller"); 1596 if (verbose > 0) 1597 fprintf(lock_output, "%s %s", sep, "stacktrace"); 1598 fprintf(lock_output, "\n"); 1599 break; 1600 case LOCK_AGGR_ADDR: 1601 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type"); 1602 break; 1603 case LOCK_AGGR_CGROUP: 1604 fprintf(lock_output, "%s\n", "cgroup"); 1605 break; 1606 default: 1607 break; 1608 } 1609 } 1610 1611 static void print_header(void) 1612 { 1613 if (!quiet) { 1614 if (symbol_conf.field_sep) 1615 print_header_csv(symbol_conf.field_sep); 1616 else 1617 print_header_stdio(); 1618 } 1619 } 1620 1621 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st) 1622 { 1623 struct lock_key *key; 1624 struct thread *t; 1625 int pid; 1626 1627 list_for_each_entry(key, &lock_keys, list) { 1628 key->print(key, st); 1629 fprintf(lock_output, " "); 1630 } 1631 1632 switch (aggr_mode) { 1633 case LOCK_AGGR_CALLER: 1634 fprintf(lock_output, " %10s %s\n", get_type_str(st->flags), st->name); 1635 break; 1636 case LOCK_AGGR_TASK: 1637 pid = st->addr; 1638 t = perf_session__findnew(session, pid); 1639 fprintf(lock_output, " %10d %s\n", 1640 pid, pid == -1 ? "Unknown" : thread__comm_str(t)); 1641 break; 1642 case LOCK_AGGR_ADDR: 1643 fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr, 1644 st->name, get_type_name(st->flags)); 1645 break; 1646 case LOCK_AGGR_CGROUP: 1647 fprintf(lock_output, " %s\n", st->name); 1648 break; 1649 default: 1650 break; 1651 } 1652 1653 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1654 struct map *kmap; 1655 struct symbol *sym; 1656 char buf[128]; 1657 u64 ip; 1658 1659 for (int i = 0; i < max_stack_depth; i++) { 1660 if (!st->callstack || !st->callstack[i]) 1661 break; 1662 1663 ip = st->callstack[i]; 1664 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1665 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1666 fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf); 1667 } 1668 } 1669 } 1670 1671 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st, 1672 const char *sep) 1673 { 1674 struct lock_key *key; 1675 struct thread *t; 1676 int pid; 1677 1678 list_for_each_entry(key, &lock_keys, list) { 1679 key->print(key, st); 1680 fprintf(lock_output, "%s ", sep); 1681 } 1682 1683 switch (aggr_mode) { 1684 case LOCK_AGGR_CALLER: 1685 fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name); 1686 if (verbose <= 0) 1687 fprintf(lock_output, "\n"); 1688 break; 1689 case LOCK_AGGR_TASK: 1690 pid = st->addr; 1691 t = perf_session__findnew(session, pid); 1692 fprintf(lock_output, "%d%s %s\n", pid, sep, 1693 pid == -1 ? "Unknown" : thread__comm_str(t)); 1694 break; 1695 case LOCK_AGGR_ADDR: 1696 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep, 1697 st->name, sep, get_type_name(st->flags)); 1698 break; 1699 case LOCK_AGGR_CGROUP: 1700 fprintf(lock_output, "%s\n",st->name); 1701 break; 1702 default: 1703 break; 1704 } 1705 1706 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 1707 struct map *kmap; 1708 struct symbol *sym; 1709 char buf[128]; 1710 u64 ip; 1711 1712 for (int i = 0; i < max_stack_depth; i++) { 1713 if (!st->callstack || !st->callstack[i]) 1714 break; 1715 1716 ip = st->callstack[i]; 1717 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1718 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1719 fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf); 1720 } 1721 fprintf(lock_output, "\n"); 1722 } 1723 } 1724 1725 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st) 1726 { 1727 if (symbol_conf.field_sep) 1728 print_lock_stat_csv(con, st, symbol_conf.field_sep); 1729 else 1730 print_lock_stat_stdio(con, st); 1731 } 1732 1733 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails) 1734 { 1735 /* Output for debug, this have to be removed */ 1736 int broken = fails->task + fails->stack + fails->time + fails->data; 1737 1738 if (!use_bpf) 1739 print_bad_events(bad, total); 1740 1741 if (quiet || total == 0 || (broken == 0 && verbose <= 0)) 1742 return; 1743 1744 total += broken; 1745 fprintf(lock_output, "\n=== output for debug ===\n\n"); 1746 fprintf(lock_output, "bad: %d, total: %d\n", broken, total); 1747 fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total); 1748 1749 fprintf(lock_output, "histogram of failure reasons\n"); 1750 fprintf(lock_output, " %10s: %d\n", "task", fails->task); 1751 fprintf(lock_output, " %10s: %d\n", "stack", fails->stack); 1752 fprintf(lock_output, " %10s: %d\n", "time", fails->time); 1753 fprintf(lock_output, " %10s: %d\n", "data", fails->data); 1754 } 1755 1756 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails, 1757 const char *sep) 1758 { 1759 /* Output for debug, this have to be removed */ 1760 if (use_bpf) 1761 bad = fails->task + fails->stack + fails->time + fails->data; 1762 1763 if (quiet || total == 0 || (bad == 0 && verbose <= 0)) 1764 return; 1765 1766 total += bad; 1767 fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad); 1768 1769 if (use_bpf) { 1770 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task); 1771 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack); 1772 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time); 1773 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data); 1774 } else { 1775 int i; 1776 const char *name[4] = { "acquire", "acquired", "contended", "release" }; 1777 1778 for (i = 0; i < BROKEN_MAX; i++) 1779 fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]); 1780 } 1781 fprintf(lock_output, "\n"); 1782 } 1783 1784 static void print_footer(int total, int bad, struct lock_contention_fails *fails) 1785 { 1786 if (symbol_conf.field_sep) 1787 print_footer_csv(total, bad, fails, symbol_conf.field_sep); 1788 else 1789 print_footer_stdio(total, bad, fails); 1790 } 1791 1792 static void print_contention_result(struct lock_contention *con) 1793 { 1794 struct lock_stat *st; 1795 int bad, total, printed; 1796 1797 if (!quiet) 1798 print_header(); 1799 1800 bad = total = printed = 0; 1801 1802 while ((st = pop_from_result())) { 1803 total += use_bpf ? st->nr_contended : 1; 1804 if (st->broken) 1805 bad++; 1806 1807 if (!st->wait_time_total) 1808 continue; 1809 1810 print_lock_stat(con, st); 1811 1812 if (++printed >= print_nr_entries) 1813 break; 1814 } 1815 1816 if (print_nr_entries) { 1817 /* update the total/bad stats */ 1818 while ((st = pop_from_result())) { 1819 total += use_bpf ? st->nr_contended : 1; 1820 if (st->broken) 1821 bad++; 1822 } 1823 } 1824 /* some entries are collected but hidden by the callstack filter */ 1825 total += con->nr_filtered; 1826 1827 print_footer(total, bad, &con->fails); 1828 } 1829 1830 static bool force; 1831 1832 static int __cmd_report(bool display_info) 1833 { 1834 int err = -EINVAL; 1835 struct perf_tool eops; 1836 struct perf_data data = { 1837 .path = input_name, 1838 .mode = PERF_DATA_MODE_READ, 1839 .force = force, 1840 }; 1841 1842 perf_tool__init(&eops, /*ordered_events=*/true); 1843 eops.attr = perf_event__process_attr; 1844 eops.event_update = process_event_update; 1845 eops.sample = process_sample_event; 1846 eops.comm = perf_event__process_comm; 1847 eops.mmap = perf_event__process_mmap; 1848 eops.namespaces = perf_event__process_namespaces; 1849 eops.tracing_data = perf_event__process_tracing_data; 1850 session = perf_session__new(&data, &eops); 1851 if (IS_ERR(session)) { 1852 pr_err("Initializing perf session failed\n"); 1853 return PTR_ERR(session); 1854 } 1855 1856 symbol_conf.allow_aliases = true; 1857 symbol__init(&session->header.env); 1858 1859 if (!data.is_pipe) { 1860 if (!perf_session__has_traces(session, "lock record")) 1861 goto out_delete; 1862 1863 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1864 pr_err("Initializing perf session tracepoint handlers failed\n"); 1865 goto out_delete; 1866 } 1867 1868 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1869 pr_err("Initializing perf session tracepoint handlers failed\n"); 1870 goto out_delete; 1871 } 1872 } 1873 1874 if (setup_output_field(false, output_fields)) 1875 goto out_delete; 1876 1877 if (select_key(false)) 1878 goto out_delete; 1879 1880 if (show_thread_stats) 1881 aggr_mode = LOCK_AGGR_TASK; 1882 1883 err = perf_session__process_events(session); 1884 if (err) 1885 goto out_delete; 1886 1887 setup_pager(); 1888 if (display_info) /* used for info subcommand */ 1889 dump_info(); 1890 else { 1891 combine_result(); 1892 sort_result(); 1893 print_result(); 1894 } 1895 1896 out_delete: 1897 perf_session__delete(session); 1898 return err; 1899 } 1900 1901 static void sighandler(int sig __maybe_unused) 1902 { 1903 } 1904 1905 static int check_lock_contention_options(const struct option *options, 1906 const char * const *usage) 1907 1908 { 1909 if (show_thread_stats && show_lock_addrs) { 1910 pr_err("Cannot use thread and addr mode together\n"); 1911 parse_options_usage(usage, options, "threads", 0); 1912 parse_options_usage(NULL, options, "lock-addr", 0); 1913 return -1; 1914 } 1915 1916 if (show_lock_owner && !use_bpf) { 1917 pr_err("Lock owners are available only with BPF\n"); 1918 parse_options_usage(usage, options, "lock-owner", 0); 1919 parse_options_usage(NULL, options, "use-bpf", 0); 1920 return -1; 1921 } 1922 1923 if (show_lock_owner && show_lock_addrs) { 1924 pr_err("Cannot use owner and addr mode together\n"); 1925 parse_options_usage(usage, options, "lock-owner", 0); 1926 parse_options_usage(NULL, options, "lock-addr", 0); 1927 return -1; 1928 } 1929 1930 if (show_lock_cgroups && !use_bpf) { 1931 pr_err("Cgroups are available only with BPF\n"); 1932 parse_options_usage(usage, options, "lock-cgroup", 0); 1933 parse_options_usage(NULL, options, "use-bpf", 0); 1934 return -1; 1935 } 1936 1937 if (show_lock_cgroups && show_lock_addrs) { 1938 pr_err("Cannot use cgroup and addr mode together\n"); 1939 parse_options_usage(usage, options, "lock-cgroup", 0); 1940 parse_options_usage(NULL, options, "lock-addr", 0); 1941 return -1; 1942 } 1943 1944 if (show_lock_cgroups && show_thread_stats) { 1945 pr_err("Cannot use cgroup and thread mode together\n"); 1946 parse_options_usage(usage, options, "lock-cgroup", 0); 1947 parse_options_usage(NULL, options, "threads", 0); 1948 return -1; 1949 } 1950 1951 if (symbol_conf.field_sep) { 1952 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */ 1953 strstr(symbol_conf.field_sep, "+") || /* part of caller offset */ 1954 strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */ 1955 pr_err("Cannot use the separator that is already used\n"); 1956 parse_options_usage(usage, options, "x", 1); 1957 return -1; 1958 } 1959 } 1960 1961 if (show_lock_owner) 1962 show_thread_stats = true; 1963 1964 return 0; 1965 } 1966 1967 static int __cmd_contention(int argc, const char **argv) 1968 { 1969 int err = -EINVAL; 1970 struct perf_tool eops; 1971 struct perf_data data = { 1972 .path = input_name, 1973 .mode = PERF_DATA_MODE_READ, 1974 .force = force, 1975 }; 1976 struct lock_contention con = { 1977 .target = &target, 1978 .map_nr_entries = bpf_map_entries, 1979 .max_stack = max_stack_depth, 1980 .stack_skip = stack_skip, 1981 .filters = &filters, 1982 .save_callstack = needs_callstack(), 1983 .owner = show_lock_owner, 1984 .cgroups = RB_ROOT, 1985 }; 1986 1987 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 1988 if (!lockhash_table) 1989 return -ENOMEM; 1990 1991 con.result = &lockhash_table[0]; 1992 1993 perf_tool__init(&eops, /*ordered_events=*/true); 1994 eops.attr = perf_event__process_attr; 1995 eops.event_update = process_event_update; 1996 eops.sample = process_sample_event; 1997 eops.comm = perf_event__process_comm; 1998 eops.mmap = perf_event__process_mmap; 1999 eops.tracing_data = perf_event__process_tracing_data; 2000 2001 session = perf_session__new(use_bpf ? NULL : &data, &eops); 2002 if (IS_ERR(session)) { 2003 pr_err("Initializing perf session failed\n"); 2004 err = PTR_ERR(session); 2005 session = NULL; 2006 goto out_delete; 2007 } 2008 2009 con.machine = &session->machines.host; 2010 2011 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : 2012 show_lock_addrs ? LOCK_AGGR_ADDR : 2013 show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER; 2014 2015 if (con.aggr_mode == LOCK_AGGR_CALLER) 2016 con.save_callstack = true; 2017 2018 symbol_conf.allow_aliases = true; 2019 symbol__init(&session->header.env); 2020 2021 if (use_bpf) { 2022 err = target__validate(&target); 2023 if (err) { 2024 char errbuf[512]; 2025 2026 target__strerror(&target, err, errbuf, 512); 2027 pr_err("%s\n", errbuf); 2028 goto out_delete; 2029 } 2030 2031 signal(SIGINT, sighandler); 2032 signal(SIGCHLD, sighandler); 2033 signal(SIGTERM, sighandler); 2034 2035 con.evlist = evlist__new(); 2036 if (con.evlist == NULL) { 2037 err = -ENOMEM; 2038 goto out_delete; 2039 } 2040 2041 err = evlist__create_maps(con.evlist, &target); 2042 if (err < 0) 2043 goto out_delete; 2044 2045 if (argc) { 2046 err = evlist__prepare_workload(con.evlist, &target, 2047 argv, false, NULL); 2048 if (err < 0) 2049 goto out_delete; 2050 } 2051 2052 if (lock_contention_prepare(&con) < 0) { 2053 pr_err("lock contention BPF setup failed\n"); 2054 goto out_delete; 2055 } 2056 } else if (!data.is_pipe) { 2057 if (!perf_session__has_traces(session, "lock record")) 2058 goto out_delete; 2059 2060 if (!evlist__find_evsel_by_str(session->evlist, 2061 "lock:contention_begin")) { 2062 pr_err("lock contention evsel not found\n"); 2063 goto out_delete; 2064 } 2065 2066 if (perf_session__set_tracepoints_handlers(session, 2067 contention_tracepoints)) { 2068 pr_err("Initializing perf session tracepoint handlers failed\n"); 2069 goto out_delete; 2070 } 2071 } 2072 2073 if (setup_output_field(true, output_fields)) 2074 goto out_delete; 2075 2076 if (select_key(true)) 2077 goto out_delete; 2078 2079 if (symbol_conf.field_sep) { 2080 int i; 2081 struct lock_key *keys = contention_keys; 2082 2083 /* do not align output in CSV format */ 2084 for (i = 0; keys[i].name; i++) 2085 keys[i].len = 0; 2086 } 2087 2088 if (use_bpf) { 2089 lock_contention_start(); 2090 if (argc) 2091 evlist__start_workload(con.evlist); 2092 2093 /* wait for signal */ 2094 pause(); 2095 2096 lock_contention_stop(); 2097 lock_contention_read(&con); 2098 } else { 2099 err = perf_session__process_events(session); 2100 if (err) 2101 goto out_delete; 2102 } 2103 2104 setup_pager(); 2105 2106 sort_contention_result(); 2107 print_contention_result(&con); 2108 2109 out_delete: 2110 lock_filter_finish(); 2111 evlist__delete(con.evlist); 2112 lock_contention_finish(&con); 2113 perf_session__delete(session); 2114 zfree(&lockhash_table); 2115 return err; 2116 } 2117 2118 2119 static int __cmd_record(int argc, const char **argv) 2120 { 2121 const char *record_args[] = { 2122 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 2123 }; 2124 const char *callgraph_args[] = { 2125 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 2126 }; 2127 unsigned int rec_argc, i, j, ret; 2128 unsigned int nr_tracepoints; 2129 unsigned int nr_callgraph_args = 0; 2130 const char **rec_argv; 2131 bool has_lock_stat = true; 2132 2133 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 2134 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 2135 pr_debug("tracepoint %s is not enabled. " 2136 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 2137 lock_tracepoints[i].name); 2138 has_lock_stat = false; 2139 break; 2140 } 2141 } 2142 2143 if (has_lock_stat) 2144 goto setup_args; 2145 2146 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 2147 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 2148 pr_err("tracepoint %s is not enabled.\n", 2149 contention_tracepoints[i].name); 2150 return 1; 2151 } 2152 } 2153 2154 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 2155 2156 setup_args: 2157 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 2158 2159 if (has_lock_stat) 2160 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 2161 else 2162 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 2163 2164 /* factor of 2 is for -e in front of each tracepoint */ 2165 rec_argc += 2 * nr_tracepoints; 2166 2167 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2168 if (!rec_argv) 2169 return -ENOMEM; 2170 2171 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2172 rec_argv[i] = record_args[i]; 2173 2174 for (j = 0; j < nr_tracepoints; j++) { 2175 rec_argv[i++] = "-e"; 2176 rec_argv[i++] = has_lock_stat 2177 ? lock_tracepoints[j].name 2178 : contention_tracepoints[j].name; 2179 } 2180 2181 for (j = 0; j < nr_callgraph_args; j++, i++) 2182 rec_argv[i] = callgraph_args[j]; 2183 2184 for (j = 1; j < (unsigned int)argc; j++, i++) 2185 rec_argv[i] = argv[j]; 2186 2187 BUG_ON(i != rec_argc); 2188 2189 ret = cmd_record(i, rec_argv); 2190 free(rec_argv); 2191 return ret; 2192 } 2193 2194 static int parse_map_entry(const struct option *opt, const char *str, 2195 int unset __maybe_unused) 2196 { 2197 unsigned long *len = (unsigned long *)opt->value; 2198 unsigned long val; 2199 char *endptr; 2200 2201 errno = 0; 2202 val = strtoul(str, &endptr, 0); 2203 if (*endptr != '\0' || errno != 0) { 2204 pr_err("invalid BPF map length: %s\n", str); 2205 return -1; 2206 } 2207 2208 *len = val; 2209 return 0; 2210 } 2211 2212 static int parse_max_stack(const struct option *opt, const char *str, 2213 int unset __maybe_unused) 2214 { 2215 unsigned long *len = (unsigned long *)opt->value; 2216 long val; 2217 char *endptr; 2218 2219 errno = 0; 2220 val = strtol(str, &endptr, 0); 2221 if (*endptr != '\0' || errno != 0) { 2222 pr_err("invalid max stack depth: %s\n", str); 2223 return -1; 2224 } 2225 2226 if (val < 0 || val > sysctl__max_stack()) { 2227 pr_err("invalid max stack depth: %ld\n", val); 2228 return -1; 2229 } 2230 2231 *len = val; 2232 return 0; 2233 } 2234 2235 static bool add_lock_type(unsigned int flags) 2236 { 2237 unsigned int *tmp; 2238 2239 tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types)); 2240 if (tmp == NULL) 2241 return false; 2242 2243 tmp[filters.nr_types++] = flags; 2244 filters.types = tmp; 2245 return true; 2246 } 2247 2248 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str, 2249 int unset __maybe_unused) 2250 { 2251 char *s, *tmp, *tok; 2252 int ret = 0; 2253 2254 s = strdup(str); 2255 if (s == NULL) 2256 return -1; 2257 2258 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2259 unsigned int flags = get_type_flag(tok); 2260 2261 if (flags == -1U) { 2262 pr_err("Unknown lock flags: %s\n", tok); 2263 ret = -1; 2264 break; 2265 } 2266 2267 if (!add_lock_type(flags)) { 2268 ret = -1; 2269 break; 2270 } 2271 } 2272 2273 free(s); 2274 return ret; 2275 } 2276 2277 static bool add_lock_addr(unsigned long addr) 2278 { 2279 unsigned long *tmp; 2280 2281 tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs)); 2282 if (tmp == NULL) { 2283 pr_err("Memory allocation failure\n"); 2284 return false; 2285 } 2286 2287 tmp[filters.nr_addrs++] = addr; 2288 filters.addrs = tmp; 2289 return true; 2290 } 2291 2292 static bool add_lock_sym(char *name) 2293 { 2294 char **tmp; 2295 char *sym = strdup(name); 2296 2297 if (sym == NULL) { 2298 pr_err("Memory allocation failure\n"); 2299 return false; 2300 } 2301 2302 tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms)); 2303 if (tmp == NULL) { 2304 pr_err("Memory allocation failure\n"); 2305 free(sym); 2306 return false; 2307 } 2308 2309 tmp[filters.nr_syms++] = sym; 2310 filters.syms = tmp; 2311 return true; 2312 } 2313 2314 static bool add_lock_slab(char *name) 2315 { 2316 char **tmp; 2317 char *sym = strdup(name); 2318 2319 if (sym == NULL) { 2320 pr_err("Memory allocation failure\n"); 2321 return false; 2322 } 2323 2324 tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs)); 2325 if (tmp == NULL) { 2326 pr_err("Memory allocation failure\n"); 2327 return false; 2328 } 2329 2330 tmp[filters.nr_slabs++] = sym; 2331 filters.slabs = tmp; 2332 return true; 2333 } 2334 2335 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str, 2336 int unset __maybe_unused) 2337 { 2338 char *s, *tmp, *tok; 2339 int ret = 0; 2340 u64 addr; 2341 2342 s = strdup(str); 2343 if (s == NULL) 2344 return -1; 2345 2346 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2347 char *end; 2348 2349 addr = strtoul(tok, &end, 16); 2350 if (*end == '\0') { 2351 if (!add_lock_addr(addr)) { 2352 ret = -1; 2353 break; 2354 } 2355 continue; 2356 } 2357 2358 if (*tok == '&') { 2359 if (!add_lock_slab(tok + 1)) { 2360 ret = -1; 2361 break; 2362 } 2363 continue; 2364 } 2365 2366 /* 2367 * At this moment, we don't have kernel symbols. Save the symbols 2368 * in a separate list and resolve them to addresses later. 2369 */ 2370 if (!add_lock_sym(tok)) { 2371 ret = -1; 2372 break; 2373 } 2374 } 2375 2376 free(s); 2377 return ret; 2378 } 2379 2380 static int parse_output(const struct option *opt __maybe_unused, const char *str, 2381 int unset __maybe_unused) 2382 { 2383 const char **name = (const char **)opt->value; 2384 2385 if (str == NULL) 2386 return -1; 2387 2388 lock_output = fopen(str, "w"); 2389 if (lock_output == NULL) { 2390 pr_err("Cannot open %s\n", str); 2391 return -1; 2392 } 2393 2394 *name = str; 2395 return 0; 2396 } 2397 2398 static bool add_lock_cgroup(char *name) 2399 { 2400 u64 *tmp; 2401 struct cgroup *cgrp; 2402 2403 cgrp = cgroup__new(name, /*do_open=*/false); 2404 if (cgrp == NULL) { 2405 pr_err("Failed to create cgroup: %s\n", name); 2406 return false; 2407 } 2408 2409 if (read_cgroup_id(cgrp) < 0) { 2410 pr_err("Failed to read cgroup id for %s\n", name); 2411 cgroup__put(cgrp); 2412 return false; 2413 } 2414 2415 tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps)); 2416 if (tmp == NULL) { 2417 pr_err("Memory allocation failure\n"); 2418 return false; 2419 } 2420 2421 tmp[filters.nr_cgrps++] = cgrp->id; 2422 filters.cgrps = tmp; 2423 cgroup__put(cgrp); 2424 return true; 2425 } 2426 2427 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str, 2428 int unset __maybe_unused) 2429 { 2430 char *s, *tmp, *tok; 2431 int ret = 0; 2432 2433 s = strdup(str); 2434 if (s == NULL) 2435 return -1; 2436 2437 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { 2438 if (!add_lock_cgroup(tok)) { 2439 ret = -1; 2440 break; 2441 } 2442 } 2443 2444 free(s); 2445 return ret; 2446 } 2447 2448 int cmd_lock(int argc, const char **argv) 2449 { 2450 const struct option lock_options[] = { 2451 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2452 OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output), 2453 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 2454 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 2455 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 2456 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2457 "file", "vmlinux pathname"), 2458 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 2459 "file", "kallsyms pathname"), 2460 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), 2461 OPT_END() 2462 }; 2463 2464 const struct option info_options[] = { 2465 OPT_BOOLEAN('t', "threads", &info_threads, 2466 "dump the thread list in perf.data"), 2467 OPT_BOOLEAN('m', "map", &info_map, 2468 "dump the map of lock instances (address:name table)"), 2469 OPT_PARENT(lock_options) 2470 }; 2471 2472 const struct option report_options[] = { 2473 OPT_STRING('k', "key", &sort_key, "acquired", 2474 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2475 OPT_STRING('F', "field", &output_fields, NULL, 2476 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 2477 /* TODO: type */ 2478 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 2479 "combine locks in the same class"), 2480 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2481 "show per-thread lock stats"), 2482 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2483 OPT_PARENT(lock_options) 2484 }; 2485 2486 struct option contention_options[] = { 2487 OPT_STRING('k', "key", &sort_key, "wait_total", 2488 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 2489 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 2490 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 2491 OPT_BOOLEAN('t', "threads", &show_thread_stats, 2492 "show per-thread lock stats"), 2493 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 2494 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 2495 "System-wide collection from all CPUs"), 2496 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 2497 "List of cpus to monitor"), 2498 OPT_STRING('p', "pid", &target.pid, "pid", 2499 "Trace on existing process id"), 2500 OPT_STRING(0, "tid", &target.tid, "tid", 2501 "Trace on existing thread id (exclusive to --pid)"), 2502 OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num", 2503 "Max number of BPF map entries", parse_map_entry), 2504 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", 2505 "Set the maximum stack depth when collecting lock contention, " 2506 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), 2507 OPT_INTEGER(0, "stack-skip", &stack_skip, 2508 "Set the number of stack depth to skip when finding a lock caller, " 2509 "Default: " __stringify(CONTENTION_STACK_SKIP)), 2510 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 2511 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"), 2512 OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS", 2513 "Filter specific type of locks", parse_lock_type), 2514 OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES", 2515 "Filter specific address/symbol of locks", parse_lock_addr), 2516 OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES", 2517 "Filter specific function in the callstack", parse_call_stack), 2518 OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"), 2519 OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", 2520 "print result in CSV format with custom separator"), 2521 OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"), 2522 OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS", 2523 "Filter specific cgroups", parse_cgroup_filter), 2524 OPT_PARENT(lock_options) 2525 }; 2526 2527 const char * const info_usage[] = { 2528 "perf lock info [<options>]", 2529 NULL 2530 }; 2531 const char *const lock_subcommands[] = { "record", "report", "script", 2532 "info", "contention", NULL }; 2533 const char *lock_usage[] = { 2534 NULL, 2535 NULL 2536 }; 2537 const char * const report_usage[] = { 2538 "perf lock report [<options>]", 2539 NULL 2540 }; 2541 const char * const contention_usage[] = { 2542 "perf lock contention [<options>]", 2543 NULL 2544 }; 2545 unsigned int i; 2546 int rc = 0; 2547 2548 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table)); 2549 if (!lockhash_table) 2550 return -ENOMEM; 2551 2552 for (i = 0; i < LOCKHASH_SIZE; i++) 2553 INIT_HLIST_HEAD(lockhash_table + i); 2554 2555 lock_output = stderr; 2556 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 2557 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2558 if (!argc) 2559 usage_with_options(lock_usage, lock_options); 2560 2561 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 2562 return __cmd_record(argc, argv); 2563 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 2564 trace_handler = &report_lock_ops; 2565 if (argc) { 2566 argc = parse_options(argc, argv, 2567 report_options, report_usage, 0); 2568 if (argc) 2569 usage_with_options(report_usage, report_options); 2570 } 2571 rc = __cmd_report(false); 2572 } else if (!strcmp(argv[0], "script")) { 2573 /* Aliased to 'perf script' */ 2574 rc = cmd_script(argc, argv); 2575 } else if (!strcmp(argv[0], "info")) { 2576 if (argc) { 2577 argc = parse_options(argc, argv, 2578 info_options, info_usage, 0); 2579 if (argc) 2580 usage_with_options(info_usage, info_options); 2581 } 2582 2583 /* If neither threads nor map requested, display both */ 2584 if (!info_threads && !info_map) { 2585 info_threads = true; 2586 info_map = true; 2587 } 2588 2589 /* recycling report_lock_ops */ 2590 trace_handler = &report_lock_ops; 2591 rc = __cmd_report(true); 2592 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 2593 trace_handler = &contention_lock_ops; 2594 sort_key = "wait_total"; 2595 output_fields = "contended,wait_total,wait_max,avg_wait"; 2596 2597 #ifndef HAVE_BPF_SKEL 2598 set_option_nobuild(contention_options, 'b', "use-bpf", 2599 "no BUILD_BPF_SKEL=1", false); 2600 #endif 2601 if (argc) { 2602 argc = parse_options(argc, argv, contention_options, 2603 contention_usage, 0); 2604 } 2605 2606 if (check_lock_contention_options(contention_options, 2607 contention_usage) < 0) 2608 return -1; 2609 2610 rc = __cmd_contention(argc, argv); 2611 } else { 2612 usage_with_options(lock_usage, lock_options); 2613 } 2614 2615 /* free usage string allocated by parse_options_subcommand */ 2616 free((void *)lock_usage[0]); 2617 2618 zfree(&lockhash_table); 2619 return rc; 2620 } 2621