1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/cgroup.h"
14 #include "util/callchain.h"
15 #include "util/lock-contention.h"
16 #include "util/bpf_skel/lock_data.h"
17
18 #include <subcmd/pager.h>
19 #include <subcmd/parse-options.h>
20 #include "util/trace-event.h"
21 #include "util/tracepoint.h"
22
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/data.h"
27 #include "util/string2.h"
28 #include "util/map.h"
29 #include "util/util.h"
30
31 #include <stdio.h>
32 #include <sys/types.h>
33 #include <sys/prctl.h>
34 #include <semaphore.h>
35 #include <math.h>
36 #include <limits.h>
37 #include <ctype.h>
38
39 #include <linux/list.h>
40 #include <linux/hash.h>
41 #include <linux/kernel.h>
42 #include <linux/zalloc.h>
43 #include <linux/err.h>
44 #include <linux/stringify.h>
45
46 static struct perf_session *session;
47 static struct target target;
48
49 static struct rb_root thread_stats;
50
51 static bool combine_locks;
52 static bool show_thread_stats;
53 static bool show_lock_addrs;
54 static bool show_lock_owner;
55 static bool show_lock_cgroups;
56 static bool use_bpf;
57 static unsigned long bpf_map_entries = MAX_ENTRIES;
58 static int max_stack_depth = CONTENTION_STACK_DEPTH;
59 static int stack_skip = CONTENTION_STACK_SKIP;
60 static int print_nr_entries = INT_MAX / 2;
61 static const char *output_name = NULL;
62 static FILE *lock_output;
63
64 static struct lock_filter filters;
65 static struct lock_delay *delays;
66 static int nr_delays;
67
68 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
69
thread_stat_find(u32 tid)70 static struct thread_stat *thread_stat_find(u32 tid)
71 {
72 struct rb_node *node;
73 struct thread_stat *st;
74
75 node = thread_stats.rb_node;
76 while (node) {
77 st = container_of(node, struct thread_stat, rb);
78 if (st->tid == tid)
79 return st;
80 else if (tid < st->tid)
81 node = node->rb_left;
82 else
83 node = node->rb_right;
84 }
85
86 return NULL;
87 }
88
thread_stat_insert(struct thread_stat * new)89 static void thread_stat_insert(struct thread_stat *new)
90 {
91 struct rb_node **rb = &thread_stats.rb_node;
92 struct rb_node *parent = NULL;
93 struct thread_stat *p;
94
95 while (*rb) {
96 p = container_of(*rb, struct thread_stat, rb);
97 parent = *rb;
98
99 if (new->tid < p->tid)
100 rb = &(*rb)->rb_left;
101 else if (new->tid > p->tid)
102 rb = &(*rb)->rb_right;
103 else
104 BUG_ON("inserting invalid thread_stat\n");
105 }
106
107 rb_link_node(&new->rb, parent, rb);
108 rb_insert_color(&new->rb, &thread_stats);
109 }
110
thread_stat_findnew_after_first(u32 tid)111 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
112 {
113 struct thread_stat *st;
114
115 st = thread_stat_find(tid);
116 if (st)
117 return st;
118
119 st = zalloc(sizeof(struct thread_stat));
120 if (!st) {
121 pr_err("memory allocation failed\n");
122 return NULL;
123 }
124
125 st->tid = tid;
126 INIT_LIST_HEAD(&st->seq_list);
127
128 thread_stat_insert(st);
129
130 return st;
131 }
132
133 static struct thread_stat *thread_stat_findnew_first(u32 tid);
134 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
135 thread_stat_findnew_first;
136
thread_stat_findnew_first(u32 tid)137 static struct thread_stat *thread_stat_findnew_first(u32 tid)
138 {
139 struct thread_stat *st;
140
141 st = zalloc(sizeof(struct thread_stat));
142 if (!st) {
143 pr_err("memory allocation failed\n");
144 return NULL;
145 }
146 st->tid = tid;
147 INIT_LIST_HEAD(&st->seq_list);
148
149 rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
150 rb_insert_color(&st->rb, &thread_stats);
151
152 thread_stat_findnew = thread_stat_findnew_after_first;
153 return st;
154 }
155
156 /* build simple key function one is bigger than two */
157 #define SINGLE_KEY(member) \
158 static int lock_stat_key_ ## member(struct lock_stat *one, \
159 struct lock_stat *two) \
160 { \
161 return one->member > two->member; \
162 }
163
164 SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)165 SINGLE_KEY(nr_contended)
166 SINGLE_KEY(avg_wait_time)
167 SINGLE_KEY(wait_time_total)
168 SINGLE_KEY(wait_time_max)
169
170 static int lock_stat_key_wait_time_min(struct lock_stat *one,
171 struct lock_stat *two)
172 {
173 u64 s1 = one->wait_time_min;
174 u64 s2 = two->wait_time_min;
175 if (s1 == ULLONG_MAX)
176 s1 = 0;
177 if (s2 == ULLONG_MAX)
178 s2 = 0;
179 return s1 > s2;
180 }
181
182 struct lock_key {
183 /*
184 * name: the value for specify by user
185 * this should be simpler than raw name of member
186 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
187 */
188 const char *name;
189 /* header: the string printed on the header line */
190 const char *header;
191 /* len: the printing width of the field */
192 int len;
193 /* key: a pointer to function to compare two lock stats for sorting */
194 int (*key)(struct lock_stat*, struct lock_stat*);
195 /* print: a pointer to function to print a given lock stats */
196 void (*print)(struct lock_key*, struct lock_stat*);
197 /* list: list entry to link this */
198 struct list_head list;
199 };
200
lock_stat_key_print_time(unsigned long long nsec,int len)201 static void lock_stat_key_print_time(unsigned long long nsec, int len)
202 {
203 static const struct {
204 float base;
205 const char *unit;
206 } table[] = {
207 { 1e9 * 3600, "h " },
208 { 1e9 * 60, "m " },
209 { 1e9, "s " },
210 { 1e6, "ms" },
211 { 1e3, "us" },
212 { 0, NULL },
213 };
214
215 /* for CSV output */
216 if (len == 0) {
217 fprintf(lock_output, "%llu", nsec);
218 return;
219 }
220
221 for (int i = 0; table[i].unit; i++) {
222 if (nsec < table[i].base)
223 continue;
224
225 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
226 return;
227 }
228
229 fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
230 }
231
232 #define PRINT_KEY(member) \
233 static void lock_stat_key_print_ ## member(struct lock_key *key, \
234 struct lock_stat *ls) \
235 { \
236 fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
237 }
238
239 #define PRINT_TIME(member) \
240 static void lock_stat_key_print_ ## member(struct lock_key *key, \
241 struct lock_stat *ls) \
242 { \
243 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \
244 }
245
246 PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)247 PRINT_KEY(nr_contended)
248 PRINT_TIME(avg_wait_time)
249 PRINT_TIME(wait_time_total)
250 PRINT_TIME(wait_time_max)
251
252 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
253 struct lock_stat *ls)
254 {
255 u64 wait_time = ls->wait_time_min;
256
257 if (wait_time == ULLONG_MAX)
258 wait_time = 0;
259
260 lock_stat_key_print_time(wait_time, key->len);
261 }
262
263
264 static const char *sort_key = "acquired";
265
266 static int (*compare)(struct lock_stat *, struct lock_stat *);
267
268 static struct rb_root sorted; /* place to store intermediate data */
269 static struct rb_root result; /* place to store sorted data */
270
271 static LIST_HEAD(lock_keys);
272 static const char *output_fields;
273
274 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \
275 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
276 static struct lock_key report_keys[] = {
277 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
278 DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
279 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
280 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
281 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
282 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
283
284 /* extra comparisons much complicated should be here */
285 { }
286 };
287
288 static struct lock_key contention_keys[] = {
289 DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
290 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
291 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
292 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
293 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
294
295 /* extra comparisons much complicated should be here */
296 { }
297 };
298
select_key(bool contention)299 static int select_key(bool contention)
300 {
301 int i;
302 struct lock_key *keys = report_keys;
303
304 if (contention)
305 keys = contention_keys;
306
307 for (i = 0; keys[i].name; i++) {
308 if (!strcmp(keys[i].name, sort_key)) {
309 compare = keys[i].key;
310
311 /* selected key should be in the output fields */
312 if (list_empty(&keys[i].list))
313 list_add_tail(&keys[i].list, &lock_keys);
314
315 return 0;
316 }
317 }
318
319 pr_err("Unknown compare key: %s\n", sort_key);
320 return -1;
321 }
322
add_output_field(bool contention,char * name)323 static int add_output_field(bool contention, char *name)
324 {
325 int i;
326 struct lock_key *keys = report_keys;
327
328 if (contention)
329 keys = contention_keys;
330
331 for (i = 0; keys[i].name; i++) {
332 if (strcmp(keys[i].name, name))
333 continue;
334
335 /* prevent double link */
336 if (list_empty(&keys[i].list))
337 list_add_tail(&keys[i].list, &lock_keys);
338
339 return 0;
340 }
341
342 pr_err("Unknown output field: %s\n", name);
343 return -1;
344 }
345
setup_output_field(bool contention,const char * str)346 static int setup_output_field(bool contention, const char *str)
347 {
348 char *tok, *tmp, *orig;
349 int i, ret = 0;
350 struct lock_key *keys = report_keys;
351
352 if (contention)
353 keys = contention_keys;
354
355 /* no output field given: use all of them */
356 if (str == NULL) {
357 for (i = 0; keys[i].name; i++)
358 list_add_tail(&keys[i].list, &lock_keys);
359 return 0;
360 }
361
362 for (i = 0; keys[i].name; i++)
363 INIT_LIST_HEAD(&keys[i].list);
364
365 orig = tmp = strdup(str);
366 if (orig == NULL)
367 return -ENOMEM;
368
369 while ((tok = strsep(&tmp, ",")) != NULL){
370 ret = add_output_field(contention, tok);
371 if (ret < 0)
372 break;
373 }
374 free(orig);
375
376 return ret;
377 }
378
combine_lock_stats(struct lock_stat * st)379 static void combine_lock_stats(struct lock_stat *st)
380 {
381 struct rb_node **rb = &sorted.rb_node;
382 struct rb_node *parent = NULL;
383 struct lock_stat *p;
384 int ret;
385
386 while (*rb) {
387 p = container_of(*rb, struct lock_stat, rb);
388 parent = *rb;
389
390 if (st->name && p->name)
391 ret = strcmp(st->name, p->name);
392 else
393 ret = !!st->name - !!p->name;
394
395 if (ret == 0) {
396 p->nr_acquired += st->nr_acquired;
397 p->nr_contended += st->nr_contended;
398 p->wait_time_total += st->wait_time_total;
399
400 if (p->nr_contended)
401 p->avg_wait_time = p->wait_time_total / p->nr_contended;
402
403 if (p->wait_time_min > st->wait_time_min)
404 p->wait_time_min = st->wait_time_min;
405 if (p->wait_time_max < st->wait_time_max)
406 p->wait_time_max = st->wait_time_max;
407
408 p->broken |= st->broken;
409 st->combined = 1;
410 return;
411 }
412
413 if (ret < 0)
414 rb = &(*rb)->rb_left;
415 else
416 rb = &(*rb)->rb_right;
417 }
418
419 rb_link_node(&st->rb, parent, rb);
420 rb_insert_color(&st->rb, &sorted);
421 }
422
insert_to(struct rb_root * rr,struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))423 static void insert_to(struct rb_root *rr, struct lock_stat *st,
424 int (*bigger)(struct lock_stat *, struct lock_stat *))
425 {
426 struct rb_node **rb = &rr->rb_node;
427 struct rb_node *parent = NULL;
428 struct lock_stat *p;
429
430 while (*rb) {
431 p = container_of(*rb, struct lock_stat, rb);
432 parent = *rb;
433
434 if (bigger(st, p))
435 rb = &(*rb)->rb_left;
436 else
437 rb = &(*rb)->rb_right;
438 }
439
440 rb_link_node(&st->rb, parent, rb);
441 rb_insert_color(&st->rb, rr);
442 }
443
insert_to_result(struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))444 static inline void insert_to_result(struct lock_stat *st,
445 int (*bigger)(struct lock_stat *,
446 struct lock_stat *))
447 {
448 if (combine_locks && st->combined)
449 return;
450 insert_to(&result, st, bigger);
451 }
452
pop_from(struct rb_root * rr)453 static inline struct lock_stat *pop_from(struct rb_root *rr)
454 {
455 struct rb_node *node = rr->rb_node;
456
457 if (!node)
458 return NULL;
459
460 while (node->rb_left)
461 node = node->rb_left;
462
463 rb_erase(node, rr);
464 return container_of(node, struct lock_stat, rb);
465
466 }
467
468 /* returns left most element of result, and erase it */
pop_from_result(void)469 static struct lock_stat *pop_from_result(void)
470 {
471 return pop_from(&result);
472 }
473
474 struct trace_lock_handler {
475 /* it's used on CONFIG_LOCKDEP */
476 int (*acquire_event)(struct evsel *evsel,
477 struct perf_sample *sample);
478
479 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
480 int (*acquired_event)(struct evsel *evsel,
481 struct perf_sample *sample);
482
483 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
484 int (*contended_event)(struct evsel *evsel,
485 struct perf_sample *sample);
486
487 /* it's used on CONFIG_LOCKDEP */
488 int (*release_event)(struct evsel *evsel,
489 struct perf_sample *sample);
490
491 /* it's used when CONFIG_LOCKDEP is off */
492 int (*contention_begin_event)(struct evsel *evsel,
493 struct perf_sample *sample);
494
495 /* it's used when CONFIG_LOCKDEP is off */
496 int (*contention_end_event)(struct evsel *evsel,
497 struct perf_sample *sample);
498 };
499
get_seq(struct thread_stat * ts,u64 addr)500 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
501 {
502 struct lock_seq_stat *seq;
503
504 list_for_each_entry(seq, &ts->seq_list, list) {
505 if (seq->addr == addr)
506 return seq;
507 }
508
509 seq = zalloc(sizeof(struct lock_seq_stat));
510 if (!seq) {
511 pr_err("memory allocation failed\n");
512 return NULL;
513 }
514 seq->state = SEQ_STATE_UNINITIALIZED;
515 seq->addr = addr;
516
517 list_add(&seq->list, &ts->seq_list);
518 return seq;
519 }
520
521 enum broken_state {
522 BROKEN_ACQUIRE,
523 BROKEN_ACQUIRED,
524 BROKEN_CONTENDED,
525 BROKEN_RELEASE,
526 BROKEN_MAX,
527 };
528
529 static int bad_hist[BROKEN_MAX];
530
531 enum acquire_flags {
532 TRY_LOCK = 1,
533 READ_LOCK = 2,
534 };
535
get_key_by_aggr_mode_simple(u64 * key,u64 addr,u32 tid)536 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
537 {
538 switch (aggr_mode) {
539 case LOCK_AGGR_ADDR:
540 *key = addr;
541 break;
542 case LOCK_AGGR_TASK:
543 *key = tid;
544 break;
545 case LOCK_AGGR_CALLER:
546 case LOCK_AGGR_CGROUP:
547 default:
548 pr_err("Invalid aggregation mode: %d\n", aggr_mode);
549 return -EINVAL;
550 }
551 return 0;
552 }
553
554 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
555
get_key_by_aggr_mode(u64 * key,u64 addr,struct evsel * evsel,struct perf_sample * sample)556 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
557 struct perf_sample *sample)
558 {
559 if (aggr_mode == LOCK_AGGR_CALLER) {
560 *key = callchain_id(evsel, sample);
561 return 0;
562 }
563 return get_key_by_aggr_mode_simple(key, addr, sample->tid);
564 }
565
report_lock_acquire_event(struct evsel * evsel,struct perf_sample * sample)566 static int report_lock_acquire_event(struct evsel *evsel,
567 struct perf_sample *sample)
568 {
569 struct lock_stat *ls;
570 struct thread_stat *ts;
571 struct lock_seq_stat *seq;
572 const char *name = evsel__strval(evsel, sample, "name");
573 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
574 int flag = evsel__intval(evsel, sample, "flags");
575 u64 key;
576 int ret;
577
578 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
579 if (ret < 0)
580 return ret;
581
582 ls = lock_stat_findnew(key, name, 0);
583 if (!ls)
584 return -ENOMEM;
585
586 ts = thread_stat_findnew(sample->tid);
587 if (!ts)
588 return -ENOMEM;
589
590 seq = get_seq(ts, addr);
591 if (!seq)
592 return -ENOMEM;
593
594 switch (seq->state) {
595 case SEQ_STATE_UNINITIALIZED:
596 case SEQ_STATE_RELEASED:
597 if (!flag) {
598 seq->state = SEQ_STATE_ACQUIRING;
599 } else {
600 if (flag & TRY_LOCK)
601 ls->nr_trylock++;
602 if (flag & READ_LOCK)
603 ls->nr_readlock++;
604 seq->state = SEQ_STATE_READ_ACQUIRED;
605 seq->read_count = 1;
606 ls->nr_acquired++;
607 }
608 break;
609 case SEQ_STATE_READ_ACQUIRED:
610 if (flag & READ_LOCK) {
611 seq->read_count++;
612 ls->nr_acquired++;
613 goto end;
614 } else {
615 goto broken;
616 }
617 break;
618 case SEQ_STATE_ACQUIRED:
619 case SEQ_STATE_ACQUIRING:
620 case SEQ_STATE_CONTENDED:
621 broken:
622 /* broken lock sequence */
623 if (!ls->broken) {
624 ls->broken = 1;
625 bad_hist[BROKEN_ACQUIRE]++;
626 }
627 list_del_init(&seq->list);
628 free(seq);
629 goto end;
630 default:
631 BUG_ON("Unknown state of lock sequence found!\n");
632 break;
633 }
634
635 ls->nr_acquire++;
636 seq->prev_event_time = sample->time;
637 end:
638 return 0;
639 }
640
report_lock_acquired_event(struct evsel * evsel,struct perf_sample * sample)641 static int report_lock_acquired_event(struct evsel *evsel,
642 struct perf_sample *sample)
643 {
644 struct lock_stat *ls;
645 struct thread_stat *ts;
646 struct lock_seq_stat *seq;
647 u64 contended_term;
648 const char *name = evsel__strval(evsel, sample, "name");
649 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
650 u64 key;
651 int ret;
652
653 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
654 if (ret < 0)
655 return ret;
656
657 ls = lock_stat_findnew(key, name, 0);
658 if (!ls)
659 return -ENOMEM;
660
661 ts = thread_stat_findnew(sample->tid);
662 if (!ts)
663 return -ENOMEM;
664
665 seq = get_seq(ts, addr);
666 if (!seq)
667 return -ENOMEM;
668
669 switch (seq->state) {
670 case SEQ_STATE_UNINITIALIZED:
671 /* orphan event, do nothing */
672 return 0;
673 case SEQ_STATE_ACQUIRING:
674 break;
675 case SEQ_STATE_CONTENDED:
676 contended_term = sample->time - seq->prev_event_time;
677 ls->wait_time_total += contended_term;
678 if (contended_term < ls->wait_time_min)
679 ls->wait_time_min = contended_term;
680 if (ls->wait_time_max < contended_term)
681 ls->wait_time_max = contended_term;
682 break;
683 case SEQ_STATE_RELEASED:
684 case SEQ_STATE_ACQUIRED:
685 case SEQ_STATE_READ_ACQUIRED:
686 /* broken lock sequence */
687 if (!ls->broken) {
688 ls->broken = 1;
689 bad_hist[BROKEN_ACQUIRED]++;
690 }
691 list_del_init(&seq->list);
692 free(seq);
693 goto end;
694 default:
695 BUG_ON("Unknown state of lock sequence found!\n");
696 break;
697 }
698
699 seq->state = SEQ_STATE_ACQUIRED;
700 ls->nr_acquired++;
701 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
702 seq->prev_event_time = sample->time;
703 end:
704 return 0;
705 }
706
report_lock_contended_event(struct evsel * evsel,struct perf_sample * sample)707 static int report_lock_contended_event(struct evsel *evsel,
708 struct perf_sample *sample)
709 {
710 struct lock_stat *ls;
711 struct thread_stat *ts;
712 struct lock_seq_stat *seq;
713 const char *name = evsel__strval(evsel, sample, "name");
714 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
715 u64 key;
716 int ret;
717
718 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
719 if (ret < 0)
720 return ret;
721
722 ls = lock_stat_findnew(key, name, 0);
723 if (!ls)
724 return -ENOMEM;
725
726 ts = thread_stat_findnew(sample->tid);
727 if (!ts)
728 return -ENOMEM;
729
730 seq = get_seq(ts, addr);
731 if (!seq)
732 return -ENOMEM;
733
734 switch (seq->state) {
735 case SEQ_STATE_UNINITIALIZED:
736 /* orphan event, do nothing */
737 return 0;
738 case SEQ_STATE_ACQUIRING:
739 break;
740 case SEQ_STATE_RELEASED:
741 case SEQ_STATE_ACQUIRED:
742 case SEQ_STATE_READ_ACQUIRED:
743 case SEQ_STATE_CONTENDED:
744 /* broken lock sequence */
745 if (!ls->broken) {
746 ls->broken = 1;
747 bad_hist[BROKEN_CONTENDED]++;
748 }
749 list_del_init(&seq->list);
750 free(seq);
751 goto end;
752 default:
753 BUG_ON("Unknown state of lock sequence found!\n");
754 break;
755 }
756
757 seq->state = SEQ_STATE_CONTENDED;
758 ls->nr_contended++;
759 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
760 seq->prev_event_time = sample->time;
761 end:
762 return 0;
763 }
764
report_lock_release_event(struct evsel * evsel,struct perf_sample * sample)765 static int report_lock_release_event(struct evsel *evsel,
766 struct perf_sample *sample)
767 {
768 struct lock_stat *ls;
769 struct thread_stat *ts;
770 struct lock_seq_stat *seq;
771 const char *name = evsel__strval(evsel, sample, "name");
772 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
773 u64 key;
774 int ret;
775
776 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
777 if (ret < 0)
778 return ret;
779
780 ls = lock_stat_findnew(key, name, 0);
781 if (!ls)
782 return -ENOMEM;
783
784 ts = thread_stat_findnew(sample->tid);
785 if (!ts)
786 return -ENOMEM;
787
788 seq = get_seq(ts, addr);
789 if (!seq)
790 return -ENOMEM;
791
792 switch (seq->state) {
793 case SEQ_STATE_UNINITIALIZED:
794 goto end;
795 case SEQ_STATE_ACQUIRED:
796 break;
797 case SEQ_STATE_READ_ACQUIRED:
798 seq->read_count--;
799 BUG_ON(seq->read_count < 0);
800 if (seq->read_count) {
801 ls->nr_release++;
802 goto end;
803 }
804 break;
805 case SEQ_STATE_ACQUIRING:
806 case SEQ_STATE_CONTENDED:
807 case SEQ_STATE_RELEASED:
808 /* broken lock sequence */
809 if (!ls->broken) {
810 ls->broken = 1;
811 bad_hist[BROKEN_RELEASE]++;
812 }
813 goto free_seq;
814 default:
815 BUG_ON("Unknown state of lock sequence found!\n");
816 break;
817 }
818
819 ls->nr_release++;
820 free_seq:
821 list_del_init(&seq->list);
822 free(seq);
823 end:
824 return 0;
825 }
826
get_symbol_name_offset(struct map * map,struct symbol * sym,u64 ip,char * buf,int size)827 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
828 char *buf, int size)
829 {
830 u64 offset;
831
832 if (map == NULL || sym == NULL) {
833 buf[0] = '\0';
834 return 0;
835 }
836
837 offset = map__map_ip(map, ip) - sym->start;
838
839 if (offset)
840 return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
841 else
842 return strlcpy(buf, sym->name, size);
843 }
lock_contention_caller(struct evsel * evsel,struct perf_sample * sample,char * buf,int size)844 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
845 char *buf, int size)
846 {
847 struct thread *thread;
848 struct callchain_cursor *cursor;
849 struct machine *machine = &session->machines.host;
850 struct symbol *sym;
851 int skip = 0;
852 int ret;
853
854 /* lock names will be replaced to task name later */
855 if (show_thread_stats)
856 return -1;
857
858 thread = machine__findnew_thread(machine, -1, sample->pid);
859 if (thread == NULL)
860 return -1;
861
862 cursor = get_tls_callchain_cursor();
863
864 /* use caller function name from the callchain */
865 ret = thread__resolve_callchain(thread, cursor, evsel, sample,
866 NULL, NULL, max_stack_depth);
867 if (ret != 0) {
868 thread__put(thread);
869 return -1;
870 }
871
872 callchain_cursor_commit(cursor);
873 thread__put(thread);
874
875 while (true) {
876 struct callchain_cursor_node *node;
877
878 node = callchain_cursor_current(cursor);
879 if (node == NULL)
880 break;
881
882 /* skip first few entries - for lock functions */
883 if (++skip <= stack_skip)
884 goto next;
885
886 sym = node->ms.sym;
887 if (sym && !machine__is_lock_function(machine, node->ip)) {
888 get_symbol_name_offset(node->ms.map, sym, node->ip,
889 buf, size);
890 return 0;
891 }
892
893 next:
894 callchain_cursor_advance(cursor);
895 }
896 return -1;
897 }
898
callchain_id(struct evsel * evsel,struct perf_sample * sample)899 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
900 {
901 struct callchain_cursor *cursor;
902 struct machine *machine = &session->machines.host;
903 struct thread *thread;
904 u64 hash = 0;
905 int skip = 0;
906 int ret;
907
908 thread = machine__findnew_thread(machine, -1, sample->pid);
909 if (thread == NULL)
910 return -1;
911
912 cursor = get_tls_callchain_cursor();
913 /* use caller function name from the callchain */
914 ret = thread__resolve_callchain(thread, cursor, evsel, sample,
915 NULL, NULL, max_stack_depth);
916 thread__put(thread);
917
918 if (ret != 0)
919 return -1;
920
921 callchain_cursor_commit(cursor);
922
923 while (true) {
924 struct callchain_cursor_node *node;
925
926 node = callchain_cursor_current(cursor);
927 if (node == NULL)
928 break;
929
930 /* skip first few entries - for lock functions */
931 if (++skip <= stack_skip)
932 goto next;
933
934 if (node->ms.sym && machine__is_lock_function(machine, node->ip))
935 goto next;
936
937 hash ^= hash_long((unsigned long)node->ip, 64);
938
939 next:
940 callchain_cursor_advance(cursor);
941 }
942 return hash;
943 }
944
get_callstack(struct perf_sample * sample,int max_stack)945 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
946 {
947 u64 *callstack;
948 u64 i;
949 int c;
950
951 callstack = calloc(max_stack, sizeof(*callstack));
952 if (callstack == NULL)
953 return NULL;
954
955 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
956 u64 ip = sample->callchain->ips[i];
957
958 if (ip >= PERF_CONTEXT_MAX)
959 continue;
960
961 callstack[c++] = ip;
962 }
963 return callstack;
964 }
965
report_lock_contention_begin_event(struct evsel * evsel,struct perf_sample * sample)966 static int report_lock_contention_begin_event(struct evsel *evsel,
967 struct perf_sample *sample)
968 {
969 struct lock_stat *ls;
970 struct thread_stat *ts;
971 struct lock_seq_stat *seq;
972 u64 addr = evsel__intval(evsel, sample, "lock_addr");
973 unsigned int flags = evsel__intval(evsel, sample, "flags");
974 u64 key;
975 int i, ret;
976 static bool kmap_loaded;
977 struct machine *machine = &session->machines.host;
978 struct map *kmap;
979 struct symbol *sym;
980
981 ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
982 if (ret < 0)
983 return ret;
984
985 if (!kmap_loaded) {
986 unsigned long *addrs;
987
988 /* make sure it loads the kernel map to find lock symbols */
989 map__load(machine__kernel_map(machine));
990 kmap_loaded = true;
991
992 /* convert (kernel) symbols to addresses */
993 for (i = 0; i < filters.nr_syms; i++) {
994 sym = machine__find_kernel_symbol_by_name(machine,
995 filters.syms[i],
996 &kmap);
997 if (sym == NULL) {
998 pr_warning("ignore unknown symbol: %s\n",
999 filters.syms[i]);
1000 continue;
1001 }
1002
1003 addrs = realloc(filters.addrs,
1004 (filters.nr_addrs + 1) * sizeof(*addrs));
1005 if (addrs == NULL) {
1006 pr_warning("memory allocation failure\n");
1007 return -ENOMEM;
1008 }
1009
1010 addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1011 filters.addrs = addrs;
1012 }
1013 }
1014
1015 ls = lock_stat_find(key);
1016 if (!ls) {
1017 char buf[128];
1018 const char *name = "";
1019
1020 switch (aggr_mode) {
1021 case LOCK_AGGR_ADDR:
1022 sym = machine__find_kernel_symbol(machine, key, &kmap);
1023 if (sym)
1024 name = sym->name;
1025 break;
1026 case LOCK_AGGR_CALLER:
1027 name = buf;
1028 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1029 name = "Unknown";
1030 break;
1031 case LOCK_AGGR_CGROUP:
1032 case LOCK_AGGR_TASK:
1033 default:
1034 break;
1035 }
1036
1037 ls = lock_stat_findnew(key, name, flags);
1038 if (!ls)
1039 return -ENOMEM;
1040 }
1041
1042 if (filters.nr_types) {
1043 bool found = false;
1044
1045 for (i = 0; i < filters.nr_types; i++) {
1046 if (flags == filters.types[i]) {
1047 found = true;
1048 break;
1049 }
1050 }
1051
1052 if (!found)
1053 return 0;
1054 }
1055
1056 if (filters.nr_addrs) {
1057 bool found = false;
1058
1059 for (i = 0; i < filters.nr_addrs; i++) {
1060 if (addr == filters.addrs[i]) {
1061 found = true;
1062 break;
1063 }
1064 }
1065
1066 if (!found)
1067 return 0;
1068 }
1069
1070 if (needs_callstack()) {
1071 u64 *callstack = get_callstack(sample, max_stack_depth);
1072 if (callstack == NULL)
1073 return -ENOMEM;
1074
1075 if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1076 free(callstack);
1077 return 0;
1078 }
1079
1080 if (ls->callstack == NULL)
1081 ls->callstack = callstack;
1082 else
1083 free(callstack);
1084 }
1085
1086 ts = thread_stat_findnew(sample->tid);
1087 if (!ts)
1088 return -ENOMEM;
1089
1090 seq = get_seq(ts, addr);
1091 if (!seq)
1092 return -ENOMEM;
1093
1094 switch (seq->state) {
1095 case SEQ_STATE_UNINITIALIZED:
1096 case SEQ_STATE_ACQUIRED:
1097 break;
1098 case SEQ_STATE_CONTENDED:
1099 /*
1100 * It can have nested contention begin with mutex spinning,
1101 * then we would use the original contention begin event and
1102 * ignore the second one.
1103 */
1104 goto end;
1105 case SEQ_STATE_ACQUIRING:
1106 case SEQ_STATE_READ_ACQUIRED:
1107 case SEQ_STATE_RELEASED:
1108 /* broken lock sequence */
1109 if (!ls->broken) {
1110 ls->broken = 1;
1111 bad_hist[BROKEN_CONTENDED]++;
1112 }
1113 list_del_init(&seq->list);
1114 free(seq);
1115 goto end;
1116 default:
1117 BUG_ON("Unknown state of lock sequence found!\n");
1118 break;
1119 }
1120
1121 if (seq->state != SEQ_STATE_CONTENDED) {
1122 seq->state = SEQ_STATE_CONTENDED;
1123 seq->prev_event_time = sample->time;
1124 ls->nr_contended++;
1125 }
1126 end:
1127 return 0;
1128 }
1129
report_lock_contention_end_event(struct evsel * evsel,struct perf_sample * sample)1130 static int report_lock_contention_end_event(struct evsel *evsel,
1131 struct perf_sample *sample)
1132 {
1133 struct lock_stat *ls;
1134 struct thread_stat *ts;
1135 struct lock_seq_stat *seq;
1136 u64 contended_term;
1137 u64 addr = evsel__intval(evsel, sample, "lock_addr");
1138 u64 key;
1139 int ret;
1140
1141 ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1142 if (ret < 0)
1143 return ret;
1144
1145 ls = lock_stat_find(key);
1146 if (!ls)
1147 return 0;
1148
1149 ts = thread_stat_find(sample->tid);
1150 if (!ts)
1151 return 0;
1152
1153 seq = get_seq(ts, addr);
1154 if (!seq)
1155 return -ENOMEM;
1156
1157 switch (seq->state) {
1158 case SEQ_STATE_UNINITIALIZED:
1159 goto end;
1160 case SEQ_STATE_CONTENDED:
1161 contended_term = sample->time - seq->prev_event_time;
1162 ls->wait_time_total += contended_term;
1163 if (contended_term < ls->wait_time_min)
1164 ls->wait_time_min = contended_term;
1165 if (ls->wait_time_max < contended_term)
1166 ls->wait_time_max = contended_term;
1167 break;
1168 case SEQ_STATE_ACQUIRING:
1169 case SEQ_STATE_ACQUIRED:
1170 case SEQ_STATE_READ_ACQUIRED:
1171 case SEQ_STATE_RELEASED:
1172 /* broken lock sequence */
1173 if (!ls->broken) {
1174 ls->broken = 1;
1175 bad_hist[BROKEN_ACQUIRED]++;
1176 }
1177 list_del_init(&seq->list);
1178 free(seq);
1179 goto end;
1180 default:
1181 BUG_ON("Unknown state of lock sequence found!\n");
1182 break;
1183 }
1184
1185 seq->state = SEQ_STATE_ACQUIRED;
1186 ls->nr_acquired++;
1187 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1188 end:
1189 return 0;
1190 }
1191
1192 /* lock oriented handlers */
1193 /* TODO: handlers for CPU oriented, thread oriented */
1194 static struct trace_lock_handler report_lock_ops = {
1195 .acquire_event = report_lock_acquire_event,
1196 .acquired_event = report_lock_acquired_event,
1197 .contended_event = report_lock_contended_event,
1198 .release_event = report_lock_release_event,
1199 .contention_begin_event = report_lock_contention_begin_event,
1200 .contention_end_event = report_lock_contention_end_event,
1201 };
1202
1203 static struct trace_lock_handler contention_lock_ops = {
1204 .contention_begin_event = report_lock_contention_begin_event,
1205 .contention_end_event = report_lock_contention_end_event,
1206 };
1207
1208
1209 static struct trace_lock_handler *trace_handler;
1210
evsel__process_lock_acquire(struct evsel * evsel,struct perf_sample * sample)1211 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1212 {
1213 if (trace_handler->acquire_event)
1214 return trace_handler->acquire_event(evsel, sample);
1215 return 0;
1216 }
1217
evsel__process_lock_acquired(struct evsel * evsel,struct perf_sample * sample)1218 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1219 {
1220 if (trace_handler->acquired_event)
1221 return trace_handler->acquired_event(evsel, sample);
1222 return 0;
1223 }
1224
evsel__process_lock_contended(struct evsel * evsel,struct perf_sample * sample)1225 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1226 {
1227 if (trace_handler->contended_event)
1228 return trace_handler->contended_event(evsel, sample);
1229 return 0;
1230 }
1231
evsel__process_lock_release(struct evsel * evsel,struct perf_sample * sample)1232 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1233 {
1234 if (trace_handler->release_event)
1235 return trace_handler->release_event(evsel, sample);
1236 return 0;
1237 }
1238
evsel__process_contention_begin(struct evsel * evsel,struct perf_sample * sample)1239 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1240 {
1241 if (trace_handler->contention_begin_event)
1242 return trace_handler->contention_begin_event(evsel, sample);
1243 return 0;
1244 }
1245
evsel__process_contention_end(struct evsel * evsel,struct perf_sample * sample)1246 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1247 {
1248 if (trace_handler->contention_end_event)
1249 return trace_handler->contention_end_event(evsel, sample);
1250 return 0;
1251 }
1252
print_bad_events(int bad,int total)1253 static void print_bad_events(int bad, int total)
1254 {
1255 /* Output for debug, this have to be removed */
1256 int i;
1257 int broken = 0;
1258 const char *name[4] =
1259 { "acquire", "acquired", "contended", "release" };
1260
1261 for (i = 0; i < BROKEN_MAX; i++)
1262 broken += bad_hist[i];
1263
1264 if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1265 return;
1266
1267 fprintf(lock_output, "\n=== output for debug ===\n\n");
1268 fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1269 fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1270 fprintf(lock_output, "histogram of events caused bad sequence\n");
1271 for (i = 0; i < BROKEN_MAX; i++)
1272 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1273 }
1274
1275 /* TODO: various way to print, coloring, nano or milli sec */
print_result(void)1276 static void print_result(void)
1277 {
1278 struct lock_stat *st;
1279 struct lock_key *key;
1280 char cut_name[20];
1281 int bad, total, printed;
1282
1283 if (!quiet) {
1284 fprintf(lock_output, "%20s ", "Name");
1285 list_for_each_entry(key, &lock_keys, list)
1286 fprintf(lock_output, "%*s ", key->len, key->header);
1287 fprintf(lock_output, "\n\n");
1288 }
1289
1290 bad = total = printed = 0;
1291 while ((st = pop_from_result())) {
1292 total++;
1293 if (st->broken)
1294 bad++;
1295 if (!st->nr_acquired)
1296 continue;
1297
1298 bzero(cut_name, 20);
1299
1300 if (strlen(st->name) < 20) {
1301 /* output raw name */
1302 const char *name = st->name;
1303
1304 if (show_thread_stats) {
1305 struct thread *t;
1306
1307 /* st->addr contains tid of thread */
1308 t = perf_session__findnew(session, st->addr);
1309 name = thread__comm_str(t);
1310 }
1311
1312 fprintf(lock_output, "%20s ", name);
1313 } else {
1314 strncpy(cut_name, st->name, 16);
1315 cut_name[16] = '.';
1316 cut_name[17] = '.';
1317 cut_name[18] = '.';
1318 cut_name[19] = '\0';
1319 /* cut off name for saving output style */
1320 fprintf(lock_output, "%20s ", cut_name);
1321 }
1322
1323 list_for_each_entry(key, &lock_keys, list) {
1324 key->print(key, st);
1325 fprintf(lock_output, " ");
1326 }
1327 fprintf(lock_output, "\n");
1328
1329 if (++printed >= print_nr_entries)
1330 break;
1331 }
1332
1333 print_bad_events(bad, total);
1334 }
1335
1336 static bool info_threads, info_map;
1337
dump_threads(void)1338 static void dump_threads(void)
1339 {
1340 struct thread_stat *st;
1341 struct rb_node *node;
1342 struct thread *t;
1343
1344 fprintf(lock_output, "%10s: comm\n", "Thread ID");
1345
1346 node = rb_first(&thread_stats);
1347 while (node) {
1348 st = container_of(node, struct thread_stat, rb);
1349 t = perf_session__findnew(session, st->tid);
1350 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1351 node = rb_next(node);
1352 thread__put(t);
1353 }
1354 }
1355
compare_maps(struct lock_stat * a,struct lock_stat * b)1356 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1357 {
1358 int ret;
1359
1360 if (a->name && b->name)
1361 ret = strcmp(a->name, b->name);
1362 else
1363 ret = !!a->name - !!b->name;
1364
1365 if (!ret)
1366 return a->addr < b->addr;
1367 else
1368 return ret < 0;
1369 }
1370
dump_map(void)1371 static void dump_map(void)
1372 {
1373 unsigned int i;
1374 struct lock_stat *st;
1375
1376 fprintf(lock_output, "Address of instance: name of class\n");
1377 for (i = 0; i < LOCKHASH_SIZE; i++) {
1378 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1379 insert_to_result(st, compare_maps);
1380 }
1381 }
1382
1383 while ((st = pop_from_result()))
1384 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1385 }
1386
dump_info(void)1387 static void dump_info(void)
1388 {
1389 if (info_threads)
1390 dump_threads();
1391
1392 if (info_map) {
1393 if (info_threads)
1394 fputc('\n', lock_output);
1395 dump_map();
1396 }
1397 }
1398
1399 static const struct evsel_str_handler lock_tracepoints[] = {
1400 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
1401 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1402 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1403 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
1404 };
1405
1406 static const struct evsel_str_handler contention_tracepoints[] = {
1407 { "lock:contention_begin", evsel__process_contention_begin, },
1408 { "lock:contention_end", evsel__process_contention_end, },
1409 };
1410
process_event_update(const struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)1411 static int process_event_update(const struct perf_tool *tool,
1412 union perf_event *event,
1413 struct evlist **pevlist)
1414 {
1415 int ret;
1416
1417 ret = perf_event__process_event_update(tool, event, pevlist);
1418 if (ret < 0)
1419 return ret;
1420
1421 /* this can return -EEXIST since we call it for each evsel */
1422 perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1423 perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1424 return 0;
1425 }
1426
1427 typedef int (*tracepoint_handler)(struct evsel *evsel,
1428 struct perf_sample *sample);
1429
process_sample_event(const struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1430 static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1431 union perf_event *event,
1432 struct perf_sample *sample,
1433 struct evsel *evsel,
1434 struct machine *machine)
1435 {
1436 int err = 0;
1437 struct thread *thread = machine__findnew_thread(machine, sample->pid,
1438 sample->tid);
1439
1440 if (thread == NULL) {
1441 pr_debug("problem processing %d event, skipping it.\n",
1442 event->header.type);
1443 return -1;
1444 }
1445
1446 if (evsel->handler != NULL) {
1447 tracepoint_handler f = evsel->handler;
1448 err = f(evsel, sample);
1449 }
1450
1451 thread__put(thread);
1452
1453 return err;
1454 }
1455
combine_result(void)1456 static void combine_result(void)
1457 {
1458 unsigned int i;
1459 struct lock_stat *st;
1460
1461 if (!combine_locks)
1462 return;
1463
1464 for (i = 0; i < LOCKHASH_SIZE; i++) {
1465 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1466 combine_lock_stats(st);
1467 }
1468 }
1469 }
1470
sort_result(void)1471 static void sort_result(void)
1472 {
1473 unsigned int i;
1474 struct lock_stat *st;
1475
1476 for (i = 0; i < LOCKHASH_SIZE; i++) {
1477 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1478 insert_to_result(st, compare);
1479 }
1480 }
1481 }
1482
1483 static const struct {
1484 unsigned int flags;
1485 /*
1486 * Name of the lock flags (access), with delimeter ':'.
1487 * For example, rwsem:R of rwsem:W.
1488 */
1489 const char *flags_name;
1490 /* Name of the lock (type), for example, rwlock or rwsem. */
1491 const char *lock_name;
1492 } lock_type_table[] = {
1493 { 0, "semaphore", "semaphore" },
1494 { LCB_F_SPIN, "spinlock", "spinlock" },
1495 { LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" },
1496 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" },
1497 { LCB_F_READ, "rwsem:R", "rwsem" },
1498 { LCB_F_WRITE, "rwsem:W", "rwsem" },
1499 { LCB_F_RT, "rt-mutex", "rt-mutex" },
1500 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" },
1501 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" },
1502 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" },
1503 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" },
1504 { LCB_F_MUTEX, "mutex", "mutex" },
1505 { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" },
1506 /* alias for optimistic spinning only */
1507 { LCB_F_MUTEX | LCB_F_SPIN, "mutex:spin", "mutex-spin" },
1508 };
1509
get_type_flags_name(unsigned int flags)1510 static const char *get_type_flags_name(unsigned int flags)
1511 {
1512 flags &= LCB_F_TYPE_MASK;
1513
1514 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1515 if (lock_type_table[i].flags == flags)
1516 return lock_type_table[i].flags_name;
1517 }
1518 return "unknown";
1519 }
1520
get_type_lock_name(unsigned int flags)1521 static const char *get_type_lock_name(unsigned int flags)
1522 {
1523 flags &= LCB_F_TYPE_MASK;
1524
1525 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1526 if (lock_type_table[i].flags == flags)
1527 return lock_type_table[i].lock_name;
1528 }
1529 return "unknown";
1530 }
1531
lock_filter_finish(void)1532 static void lock_filter_finish(void)
1533 {
1534 zfree(&filters.types);
1535 filters.nr_types = 0;
1536
1537 zfree(&filters.addrs);
1538 filters.nr_addrs = 0;
1539
1540 for (int i = 0; i < filters.nr_syms; i++)
1541 free(filters.syms[i]);
1542
1543 zfree(&filters.syms);
1544 filters.nr_syms = 0;
1545
1546 zfree(&filters.cgrps);
1547 filters.nr_cgrps = 0;
1548
1549 for (int i = 0; i < filters.nr_slabs; i++)
1550 free(filters.slabs[i]);
1551
1552 zfree(&filters.slabs);
1553 filters.nr_slabs = 0;
1554 }
1555
sort_contention_result(void)1556 static void sort_contention_result(void)
1557 {
1558 sort_result();
1559 }
1560
print_header_stdio(void)1561 static void print_header_stdio(void)
1562 {
1563 struct lock_key *key;
1564
1565 list_for_each_entry(key, &lock_keys, list)
1566 fprintf(lock_output, "%*s ", key->len, key->header);
1567
1568 switch (aggr_mode) {
1569 case LOCK_AGGR_TASK:
1570 fprintf(lock_output, " %10s %s\n\n", "pid",
1571 show_lock_owner ? "owner" : "comm");
1572 break;
1573 case LOCK_AGGR_CALLER:
1574 fprintf(lock_output, " %10s %s\n\n", "type", "caller");
1575 break;
1576 case LOCK_AGGR_ADDR:
1577 fprintf(lock_output, " %16s %s\n\n", "address", "symbol");
1578 break;
1579 case LOCK_AGGR_CGROUP:
1580 fprintf(lock_output, " %s\n\n", "cgroup");
1581 break;
1582 default:
1583 break;
1584 }
1585 }
1586
print_header_csv(const char * sep)1587 static void print_header_csv(const char *sep)
1588 {
1589 struct lock_key *key;
1590
1591 fprintf(lock_output, "# output: ");
1592 list_for_each_entry(key, &lock_keys, list)
1593 fprintf(lock_output, "%s%s ", key->header, sep);
1594
1595 switch (aggr_mode) {
1596 case LOCK_AGGR_TASK:
1597 fprintf(lock_output, "%s%s %s\n", "pid", sep,
1598 show_lock_owner ? "owner" : "comm");
1599 break;
1600 case LOCK_AGGR_CALLER:
1601 fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1602 if (verbose > 0)
1603 fprintf(lock_output, "%s %s", sep, "stacktrace");
1604 fprintf(lock_output, "\n");
1605 break;
1606 case LOCK_AGGR_ADDR:
1607 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1608 break;
1609 case LOCK_AGGR_CGROUP:
1610 fprintf(lock_output, "%s\n", "cgroup");
1611 break;
1612 default:
1613 break;
1614 }
1615 }
1616
print_header(void)1617 static void print_header(void)
1618 {
1619 if (!quiet) {
1620 if (symbol_conf.field_sep)
1621 print_header_csv(symbol_conf.field_sep);
1622 else
1623 print_header_stdio();
1624 }
1625 }
1626
print_lock_stat_stdio(struct lock_contention * con,struct lock_stat * st)1627 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1628 {
1629 struct lock_key *key;
1630 struct thread *t;
1631 int pid;
1632
1633 list_for_each_entry(key, &lock_keys, list) {
1634 key->print(key, st);
1635 fprintf(lock_output, " ");
1636 }
1637
1638 switch (aggr_mode) {
1639 case LOCK_AGGR_CALLER:
1640 fprintf(lock_output, " %10s %s\n", get_type_flags_name(st->flags), st->name);
1641 break;
1642 case LOCK_AGGR_TASK:
1643 pid = st->addr;
1644 t = perf_session__findnew(session, pid);
1645 fprintf(lock_output, " %10d %s\n",
1646 pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1647 break;
1648 case LOCK_AGGR_ADDR:
1649 fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr,
1650 st->name, get_type_lock_name(st->flags));
1651 break;
1652 case LOCK_AGGR_CGROUP:
1653 fprintf(lock_output, " %s\n", st->name);
1654 break;
1655 default:
1656 break;
1657 }
1658
1659 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1660 struct map *kmap;
1661 struct symbol *sym;
1662 char buf[128];
1663 u64 ip;
1664
1665 for (int i = 0; i < max_stack_depth; i++) {
1666 if (!st->callstack || !st->callstack[i])
1667 break;
1668
1669 ip = st->callstack[i];
1670 sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1671 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1672 fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf);
1673 }
1674 }
1675 }
1676
print_lock_stat_csv(struct lock_contention * con,struct lock_stat * st,const char * sep)1677 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1678 const char *sep)
1679 {
1680 struct lock_key *key;
1681 struct thread *t;
1682 int pid;
1683
1684 list_for_each_entry(key, &lock_keys, list) {
1685 key->print(key, st);
1686 fprintf(lock_output, "%s ", sep);
1687 }
1688
1689 switch (aggr_mode) {
1690 case LOCK_AGGR_CALLER:
1691 fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
1692 if (verbose <= 0)
1693 fprintf(lock_output, "\n");
1694 break;
1695 case LOCK_AGGR_TASK:
1696 pid = st->addr;
1697 t = perf_session__findnew(session, pid);
1698 fprintf(lock_output, "%d%s %s\n", pid, sep,
1699 pid == -1 ? "Unknown" : thread__comm_str(t));
1700 break;
1701 case LOCK_AGGR_ADDR:
1702 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1703 st->name, sep, get_type_lock_name(st->flags));
1704 break;
1705 case LOCK_AGGR_CGROUP:
1706 fprintf(lock_output, "%s\n",st->name);
1707 break;
1708 default:
1709 break;
1710 }
1711
1712 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1713 struct map *kmap;
1714 struct symbol *sym;
1715 char buf[128];
1716 u64 ip;
1717
1718 for (int i = 0; i < max_stack_depth; i++) {
1719 if (!st->callstack || !st->callstack[i])
1720 break;
1721
1722 ip = st->callstack[i];
1723 sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1724 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1725 fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1726 }
1727 fprintf(lock_output, "\n");
1728 }
1729 }
1730
print_lock_stat(struct lock_contention * con,struct lock_stat * st)1731 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1732 {
1733 if (symbol_conf.field_sep)
1734 print_lock_stat_csv(con, st, symbol_conf.field_sep);
1735 else
1736 print_lock_stat_stdio(con, st);
1737 }
1738
print_footer_stdio(int total,int bad,struct lock_contention_fails * fails)1739 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1740 {
1741 /* Output for debug, this have to be removed */
1742 int broken = fails->task + fails->stack + fails->time + fails->data;
1743
1744 if (!use_bpf)
1745 print_bad_events(bad, total);
1746
1747 if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1748 return;
1749
1750 total += broken;
1751 fprintf(lock_output, "\n=== output for debug ===\n\n");
1752 fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1753 fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1754
1755 fprintf(lock_output, "histogram of failure reasons\n");
1756 fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1757 fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1758 fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1759 fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1760 }
1761
print_footer_csv(int total,int bad,struct lock_contention_fails * fails,const char * sep)1762 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1763 const char *sep)
1764 {
1765 /* Output for debug, this have to be removed */
1766 if (use_bpf)
1767 bad = fails->task + fails->stack + fails->time + fails->data;
1768
1769 if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1770 return;
1771
1772 total += bad;
1773 fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1774
1775 if (use_bpf) {
1776 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1777 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1778 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1779 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1780 } else {
1781 int i;
1782 const char *name[4] = { "acquire", "acquired", "contended", "release" };
1783
1784 for (i = 0; i < BROKEN_MAX; i++)
1785 fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1786 }
1787 fprintf(lock_output, "\n");
1788 }
1789
print_footer(int total,int bad,struct lock_contention_fails * fails)1790 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1791 {
1792 if (symbol_conf.field_sep)
1793 print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1794 else
1795 print_footer_stdio(total, bad, fails);
1796 }
1797
print_contention_result(struct lock_contention * con)1798 static void print_contention_result(struct lock_contention *con)
1799 {
1800 struct lock_stat *st;
1801 int bad, total, printed;
1802
1803 if (!quiet)
1804 print_header();
1805
1806 bad = total = printed = 0;
1807
1808 while ((st = pop_from_result())) {
1809 total += use_bpf ? st->nr_contended : 1;
1810 if (st->broken)
1811 bad++;
1812
1813 if (!st->wait_time_total)
1814 continue;
1815
1816 print_lock_stat(con, st);
1817
1818 if (++printed >= print_nr_entries)
1819 break;
1820 }
1821
1822 if (con->owner && con->save_callstack && verbose > 0) {
1823 struct rb_root root = RB_ROOT;
1824
1825 if (symbol_conf.field_sep)
1826 fprintf(lock_output, "# owner stack trace:\n");
1827 else
1828 fprintf(lock_output, "\n=== owner stack trace ===\n\n");
1829 while ((st = pop_owner_stack_trace(con)))
1830 insert_to(&root, st, compare);
1831
1832 while ((st = pop_from(&root))) {
1833 print_lock_stat(con, st);
1834 free(st);
1835 }
1836 }
1837
1838 if (print_nr_entries) {
1839 /* update the total/bad stats */
1840 while ((st = pop_from_result())) {
1841 total += use_bpf ? st->nr_contended : 1;
1842 if (st->broken)
1843 bad++;
1844 }
1845 }
1846 /* some entries are collected but hidden by the callstack filter */
1847 total += con->nr_filtered;
1848
1849 print_footer(total, bad, &con->fails);
1850 }
1851
1852 static bool force;
1853
__cmd_report(bool display_info)1854 static int __cmd_report(bool display_info)
1855 {
1856 int err = -EINVAL;
1857 struct perf_tool eops;
1858 struct perf_data data = {
1859 .path = input_name,
1860 .mode = PERF_DATA_MODE_READ,
1861 .force = force,
1862 };
1863
1864 perf_tool__init(&eops, /*ordered_events=*/true);
1865 eops.attr = perf_event__process_attr;
1866 eops.event_update = process_event_update;
1867 eops.sample = process_sample_event;
1868 eops.comm = perf_event__process_comm;
1869 eops.mmap = perf_event__process_mmap;
1870 eops.namespaces = perf_event__process_namespaces;
1871 eops.tracing_data = perf_event__process_tracing_data;
1872 session = perf_session__new(&data, &eops);
1873 if (IS_ERR(session)) {
1874 pr_err("Initializing perf session failed\n");
1875 return PTR_ERR(session);
1876 }
1877
1878 symbol_conf.allow_aliases = true;
1879 symbol__init(perf_session__env(session));
1880
1881 if (!data.is_pipe) {
1882 if (!perf_session__has_traces(session, "lock record"))
1883 goto out_delete;
1884
1885 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1886 pr_err("Initializing perf session tracepoint handlers failed\n");
1887 goto out_delete;
1888 }
1889
1890 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1891 pr_err("Initializing perf session tracepoint handlers failed\n");
1892 goto out_delete;
1893 }
1894 }
1895
1896 if (setup_output_field(false, output_fields))
1897 goto out_delete;
1898
1899 if (select_key(false))
1900 goto out_delete;
1901
1902 if (show_thread_stats)
1903 aggr_mode = LOCK_AGGR_TASK;
1904
1905 err = perf_session__process_events(session);
1906 if (err)
1907 goto out_delete;
1908
1909 setup_pager();
1910 if (display_info) /* used for info subcommand */
1911 dump_info();
1912 else {
1913 combine_result();
1914 sort_result();
1915 print_result();
1916 }
1917
1918 out_delete:
1919 perf_session__delete(session);
1920 return err;
1921 }
1922
sighandler(int sig __maybe_unused)1923 static void sighandler(int sig __maybe_unused)
1924 {
1925 }
1926
check_lock_contention_options(const struct option * options,const char * const * usage)1927 static int check_lock_contention_options(const struct option *options,
1928 const char * const *usage)
1929
1930 {
1931 if (show_thread_stats && show_lock_addrs) {
1932 pr_err("Cannot use thread and addr mode together\n");
1933 parse_options_usage(usage, options, "threads", 0);
1934 parse_options_usage(NULL, options, "lock-addr", 0);
1935 return -1;
1936 }
1937
1938 if (show_lock_owner && !use_bpf) {
1939 pr_err("Lock owners are available only with BPF\n");
1940 parse_options_usage(usage, options, "lock-owner", 0);
1941 parse_options_usage(NULL, options, "use-bpf", 0);
1942 return -1;
1943 }
1944
1945 if (show_lock_owner && show_lock_addrs) {
1946 pr_err("Cannot use owner and addr mode together\n");
1947 parse_options_usage(usage, options, "lock-owner", 0);
1948 parse_options_usage(NULL, options, "lock-addr", 0);
1949 return -1;
1950 }
1951
1952 if (show_lock_cgroups && !use_bpf) {
1953 pr_err("Cgroups are available only with BPF\n");
1954 parse_options_usage(usage, options, "lock-cgroup", 0);
1955 parse_options_usage(NULL, options, "use-bpf", 0);
1956 return -1;
1957 }
1958
1959 if (show_lock_cgroups && show_lock_addrs) {
1960 pr_err("Cannot use cgroup and addr mode together\n");
1961 parse_options_usage(usage, options, "lock-cgroup", 0);
1962 parse_options_usage(NULL, options, "lock-addr", 0);
1963 return -1;
1964 }
1965
1966 if (show_lock_cgroups && show_thread_stats) {
1967 pr_err("Cannot use cgroup and thread mode together\n");
1968 parse_options_usage(usage, options, "lock-cgroup", 0);
1969 parse_options_usage(NULL, options, "threads", 0);
1970 return -1;
1971 }
1972
1973 if (symbol_conf.field_sep) {
1974 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1975 strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1976 strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1977 pr_err("Cannot use the separator that is already used\n");
1978 parse_options_usage(usage, options, "x", 1);
1979 return -1;
1980 }
1981 }
1982
1983 if (show_lock_owner && !show_thread_stats) {
1984 pr_warning("Now -o try to show owner's callstack instead of pid and comm.\n");
1985 pr_warning("Please use -t option too to keep the old behavior.\n");
1986 }
1987
1988 return 0;
1989 }
1990
__cmd_contention(int argc,const char ** argv)1991 static int __cmd_contention(int argc, const char **argv)
1992 {
1993 int err = -EINVAL;
1994 struct perf_tool eops;
1995 struct perf_data data = {
1996 .path = input_name,
1997 .mode = PERF_DATA_MODE_READ,
1998 .force = force,
1999 };
2000 struct lock_contention con = {
2001 .target = &target,
2002 .map_nr_entries = bpf_map_entries,
2003 .max_stack = max_stack_depth,
2004 .stack_skip = stack_skip,
2005 .filters = &filters,
2006 .delays = delays,
2007 .nr_delays = nr_delays,
2008 .save_callstack = needs_callstack(),
2009 .owner = show_lock_owner,
2010 .cgroups = RB_ROOT,
2011 };
2012
2013 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2014 if (!lockhash_table)
2015 return -ENOMEM;
2016
2017 con.result = &lockhash_table[0];
2018
2019 perf_tool__init(&eops, /*ordered_events=*/true);
2020 eops.attr = perf_event__process_attr;
2021 eops.event_update = process_event_update;
2022 eops.sample = process_sample_event;
2023 eops.comm = perf_event__process_comm;
2024 eops.mmap = perf_event__process_mmap;
2025 eops.tracing_data = perf_event__process_tracing_data;
2026
2027 session = perf_session__new(use_bpf ? NULL : &data, &eops);
2028 if (IS_ERR(session)) {
2029 pr_err("Initializing perf session failed\n");
2030 err = PTR_ERR(session);
2031 session = NULL;
2032 goto out_delete;
2033 }
2034
2035 con.machine = &session->machines.host;
2036
2037 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2038 show_lock_addrs ? LOCK_AGGR_ADDR :
2039 show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2040
2041 if (con.aggr_mode == LOCK_AGGR_CALLER)
2042 con.save_callstack = true;
2043
2044 symbol_conf.allow_aliases = true;
2045 symbol__init(perf_session__env(session));
2046
2047 if (use_bpf) {
2048 err = target__validate(&target);
2049 if (err) {
2050 char errbuf[512];
2051
2052 target__strerror(&target, err, errbuf, 512);
2053 pr_err("%s\n", errbuf);
2054 goto out_delete;
2055 }
2056
2057 signal(SIGINT, sighandler);
2058 signal(SIGCHLD, sighandler);
2059 signal(SIGTERM, sighandler);
2060
2061 con.evlist = evlist__new();
2062 if (con.evlist == NULL) {
2063 err = -ENOMEM;
2064 goto out_delete;
2065 }
2066
2067 err = evlist__create_maps(con.evlist, &target);
2068 if (err < 0)
2069 goto out_delete;
2070
2071 if (argc) {
2072 err = evlist__prepare_workload(con.evlist, &target,
2073 argv, false, NULL);
2074 if (err < 0)
2075 goto out_delete;
2076 }
2077
2078 err = lock_contention_prepare(&con);
2079 if (err < 0) {
2080 pr_err("lock contention BPF setup failed\n");
2081 goto out_delete;
2082 }
2083 } else if (!data.is_pipe) {
2084 if (!perf_session__has_traces(session, "lock record"))
2085 goto out_delete;
2086
2087 if (!evlist__find_evsel_by_str(session->evlist,
2088 "lock:contention_begin")) {
2089 pr_err("lock contention evsel not found\n");
2090 goto out_delete;
2091 }
2092
2093 if (perf_session__set_tracepoints_handlers(session,
2094 contention_tracepoints)) {
2095 pr_err("Initializing perf session tracepoint handlers failed\n");
2096 goto out_delete;
2097 }
2098 }
2099
2100 err = setup_output_field(true, output_fields);
2101 if (err) {
2102 pr_err("Failed to setup output field\n");
2103 goto out_delete;
2104 }
2105
2106 err = select_key(true);
2107 if (err)
2108 goto out_delete;
2109
2110 if (symbol_conf.field_sep) {
2111 int i;
2112 struct lock_key *keys = contention_keys;
2113
2114 /* do not align output in CSV format */
2115 for (i = 0; keys[i].name; i++)
2116 keys[i].len = 0;
2117 }
2118
2119 if (use_bpf) {
2120 lock_contention_start();
2121 if (argc)
2122 evlist__start_workload(con.evlist);
2123
2124 /* wait for signal */
2125 pause();
2126
2127 lock_contention_stop();
2128 lock_contention_read(&con);
2129 } else {
2130 err = perf_session__process_events(session);
2131 if (err)
2132 goto out_delete;
2133 }
2134
2135 setup_pager();
2136
2137 sort_contention_result();
2138 print_contention_result(&con);
2139
2140 out_delete:
2141 lock_filter_finish();
2142 evlist__delete(con.evlist);
2143 lock_contention_finish(&con);
2144 perf_session__delete(session);
2145 zfree(&lockhash_table);
2146 return err;
2147 }
2148
2149
__cmd_record(int argc,const char ** argv)2150 static int __cmd_record(int argc, const char **argv)
2151 {
2152 const char *record_args[] = {
2153 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2154 };
2155 const char *callgraph_args[] = {
2156 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2157 };
2158 unsigned int rec_argc, i, j, ret;
2159 unsigned int nr_tracepoints;
2160 unsigned int nr_callgraph_args = 0;
2161 const char **rec_argv;
2162 bool has_lock_stat = true;
2163
2164 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2165 if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2166 pr_debug("tracepoint %s is not enabled. "
2167 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2168 lock_tracepoints[i].name);
2169 has_lock_stat = false;
2170 break;
2171 }
2172 }
2173
2174 if (has_lock_stat)
2175 goto setup_args;
2176
2177 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2178 if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2179 pr_err("tracepoint %s is not enabled.\n",
2180 contention_tracepoints[i].name);
2181 return 1;
2182 }
2183 }
2184
2185 nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2186
2187 setup_args:
2188 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2189
2190 if (has_lock_stat)
2191 nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2192 else
2193 nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2194
2195 /* factor of 2 is for -e in front of each tracepoint */
2196 rec_argc += 2 * nr_tracepoints;
2197
2198 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2199 if (!rec_argv)
2200 return -ENOMEM;
2201
2202 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2203 rec_argv[i] = record_args[i];
2204
2205 for (j = 0; j < nr_tracepoints; j++) {
2206 rec_argv[i++] = "-e";
2207 rec_argv[i++] = has_lock_stat
2208 ? lock_tracepoints[j].name
2209 : contention_tracepoints[j].name;
2210 }
2211
2212 for (j = 0; j < nr_callgraph_args; j++, i++)
2213 rec_argv[i] = callgraph_args[j];
2214
2215 for (j = 1; j < (unsigned int)argc; j++, i++)
2216 rec_argv[i] = argv[j];
2217
2218 BUG_ON(i != rec_argc);
2219
2220 ret = cmd_record(i, rec_argv);
2221 free(rec_argv);
2222 return ret;
2223 }
2224
parse_map_entry(const struct option * opt,const char * str,int unset __maybe_unused)2225 static int parse_map_entry(const struct option *opt, const char *str,
2226 int unset __maybe_unused)
2227 {
2228 unsigned long *len = (unsigned long *)opt->value;
2229 unsigned long val;
2230 char *endptr;
2231
2232 errno = 0;
2233 val = strtoul(str, &endptr, 0);
2234 if (*endptr != '\0' || errno != 0) {
2235 pr_err("invalid BPF map length: %s\n", str);
2236 return -1;
2237 }
2238
2239 *len = val;
2240 return 0;
2241 }
2242
parse_max_stack(const struct option * opt,const char * str,int unset __maybe_unused)2243 static int parse_max_stack(const struct option *opt, const char *str,
2244 int unset __maybe_unused)
2245 {
2246 unsigned long *len = (unsigned long *)opt->value;
2247 long val;
2248 char *endptr;
2249
2250 errno = 0;
2251 val = strtol(str, &endptr, 0);
2252 if (*endptr != '\0' || errno != 0) {
2253 pr_err("invalid max stack depth: %s\n", str);
2254 return -1;
2255 }
2256
2257 if (val < 0 || val > sysctl__max_stack()) {
2258 pr_err("invalid max stack depth: %ld\n", val);
2259 return -1;
2260 }
2261
2262 *len = val;
2263 return 0;
2264 }
2265
add_lock_type(unsigned int flags)2266 static bool add_lock_type(unsigned int flags)
2267 {
2268 unsigned int *tmp;
2269
2270 tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2271 if (tmp == NULL)
2272 return false;
2273
2274 tmp[filters.nr_types++] = flags;
2275 filters.types = tmp;
2276 return true;
2277 }
2278
parse_lock_type(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2279 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2280 int unset __maybe_unused)
2281 {
2282 char *s, *tmp, *tok;
2283
2284 s = strdup(str);
2285 if (s == NULL)
2286 return -1;
2287
2288 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2289 bool found = false;
2290
2291 /* `tok` is a flags name if it contains ':'. */
2292 if (strchr(tok, ':')) {
2293 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2294 if (!strcmp(lock_type_table[i].flags_name, tok) &&
2295 add_lock_type(lock_type_table[i].flags)) {
2296 found = true;
2297 break;
2298 }
2299 }
2300
2301 if (!found) {
2302 pr_err("Unknown lock flags name: %s\n", tok);
2303 free(s);
2304 return -1;
2305 }
2306
2307 continue;
2308 }
2309
2310 /*
2311 * Otherwise `tok` is a lock name.
2312 * Single lock name could contain multiple flags.
2313 * Replace alias `pcpu-sem` with actual name `percpu-rwsem.
2314 */
2315 if (!strcmp(tok, "pcpu-sem"))
2316 tok = (char *)"percpu-rwsem";
2317 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2318 if (!strcmp(lock_type_table[i].lock_name, tok)) {
2319 if (add_lock_type(lock_type_table[i].flags)) {
2320 found = true;
2321 } else {
2322 free(s);
2323 return -1;
2324 }
2325 }
2326 }
2327
2328 if (!found) {
2329 pr_err("Unknown lock name: %s\n", tok);
2330 free(s);
2331 return -1;
2332 }
2333
2334 }
2335
2336 free(s);
2337 return 0;
2338 }
2339
add_lock_addr(unsigned long addr)2340 static bool add_lock_addr(unsigned long addr)
2341 {
2342 unsigned long *tmp;
2343
2344 tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2345 if (tmp == NULL) {
2346 pr_err("Memory allocation failure\n");
2347 return false;
2348 }
2349
2350 tmp[filters.nr_addrs++] = addr;
2351 filters.addrs = tmp;
2352 return true;
2353 }
2354
add_lock_sym(char * name)2355 static bool add_lock_sym(char *name)
2356 {
2357 char **tmp;
2358 char *sym = strdup(name);
2359
2360 if (sym == NULL) {
2361 pr_err("Memory allocation failure\n");
2362 return false;
2363 }
2364
2365 tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2366 if (tmp == NULL) {
2367 pr_err("Memory allocation failure\n");
2368 free(sym);
2369 return false;
2370 }
2371
2372 tmp[filters.nr_syms++] = sym;
2373 filters.syms = tmp;
2374 return true;
2375 }
2376
add_lock_slab(char * name)2377 static bool add_lock_slab(char *name)
2378 {
2379 char **tmp;
2380 char *sym = strdup(name);
2381
2382 if (sym == NULL) {
2383 pr_err("Memory allocation failure\n");
2384 return false;
2385 }
2386
2387 tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2388 if (tmp == NULL) {
2389 pr_err("Memory allocation failure\n");
2390 return false;
2391 }
2392
2393 tmp[filters.nr_slabs++] = sym;
2394 filters.slabs = tmp;
2395 return true;
2396 }
2397
parse_lock_addr(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2398 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2399 int unset __maybe_unused)
2400 {
2401 char *s, *tmp, *tok;
2402 int ret = 0;
2403 u64 addr;
2404
2405 s = strdup(str);
2406 if (s == NULL)
2407 return -1;
2408
2409 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2410 char *end;
2411
2412 addr = strtoul(tok, &end, 16);
2413 if (*end == '\0') {
2414 if (!add_lock_addr(addr)) {
2415 ret = -1;
2416 break;
2417 }
2418 continue;
2419 }
2420
2421 if (*tok == '&') {
2422 if (!add_lock_slab(tok + 1)) {
2423 ret = -1;
2424 break;
2425 }
2426 continue;
2427 }
2428
2429 /*
2430 * At this moment, we don't have kernel symbols. Save the symbols
2431 * in a separate list and resolve them to addresses later.
2432 */
2433 if (!add_lock_sym(tok)) {
2434 ret = -1;
2435 break;
2436 }
2437 }
2438
2439 free(s);
2440 return ret;
2441 }
2442
parse_output(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2443 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2444 int unset __maybe_unused)
2445 {
2446 const char **name = (const char **)opt->value;
2447
2448 if (str == NULL)
2449 return -1;
2450
2451 lock_output = fopen(str, "w");
2452 if (lock_output == NULL) {
2453 pr_err("Cannot open %s\n", str);
2454 return -1;
2455 }
2456
2457 *name = str;
2458 return 0;
2459 }
2460
add_lock_cgroup(char * name)2461 static bool add_lock_cgroup(char *name)
2462 {
2463 u64 *tmp;
2464 struct cgroup *cgrp;
2465
2466 cgrp = cgroup__new(name, /*do_open=*/false);
2467 if (cgrp == NULL) {
2468 pr_err("Failed to create cgroup: %s\n", name);
2469 return false;
2470 }
2471
2472 if (read_cgroup_id(cgrp) < 0) {
2473 pr_err("Failed to read cgroup id for %s\n", name);
2474 cgroup__put(cgrp);
2475 return false;
2476 }
2477
2478 tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2479 if (tmp == NULL) {
2480 pr_err("Memory allocation failure\n");
2481 return false;
2482 }
2483
2484 tmp[filters.nr_cgrps++] = cgrp->id;
2485 filters.cgrps = tmp;
2486 cgroup__put(cgrp);
2487 return true;
2488 }
2489
parse_cgroup_filter(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2490 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2491 int unset __maybe_unused)
2492 {
2493 char *s, *tmp, *tok;
2494 int ret = 0;
2495
2496 s = strdup(str);
2497 if (s == NULL)
2498 return -1;
2499
2500 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2501 if (!add_lock_cgroup(tok)) {
2502 ret = -1;
2503 break;
2504 }
2505 }
2506
2507 free(s);
2508 return ret;
2509 }
2510
add_lock_delay(char * spec)2511 static bool add_lock_delay(char *spec)
2512 {
2513 char *at, *pos;
2514 struct lock_delay *tmp;
2515 unsigned long duration;
2516
2517 at = strchr(spec, '@');
2518 if (at == NULL) {
2519 pr_err("lock delay should have '@' sign: %s\n", spec);
2520 return false;
2521 }
2522 if (at == spec) {
2523 pr_err("lock delay should have time before '@': %s\n", spec);
2524 return false;
2525 }
2526
2527 *at = '\0';
2528 duration = strtoul(spec, &pos, 0);
2529 if (!strcmp(pos, "ns"))
2530 duration *= 1;
2531 else if (!strcmp(pos, "us"))
2532 duration *= 1000;
2533 else if (!strcmp(pos, "ms"))
2534 duration *= 1000 * 1000;
2535 else if (*pos) {
2536 pr_err("invalid delay time: %s@%s\n", spec, at + 1);
2537 return false;
2538 }
2539
2540 if (duration > 10 * 1000 * 1000) {
2541 pr_err("lock delay is too long: %s (> 10ms)\n", spec);
2542 return false;
2543 }
2544
2545 tmp = realloc(delays, (nr_delays + 1) * sizeof(*delays));
2546 if (tmp == NULL) {
2547 pr_err("Memory allocation failure\n");
2548 return false;
2549 }
2550 delays = tmp;
2551
2552 delays[nr_delays].sym = strdup(at + 1);
2553 if (delays[nr_delays].sym == NULL) {
2554 pr_err("Memory allocation failure\n");
2555 return false;
2556 }
2557 delays[nr_delays].time = duration;
2558
2559 nr_delays++;
2560 return true;
2561 }
2562
parse_lock_delay(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2563 static int parse_lock_delay(const struct option *opt __maybe_unused, const char *str,
2564 int unset __maybe_unused)
2565 {
2566 char *s, *tmp, *tok;
2567 int ret = 0;
2568
2569 s = strdup(str);
2570 if (s == NULL)
2571 return -1;
2572
2573 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2574 if (!add_lock_delay(tok)) {
2575 ret = -1;
2576 break;
2577 }
2578 }
2579
2580 free(s);
2581 return ret;
2582 }
2583
cmd_lock(int argc,const char ** argv)2584 int cmd_lock(int argc, const char **argv)
2585 {
2586 const struct option lock_options[] = {
2587 OPT_STRING('i', "input", &input_name, "file", "input file name"),
2588 OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2589 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2590 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2591 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2592 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2593 "file", "vmlinux pathname"),
2594 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2595 "file", "kallsyms pathname"),
2596 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2597 OPT_END()
2598 };
2599
2600 const struct option info_options[] = {
2601 OPT_BOOLEAN('t', "threads", &info_threads,
2602 "dump the thread list in perf.data"),
2603 OPT_BOOLEAN('m', "map", &info_map,
2604 "dump the map of lock instances (address:name table)"),
2605 OPT_PARENT(lock_options)
2606 };
2607
2608 const struct option report_options[] = {
2609 OPT_STRING('k', "key", &sort_key, "acquired",
2610 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2611 OPT_STRING('F', "field", &output_fields, NULL,
2612 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2613 /* TODO: type */
2614 OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2615 "combine locks in the same class"),
2616 OPT_BOOLEAN('t', "threads", &show_thread_stats,
2617 "show per-thread lock stats"),
2618 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2619 OPT_PARENT(lock_options)
2620 };
2621
2622 struct option contention_options[] = {
2623 OPT_STRING('k', "key", &sort_key, "wait_total",
2624 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2625 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2626 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2627 OPT_BOOLEAN('t', "threads", &show_thread_stats,
2628 "show per-thread lock stats"),
2629 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2630 OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2631 "System-wide collection from all CPUs"),
2632 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2633 "List of cpus to monitor"),
2634 OPT_STRING('p', "pid", &target.pid, "pid",
2635 "Trace on existing process id"),
2636 OPT_STRING(0, "tid", &target.tid, "tid",
2637 "Trace on existing thread id (exclusive to --pid)"),
2638 OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2639 "Max number of BPF map entries", parse_map_entry),
2640 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2641 "Set the maximum stack depth when collecting lock contention, "
2642 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2643 OPT_INTEGER(0, "stack-skip", &stack_skip,
2644 "Set the number of stack depth to skip when finding a lock caller, "
2645 "Default: " __stringify(CONTENTION_STACK_SKIP)),
2646 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2647 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2648 OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2649 "Filter specific type of locks", parse_lock_type),
2650 OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2651 "Filter specific address/symbol of locks", parse_lock_addr),
2652 OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2653 "Filter specific function in the callstack", parse_call_stack),
2654 OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2655 OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2656 "print result in CSV format with custom separator"),
2657 OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2658 OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2659 "Filter specific cgroups", parse_cgroup_filter),
2660 OPT_CALLBACK('J', "inject-delay", NULL, "TIME@FUNC",
2661 "Inject delays to specific locks", parse_lock_delay),
2662 OPT_PARENT(lock_options)
2663 };
2664
2665 const char * const info_usage[] = {
2666 "perf lock info [<options>]",
2667 NULL
2668 };
2669 const char *const lock_subcommands[] = { "record", "report", "script",
2670 "info", "contention", NULL };
2671 const char *lock_usage[] = {
2672 NULL,
2673 NULL
2674 };
2675 const char * const report_usage[] = {
2676 "perf lock report [<options>]",
2677 NULL
2678 };
2679 const char * const contention_usage[] = {
2680 "perf lock contention [<options>]",
2681 NULL
2682 };
2683 unsigned int i;
2684 int rc = 0;
2685
2686 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2687 if (!lockhash_table)
2688 return -ENOMEM;
2689
2690 for (i = 0; i < LOCKHASH_SIZE; i++)
2691 INIT_HLIST_HEAD(lockhash_table + i);
2692
2693 lock_output = stderr;
2694 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2695 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2696 if (!argc)
2697 usage_with_options(lock_usage, lock_options);
2698
2699 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2700 return __cmd_record(argc, argv);
2701 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2702 trace_handler = &report_lock_ops;
2703 if (argc) {
2704 argc = parse_options(argc, argv,
2705 report_options, report_usage, 0);
2706 if (argc)
2707 usage_with_options(report_usage, report_options);
2708 }
2709 rc = __cmd_report(false);
2710 } else if (!strcmp(argv[0], "script")) {
2711 /* Aliased to 'perf script' */
2712 rc = cmd_script(argc, argv);
2713 } else if (!strcmp(argv[0], "info")) {
2714 if (argc) {
2715 argc = parse_options(argc, argv,
2716 info_options, info_usage, 0);
2717 if (argc)
2718 usage_with_options(info_usage, info_options);
2719 }
2720
2721 /* If neither threads nor map requested, display both */
2722 if (!info_threads && !info_map) {
2723 info_threads = true;
2724 info_map = true;
2725 }
2726
2727 /* recycling report_lock_ops */
2728 trace_handler = &report_lock_ops;
2729 rc = __cmd_report(true);
2730 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2731 trace_handler = &contention_lock_ops;
2732 sort_key = "wait_total";
2733 output_fields = "contended,wait_total,wait_max,avg_wait";
2734
2735 #ifndef HAVE_BPF_SKEL
2736 set_option_nobuild(contention_options, 'b', "use-bpf",
2737 "no BUILD_BPF_SKEL=1", false);
2738 #endif
2739 if (argc) {
2740 argc = parse_options(argc, argv, contention_options,
2741 contention_usage, 0);
2742 }
2743
2744 if (check_lock_contention_options(contention_options,
2745 contention_usage) < 0)
2746 return -1;
2747
2748 rc = __cmd_contention(argc, argv);
2749 } else {
2750 usage_with_options(lock_usage, lock_options);
2751 }
2752
2753 /* free usage string allocated by parse_options_subcommand */
2754 free((void *)lock_usage[0]);
2755
2756 zfree(&lockhash_table);
2757 return rc;
2758 }
2759