xref: /linux/tools/perf/builtin-lock.c (revision a77ecea7ced2fef7cc0a8ad0323542f781ad9788)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/cgroup.h"
14 #include "util/callchain.h"
15 #include "util/lock-contention.h"
16 #include "util/bpf_skel/lock_data.h"
17 
18 #include <subcmd/pager.h>
19 #include <subcmd/parse-options.h>
20 #include "util/trace-event.h"
21 #include "util/tracepoint.h"
22 
23 #include "util/debug.h"
24 #include "util/event.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/data.h"
28 #include "util/string2.h"
29 #include "util/map.h"
30 #include "util/util.h"
31 
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <sys/prctl.h>
35 #include <sys/wait.h>
36 #include <semaphore.h>
37 #include <math.h>
38 #include <limits.h>
39 #include <ctype.h>
40 
41 #include <linux/list.h>
42 #include <linux/hash.h>
43 #include <linux/kernel.h>
44 #include <linux/zalloc.h>
45 #include <linux/err.h>
46 #include <linux/stringify.h>
47 
48 static struct perf_session *session;
49 static struct target target;
50 
51 static struct rb_root		thread_stats;
52 
53 static bool combine_locks;
54 static bool show_thread_stats;
55 static bool show_lock_addrs;
56 static bool show_lock_owner;
57 static bool show_lock_cgroups;
58 static bool use_bpf;
59 static unsigned long bpf_map_entries = MAX_ENTRIES;
60 static int max_stack_depth = CONTENTION_STACK_DEPTH;
61 static int stack_skip = CONTENTION_STACK_SKIP;
62 static int print_nr_entries = INT_MAX / 2;
63 static const char *output_name = NULL;
64 static FILE *lock_output;
65 
66 static struct lock_filter filters;
67 static struct lock_delay *delays;
68 static int nr_delays;
69 
70 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
71 
72 static struct thread_stat *thread_stat_find(u32 tid)
73 {
74 	struct rb_node *node;
75 	struct thread_stat *st;
76 
77 	node = thread_stats.rb_node;
78 	while (node) {
79 		st = container_of(node, struct thread_stat, rb);
80 		if (st->tid == tid)
81 			return st;
82 		else if (tid < st->tid)
83 			node = node->rb_left;
84 		else
85 			node = node->rb_right;
86 	}
87 
88 	return NULL;
89 }
90 
91 static void thread_stat_insert(struct thread_stat *new)
92 {
93 	struct rb_node **rb = &thread_stats.rb_node;
94 	struct rb_node *parent = NULL;
95 	struct thread_stat *p;
96 
97 	while (*rb) {
98 		p = container_of(*rb, struct thread_stat, rb);
99 		parent = *rb;
100 
101 		if (new->tid < p->tid)
102 			rb = &(*rb)->rb_left;
103 		else if (new->tid > p->tid)
104 			rb = &(*rb)->rb_right;
105 		else
106 			BUG_ON("inserting invalid thread_stat\n");
107 	}
108 
109 	rb_link_node(&new->rb, parent, rb);
110 	rb_insert_color(&new->rb, &thread_stats);
111 }
112 
113 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
114 {
115 	struct thread_stat *st;
116 
117 	st = thread_stat_find(tid);
118 	if (st)
119 		return st;
120 
121 	st = zalloc(sizeof(struct thread_stat));
122 	if (!st) {
123 		pr_err("memory allocation failed\n");
124 		return NULL;
125 	}
126 
127 	st->tid = tid;
128 	INIT_LIST_HEAD(&st->seq_list);
129 
130 	thread_stat_insert(st);
131 
132 	return st;
133 }
134 
135 static struct thread_stat *thread_stat_findnew_first(u32 tid);
136 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
137 	thread_stat_findnew_first;
138 
139 static struct thread_stat *thread_stat_findnew_first(u32 tid)
140 {
141 	struct thread_stat *st;
142 
143 	st = zalloc(sizeof(struct thread_stat));
144 	if (!st) {
145 		pr_err("memory allocation failed\n");
146 		return NULL;
147 	}
148 	st->tid = tid;
149 	INIT_LIST_HEAD(&st->seq_list);
150 
151 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
152 	rb_insert_color(&st->rb, &thread_stats);
153 
154 	thread_stat_findnew = thread_stat_findnew_after_first;
155 	return st;
156 }
157 
158 /* build simple key function one is bigger than two */
159 #define SINGLE_KEY(member)						\
160 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
161 					 struct lock_stat *two)		\
162 	{								\
163 		return one->member > two->member;			\
164 	}
165 
166 SINGLE_KEY(nr_acquired)
167 SINGLE_KEY(nr_contended)
168 SINGLE_KEY(avg_wait_time)
169 SINGLE_KEY(wait_time_total)
170 SINGLE_KEY(wait_time_max)
171 
172 static int lock_stat_key_wait_time_min(struct lock_stat *one,
173 					struct lock_stat *two)
174 {
175 	u64 s1 = one->wait_time_min;
176 	u64 s2 = two->wait_time_min;
177 	if (s1 == ULLONG_MAX)
178 		s1 = 0;
179 	if (s2 == ULLONG_MAX)
180 		s2 = 0;
181 	return s1 > s2;
182 }
183 
184 struct lock_key {
185 	/*
186 	 * name: the value for specify by user
187 	 * this should be simpler than raw name of member
188 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
189 	 */
190 	const char		*name;
191 	/* header: the string printed on the header line */
192 	const char		*header;
193 	/* len: the printing width of the field */
194 	int			len;
195 	/* key: a pointer to function to compare two lock stats for sorting */
196 	int			(*key)(struct lock_stat*, struct lock_stat*);
197 	/* print: a pointer to function to print a given lock stats */
198 	void			(*print)(struct lock_key*, struct lock_stat*);
199 	/* list: list entry to link this */
200 	struct list_head	list;
201 };
202 
203 static void lock_stat_key_print_time(unsigned long long nsec, int len)
204 {
205 	static const struct {
206 		float base;
207 		const char *unit;
208 	} table[] = {
209 		{ 1e9 * 3600, "h " },
210 		{ 1e9 * 60, "m " },
211 		{ 1e9, "s " },
212 		{ 1e6, "ms" },
213 		{ 1e3, "us" },
214 		{ 0, NULL },
215 	};
216 
217 	/* for CSV output */
218 	if (len == 0) {
219 		fprintf(lock_output, "%llu", nsec);
220 		return;
221 	}
222 
223 	for (int i = 0; table[i].unit; i++) {
224 		if (nsec < table[i].base)
225 			continue;
226 
227 		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
228 		return;
229 	}
230 
231 	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
232 }
233 
234 #define PRINT_KEY(member)						\
235 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
236 					   struct lock_stat *ls)	\
237 {									\
238 	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
239 }
240 
241 #define PRINT_TIME(member)						\
242 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
243 					   struct lock_stat *ls)	\
244 {									\
245 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
246 }
247 
248 PRINT_KEY(nr_acquired)
249 PRINT_KEY(nr_contended)
250 PRINT_TIME(avg_wait_time)
251 PRINT_TIME(wait_time_total)
252 PRINT_TIME(wait_time_max)
253 
254 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
255 					      struct lock_stat *ls)
256 {
257 	u64 wait_time = ls->wait_time_min;
258 
259 	if (wait_time == ULLONG_MAX)
260 		wait_time = 0;
261 
262 	lock_stat_key_print_time(wait_time, key->len);
263 }
264 
265 
266 static const char		*sort_key = "acquired";
267 
268 static int			(*compare)(struct lock_stat *, struct lock_stat *);
269 
270 static struct rb_root		sorted; /* place to store intermediate data */
271 static struct rb_root		result;	/* place to store sorted data */
272 
273 static LIST_HEAD(lock_keys);
274 static const char		*output_fields;
275 
276 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
277 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
278 static struct lock_key report_keys[] = {
279 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
280 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
281 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
282 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
283 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
284 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
285 
286 	/* extra comparisons much complicated should be here */
287 	{ }
288 };
289 
290 static struct lock_key contention_keys[] = {
291 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
292 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
293 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
294 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
295 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
296 
297 	/* extra comparisons much complicated should be here */
298 	{ }
299 };
300 
301 static int select_key(bool contention)
302 {
303 	int i;
304 	struct lock_key *keys = report_keys;
305 
306 	if (contention)
307 		keys = contention_keys;
308 
309 	for (i = 0; keys[i].name; i++) {
310 		if (!strcmp(keys[i].name, sort_key)) {
311 			compare = keys[i].key;
312 
313 			/* selected key should be in the output fields */
314 			if (list_empty(&keys[i].list))
315 				list_add_tail(&keys[i].list, &lock_keys);
316 
317 			return 0;
318 		}
319 	}
320 
321 	pr_err("Unknown compare key: %s\n", sort_key);
322 	return -1;
323 }
324 
325 static int add_output_field(bool contention, char *name)
326 {
327 	int i;
328 	struct lock_key *keys = report_keys;
329 
330 	if (contention)
331 		keys = contention_keys;
332 
333 	for (i = 0; keys[i].name; i++) {
334 		if (strcmp(keys[i].name, name))
335 			continue;
336 
337 		/* prevent double link */
338 		if (list_empty(&keys[i].list))
339 			list_add_tail(&keys[i].list, &lock_keys);
340 
341 		return 0;
342 	}
343 
344 	pr_err("Unknown output field: %s\n", name);
345 	return -1;
346 }
347 
348 static int setup_output_field(bool contention, const char *str)
349 {
350 	char *tok, *tmp, *orig;
351 	int i, ret = 0;
352 	struct lock_key *keys = report_keys;
353 
354 	if (contention)
355 		keys = contention_keys;
356 
357 	/* no output field given: use all of them */
358 	if (str == NULL) {
359 		for (i = 0; keys[i].name; i++)
360 			list_add_tail(&keys[i].list, &lock_keys);
361 		return 0;
362 	}
363 
364 	for (i = 0; keys[i].name; i++)
365 		INIT_LIST_HEAD(&keys[i].list);
366 
367 	orig = tmp = strdup(str);
368 	if (orig == NULL)
369 		return -ENOMEM;
370 
371 	while ((tok = strsep(&tmp, ",")) != NULL){
372 		ret = add_output_field(contention, tok);
373 		if (ret < 0)
374 			break;
375 	}
376 	free(orig);
377 
378 	return ret;
379 }
380 
381 static void combine_lock_stats(struct lock_stat *st)
382 {
383 	struct rb_node **rb = &sorted.rb_node;
384 	struct rb_node *parent = NULL;
385 	struct lock_stat *p;
386 	int ret;
387 
388 	while (*rb) {
389 		p = container_of(*rb, struct lock_stat, rb);
390 		parent = *rb;
391 
392 		if (st->name && p->name)
393 			ret = strcmp(st->name, p->name);
394 		else
395 			ret = !!st->name - !!p->name;
396 
397 		if (ret == 0) {
398 			p->nr_acquired += st->nr_acquired;
399 			p->nr_contended += st->nr_contended;
400 			p->wait_time_total += st->wait_time_total;
401 
402 			if (p->nr_contended)
403 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
404 
405 			if (p->wait_time_min > st->wait_time_min)
406 				p->wait_time_min = st->wait_time_min;
407 			if (p->wait_time_max < st->wait_time_max)
408 				p->wait_time_max = st->wait_time_max;
409 
410 			p->broken |= st->broken;
411 			st->combined = 1;
412 			return;
413 		}
414 
415 		if (ret < 0)
416 			rb = &(*rb)->rb_left;
417 		else
418 			rb = &(*rb)->rb_right;
419 	}
420 
421 	rb_link_node(&st->rb, parent, rb);
422 	rb_insert_color(&st->rb, &sorted);
423 }
424 
425 static void insert_to(struct rb_root *rr, struct lock_stat *st,
426 		      int (*bigger)(struct lock_stat *, struct lock_stat *))
427 {
428 	struct rb_node **rb = &rr->rb_node;
429 	struct rb_node *parent = NULL;
430 	struct lock_stat *p;
431 
432 	while (*rb) {
433 		p = container_of(*rb, struct lock_stat, rb);
434 		parent = *rb;
435 
436 		if (bigger(st, p))
437 			rb = &(*rb)->rb_left;
438 		else
439 			rb = &(*rb)->rb_right;
440 	}
441 
442 	rb_link_node(&st->rb, parent, rb);
443 	rb_insert_color(&st->rb, rr);
444 }
445 
446 static inline void insert_to_result(struct lock_stat *st,
447 				    int (*bigger)(struct lock_stat *,
448 						  struct lock_stat *))
449 {
450 	if (combine_locks && st->combined)
451 		return;
452 	insert_to(&result, st, bigger);
453 }
454 
455 static inline struct lock_stat *pop_from(struct rb_root *rr)
456 {
457 	struct rb_node *node = rr->rb_node;
458 
459 	if (!node)
460 		return NULL;
461 
462 	while (node->rb_left)
463 		node = node->rb_left;
464 
465 	rb_erase(node, rr);
466 	return container_of(node, struct lock_stat, rb);
467 
468 }
469 
470 /* returns left most element of result, and erase it */
471 static struct lock_stat *pop_from_result(void)
472 {
473 	return pop_from(&result);
474 }
475 
476 struct trace_lock_handler {
477 	/* it's used on CONFIG_LOCKDEP */
478 	int (*acquire_event)(struct perf_sample *sample);
479 
480 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
481 	int (*acquired_event)(struct perf_sample *sample);
482 
483 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
484 	int (*contended_event)(struct perf_sample *sample);
485 
486 	/* it's used on CONFIG_LOCKDEP */
487 	int (*release_event)(struct perf_sample *sample);
488 
489 	/* it's used when CONFIG_LOCKDEP is off */
490 	int (*contention_begin_event)(struct perf_sample *sample);
491 
492 	/* it's used when CONFIG_LOCKDEP is off */
493 	int (*contention_end_event)(struct perf_sample *sample);
494 };
495 
496 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
497 {
498 	struct lock_seq_stat *seq;
499 
500 	list_for_each_entry(seq, &ts->seq_list, list) {
501 		if (seq->addr == addr)
502 			return seq;
503 	}
504 
505 	seq = zalloc(sizeof(struct lock_seq_stat));
506 	if (!seq) {
507 		pr_err("memory allocation failed\n");
508 		return NULL;
509 	}
510 	seq->state = SEQ_STATE_UNINITIALIZED;
511 	seq->addr = addr;
512 
513 	list_add(&seq->list, &ts->seq_list);
514 	return seq;
515 }
516 
517 enum broken_state {
518 	BROKEN_ACQUIRE,
519 	BROKEN_ACQUIRED,
520 	BROKEN_CONTENDED,
521 	BROKEN_RELEASE,
522 	BROKEN_MAX,
523 };
524 
525 static int bad_hist[BROKEN_MAX];
526 
527 enum acquire_flags {
528 	TRY_LOCK = 1,
529 	READ_LOCK = 2,
530 };
531 
532 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
533 {
534 	switch (aggr_mode) {
535 	case LOCK_AGGR_ADDR:
536 		*key = addr;
537 		break;
538 	case LOCK_AGGR_TASK:
539 		*key = tid;
540 		break;
541 	case LOCK_AGGR_CALLER:
542 	case LOCK_AGGR_CGROUP:
543 	default:
544 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
545 		return -EINVAL;
546 	}
547 	return 0;
548 }
549 
550 static u64 callchain_id(struct perf_sample *sample);
551 
552 static int get_key_by_aggr_mode(u64 *key, u64 addr,
553 				 struct perf_sample *sample)
554 {
555 	if (aggr_mode == LOCK_AGGR_CALLER) {
556 		*key = callchain_id(sample);
557 		return 0;
558 	}
559 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
560 }
561 
562 static int report_lock_acquire_event(struct perf_sample *sample)
563 {
564 	struct lock_stat *ls;
565 	struct thread_stat *ts;
566 	struct lock_seq_stat *seq;
567 	const char *name = perf_sample__strval(sample, "name");
568 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
569 	int flag = perf_sample__intval(sample, "flags");
570 	u64 key;
571 	int ret;
572 
573 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
574 	if (ret < 0)
575 		return ret;
576 
577 	ls = lock_stat_findnew(key, name, 0);
578 	if (!ls)
579 		return -ENOMEM;
580 
581 	ts = thread_stat_findnew(sample->tid);
582 	if (!ts)
583 		return -ENOMEM;
584 
585 	seq = get_seq(ts, addr);
586 	if (!seq)
587 		return -ENOMEM;
588 
589 	switch (seq->state) {
590 	case SEQ_STATE_UNINITIALIZED:
591 	case SEQ_STATE_RELEASED:
592 		if (!flag) {
593 			seq->state = SEQ_STATE_ACQUIRING;
594 		} else {
595 			if (flag & TRY_LOCK)
596 				ls->nr_trylock++;
597 			if (flag & READ_LOCK)
598 				ls->nr_readlock++;
599 			seq->state = SEQ_STATE_READ_ACQUIRED;
600 			seq->read_count = 1;
601 			ls->nr_acquired++;
602 		}
603 		break;
604 	case SEQ_STATE_READ_ACQUIRED:
605 		if (flag & READ_LOCK) {
606 			seq->read_count++;
607 			ls->nr_acquired++;
608 			goto end;
609 		} else {
610 			goto broken;
611 		}
612 		break;
613 	case SEQ_STATE_ACQUIRED:
614 	case SEQ_STATE_ACQUIRING:
615 	case SEQ_STATE_CONTENDED:
616 broken:
617 		/* broken lock sequence */
618 		if (!ls->broken) {
619 			ls->broken = 1;
620 			bad_hist[BROKEN_ACQUIRE]++;
621 		}
622 		list_del_init(&seq->list);
623 		free(seq);
624 		goto end;
625 	default:
626 		BUG_ON("Unknown state of lock sequence found!\n");
627 		break;
628 	}
629 
630 	ls->nr_acquire++;
631 	seq->prev_event_time = sample->time;
632 end:
633 	return 0;
634 }
635 
636 static int report_lock_acquired_event(struct perf_sample *sample)
637 {
638 	struct lock_stat *ls;
639 	struct thread_stat *ts;
640 	struct lock_seq_stat *seq;
641 	u64 contended_term;
642 	const char *name = perf_sample__strval(sample, "name");
643 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
644 	u64 key;
645 	int ret;
646 
647 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
648 	if (ret < 0)
649 		return ret;
650 
651 	ls = lock_stat_findnew(key, name, 0);
652 	if (!ls)
653 		return -ENOMEM;
654 
655 	ts = thread_stat_findnew(sample->tid);
656 	if (!ts)
657 		return -ENOMEM;
658 
659 	seq = get_seq(ts, addr);
660 	if (!seq)
661 		return -ENOMEM;
662 
663 	switch (seq->state) {
664 	case SEQ_STATE_UNINITIALIZED:
665 		/* orphan event, do nothing */
666 		return 0;
667 	case SEQ_STATE_ACQUIRING:
668 		break;
669 	case SEQ_STATE_CONTENDED:
670 		contended_term = sample->time - seq->prev_event_time;
671 		ls->wait_time_total += contended_term;
672 		if (contended_term < ls->wait_time_min)
673 			ls->wait_time_min = contended_term;
674 		if (ls->wait_time_max < contended_term)
675 			ls->wait_time_max = contended_term;
676 		break;
677 	case SEQ_STATE_RELEASED:
678 	case SEQ_STATE_ACQUIRED:
679 	case SEQ_STATE_READ_ACQUIRED:
680 		/* broken lock sequence */
681 		if (!ls->broken) {
682 			ls->broken = 1;
683 			bad_hist[BROKEN_ACQUIRED]++;
684 		}
685 		list_del_init(&seq->list);
686 		free(seq);
687 		goto end;
688 	default:
689 		BUG_ON("Unknown state of lock sequence found!\n");
690 		break;
691 	}
692 
693 	seq->state = SEQ_STATE_ACQUIRED;
694 	ls->nr_acquired++;
695 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
696 	seq->prev_event_time = sample->time;
697 end:
698 	return 0;
699 }
700 
701 static int report_lock_contended_event(struct perf_sample *sample)
702 {
703 	struct lock_stat *ls;
704 	struct thread_stat *ts;
705 	struct lock_seq_stat *seq;
706 	const char *name = perf_sample__strval(sample, "name");
707 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
708 	u64 key;
709 	int ret;
710 
711 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
712 	if (ret < 0)
713 		return ret;
714 
715 	ls = lock_stat_findnew(key, name, 0);
716 	if (!ls)
717 		return -ENOMEM;
718 
719 	ts = thread_stat_findnew(sample->tid);
720 	if (!ts)
721 		return -ENOMEM;
722 
723 	seq = get_seq(ts, addr);
724 	if (!seq)
725 		return -ENOMEM;
726 
727 	switch (seq->state) {
728 	case SEQ_STATE_UNINITIALIZED:
729 		/* orphan event, do nothing */
730 		return 0;
731 	case SEQ_STATE_ACQUIRING:
732 		break;
733 	case SEQ_STATE_RELEASED:
734 	case SEQ_STATE_ACQUIRED:
735 	case SEQ_STATE_READ_ACQUIRED:
736 	case SEQ_STATE_CONTENDED:
737 		/* broken lock sequence */
738 		if (!ls->broken) {
739 			ls->broken = 1;
740 			bad_hist[BROKEN_CONTENDED]++;
741 		}
742 		list_del_init(&seq->list);
743 		free(seq);
744 		goto end;
745 	default:
746 		BUG_ON("Unknown state of lock sequence found!\n");
747 		break;
748 	}
749 
750 	seq->state = SEQ_STATE_CONTENDED;
751 	ls->nr_contended++;
752 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
753 	seq->prev_event_time = sample->time;
754 end:
755 	return 0;
756 }
757 
758 static int report_lock_release_event(struct perf_sample *sample)
759 {
760 	struct lock_stat *ls;
761 	struct thread_stat *ts;
762 	struct lock_seq_stat *seq;
763 	const char *name = perf_sample__strval(sample, "name");
764 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
765 	u64 key;
766 	int ret;
767 
768 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
769 	if (ret < 0)
770 		return ret;
771 
772 	ls = lock_stat_findnew(key, name, 0);
773 	if (!ls)
774 		return -ENOMEM;
775 
776 	ts = thread_stat_findnew(sample->tid);
777 	if (!ts)
778 		return -ENOMEM;
779 
780 	seq = get_seq(ts, addr);
781 	if (!seq)
782 		return -ENOMEM;
783 
784 	switch (seq->state) {
785 	case SEQ_STATE_UNINITIALIZED:
786 		goto end;
787 	case SEQ_STATE_ACQUIRED:
788 		break;
789 	case SEQ_STATE_READ_ACQUIRED:
790 		seq->read_count--;
791 		BUG_ON(seq->read_count < 0);
792 		if (seq->read_count) {
793 			ls->nr_release++;
794 			goto end;
795 		}
796 		break;
797 	case SEQ_STATE_ACQUIRING:
798 	case SEQ_STATE_CONTENDED:
799 	case SEQ_STATE_RELEASED:
800 		/* broken lock sequence */
801 		if (!ls->broken) {
802 			ls->broken = 1;
803 			bad_hist[BROKEN_RELEASE]++;
804 		}
805 		goto free_seq;
806 	default:
807 		BUG_ON("Unknown state of lock sequence found!\n");
808 		break;
809 	}
810 
811 	ls->nr_release++;
812 free_seq:
813 	list_del_init(&seq->list);
814 	free(seq);
815 end:
816 	return 0;
817 }
818 
819 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
820 				  char *buf, int size)
821 {
822 	u64 offset;
823 
824 	if (map == NULL || sym == NULL) {
825 		buf[0] = '\0';
826 		return 0;
827 	}
828 
829 	offset = map__map_ip(map, ip) - sym->start;
830 
831 	if (offset)
832 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
833 	else
834 		return strlcpy(buf, sym->name, size);
835 }
836 static int lock_contention_caller(struct perf_sample *sample,
837 				  char *buf, int size)
838 {
839 	struct thread *thread;
840 	struct callchain_cursor *cursor;
841 	struct machine *machine = &session->machines.host;
842 	struct symbol *sym;
843 	int skip = 0;
844 	int ret;
845 
846 	/* lock names will be replaced to task name later */
847 	if (show_thread_stats)
848 		return -1;
849 
850 	thread = machine__findnew_thread(machine, -1, sample->pid);
851 	if (thread == NULL)
852 		return -1;
853 
854 	cursor = get_tls_callchain_cursor();
855 
856 	/* use caller function name from the callchain */
857 	ret = thread__resolve_callchain(thread, cursor, sample,
858 					NULL, NULL, max_stack_depth);
859 	if (ret != 0) {
860 		thread__put(thread);
861 		return -1;
862 	}
863 
864 	callchain_cursor_commit(cursor);
865 	thread__put(thread);
866 
867 	while (true) {
868 		struct callchain_cursor_node *node;
869 
870 		node = callchain_cursor_current(cursor);
871 		if (node == NULL)
872 			break;
873 
874 		/* skip first few entries - for lock functions */
875 		if (++skip <= stack_skip)
876 			goto next;
877 
878 		sym = node->ms.sym;
879 		if (sym && !machine__is_lock_function(machine, node->ip)) {
880 			get_symbol_name_offset(node->ms.map, sym, node->ip,
881 					       buf, size);
882 			return 0;
883 		}
884 
885 next:
886 		callchain_cursor_advance(cursor);
887 	}
888 	return -1;
889 }
890 
891 static u64 callchain_id(struct perf_sample *sample)
892 {
893 	struct callchain_cursor *cursor;
894 	struct machine *machine = &session->machines.host;
895 	struct thread *thread;
896 	u64 hash = 0;
897 	int skip = 0;
898 	int ret;
899 
900 	thread = machine__findnew_thread(machine, -1, sample->pid);
901 	if (thread == NULL)
902 		return -1;
903 
904 	cursor = get_tls_callchain_cursor();
905 	/* use caller function name from the callchain */
906 	ret = thread__resolve_callchain(thread, cursor, sample,
907 					NULL, NULL, max_stack_depth);
908 	thread__put(thread);
909 
910 	if (ret != 0)
911 		return -1;
912 
913 	callchain_cursor_commit(cursor);
914 
915 	while (true) {
916 		struct callchain_cursor_node *node;
917 
918 		node = callchain_cursor_current(cursor);
919 		if (node == NULL)
920 			break;
921 
922 		/* skip first few entries - for lock functions */
923 		if (++skip <= stack_skip)
924 			goto next;
925 
926 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
927 			goto next;
928 
929 		hash ^= hash_long((unsigned long)node->ip, 64);
930 
931 next:
932 		callchain_cursor_advance(cursor);
933 	}
934 	return hash;
935 }
936 
937 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
938 {
939 	u64 *callstack;
940 	u64 i;
941 	int c;
942 
943 	if (!sample->callchain) {
944 		pr_debug("Sample unexpectedly missing callchain\n");
945 		return NULL;
946 	}
947 
948 	callstack = calloc(max_stack, sizeof(*callstack));
949 	if (callstack == NULL) {
950 		pr_debug("Failed to allocate callstack\n");
951 		return NULL;
952 	}
953 
954 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
955 		u64 ip = sample->callchain->ips[i];
956 
957 		if (ip >= PERF_CONTEXT_MAX)
958 			continue;
959 
960 		callstack[c++] = ip;
961 	}
962 	return callstack;
963 }
964 
965 static int report_lock_contention_begin_event(struct perf_sample *sample)
966 {
967 	struct lock_stat *ls;
968 	struct thread_stat *ts;
969 	struct lock_seq_stat *seq;
970 	u64 addr = perf_sample__intval(sample, "lock_addr");
971 	unsigned int flags = perf_sample__intval(sample, "flags");
972 	u64 key;
973 	int i, ret;
974 	static bool kmap_loaded;
975 	struct machine *machine = &session->machines.host;
976 	struct map *kmap;
977 	struct symbol *sym;
978 
979 	ret = get_key_by_aggr_mode(&key, addr, sample);
980 	if (ret < 0)
981 		return ret;
982 
983 	if (!kmap_loaded) {
984 		unsigned long *addrs;
985 
986 		/* make sure it loads the kernel map to find lock symbols */
987 		map__load(machine__kernel_map(machine));
988 		kmap_loaded = true;
989 
990 		/* convert (kernel) symbols to addresses */
991 		for (i = 0; i < filters.nr_syms; i++) {
992 			sym = machine__find_kernel_symbol_by_name(machine,
993 								  filters.syms[i],
994 								  &kmap);
995 			if (sym == NULL) {
996 				pr_warning("ignore unknown symbol: %s\n",
997 					   filters.syms[i]);
998 				continue;
999 			}
1000 
1001 			addrs = realloc(filters.addrs,
1002 					(filters.nr_addrs + 1) * sizeof(*addrs));
1003 			if (addrs == NULL) {
1004 				pr_warning("memory allocation failure\n");
1005 				return -ENOMEM;
1006 			}
1007 
1008 			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1009 			filters.addrs = addrs;
1010 		}
1011 	}
1012 
1013 	ls = lock_stat_find(key);
1014 	if (!ls) {
1015 		char buf[128];
1016 		const char *name = "";
1017 
1018 		switch (aggr_mode) {
1019 		case LOCK_AGGR_ADDR:
1020 			sym = machine__find_kernel_symbol(machine, key, &kmap);
1021 			if (sym)
1022 				name = sym->name;
1023 			break;
1024 		case LOCK_AGGR_CALLER:
1025 			name = buf;
1026 			if (lock_contention_caller(sample, buf, sizeof(buf)) < 0)
1027 				name = "Unknown";
1028 			break;
1029 		case LOCK_AGGR_CGROUP:
1030 		case LOCK_AGGR_TASK:
1031 		default:
1032 			break;
1033 		}
1034 
1035 		ls = lock_stat_findnew(key, name, flags);
1036 		if (!ls)
1037 			return -ENOMEM;
1038 	}
1039 
1040 	if (filters.nr_types) {
1041 		bool found = false;
1042 
1043 		for (i = 0; i < filters.nr_types; i++) {
1044 			if (flags == filters.types[i]) {
1045 				found = true;
1046 				break;
1047 			}
1048 		}
1049 
1050 		if (!found)
1051 			return 0;
1052 	}
1053 
1054 	if (filters.nr_addrs) {
1055 		bool found = false;
1056 
1057 		for (i = 0; i < filters.nr_addrs; i++) {
1058 			if (addr == filters.addrs[i]) {
1059 				found = true;
1060 				break;
1061 			}
1062 		}
1063 
1064 		if (!found)
1065 			return 0;
1066 	}
1067 
1068 	if (needs_callstack()) {
1069 		u64 *callstack = get_callstack(sample, max_stack_depth);
1070 		if (callstack == NULL)
1071 			return 0;
1072 
1073 		if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1074 			free(callstack);
1075 			return 0;
1076 		}
1077 
1078 		if (ls->callstack == NULL)
1079 			ls->callstack = callstack;
1080 		else
1081 			free(callstack);
1082 	}
1083 
1084 	ts = thread_stat_findnew(sample->tid);
1085 	if (!ts)
1086 		return -ENOMEM;
1087 
1088 	seq = get_seq(ts, addr);
1089 	if (!seq)
1090 		return -ENOMEM;
1091 
1092 	switch (seq->state) {
1093 	case SEQ_STATE_UNINITIALIZED:
1094 	case SEQ_STATE_ACQUIRED:
1095 		break;
1096 	case SEQ_STATE_CONTENDED:
1097 		/*
1098 		 * It can have nested contention begin with mutex spinning,
1099 		 * then we would use the original contention begin event and
1100 		 * ignore the second one.
1101 		 */
1102 		goto end;
1103 	case SEQ_STATE_ACQUIRING:
1104 	case SEQ_STATE_READ_ACQUIRED:
1105 	case SEQ_STATE_RELEASED:
1106 		/* broken lock sequence */
1107 		if (!ls->broken) {
1108 			ls->broken = 1;
1109 			bad_hist[BROKEN_CONTENDED]++;
1110 		}
1111 		list_del_init(&seq->list);
1112 		free(seq);
1113 		goto end;
1114 	default:
1115 		BUG_ON("Unknown state of lock sequence found!\n");
1116 		break;
1117 	}
1118 
1119 	if (seq->state != SEQ_STATE_CONTENDED) {
1120 		seq->state = SEQ_STATE_CONTENDED;
1121 		seq->prev_event_time = sample->time;
1122 		ls->nr_contended++;
1123 	}
1124 end:
1125 	return 0;
1126 }
1127 
1128 static int report_lock_contention_end_event(struct perf_sample *sample)
1129 {
1130 	struct lock_stat *ls;
1131 	struct thread_stat *ts;
1132 	struct lock_seq_stat *seq;
1133 	u64 contended_term;
1134 	u64 addr = perf_sample__intval(sample, "lock_addr");
1135 	u64 key;
1136 	int ret;
1137 
1138 	ret = get_key_by_aggr_mode(&key, addr, sample);
1139 	if (ret < 0)
1140 		return ret;
1141 
1142 	ls = lock_stat_find(key);
1143 	if (!ls)
1144 		return 0;
1145 
1146 	ts = thread_stat_find(sample->tid);
1147 	if (!ts)
1148 		return 0;
1149 
1150 	seq = get_seq(ts, addr);
1151 	if (!seq)
1152 		return -ENOMEM;
1153 
1154 	switch (seq->state) {
1155 	case SEQ_STATE_UNINITIALIZED:
1156 		goto end;
1157 	case SEQ_STATE_CONTENDED:
1158 		contended_term = sample->time - seq->prev_event_time;
1159 		ls->wait_time_total += contended_term;
1160 		if (contended_term < ls->wait_time_min)
1161 			ls->wait_time_min = contended_term;
1162 		if (ls->wait_time_max < contended_term)
1163 			ls->wait_time_max = contended_term;
1164 		break;
1165 	case SEQ_STATE_ACQUIRING:
1166 	case SEQ_STATE_ACQUIRED:
1167 	case SEQ_STATE_READ_ACQUIRED:
1168 	case SEQ_STATE_RELEASED:
1169 		/* broken lock sequence */
1170 		if (!ls->broken) {
1171 			ls->broken = 1;
1172 			bad_hist[BROKEN_ACQUIRED]++;
1173 		}
1174 		list_del_init(&seq->list);
1175 		free(seq);
1176 		goto end;
1177 	default:
1178 		BUG_ON("Unknown state of lock sequence found!\n");
1179 		break;
1180 	}
1181 
1182 	seq->state = SEQ_STATE_ACQUIRED;
1183 	ls->nr_acquired++;
1184 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1185 end:
1186 	return 0;
1187 }
1188 
1189 /* lock oriented handlers */
1190 /* TODO: handlers for CPU oriented, thread oriented */
1191 static const struct trace_lock_handler report_lock_ops  = {
1192 	.acquire_event		= report_lock_acquire_event,
1193 	.acquired_event		= report_lock_acquired_event,
1194 	.contended_event	= report_lock_contended_event,
1195 	.release_event		= report_lock_release_event,
1196 	.contention_begin_event	= report_lock_contention_begin_event,
1197 	.contention_end_event	= report_lock_contention_end_event,
1198 };
1199 
1200 static const struct trace_lock_handler contention_lock_ops  = {
1201 	.contention_begin_event	= report_lock_contention_begin_event,
1202 	.contention_end_event	= report_lock_contention_end_event,
1203 };
1204 
1205 
1206 static const struct trace_lock_handler *trace_handler;
1207 
1208 static int evsel__process_lock_acquire(struct perf_sample *sample)
1209 {
1210 	if (trace_handler->acquire_event)
1211 		return trace_handler->acquire_event(sample);
1212 	return 0;
1213 }
1214 
1215 static int evsel__process_lock_acquired(struct perf_sample *sample)
1216 {
1217 	if (trace_handler->acquired_event)
1218 		return trace_handler->acquired_event(sample);
1219 	return 0;
1220 }
1221 
1222 static int evsel__process_lock_contended(struct perf_sample *sample)
1223 {
1224 	if (trace_handler->contended_event)
1225 		return trace_handler->contended_event(sample);
1226 	return 0;
1227 }
1228 
1229 static int evsel__process_lock_release(struct perf_sample *sample)
1230 {
1231 	if (trace_handler->release_event)
1232 		return trace_handler->release_event(sample);
1233 	return 0;
1234 }
1235 
1236 static int evsel__process_contention_begin(struct perf_sample *sample)
1237 {
1238 	if (trace_handler->contention_begin_event)
1239 		return trace_handler->contention_begin_event(sample);
1240 	return 0;
1241 }
1242 
1243 static int evsel__process_contention_end(struct perf_sample *sample)
1244 {
1245 	if (trace_handler->contention_end_event)
1246 		return trace_handler->contention_end_event(sample);
1247 	return 0;
1248 }
1249 
1250 static void print_bad_events(int bad, int total)
1251 {
1252 	/* Output for debug, this have to be removed */
1253 	int i;
1254 	int broken = 0;
1255 	const char *name[4] =
1256 		{ "acquire", "acquired", "contended", "release" };
1257 
1258 	for (i = 0; i < BROKEN_MAX; i++)
1259 		broken += bad_hist[i];
1260 
1261 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1262 		return;
1263 
1264 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1265 	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1266 	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1267 	fprintf(lock_output, "histogram of events caused bad sequence\n");
1268 	for (i = 0; i < BROKEN_MAX; i++)
1269 		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1270 }
1271 
1272 /* TODO: various way to print, coloring, nano or milli sec */
1273 static void print_result(void)
1274 {
1275 	struct lock_stat *st;
1276 	struct lock_key *key;
1277 	char cut_name[20];
1278 	int bad, total, printed;
1279 
1280 	if (!quiet) {
1281 		fprintf(lock_output, "%20s ", "Name");
1282 		list_for_each_entry(key, &lock_keys, list)
1283 			fprintf(lock_output, "%*s ", key->len, key->header);
1284 		fprintf(lock_output, "\n\n");
1285 	}
1286 
1287 	bad = total = printed = 0;
1288 	while ((st = pop_from_result())) {
1289 		total++;
1290 		if (st->broken)
1291 			bad++;
1292 		if (!st->nr_acquired)
1293 			continue;
1294 
1295 		bzero(cut_name, 20);
1296 
1297 		if (strlen(st->name) < 20) {
1298 			/* output raw name */
1299 			const char *name = st->name;
1300 
1301 			if (show_thread_stats) {
1302 				struct thread *t;
1303 
1304 				/* st->addr contains tid of thread */
1305 				t = perf_session__findnew(session, st->addr);
1306 				name = thread__comm_str(t);
1307 			}
1308 
1309 			fprintf(lock_output, "%20s ", name);
1310 		} else {
1311 			strncpy(cut_name, st->name, 16);
1312 			cut_name[16] = '.';
1313 			cut_name[17] = '.';
1314 			cut_name[18] = '.';
1315 			cut_name[19] = '\0';
1316 			/* cut off name for saving output style */
1317 			fprintf(lock_output, "%20s ", cut_name);
1318 		}
1319 
1320 		list_for_each_entry(key, &lock_keys, list) {
1321 			key->print(key, st);
1322 			fprintf(lock_output, " ");
1323 		}
1324 		fprintf(lock_output, "\n");
1325 
1326 		if (++printed >= print_nr_entries)
1327 			break;
1328 	}
1329 
1330 	print_bad_events(bad, total);
1331 }
1332 
1333 static bool info_threads, info_map;
1334 
1335 static void dump_threads(void)
1336 {
1337 	struct thread_stat *st;
1338 	struct rb_node *node;
1339 	struct thread *t;
1340 
1341 	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1342 
1343 	node = rb_first(&thread_stats);
1344 	while (node) {
1345 		st = container_of(node, struct thread_stat, rb);
1346 		t = perf_session__findnew(session, st->tid);
1347 		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1348 		node = rb_next(node);
1349 		thread__put(t);
1350 	}
1351 }
1352 
1353 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1354 {
1355 	int ret;
1356 
1357 	if (a->name && b->name)
1358 		ret = strcmp(a->name, b->name);
1359 	else
1360 		ret = !!a->name - !!b->name;
1361 
1362 	if (!ret)
1363 		return a->addr < b->addr;
1364 	else
1365 		return ret < 0;
1366 }
1367 
1368 static void dump_map(void)
1369 {
1370 	unsigned int i;
1371 	struct lock_stat *st;
1372 
1373 	fprintf(lock_output, "Address of instance: name of class\n");
1374 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1375 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1376 			insert_to_result(st, compare_maps);
1377 		}
1378 	}
1379 
1380 	while ((st = pop_from_result()))
1381 		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1382 }
1383 
1384 static void dump_info(void)
1385 {
1386 	if (info_threads)
1387 		dump_threads();
1388 
1389 	if (info_map) {
1390 		if (info_threads)
1391 			fputc('\n', lock_output);
1392 		dump_map();
1393 	}
1394 }
1395 
1396 static const struct evsel_str_handler lock_tracepoints[] = {
1397 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1398 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1399 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1400 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1401 };
1402 
1403 static const struct evsel_str_handler contention_tracepoints[] = {
1404 	{ "lock:contention_begin", evsel__process_contention_begin, },
1405 	{ "lock:contention_end",   evsel__process_contention_end,   },
1406 };
1407 
1408 static int process_event_update(const struct perf_tool *tool,
1409 				union perf_event *event,
1410 				struct evlist **pevlist)
1411 {
1412 	int ret;
1413 
1414 	ret = perf_event__process_event_update(tool, event, pevlist);
1415 	if (ret < 0)
1416 		return ret;
1417 
1418 	/* this can return -EEXIST since we call it for each evsel */
1419 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1420 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1421 	return 0;
1422 }
1423 
1424 typedef int (*tracepoint_handler)(struct perf_sample *sample);
1425 
1426 static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1427 				union perf_event *event,
1428 				struct perf_sample *sample,
1429 				struct machine *machine)
1430 {
1431 	struct evsel *evsel = sample->evsel;
1432 	int err = 0;
1433 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1434 							sample->tid);
1435 
1436 	if (thread == NULL) {
1437 		pr_debug("problem processing %s (%u) event at offset %#" PRIx64 ", skipping it.\n",
1438 			 perf_event__name(event->header.type), event->header.type,
1439 			 sample->file_offset);
1440 		return -1;
1441 	}
1442 
1443 	if (evsel->handler != NULL) {
1444 		tracepoint_handler f = evsel->handler;
1445 		err = f(sample);
1446 	}
1447 
1448 	thread__put(thread);
1449 
1450 	return err;
1451 }
1452 
1453 static void combine_result(void)
1454 {
1455 	unsigned int i;
1456 	struct lock_stat *st;
1457 
1458 	if (!combine_locks)
1459 		return;
1460 
1461 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1462 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1463 			combine_lock_stats(st);
1464 		}
1465 	}
1466 }
1467 
1468 static void sort_result(void)
1469 {
1470 	unsigned int i;
1471 	struct lock_stat *st;
1472 
1473 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1474 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1475 			insert_to_result(st, compare);
1476 		}
1477 	}
1478 }
1479 
1480 static const struct {
1481 	unsigned int flags;
1482 	/*
1483 	 * Name of the lock flags (access), with delimeter ':'.
1484 	 * For example, rwsem:R of rwsem:W.
1485 	 */
1486 	const char *flags_name;
1487 	/* Name of the lock (type), for example, rwlock or rwsem. */
1488 	const char *lock_name;
1489 } lock_type_table[] = {
1490 	{ 0,				"semaphore",	"semaphore" },
1491 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1492 	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1493 	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1494 	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1495 	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1496 	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1497 	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1498 	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1499 	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1500 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1501 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1502 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1503 	/* alias for optimistic spinning only */
1504 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex:spin",	"mutex-spin" },
1505 };
1506 
1507 static const char *get_type_flags_name(unsigned int flags)
1508 {
1509 	flags &= LCB_F_TYPE_MASK;
1510 
1511 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1512 		if (lock_type_table[i].flags == flags)
1513 			return lock_type_table[i].flags_name;
1514 	}
1515 	return "unknown";
1516 }
1517 
1518 static const char *get_type_lock_name(unsigned int flags)
1519 {
1520 	flags &= LCB_F_TYPE_MASK;
1521 
1522 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1523 		if (lock_type_table[i].flags == flags)
1524 			return lock_type_table[i].lock_name;
1525 	}
1526 	return "unknown";
1527 }
1528 
1529 static void lock_filter_finish(void)
1530 {
1531 	zfree(&filters.types);
1532 	filters.nr_types = 0;
1533 
1534 	zfree(&filters.addrs);
1535 	filters.nr_addrs = 0;
1536 
1537 	for (int i = 0; i < filters.nr_syms; i++)
1538 		free(filters.syms[i]);
1539 
1540 	zfree(&filters.syms);
1541 	filters.nr_syms = 0;
1542 
1543 	zfree(&filters.cgrps);
1544 	filters.nr_cgrps = 0;
1545 
1546 	for (int i = 0; i < filters.nr_slabs; i++)
1547 		free(filters.slabs[i]);
1548 
1549 	zfree(&filters.slabs);
1550 	filters.nr_slabs = 0;
1551 }
1552 
1553 static void sort_contention_result(void)
1554 {
1555 	sort_result();
1556 }
1557 
1558 static void print_header_stdio(void)
1559 {
1560 	struct lock_key *key;
1561 
1562 	list_for_each_entry(key, &lock_keys, list)
1563 		fprintf(lock_output, "%*s ", key->len, key->header);
1564 
1565 	switch (aggr_mode) {
1566 	case LOCK_AGGR_TASK:
1567 		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1568 			show_lock_owner ? "owner" : "comm");
1569 		break;
1570 	case LOCK_AGGR_CALLER:
1571 		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1572 		break;
1573 	case LOCK_AGGR_ADDR:
1574 		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1575 		break;
1576 	case LOCK_AGGR_CGROUP:
1577 		fprintf(lock_output, "  %s\n\n", "cgroup");
1578 		break;
1579 	default:
1580 		break;
1581 	}
1582 }
1583 
1584 static void print_header_csv(const char *sep)
1585 {
1586 	struct lock_key *key;
1587 
1588 	fprintf(lock_output, "# output: ");
1589 	list_for_each_entry(key, &lock_keys, list)
1590 		fprintf(lock_output, "%s%s ", key->header, sep);
1591 
1592 	switch (aggr_mode) {
1593 	case LOCK_AGGR_TASK:
1594 		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1595 			show_lock_owner ? "owner" : "comm");
1596 		break;
1597 	case LOCK_AGGR_CALLER:
1598 		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1599 		if (verbose > 0)
1600 			fprintf(lock_output, "%s %s", sep, "stacktrace");
1601 		fprintf(lock_output, "\n");
1602 		break;
1603 	case LOCK_AGGR_ADDR:
1604 		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1605 		break;
1606 	case LOCK_AGGR_CGROUP:
1607 		fprintf(lock_output, "%s\n", "cgroup");
1608 		break;
1609 	default:
1610 		break;
1611 	}
1612 }
1613 
1614 static void print_header(void)
1615 {
1616 	if (!quiet) {
1617 		if (symbol_conf.field_sep)
1618 			print_header_csv(symbol_conf.field_sep);
1619 		else
1620 			print_header_stdio();
1621 	}
1622 }
1623 
1624 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1625 {
1626 	struct lock_key *key;
1627 	struct thread *t;
1628 	int pid;
1629 
1630 	list_for_each_entry(key, &lock_keys, list) {
1631 		key->print(key, st);
1632 		fprintf(lock_output, " ");
1633 	}
1634 
1635 	switch (aggr_mode) {
1636 	case LOCK_AGGR_CALLER:
1637 		fprintf(lock_output, "  %10s   %s\n", get_type_flags_name(st->flags), st->name);
1638 		break;
1639 	case LOCK_AGGR_TASK:
1640 		pid = st->addr;
1641 		t = perf_session__findnew(session, pid);
1642 		fprintf(lock_output, "  %10d   %s\n",
1643 			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1644 		break;
1645 	case LOCK_AGGR_ADDR:
1646 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1647 			st->name, get_type_lock_name(st->flags));
1648 		break;
1649 	case LOCK_AGGR_CGROUP:
1650 		fprintf(lock_output, "  %s\n", st->name);
1651 		break;
1652 	default:
1653 		break;
1654 	}
1655 
1656 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1657 		struct map *kmap;
1658 		struct symbol *sym;
1659 		char buf[128];
1660 		u64 ip;
1661 
1662 		for (int i = 0; i < max_stack_depth; i++) {
1663 			if (!st->callstack || !st->callstack[i])
1664 				break;
1665 
1666 			ip = st->callstack[i];
1667 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1668 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1669 			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1670 		}
1671 	}
1672 }
1673 
1674 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1675 				const char *sep)
1676 {
1677 	struct lock_key *key;
1678 	struct thread *t;
1679 	int pid;
1680 
1681 	list_for_each_entry(key, &lock_keys, list) {
1682 		key->print(key, st);
1683 		fprintf(lock_output, "%s ", sep);
1684 	}
1685 
1686 	switch (aggr_mode) {
1687 	case LOCK_AGGR_CALLER:
1688 		fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
1689 		if (verbose <= 0)
1690 			fprintf(lock_output, "\n");
1691 		break;
1692 	case LOCK_AGGR_TASK:
1693 		pid = st->addr;
1694 		t = perf_session__findnew(session, pid);
1695 		fprintf(lock_output, "%d%s %s\n", pid, sep,
1696 			pid == -1 ? "Unknown" : thread__comm_str(t));
1697 		break;
1698 	case LOCK_AGGR_ADDR:
1699 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1700 			st->name, sep, get_type_lock_name(st->flags));
1701 		break;
1702 	case LOCK_AGGR_CGROUP:
1703 		fprintf(lock_output, "%s\n",st->name);
1704 		break;
1705 	default:
1706 		break;
1707 	}
1708 
1709 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1710 		struct map *kmap;
1711 		struct symbol *sym;
1712 		char buf[128];
1713 		u64 ip;
1714 
1715 		for (int i = 0; i < max_stack_depth; i++) {
1716 			if (!st->callstack || !st->callstack[i])
1717 				break;
1718 
1719 			ip = st->callstack[i];
1720 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1721 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1722 			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1723 		}
1724 		fprintf(lock_output, "\n");
1725 	}
1726 }
1727 
1728 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1729 {
1730 	if (symbol_conf.field_sep)
1731 		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1732 	else
1733 		print_lock_stat_stdio(con, st);
1734 }
1735 
1736 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1737 {
1738 	/* Output for debug, this have to be removed */
1739 	int broken = fails->task + fails->stack + fails->time + fails->data;
1740 
1741 	if (!use_bpf)
1742 		print_bad_events(bad, total);
1743 
1744 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1745 		return;
1746 
1747 	total += broken;
1748 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1749 	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1750 	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1751 
1752 	fprintf(lock_output, "histogram of failure reasons\n");
1753 	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1754 	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1755 	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1756 	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1757 }
1758 
1759 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1760 			     const char *sep)
1761 {
1762 	/* Output for debug, this have to be removed */
1763 	if (use_bpf)
1764 		bad = fails->task + fails->stack + fails->time + fails->data;
1765 
1766 	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1767 		return;
1768 
1769 	total += bad;
1770 	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1771 
1772 	if (use_bpf) {
1773 		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1774 		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1775 		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1776 		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1777 	} else {
1778 		int i;
1779 		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1780 
1781 		for (i = 0; i < BROKEN_MAX; i++)
1782 			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1783 	}
1784 	fprintf(lock_output, "\n");
1785 }
1786 
1787 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1788 {
1789 	if (symbol_conf.field_sep)
1790 		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1791 	else
1792 		print_footer_stdio(total, bad, fails);
1793 }
1794 
1795 static void print_contention_result(struct lock_contention *con)
1796 {
1797 	struct lock_stat *st;
1798 	int bad, total, printed;
1799 
1800 	if (!quiet)
1801 		print_header();
1802 
1803 	bad = total = printed = 0;
1804 
1805 	while ((st = pop_from_result())) {
1806 		total += use_bpf ? st->nr_contended : 1;
1807 		if (st->broken)
1808 			bad++;
1809 
1810 		if (!st->wait_time_total)
1811 			continue;
1812 
1813 		print_lock_stat(con, st);
1814 
1815 		if (++printed >= print_nr_entries)
1816 			break;
1817 	}
1818 
1819 	if (con->owner && con->save_callstack && verbose > 0) {
1820 		struct rb_root root = RB_ROOT;
1821 
1822 		if (symbol_conf.field_sep)
1823 			fprintf(lock_output, "# owner stack trace:\n");
1824 		else
1825 			fprintf(lock_output, "\n=== owner stack trace ===\n\n");
1826 		while ((st = pop_owner_stack_trace(con)))
1827 			insert_to(&root, st, compare);
1828 
1829 		while ((st = pop_from(&root))) {
1830 			print_lock_stat(con, st);
1831 			free(st);
1832 		}
1833 	}
1834 
1835 	if (print_nr_entries) {
1836 		/* update the total/bad stats */
1837 		while ((st = pop_from_result())) {
1838 			total += use_bpf ? st->nr_contended : 1;
1839 			if (st->broken)
1840 				bad++;
1841 		}
1842 	}
1843 	/* some entries are collected but hidden by the callstack filter */
1844 	total += con->nr_filtered;
1845 
1846 	print_footer(total, bad, &con->fails);
1847 }
1848 
1849 static bool force;
1850 
1851 static int __cmd_report(bool display_info)
1852 {
1853 	int err = -EINVAL;
1854 	struct perf_tool eops;
1855 	struct perf_data data = {
1856 		.path  = input_name,
1857 		.mode  = PERF_DATA_MODE_READ,
1858 		.force = force,
1859 	};
1860 
1861 	perf_tool__init(&eops, /*ordered_events=*/true);
1862 	eops.attr		 = perf_event__process_attr;
1863 	eops.event_update	 = process_event_update;
1864 	eops.sample		 = process_sample_event;
1865 	eops.comm		 = perf_event__process_comm;
1866 	eops.mmap		 = perf_event__process_mmap;
1867 	eops.mmap2		 = perf_event__process_mmap2;
1868 	eops.namespaces		 = perf_event__process_namespaces;
1869 	eops.tracing_data	 = perf_event__process_tracing_data;
1870 	session = perf_session__new(&data, &eops);
1871 	if (IS_ERR(session)) {
1872 		pr_err("Initializing perf session failed\n");
1873 		return PTR_ERR(session);
1874 	}
1875 
1876 	symbol_conf.allow_aliases = true;
1877 	symbol__init(perf_session__env(session));
1878 
1879 	if (!data.is_pipe) {
1880 		if (!perf_session__has_traces(session, "lock record"))
1881 			goto out_delete;
1882 
1883 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1884 			pr_err("Initializing perf session tracepoint handlers failed\n");
1885 			goto out_delete;
1886 		}
1887 
1888 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1889 			pr_err("Initializing perf session tracepoint handlers failed\n");
1890 			goto out_delete;
1891 		}
1892 	}
1893 
1894 	if (setup_output_field(false, output_fields))
1895 		goto out_delete;
1896 
1897 	if (select_key(false))
1898 		goto out_delete;
1899 
1900 	if (show_thread_stats)
1901 		aggr_mode = LOCK_AGGR_TASK;
1902 
1903 	err = perf_session__process_events(session);
1904 	if (err)
1905 		goto out_delete;
1906 
1907 	setup_pager();
1908 	if (display_info) /* used for info subcommand */
1909 		dump_info();
1910 	else {
1911 		combine_result();
1912 		sort_result();
1913 		print_result();
1914 	}
1915 
1916 out_delete:
1917 	perf_session__delete(session);
1918 	return err;
1919 }
1920 
1921 static volatile sig_atomic_t done;
1922 
1923 static void sighandler(int sig __maybe_unused)
1924 {
1925 	done = 1;
1926 }
1927 
1928 static int check_lock_contention_options(const struct option *options,
1929 					 const char * const *usage)
1930 
1931 {
1932 	if (show_thread_stats && show_lock_addrs) {
1933 		pr_err("Cannot use thread and addr mode together\n");
1934 		parse_options_usage(usage, options, "threads", 0);
1935 		parse_options_usage(NULL, options, "lock-addr", 0);
1936 		return -1;
1937 	}
1938 
1939 	if (show_lock_owner && !use_bpf) {
1940 		pr_err("Lock owners are available only with BPF\n");
1941 		parse_options_usage(usage, options, "lock-owner", 0);
1942 		parse_options_usage(NULL, options, "use-bpf", 0);
1943 		return -1;
1944 	}
1945 
1946 	if (show_lock_owner && show_lock_addrs) {
1947 		pr_err("Cannot use owner and addr mode together\n");
1948 		parse_options_usage(usage, options, "lock-owner", 0);
1949 		parse_options_usage(NULL, options, "lock-addr", 0);
1950 		return -1;
1951 	}
1952 
1953 	if (show_lock_cgroups && !use_bpf) {
1954 		pr_err("Cgroups are available only with BPF\n");
1955 		parse_options_usage(usage, options, "lock-cgroup", 0);
1956 		parse_options_usage(NULL, options, "use-bpf", 0);
1957 		return -1;
1958 	}
1959 
1960 	if (show_lock_cgroups && show_lock_addrs) {
1961 		pr_err("Cannot use cgroup and addr mode together\n");
1962 		parse_options_usage(usage, options, "lock-cgroup", 0);
1963 		parse_options_usage(NULL, options, "lock-addr", 0);
1964 		return -1;
1965 	}
1966 
1967 	if (show_lock_cgroups && show_thread_stats) {
1968 		pr_err("Cannot use cgroup and thread mode together\n");
1969 		parse_options_usage(usage, options, "lock-cgroup", 0);
1970 		parse_options_usage(NULL, options, "threads", 0);
1971 		return -1;
1972 	}
1973 
1974 	if (symbol_conf.field_sep) {
1975 		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1976 		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1977 		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1978 			pr_err("Cannot use the separator that is already used\n");
1979 			parse_options_usage(usage, options, "x", 1);
1980 			return -1;
1981 		}
1982 	}
1983 
1984 	if (show_lock_owner && !show_thread_stats) {
1985 		pr_warning("Now -o try to show owner's callstack instead of pid and comm.\n");
1986 		pr_warning("Please use -t option too to keep the old behavior.\n");
1987 	}
1988 
1989 	return 0;
1990 }
1991 
1992 static int __cmd_contention(int argc, const char **argv)
1993 {
1994 	int err = -EINVAL;
1995 	struct perf_tool eops;
1996 	struct perf_data data = {
1997 		.path  = input_name,
1998 		.mode  = PERF_DATA_MODE_READ,
1999 		.force = force,
2000 	};
2001 	struct lock_contention con = {
2002 		.target = &target,
2003 		.map_nr_entries = bpf_map_entries,
2004 		.max_stack = max_stack_depth,
2005 		.stack_skip = stack_skip,
2006 		.filters = &filters,
2007 		.delays = delays,
2008 		.nr_delays = nr_delays,
2009 		.save_callstack = needs_callstack(),
2010 		.owner = show_lock_owner,
2011 		.cgroups = RB_ROOT,
2012 	};
2013 	struct perf_env host_env;
2014 
2015 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2016 	if (!lockhash_table)
2017 		return -ENOMEM;
2018 
2019 	con.result = &lockhash_table[0];
2020 
2021 	perf_tool__init(&eops, /*ordered_events=*/true);
2022 	eops.attr		 = perf_event__process_attr;
2023 	eops.event_update	 = process_event_update;
2024 	eops.sample		 = process_sample_event;
2025 	eops.comm		 = perf_event__process_comm;
2026 	eops.mmap		 = perf_event__process_mmap;
2027 	eops.mmap2		 = perf_event__process_mmap2;
2028 	eops.tracing_data	 = perf_event__process_tracing_data;
2029 
2030 	perf_env__init(&host_env);
2031 	session = __perf_session__new(use_bpf ? NULL : &data, &eops,
2032 				/*trace_event_repipe=*/false, &host_env);
2033 
2034 	if (IS_ERR(session)) {
2035 		pr_err("Initializing perf session failed\n");
2036 		err = PTR_ERR(session);
2037 		session = NULL;
2038 		goto out_delete;
2039 	}
2040 
2041 	con.machine = &session->machines.host;
2042 
2043 	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2044 		show_lock_addrs ? LOCK_AGGR_ADDR :
2045 		show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2046 
2047 	if (con.aggr_mode == LOCK_AGGR_CALLER)
2048 		con.save_callstack = true;
2049 
2050 	symbol_conf.allow_aliases = true;
2051 	symbol__init(perf_session__env(session));
2052 
2053 	if (use_bpf) {
2054 		err = target__validate(&target);
2055 		if (err) {
2056 			char errbuf[512];
2057 
2058 			target__strerror(&target, err, errbuf, 512);
2059 			pr_err("%s\n", errbuf);
2060 			goto out_delete;
2061 		}
2062 
2063 		done = 0;
2064 		signal(SIGINT, sighandler);
2065 		signal(SIGCHLD, sighandler);
2066 		signal(SIGTERM, sighandler);
2067 
2068 		con.evlist = evlist__new();
2069 		if (con.evlist == NULL) {
2070 			err = -ENOMEM;
2071 			goto out_delete;
2072 		}
2073 
2074 		err = evlist__create_maps(con.evlist, &target);
2075 		if (err < 0)
2076 			goto out_delete;
2077 
2078 		if (argc) {
2079 			err = evlist__prepare_workload(con.evlist, &target,
2080 						       argv, false, NULL);
2081 			if (err < 0)
2082 				goto out_delete;
2083 		}
2084 
2085 		err = lock_contention_prepare(&con);
2086 		if (err < 0) {
2087 			pr_err("lock contention BPF setup failed\n");
2088 			goto out_delete;
2089 		}
2090 	} else if (!data.is_pipe) {
2091 		if (!perf_session__has_traces(session, "lock record"))
2092 			goto out_delete;
2093 
2094 		if (!evlist__find_evsel_by_str(session->evlist,
2095 					       "lock:contention_begin")) {
2096 			pr_err("lock contention evsel not found\n");
2097 			goto out_delete;
2098 		}
2099 
2100 		if (perf_session__set_tracepoints_handlers(session,
2101 						contention_tracepoints)) {
2102 			pr_err("Initializing perf session tracepoint handlers failed\n");
2103 			goto out_delete;
2104 		}
2105 	}
2106 
2107 	err = setup_output_field(true, output_fields);
2108 	if (err) {
2109 		pr_err("Failed to setup output field\n");
2110 		goto out_delete;
2111 	}
2112 
2113 	err = select_key(true);
2114 	if (err)
2115 		goto out_delete;
2116 
2117 	if (symbol_conf.field_sep) {
2118 		int i;
2119 		struct lock_key *keys = contention_keys;
2120 
2121 		/* do not align output in CSV format */
2122 		for (i = 0; keys[i].name; i++)
2123 			keys[i].len = 0;
2124 	}
2125 
2126 	if (use_bpf) {
2127 		lock_contention_start();
2128 		if (argc)
2129 			evlist__start_workload(con.evlist);
2130 
2131 		while (!done) {
2132 			if (argc && waitpid(con.evlist->workload.pid, NULL, WNOHANG) > 0)
2133 				break;
2134 			sleep(1);
2135 		}
2136 
2137 		lock_contention_stop();
2138 		lock_contention_read(&con);
2139 	} else {
2140 		err = perf_session__process_events(session);
2141 		if (err)
2142 			goto out_delete;
2143 	}
2144 
2145 	setup_pager();
2146 
2147 	sort_contention_result();
2148 	print_contention_result(&con);
2149 
2150 out_delete:
2151 	lock_filter_finish();
2152 	evlist__delete(con.evlist);
2153 	lock_contention_finish(&con);
2154 	perf_session__delete(session);
2155 	perf_env__exit(&host_env);
2156 	zfree(&lockhash_table);
2157 	return err;
2158 }
2159 
2160 
2161 static int __cmd_record(int argc, const char **argv)
2162 {
2163 	const char *record_args[] = {
2164 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2165 	};
2166 	const char *callgraph_args[] = {
2167 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2168 	};
2169 	unsigned int rec_argc, i, j, ret;
2170 	unsigned int nr_tracepoints;
2171 	unsigned int nr_callgraph_args = 0;
2172 	const char **rec_argv;
2173 	bool has_lock_stat = true;
2174 
2175 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2176 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2177 			pr_debug("tracepoint %s is not enabled. "
2178 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2179 				 lock_tracepoints[i].name);
2180 			has_lock_stat = false;
2181 			break;
2182 		}
2183 	}
2184 
2185 	if (has_lock_stat)
2186 		goto setup_args;
2187 
2188 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2189 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2190 			pr_err("tracepoint %s is not enabled.\n",
2191 			       contention_tracepoints[i].name);
2192 			return 1;
2193 		}
2194 	}
2195 
2196 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2197 
2198 setup_args:
2199 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2200 
2201 	if (has_lock_stat)
2202 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2203 	else
2204 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2205 
2206 	/* factor of 2 is for -e in front of each tracepoint */
2207 	rec_argc += 2 * nr_tracepoints;
2208 
2209 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2210 	if (!rec_argv)
2211 		return -ENOMEM;
2212 
2213 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2214 		rec_argv[i] = record_args[i];
2215 
2216 	for (j = 0; j < nr_tracepoints; j++) {
2217 		rec_argv[i++] = "-e";
2218 		rec_argv[i++] = has_lock_stat
2219 			? lock_tracepoints[j].name
2220 			: contention_tracepoints[j].name;
2221 	}
2222 
2223 	for (j = 0; j < nr_callgraph_args; j++, i++)
2224 		rec_argv[i] = callgraph_args[j];
2225 
2226 	for (j = 1; j < (unsigned int)argc; j++, i++)
2227 		rec_argv[i] = argv[j];
2228 
2229 	BUG_ON(i != rec_argc);
2230 
2231 	ret = cmd_record(i, rec_argv);
2232 	free(rec_argv);
2233 	return ret;
2234 }
2235 
2236 static int parse_map_entry(const struct option *opt, const char *str,
2237 			    int unset __maybe_unused)
2238 {
2239 	unsigned long *len = (unsigned long *)opt->value;
2240 	unsigned long val;
2241 	char *endptr;
2242 
2243 	errno = 0;
2244 	val = strtoul(str, &endptr, 0);
2245 	if (*endptr != '\0' || errno != 0) {
2246 		pr_err("invalid BPF map length: %s\n", str);
2247 		return -1;
2248 	}
2249 
2250 	*len = val;
2251 	return 0;
2252 }
2253 
2254 static int parse_max_stack(const struct option *opt, const char *str,
2255 			   int unset __maybe_unused)
2256 {
2257 	int *len = opt->value;
2258 	long val;
2259 	char *endptr;
2260 
2261 	errno = 0;
2262 	val = strtol(str, &endptr, 0);
2263 	if (*endptr != '\0' || errno != 0) {
2264 		pr_err("invalid max stack depth: %s\n", str);
2265 		return -1;
2266 	}
2267 
2268 	if (val < 0 || val > sysctl__max_stack()) {
2269 		pr_err("invalid max stack depth: %ld\n", val);
2270 		return -1;
2271 	}
2272 
2273 	*len = val;
2274 	return 0;
2275 }
2276 
2277 static bool add_lock_type(unsigned int flags)
2278 {
2279 	unsigned int *tmp;
2280 
2281 	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2282 	if (tmp == NULL)
2283 		return false;
2284 
2285 	tmp[filters.nr_types++] = flags;
2286 	filters.types = tmp;
2287 	return true;
2288 }
2289 
2290 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2291 			   int unset __maybe_unused)
2292 {
2293 	char *s, *tmp, *tok;
2294 
2295 	s = strdup(str);
2296 	if (s == NULL)
2297 		return -1;
2298 
2299 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2300 		bool found = false;
2301 
2302 		/* `tok` is a flags name if it contains ':'. */
2303 		if (strchr(tok, ':')) {
2304 			for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2305 				if (!strcmp(lock_type_table[i].flags_name, tok) &&
2306 				    add_lock_type(lock_type_table[i].flags)) {
2307 					found = true;
2308 					break;
2309 				}
2310 			}
2311 
2312 			if (!found) {
2313 				pr_err("Unknown lock flags name: %s\n", tok);
2314 				free(s);
2315 				return -1;
2316 			}
2317 
2318 			continue;
2319 		}
2320 
2321 		/*
2322 		 * Otherwise `tok` is a lock name.
2323 		 * Single lock name could contain multiple flags.
2324 		 * Replace alias `pcpu-sem` with actual name `percpu-rwsem.
2325 		 */
2326 		if (!strcmp(tok, "pcpu-sem"))
2327 			tok = (char *)"percpu-rwsem";
2328 		for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2329 			if (!strcmp(lock_type_table[i].lock_name, tok)) {
2330 				if (add_lock_type(lock_type_table[i].flags)) {
2331 					found = true;
2332 				} else {
2333 					free(s);
2334 					return -1;
2335 				}
2336 			}
2337 		}
2338 
2339 		if (!found) {
2340 			pr_err("Unknown lock name: %s\n", tok);
2341 			free(s);
2342 			return -1;
2343 		}
2344 
2345 	}
2346 
2347 	free(s);
2348 	return 0;
2349 }
2350 
2351 static bool add_lock_addr(unsigned long addr)
2352 {
2353 	unsigned long *tmp;
2354 
2355 	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2356 	if (tmp == NULL) {
2357 		pr_err("Memory allocation failure\n");
2358 		return false;
2359 	}
2360 
2361 	tmp[filters.nr_addrs++] = addr;
2362 	filters.addrs = tmp;
2363 	return true;
2364 }
2365 
2366 static bool add_lock_sym(char *name)
2367 {
2368 	char **tmp;
2369 	char *sym = strdup(name);
2370 
2371 	if (sym == NULL) {
2372 		pr_err("Memory allocation failure\n");
2373 		return false;
2374 	}
2375 
2376 	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2377 	if (tmp == NULL) {
2378 		pr_err("Memory allocation failure\n");
2379 		free(sym);
2380 		return false;
2381 	}
2382 
2383 	tmp[filters.nr_syms++] = sym;
2384 	filters.syms = tmp;
2385 	return true;
2386 }
2387 
2388 static bool add_lock_slab(char *name)
2389 {
2390 	char **tmp;
2391 	char *sym = strdup(name);
2392 
2393 	if (sym == NULL) {
2394 		pr_err("Memory allocation failure\n");
2395 		return false;
2396 	}
2397 
2398 	tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2399 	if (tmp == NULL) {
2400 		pr_err("Memory allocation failure\n");
2401 		return false;
2402 	}
2403 
2404 	tmp[filters.nr_slabs++] = sym;
2405 	filters.slabs = tmp;
2406 	return true;
2407 }
2408 
2409 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2410 			   int unset __maybe_unused)
2411 {
2412 	char *s, *tmp, *tok;
2413 	int ret = 0;
2414 	u64 addr;
2415 
2416 	s = strdup(str);
2417 	if (s == NULL)
2418 		return -1;
2419 
2420 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2421 		char *end;
2422 
2423 		addr = strtoul(tok, &end, 16);
2424 		if (*end == '\0') {
2425 			if (!add_lock_addr(addr)) {
2426 				ret = -1;
2427 				break;
2428 			}
2429 			continue;
2430 		}
2431 
2432 		if (*tok == '&') {
2433 			if (!add_lock_slab(tok + 1)) {
2434 				ret = -1;
2435 				break;
2436 			}
2437 			continue;
2438 		}
2439 
2440 		/*
2441 		 * At this moment, we don't have kernel symbols.  Save the symbols
2442 		 * in a separate list and resolve them to addresses later.
2443 		 */
2444 		if (!add_lock_sym(tok)) {
2445 			ret = -1;
2446 			break;
2447 		}
2448 	}
2449 
2450 	free(s);
2451 	return ret;
2452 }
2453 
2454 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2455 			int unset __maybe_unused)
2456 {
2457 	const char **name = (const char **)opt->value;
2458 
2459 	if (str == NULL)
2460 		return -1;
2461 
2462 	lock_output = fopen(str, "w");
2463 	if (lock_output == NULL) {
2464 		pr_err("Cannot open %s\n", str);
2465 		return -1;
2466 	}
2467 
2468 	*name = str;
2469 	return 0;
2470 }
2471 
2472 static bool add_lock_cgroup(char *name)
2473 {
2474 	u64 *tmp;
2475 	struct cgroup *cgrp;
2476 
2477 	cgrp = cgroup__new(name, /*do_open=*/false);
2478 	if (cgrp == NULL) {
2479 		pr_err("Failed to create cgroup: %s\n", name);
2480 		return false;
2481 	}
2482 
2483 	if (read_cgroup_id(cgrp) < 0) {
2484 		pr_err("Failed to read cgroup id for %s\n", name);
2485 		cgroup__put(cgrp);
2486 		return false;
2487 	}
2488 
2489 	tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2490 	if (tmp == NULL) {
2491 		pr_err("Memory allocation failure\n");
2492 		return false;
2493 	}
2494 
2495 	tmp[filters.nr_cgrps++] = cgrp->id;
2496 	filters.cgrps = tmp;
2497 	cgroup__put(cgrp);
2498 	return true;
2499 }
2500 
2501 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2502 			       int unset __maybe_unused)
2503 {
2504 	char *s, *tmp, *tok;
2505 	int ret = 0;
2506 
2507 	s = strdup(str);
2508 	if (s == NULL)
2509 		return -1;
2510 
2511 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2512 		if (!add_lock_cgroup(tok)) {
2513 			ret = -1;
2514 			break;
2515 		}
2516 	}
2517 
2518 	free(s);
2519 	return ret;
2520 }
2521 
2522 static bool add_lock_delay(char *spec)
2523 {
2524 	char *at, *pos;
2525 	struct lock_delay *tmp;
2526 	unsigned long duration;
2527 
2528 	at = strchr(spec, '@');
2529 	if (at == NULL) {
2530 		pr_err("lock delay should have '@' sign: %s\n", spec);
2531 		return false;
2532 	}
2533 	if (at == spec) {
2534 		pr_err("lock delay should have time before '@': %s\n", spec);
2535 		return false;
2536 	}
2537 
2538 	*at = '\0';
2539 	duration = strtoul(spec, &pos, 0);
2540 	if (!strcmp(pos, "ns"))
2541 		duration *= 1;
2542 	else if (!strcmp(pos, "us"))
2543 		duration *= 1000;
2544 	else if (!strcmp(pos, "ms"))
2545 		duration *= 1000 * 1000;
2546 	else if (*pos) {
2547 		pr_err("invalid delay time: %s@%s\n", spec, at + 1);
2548 		return false;
2549 	}
2550 
2551 	if (duration > 10 * 1000 * 1000) {
2552 		pr_err("lock delay is too long: %s (> 10ms)\n", spec);
2553 		return false;
2554 	}
2555 
2556 	tmp = realloc(delays, (nr_delays + 1) * sizeof(*delays));
2557 	if (tmp == NULL) {
2558 		pr_err("Memory allocation failure\n");
2559 		return false;
2560 	}
2561 	delays = tmp;
2562 
2563 	delays[nr_delays].sym = strdup(at + 1);
2564 	if (delays[nr_delays].sym == NULL) {
2565 		pr_err("Memory allocation failure\n");
2566 		return false;
2567 	}
2568 	delays[nr_delays].time = duration;
2569 
2570 	nr_delays++;
2571 	return true;
2572 }
2573 
2574 static int parse_lock_delay(const struct option *opt __maybe_unused, const char *str,
2575 			    int unset __maybe_unused)
2576 {
2577 	char *s, *tmp, *tok;
2578 	int ret = 0;
2579 
2580 	s = strdup(str);
2581 	if (s == NULL)
2582 		return -1;
2583 
2584 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2585 		if (!add_lock_delay(tok)) {
2586 			ret = -1;
2587 			break;
2588 		}
2589 	}
2590 
2591 	free(s);
2592 	return ret;
2593 }
2594 
2595 int cmd_lock(int argc, const char **argv)
2596 {
2597 	const struct option lock_options[] = {
2598 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2599 	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2600 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2601 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2602 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2603 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2604 		   "file", "vmlinux pathname"),
2605 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2606 		   "file", "kallsyms pathname"),
2607 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2608 	OPT_END()
2609 	};
2610 
2611 	const struct option info_options[] = {
2612 	OPT_BOOLEAN('t', "threads", &info_threads,
2613 		    "dump the thread list in perf.data"),
2614 	OPT_BOOLEAN('m', "map", &info_map,
2615 		    "dump the map of lock instances (address:name table)"),
2616 	OPT_PARENT(lock_options)
2617 	};
2618 
2619 	const struct option report_options[] = {
2620 	OPT_STRING('k', "key", &sort_key, "acquired",
2621 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2622 	OPT_STRING('F', "field", &output_fields, NULL,
2623 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2624 	/* TODO: type */
2625 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2626 		    "combine locks in the same class"),
2627 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2628 		    "show per-thread lock stats"),
2629 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2630 	OPT_PARENT(lock_options)
2631 	};
2632 
2633 	struct option contention_options[] = {
2634 	OPT_STRING('k', "key", &sort_key, "wait_total",
2635 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2636 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2637 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2638 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2639 		    "show per-thread lock stats"),
2640 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2641 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2642 		    "System-wide collection from all CPUs"),
2643 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2644 		    "List of cpus to monitor"),
2645 	OPT_STRING('p', "pid", &target.pid, "pid",
2646 		   "Trace on existing process id"),
2647 	OPT_STRING(0, "tid", &target.tid, "tid",
2648 		   "Trace on existing thread id (exclusive to --pid)"),
2649 	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2650 		     "Max number of BPF map entries", parse_map_entry),
2651 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2652 		     "Set the maximum stack depth when collecting lock contention, "
2653 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2654 	OPT_INTEGER(0, "stack-skip", &stack_skip,
2655 		    "Set the number of stack depth to skip when finding a lock caller, "
2656 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2657 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2658 	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2659 	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2660 		     "Filter specific type of locks", parse_lock_type),
2661 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2662 		     "Filter specific address/symbol of locks", parse_lock_addr),
2663 	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2664 		     "Filter specific function in the callstack", parse_call_stack),
2665 	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2666 	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2667 		   "print result in CSV format with custom separator"),
2668 	OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2669 	OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2670 		     "Filter specific cgroups", parse_cgroup_filter),
2671 	OPT_CALLBACK('J', "inject-delay", NULL, "TIME@FUNC",
2672 		     "Inject delays to specific locks", parse_lock_delay),
2673 	OPT_PARENT(lock_options)
2674 	};
2675 
2676 	const char * const info_usage[] = {
2677 		"perf lock info [<options>]",
2678 		NULL
2679 	};
2680 	const char *const lock_subcommands[] = { "record", "report", "script",
2681 						 "info", "contention", NULL };
2682 	const char *lock_usage[] = {
2683 		NULL,
2684 		NULL
2685 	};
2686 	const char * const report_usage[] = {
2687 		"perf lock report [<options>]",
2688 		NULL
2689 	};
2690 	const char * const contention_usage[] = {
2691 		"perf lock contention [<options>]",
2692 		NULL
2693 	};
2694 	unsigned int i;
2695 	int rc = 0;
2696 
2697 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2698 	if (!lockhash_table)
2699 		return -ENOMEM;
2700 
2701 	for (i = 0; i < LOCKHASH_SIZE; i++)
2702 		INIT_HLIST_HEAD(lockhash_table + i);
2703 
2704 	lock_output = stderr;
2705 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2706 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2707 	if (!argc)
2708 		usage_with_options(lock_usage, lock_options);
2709 
2710 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2711 		return __cmd_record(argc, argv);
2712 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2713 		trace_handler = &report_lock_ops;
2714 		if (argc) {
2715 			argc = parse_options(argc, argv,
2716 					     report_options, report_usage, 0);
2717 			if (argc)
2718 				usage_with_options(report_usage, report_options);
2719 		}
2720 		rc = __cmd_report(false);
2721 	} else if (!strcmp(argv[0], "script")) {
2722 		/* Aliased to 'perf script' */
2723 		rc = cmd_script(argc, argv);
2724 	} else if (!strcmp(argv[0], "info")) {
2725 		if (argc) {
2726 			argc = parse_options(argc, argv,
2727 					     info_options, info_usage, 0);
2728 			if (argc)
2729 				usage_with_options(info_usage, info_options);
2730 		}
2731 
2732 		/* If neither threads nor map requested, display both */
2733 		if (!info_threads && !info_map) {
2734 			info_threads = true;
2735 			info_map = true;
2736 		}
2737 
2738 		/* recycling report_lock_ops */
2739 		trace_handler = &report_lock_ops;
2740 		rc = __cmd_report(true);
2741 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2742 		trace_handler = &contention_lock_ops;
2743 		sort_key = "wait_total";
2744 		output_fields = "contended,wait_total,wait_max,avg_wait";
2745 
2746 #ifndef HAVE_BPF_SKEL
2747 		set_option_nobuild(contention_options, 'b', "use-bpf",
2748 				   "no BUILD_BPF_SKEL=1", false);
2749 #endif
2750 		if (argc) {
2751 			argc = parse_options(argc, argv, contention_options,
2752 					     contention_usage, 0);
2753 		}
2754 
2755 		if (check_lock_contention_options(contention_options,
2756 						  contention_usage) < 0)
2757 			return -1;
2758 
2759 		rc = __cmd_contention(argc, argv);
2760 	} else {
2761 		usage_with_options(lock_usage, lock_options);
2762 	}
2763 
2764 	/* free usage string allocated by parse_options_subcommand */
2765 	free((void *)lock_usage[0]);
2766 
2767 	zfree(&lockhash_table);
2768 	return rc;
2769 }
2770