xref: /linux/tools/perf/builtin-lock.c (revision 390d5ea26622f794c2d29cefd5a01ef116b4fe1d)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/cgroup.h"
14 #include "util/callchain.h"
15 #include "util/lock-contention.h"
16 #include "util/bpf_skel/lock_data.h"
17 
18 #include <subcmd/pager.h>
19 #include <subcmd/parse-options.h>
20 #include "util/trace-event.h"
21 #include "util/tracepoint.h"
22 
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/data.h"
27 #include "util/string2.h"
28 #include "util/map.h"
29 #include "util/util.h"
30 
31 #include <stdio.h>
32 #include <sys/types.h>
33 #include <sys/prctl.h>
34 #include <sys/wait.h>
35 #include <semaphore.h>
36 #include <math.h>
37 #include <limits.h>
38 #include <ctype.h>
39 
40 #include <linux/list.h>
41 #include <linux/hash.h>
42 #include <linux/kernel.h>
43 #include <linux/zalloc.h>
44 #include <linux/err.h>
45 #include <linux/stringify.h>
46 
47 static struct perf_session *session;
48 static struct target target;
49 
50 static struct rb_root		thread_stats;
51 
52 static bool combine_locks;
53 static bool show_thread_stats;
54 static bool show_lock_addrs;
55 static bool show_lock_owner;
56 static bool show_lock_cgroups;
57 static bool use_bpf;
58 static unsigned long bpf_map_entries = MAX_ENTRIES;
59 static int max_stack_depth = CONTENTION_STACK_DEPTH;
60 static int stack_skip = CONTENTION_STACK_SKIP;
61 static int print_nr_entries = INT_MAX / 2;
62 static const char *output_name = NULL;
63 static FILE *lock_output;
64 
65 static struct lock_filter filters;
66 static struct lock_delay *delays;
67 static int nr_delays;
68 
69 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
70 
71 static struct thread_stat *thread_stat_find(u32 tid)
72 {
73 	struct rb_node *node;
74 	struct thread_stat *st;
75 
76 	node = thread_stats.rb_node;
77 	while (node) {
78 		st = container_of(node, struct thread_stat, rb);
79 		if (st->tid == tid)
80 			return st;
81 		else if (tid < st->tid)
82 			node = node->rb_left;
83 		else
84 			node = node->rb_right;
85 	}
86 
87 	return NULL;
88 }
89 
90 static void thread_stat_insert(struct thread_stat *new)
91 {
92 	struct rb_node **rb = &thread_stats.rb_node;
93 	struct rb_node *parent = NULL;
94 	struct thread_stat *p;
95 
96 	while (*rb) {
97 		p = container_of(*rb, struct thread_stat, rb);
98 		parent = *rb;
99 
100 		if (new->tid < p->tid)
101 			rb = &(*rb)->rb_left;
102 		else if (new->tid > p->tid)
103 			rb = &(*rb)->rb_right;
104 		else
105 			BUG_ON("inserting invalid thread_stat\n");
106 	}
107 
108 	rb_link_node(&new->rb, parent, rb);
109 	rb_insert_color(&new->rb, &thread_stats);
110 }
111 
112 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
113 {
114 	struct thread_stat *st;
115 
116 	st = thread_stat_find(tid);
117 	if (st)
118 		return st;
119 
120 	st = zalloc(sizeof(struct thread_stat));
121 	if (!st) {
122 		pr_err("memory allocation failed\n");
123 		return NULL;
124 	}
125 
126 	st->tid = tid;
127 	INIT_LIST_HEAD(&st->seq_list);
128 
129 	thread_stat_insert(st);
130 
131 	return st;
132 }
133 
134 static struct thread_stat *thread_stat_findnew_first(u32 tid);
135 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
136 	thread_stat_findnew_first;
137 
138 static struct thread_stat *thread_stat_findnew_first(u32 tid)
139 {
140 	struct thread_stat *st;
141 
142 	st = zalloc(sizeof(struct thread_stat));
143 	if (!st) {
144 		pr_err("memory allocation failed\n");
145 		return NULL;
146 	}
147 	st->tid = tid;
148 	INIT_LIST_HEAD(&st->seq_list);
149 
150 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
151 	rb_insert_color(&st->rb, &thread_stats);
152 
153 	thread_stat_findnew = thread_stat_findnew_after_first;
154 	return st;
155 }
156 
157 /* build simple key function one is bigger than two */
158 #define SINGLE_KEY(member)						\
159 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
160 					 struct lock_stat *two)		\
161 	{								\
162 		return one->member > two->member;			\
163 	}
164 
165 SINGLE_KEY(nr_acquired)
166 SINGLE_KEY(nr_contended)
167 SINGLE_KEY(avg_wait_time)
168 SINGLE_KEY(wait_time_total)
169 SINGLE_KEY(wait_time_max)
170 
171 static int lock_stat_key_wait_time_min(struct lock_stat *one,
172 					struct lock_stat *two)
173 {
174 	u64 s1 = one->wait_time_min;
175 	u64 s2 = two->wait_time_min;
176 	if (s1 == ULLONG_MAX)
177 		s1 = 0;
178 	if (s2 == ULLONG_MAX)
179 		s2 = 0;
180 	return s1 > s2;
181 }
182 
183 struct lock_key {
184 	/*
185 	 * name: the value for specify by user
186 	 * this should be simpler than raw name of member
187 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
188 	 */
189 	const char		*name;
190 	/* header: the string printed on the header line */
191 	const char		*header;
192 	/* len: the printing width of the field */
193 	int			len;
194 	/* key: a pointer to function to compare two lock stats for sorting */
195 	int			(*key)(struct lock_stat*, struct lock_stat*);
196 	/* print: a pointer to function to print a given lock stats */
197 	void			(*print)(struct lock_key*, struct lock_stat*);
198 	/* list: list entry to link this */
199 	struct list_head	list;
200 };
201 
202 static void lock_stat_key_print_time(unsigned long long nsec, int len)
203 {
204 	static const struct {
205 		float base;
206 		const char *unit;
207 	} table[] = {
208 		{ 1e9 * 3600, "h " },
209 		{ 1e9 * 60, "m " },
210 		{ 1e9, "s " },
211 		{ 1e6, "ms" },
212 		{ 1e3, "us" },
213 		{ 0, NULL },
214 	};
215 
216 	/* for CSV output */
217 	if (len == 0) {
218 		fprintf(lock_output, "%llu", nsec);
219 		return;
220 	}
221 
222 	for (int i = 0; table[i].unit; i++) {
223 		if (nsec < table[i].base)
224 			continue;
225 
226 		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
227 		return;
228 	}
229 
230 	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
231 }
232 
233 #define PRINT_KEY(member)						\
234 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
235 					   struct lock_stat *ls)	\
236 {									\
237 	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
238 }
239 
240 #define PRINT_TIME(member)						\
241 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
242 					   struct lock_stat *ls)	\
243 {									\
244 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
245 }
246 
247 PRINT_KEY(nr_acquired)
248 PRINT_KEY(nr_contended)
249 PRINT_TIME(avg_wait_time)
250 PRINT_TIME(wait_time_total)
251 PRINT_TIME(wait_time_max)
252 
253 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
254 					      struct lock_stat *ls)
255 {
256 	u64 wait_time = ls->wait_time_min;
257 
258 	if (wait_time == ULLONG_MAX)
259 		wait_time = 0;
260 
261 	lock_stat_key_print_time(wait_time, key->len);
262 }
263 
264 
265 static const char		*sort_key = "acquired";
266 
267 static int			(*compare)(struct lock_stat *, struct lock_stat *);
268 
269 static struct rb_root		sorted; /* place to store intermediate data */
270 static struct rb_root		result;	/* place to store sorted data */
271 
272 static LIST_HEAD(lock_keys);
273 static const char		*output_fields;
274 
275 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
276 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
277 static struct lock_key report_keys[] = {
278 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
279 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
280 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
281 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
282 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
283 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
284 
285 	/* extra comparisons much complicated should be here */
286 	{ }
287 };
288 
289 static struct lock_key contention_keys[] = {
290 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
291 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
292 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
293 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
294 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
295 
296 	/* extra comparisons much complicated should be here */
297 	{ }
298 };
299 
300 static int select_key(bool contention)
301 {
302 	int i;
303 	struct lock_key *keys = report_keys;
304 
305 	if (contention)
306 		keys = contention_keys;
307 
308 	for (i = 0; keys[i].name; i++) {
309 		if (!strcmp(keys[i].name, sort_key)) {
310 			compare = keys[i].key;
311 
312 			/* selected key should be in the output fields */
313 			if (list_empty(&keys[i].list))
314 				list_add_tail(&keys[i].list, &lock_keys);
315 
316 			return 0;
317 		}
318 	}
319 
320 	pr_err("Unknown compare key: %s\n", sort_key);
321 	return -1;
322 }
323 
324 static int add_output_field(bool contention, char *name)
325 {
326 	int i;
327 	struct lock_key *keys = report_keys;
328 
329 	if (contention)
330 		keys = contention_keys;
331 
332 	for (i = 0; keys[i].name; i++) {
333 		if (strcmp(keys[i].name, name))
334 			continue;
335 
336 		/* prevent double link */
337 		if (list_empty(&keys[i].list))
338 			list_add_tail(&keys[i].list, &lock_keys);
339 
340 		return 0;
341 	}
342 
343 	pr_err("Unknown output field: %s\n", name);
344 	return -1;
345 }
346 
347 static int setup_output_field(bool contention, const char *str)
348 {
349 	char *tok, *tmp, *orig;
350 	int i, ret = 0;
351 	struct lock_key *keys = report_keys;
352 
353 	if (contention)
354 		keys = contention_keys;
355 
356 	/* no output field given: use all of them */
357 	if (str == NULL) {
358 		for (i = 0; keys[i].name; i++)
359 			list_add_tail(&keys[i].list, &lock_keys);
360 		return 0;
361 	}
362 
363 	for (i = 0; keys[i].name; i++)
364 		INIT_LIST_HEAD(&keys[i].list);
365 
366 	orig = tmp = strdup(str);
367 	if (orig == NULL)
368 		return -ENOMEM;
369 
370 	while ((tok = strsep(&tmp, ",")) != NULL){
371 		ret = add_output_field(contention, tok);
372 		if (ret < 0)
373 			break;
374 	}
375 	free(orig);
376 
377 	return ret;
378 }
379 
380 static void combine_lock_stats(struct lock_stat *st)
381 {
382 	struct rb_node **rb = &sorted.rb_node;
383 	struct rb_node *parent = NULL;
384 	struct lock_stat *p;
385 	int ret;
386 
387 	while (*rb) {
388 		p = container_of(*rb, struct lock_stat, rb);
389 		parent = *rb;
390 
391 		if (st->name && p->name)
392 			ret = strcmp(st->name, p->name);
393 		else
394 			ret = !!st->name - !!p->name;
395 
396 		if (ret == 0) {
397 			p->nr_acquired += st->nr_acquired;
398 			p->nr_contended += st->nr_contended;
399 			p->wait_time_total += st->wait_time_total;
400 
401 			if (p->nr_contended)
402 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
403 
404 			if (p->wait_time_min > st->wait_time_min)
405 				p->wait_time_min = st->wait_time_min;
406 			if (p->wait_time_max < st->wait_time_max)
407 				p->wait_time_max = st->wait_time_max;
408 
409 			p->broken |= st->broken;
410 			st->combined = 1;
411 			return;
412 		}
413 
414 		if (ret < 0)
415 			rb = &(*rb)->rb_left;
416 		else
417 			rb = &(*rb)->rb_right;
418 	}
419 
420 	rb_link_node(&st->rb, parent, rb);
421 	rb_insert_color(&st->rb, &sorted);
422 }
423 
424 static void insert_to(struct rb_root *rr, struct lock_stat *st,
425 		      int (*bigger)(struct lock_stat *, struct lock_stat *))
426 {
427 	struct rb_node **rb = &rr->rb_node;
428 	struct rb_node *parent = NULL;
429 	struct lock_stat *p;
430 
431 	while (*rb) {
432 		p = container_of(*rb, struct lock_stat, rb);
433 		parent = *rb;
434 
435 		if (bigger(st, p))
436 			rb = &(*rb)->rb_left;
437 		else
438 			rb = &(*rb)->rb_right;
439 	}
440 
441 	rb_link_node(&st->rb, parent, rb);
442 	rb_insert_color(&st->rb, rr);
443 }
444 
445 static inline void insert_to_result(struct lock_stat *st,
446 				    int (*bigger)(struct lock_stat *,
447 						  struct lock_stat *))
448 {
449 	if (combine_locks && st->combined)
450 		return;
451 	insert_to(&result, st, bigger);
452 }
453 
454 static inline struct lock_stat *pop_from(struct rb_root *rr)
455 {
456 	struct rb_node *node = rr->rb_node;
457 
458 	if (!node)
459 		return NULL;
460 
461 	while (node->rb_left)
462 		node = node->rb_left;
463 
464 	rb_erase(node, rr);
465 	return container_of(node, struct lock_stat, rb);
466 
467 }
468 
469 /* returns left most element of result, and erase it */
470 static struct lock_stat *pop_from_result(void)
471 {
472 	return pop_from(&result);
473 }
474 
475 struct trace_lock_handler {
476 	/* it's used on CONFIG_LOCKDEP */
477 	int (*acquire_event)(struct perf_sample *sample);
478 
479 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
480 	int (*acquired_event)(struct perf_sample *sample);
481 
482 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
483 	int (*contended_event)(struct perf_sample *sample);
484 
485 	/* it's used on CONFIG_LOCKDEP */
486 	int (*release_event)(struct perf_sample *sample);
487 
488 	/* it's used when CONFIG_LOCKDEP is off */
489 	int (*contention_begin_event)(struct perf_sample *sample);
490 
491 	/* it's used when CONFIG_LOCKDEP is off */
492 	int (*contention_end_event)(struct perf_sample *sample);
493 };
494 
495 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
496 {
497 	struct lock_seq_stat *seq;
498 
499 	list_for_each_entry(seq, &ts->seq_list, list) {
500 		if (seq->addr == addr)
501 			return seq;
502 	}
503 
504 	seq = zalloc(sizeof(struct lock_seq_stat));
505 	if (!seq) {
506 		pr_err("memory allocation failed\n");
507 		return NULL;
508 	}
509 	seq->state = SEQ_STATE_UNINITIALIZED;
510 	seq->addr = addr;
511 
512 	list_add(&seq->list, &ts->seq_list);
513 	return seq;
514 }
515 
516 enum broken_state {
517 	BROKEN_ACQUIRE,
518 	BROKEN_ACQUIRED,
519 	BROKEN_CONTENDED,
520 	BROKEN_RELEASE,
521 	BROKEN_MAX,
522 };
523 
524 static int bad_hist[BROKEN_MAX];
525 
526 enum acquire_flags {
527 	TRY_LOCK = 1,
528 	READ_LOCK = 2,
529 };
530 
531 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
532 {
533 	switch (aggr_mode) {
534 	case LOCK_AGGR_ADDR:
535 		*key = addr;
536 		break;
537 	case LOCK_AGGR_TASK:
538 		*key = tid;
539 		break;
540 	case LOCK_AGGR_CALLER:
541 	case LOCK_AGGR_CGROUP:
542 	default:
543 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
544 		return -EINVAL;
545 	}
546 	return 0;
547 }
548 
549 static u64 callchain_id(struct perf_sample *sample);
550 
551 static int get_key_by_aggr_mode(u64 *key, u64 addr,
552 				 struct perf_sample *sample)
553 {
554 	if (aggr_mode == LOCK_AGGR_CALLER) {
555 		*key = callchain_id(sample);
556 		return 0;
557 	}
558 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
559 }
560 
561 static int report_lock_acquire_event(struct perf_sample *sample)
562 {
563 	struct lock_stat *ls;
564 	struct thread_stat *ts;
565 	struct lock_seq_stat *seq;
566 	const char *name = perf_sample__strval(sample, "name");
567 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
568 	int flag = perf_sample__intval(sample, "flags");
569 	u64 key;
570 	int ret;
571 
572 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
573 	if (ret < 0)
574 		return ret;
575 
576 	ls = lock_stat_findnew(key, name, 0);
577 	if (!ls)
578 		return -ENOMEM;
579 
580 	ts = thread_stat_findnew(sample->tid);
581 	if (!ts)
582 		return -ENOMEM;
583 
584 	seq = get_seq(ts, addr);
585 	if (!seq)
586 		return -ENOMEM;
587 
588 	switch (seq->state) {
589 	case SEQ_STATE_UNINITIALIZED:
590 	case SEQ_STATE_RELEASED:
591 		if (!flag) {
592 			seq->state = SEQ_STATE_ACQUIRING;
593 		} else {
594 			if (flag & TRY_LOCK)
595 				ls->nr_trylock++;
596 			if (flag & READ_LOCK)
597 				ls->nr_readlock++;
598 			seq->state = SEQ_STATE_READ_ACQUIRED;
599 			seq->read_count = 1;
600 			ls->nr_acquired++;
601 		}
602 		break;
603 	case SEQ_STATE_READ_ACQUIRED:
604 		if (flag & READ_LOCK) {
605 			seq->read_count++;
606 			ls->nr_acquired++;
607 			goto end;
608 		} else {
609 			goto broken;
610 		}
611 		break;
612 	case SEQ_STATE_ACQUIRED:
613 	case SEQ_STATE_ACQUIRING:
614 	case SEQ_STATE_CONTENDED:
615 broken:
616 		/* broken lock sequence */
617 		if (!ls->broken) {
618 			ls->broken = 1;
619 			bad_hist[BROKEN_ACQUIRE]++;
620 		}
621 		list_del_init(&seq->list);
622 		free(seq);
623 		goto end;
624 	default:
625 		BUG_ON("Unknown state of lock sequence found!\n");
626 		break;
627 	}
628 
629 	ls->nr_acquire++;
630 	seq->prev_event_time = sample->time;
631 end:
632 	return 0;
633 }
634 
635 static int report_lock_acquired_event(struct perf_sample *sample)
636 {
637 	struct lock_stat *ls;
638 	struct thread_stat *ts;
639 	struct lock_seq_stat *seq;
640 	u64 contended_term;
641 	const char *name = perf_sample__strval(sample, "name");
642 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
643 	u64 key;
644 	int ret;
645 
646 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
647 	if (ret < 0)
648 		return ret;
649 
650 	ls = lock_stat_findnew(key, name, 0);
651 	if (!ls)
652 		return -ENOMEM;
653 
654 	ts = thread_stat_findnew(sample->tid);
655 	if (!ts)
656 		return -ENOMEM;
657 
658 	seq = get_seq(ts, addr);
659 	if (!seq)
660 		return -ENOMEM;
661 
662 	switch (seq->state) {
663 	case SEQ_STATE_UNINITIALIZED:
664 		/* orphan event, do nothing */
665 		return 0;
666 	case SEQ_STATE_ACQUIRING:
667 		break;
668 	case SEQ_STATE_CONTENDED:
669 		contended_term = sample->time - seq->prev_event_time;
670 		ls->wait_time_total += contended_term;
671 		if (contended_term < ls->wait_time_min)
672 			ls->wait_time_min = contended_term;
673 		if (ls->wait_time_max < contended_term)
674 			ls->wait_time_max = contended_term;
675 		break;
676 	case SEQ_STATE_RELEASED:
677 	case SEQ_STATE_ACQUIRED:
678 	case SEQ_STATE_READ_ACQUIRED:
679 		/* broken lock sequence */
680 		if (!ls->broken) {
681 			ls->broken = 1;
682 			bad_hist[BROKEN_ACQUIRED]++;
683 		}
684 		list_del_init(&seq->list);
685 		free(seq);
686 		goto end;
687 	default:
688 		BUG_ON("Unknown state of lock sequence found!\n");
689 		break;
690 	}
691 
692 	seq->state = SEQ_STATE_ACQUIRED;
693 	ls->nr_acquired++;
694 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
695 	seq->prev_event_time = sample->time;
696 end:
697 	return 0;
698 }
699 
700 static int report_lock_contended_event(struct perf_sample *sample)
701 {
702 	struct lock_stat *ls;
703 	struct thread_stat *ts;
704 	struct lock_seq_stat *seq;
705 	const char *name = perf_sample__strval(sample, "name");
706 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
707 	u64 key;
708 	int ret;
709 
710 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
711 	if (ret < 0)
712 		return ret;
713 
714 	ls = lock_stat_findnew(key, name, 0);
715 	if (!ls)
716 		return -ENOMEM;
717 
718 	ts = thread_stat_findnew(sample->tid);
719 	if (!ts)
720 		return -ENOMEM;
721 
722 	seq = get_seq(ts, addr);
723 	if (!seq)
724 		return -ENOMEM;
725 
726 	switch (seq->state) {
727 	case SEQ_STATE_UNINITIALIZED:
728 		/* orphan event, do nothing */
729 		return 0;
730 	case SEQ_STATE_ACQUIRING:
731 		break;
732 	case SEQ_STATE_RELEASED:
733 	case SEQ_STATE_ACQUIRED:
734 	case SEQ_STATE_READ_ACQUIRED:
735 	case SEQ_STATE_CONTENDED:
736 		/* broken lock sequence */
737 		if (!ls->broken) {
738 			ls->broken = 1;
739 			bad_hist[BROKEN_CONTENDED]++;
740 		}
741 		list_del_init(&seq->list);
742 		free(seq);
743 		goto end;
744 	default:
745 		BUG_ON("Unknown state of lock sequence found!\n");
746 		break;
747 	}
748 
749 	seq->state = SEQ_STATE_CONTENDED;
750 	ls->nr_contended++;
751 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
752 	seq->prev_event_time = sample->time;
753 end:
754 	return 0;
755 }
756 
757 static int report_lock_release_event(struct perf_sample *sample)
758 {
759 	struct lock_stat *ls;
760 	struct thread_stat *ts;
761 	struct lock_seq_stat *seq;
762 	const char *name = perf_sample__strval(sample, "name");
763 	u64 addr = perf_sample__intval(sample, "lockdep_addr");
764 	u64 key;
765 	int ret;
766 
767 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
768 	if (ret < 0)
769 		return ret;
770 
771 	ls = lock_stat_findnew(key, name, 0);
772 	if (!ls)
773 		return -ENOMEM;
774 
775 	ts = thread_stat_findnew(sample->tid);
776 	if (!ts)
777 		return -ENOMEM;
778 
779 	seq = get_seq(ts, addr);
780 	if (!seq)
781 		return -ENOMEM;
782 
783 	switch (seq->state) {
784 	case SEQ_STATE_UNINITIALIZED:
785 		goto end;
786 	case SEQ_STATE_ACQUIRED:
787 		break;
788 	case SEQ_STATE_READ_ACQUIRED:
789 		seq->read_count--;
790 		BUG_ON(seq->read_count < 0);
791 		if (seq->read_count) {
792 			ls->nr_release++;
793 			goto end;
794 		}
795 		break;
796 	case SEQ_STATE_ACQUIRING:
797 	case SEQ_STATE_CONTENDED:
798 	case SEQ_STATE_RELEASED:
799 		/* broken lock sequence */
800 		if (!ls->broken) {
801 			ls->broken = 1;
802 			bad_hist[BROKEN_RELEASE]++;
803 		}
804 		goto free_seq;
805 	default:
806 		BUG_ON("Unknown state of lock sequence found!\n");
807 		break;
808 	}
809 
810 	ls->nr_release++;
811 free_seq:
812 	list_del_init(&seq->list);
813 	free(seq);
814 end:
815 	return 0;
816 }
817 
818 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
819 				  char *buf, int size)
820 {
821 	u64 offset;
822 
823 	if (map == NULL || sym == NULL) {
824 		buf[0] = '\0';
825 		return 0;
826 	}
827 
828 	offset = map__map_ip(map, ip) - sym->start;
829 
830 	if (offset)
831 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
832 	else
833 		return strlcpy(buf, sym->name, size);
834 }
835 static int lock_contention_caller(struct perf_sample *sample,
836 				  char *buf, int size)
837 {
838 	struct thread *thread;
839 	struct callchain_cursor *cursor;
840 	struct machine *machine = &session->machines.host;
841 	struct symbol *sym;
842 	int skip = 0;
843 	int ret;
844 
845 	/* lock names will be replaced to task name later */
846 	if (show_thread_stats)
847 		return -1;
848 
849 	thread = machine__findnew_thread(machine, -1, sample->pid);
850 	if (thread == NULL)
851 		return -1;
852 
853 	cursor = get_tls_callchain_cursor();
854 
855 	/* use caller function name from the callchain */
856 	ret = thread__resolve_callchain(thread, cursor, sample,
857 					NULL, NULL, max_stack_depth);
858 	if (ret != 0) {
859 		thread__put(thread);
860 		return -1;
861 	}
862 
863 	callchain_cursor_commit(cursor);
864 	thread__put(thread);
865 
866 	while (true) {
867 		struct callchain_cursor_node *node;
868 
869 		node = callchain_cursor_current(cursor);
870 		if (node == NULL)
871 			break;
872 
873 		/* skip first few entries - for lock functions */
874 		if (++skip <= stack_skip)
875 			goto next;
876 
877 		sym = node->ms.sym;
878 		if (sym && !machine__is_lock_function(machine, node->ip)) {
879 			get_symbol_name_offset(node->ms.map, sym, node->ip,
880 					       buf, size);
881 			return 0;
882 		}
883 
884 next:
885 		callchain_cursor_advance(cursor);
886 	}
887 	return -1;
888 }
889 
890 static u64 callchain_id(struct perf_sample *sample)
891 {
892 	struct callchain_cursor *cursor;
893 	struct machine *machine = &session->machines.host;
894 	struct thread *thread;
895 	u64 hash = 0;
896 	int skip = 0;
897 	int ret;
898 
899 	thread = machine__findnew_thread(machine, -1, sample->pid);
900 	if (thread == NULL)
901 		return -1;
902 
903 	cursor = get_tls_callchain_cursor();
904 	/* use caller function name from the callchain */
905 	ret = thread__resolve_callchain(thread, cursor, sample,
906 					NULL, NULL, max_stack_depth);
907 	thread__put(thread);
908 
909 	if (ret != 0)
910 		return -1;
911 
912 	callchain_cursor_commit(cursor);
913 
914 	while (true) {
915 		struct callchain_cursor_node *node;
916 
917 		node = callchain_cursor_current(cursor);
918 		if (node == NULL)
919 			break;
920 
921 		/* skip first few entries - for lock functions */
922 		if (++skip <= stack_skip)
923 			goto next;
924 
925 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
926 			goto next;
927 
928 		hash ^= hash_long((unsigned long)node->ip, 64);
929 
930 next:
931 		callchain_cursor_advance(cursor);
932 	}
933 	return hash;
934 }
935 
936 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
937 {
938 	u64 *callstack;
939 	u64 i;
940 	int c;
941 
942 	if (!sample->callchain) {
943 		pr_debug("Sample unexpectedly missing callchain\n");
944 		return NULL;
945 	}
946 
947 	callstack = calloc(max_stack, sizeof(*callstack));
948 	if (callstack == NULL) {
949 		pr_debug("Failed to allocate callstack\n");
950 		return NULL;
951 	}
952 
953 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
954 		u64 ip = sample->callchain->ips[i];
955 
956 		if (ip >= PERF_CONTEXT_MAX)
957 			continue;
958 
959 		callstack[c++] = ip;
960 	}
961 	return callstack;
962 }
963 
964 static int report_lock_contention_begin_event(struct perf_sample *sample)
965 {
966 	struct lock_stat *ls;
967 	struct thread_stat *ts;
968 	struct lock_seq_stat *seq;
969 	u64 addr = perf_sample__intval(sample, "lock_addr");
970 	unsigned int flags = perf_sample__intval(sample, "flags");
971 	u64 key;
972 	int i, ret;
973 	static bool kmap_loaded;
974 	struct machine *machine = &session->machines.host;
975 	struct map *kmap;
976 	struct symbol *sym;
977 
978 	ret = get_key_by_aggr_mode(&key, addr, sample);
979 	if (ret < 0)
980 		return ret;
981 
982 	if (!kmap_loaded) {
983 		unsigned long *addrs;
984 
985 		/* make sure it loads the kernel map to find lock symbols */
986 		map__load(machine__kernel_map(machine));
987 		kmap_loaded = true;
988 
989 		/* convert (kernel) symbols to addresses */
990 		for (i = 0; i < filters.nr_syms; i++) {
991 			sym = machine__find_kernel_symbol_by_name(machine,
992 								  filters.syms[i],
993 								  &kmap);
994 			if (sym == NULL) {
995 				pr_warning("ignore unknown symbol: %s\n",
996 					   filters.syms[i]);
997 				continue;
998 			}
999 
1000 			addrs = realloc(filters.addrs,
1001 					(filters.nr_addrs + 1) * sizeof(*addrs));
1002 			if (addrs == NULL) {
1003 				pr_warning("memory allocation failure\n");
1004 				return -ENOMEM;
1005 			}
1006 
1007 			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1008 			filters.addrs = addrs;
1009 		}
1010 	}
1011 
1012 	ls = lock_stat_find(key);
1013 	if (!ls) {
1014 		char buf[128];
1015 		const char *name = "";
1016 
1017 		switch (aggr_mode) {
1018 		case LOCK_AGGR_ADDR:
1019 			sym = machine__find_kernel_symbol(machine, key, &kmap);
1020 			if (sym)
1021 				name = sym->name;
1022 			break;
1023 		case LOCK_AGGR_CALLER:
1024 			name = buf;
1025 			if (lock_contention_caller(sample, buf, sizeof(buf)) < 0)
1026 				name = "Unknown";
1027 			break;
1028 		case LOCK_AGGR_CGROUP:
1029 		case LOCK_AGGR_TASK:
1030 		default:
1031 			break;
1032 		}
1033 
1034 		ls = lock_stat_findnew(key, name, flags);
1035 		if (!ls)
1036 			return -ENOMEM;
1037 	}
1038 
1039 	if (filters.nr_types) {
1040 		bool found = false;
1041 
1042 		for (i = 0; i < filters.nr_types; i++) {
1043 			if (flags == filters.types[i]) {
1044 				found = true;
1045 				break;
1046 			}
1047 		}
1048 
1049 		if (!found)
1050 			return 0;
1051 	}
1052 
1053 	if (filters.nr_addrs) {
1054 		bool found = false;
1055 
1056 		for (i = 0; i < filters.nr_addrs; i++) {
1057 			if (addr == filters.addrs[i]) {
1058 				found = true;
1059 				break;
1060 			}
1061 		}
1062 
1063 		if (!found)
1064 			return 0;
1065 	}
1066 
1067 	if (needs_callstack()) {
1068 		u64 *callstack = get_callstack(sample, max_stack_depth);
1069 		if (callstack == NULL)
1070 			return 0;
1071 
1072 		if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1073 			free(callstack);
1074 			return 0;
1075 		}
1076 
1077 		if (ls->callstack == NULL)
1078 			ls->callstack = callstack;
1079 		else
1080 			free(callstack);
1081 	}
1082 
1083 	ts = thread_stat_findnew(sample->tid);
1084 	if (!ts)
1085 		return -ENOMEM;
1086 
1087 	seq = get_seq(ts, addr);
1088 	if (!seq)
1089 		return -ENOMEM;
1090 
1091 	switch (seq->state) {
1092 	case SEQ_STATE_UNINITIALIZED:
1093 	case SEQ_STATE_ACQUIRED:
1094 		break;
1095 	case SEQ_STATE_CONTENDED:
1096 		/*
1097 		 * It can have nested contention begin with mutex spinning,
1098 		 * then we would use the original contention begin event and
1099 		 * ignore the second one.
1100 		 */
1101 		goto end;
1102 	case SEQ_STATE_ACQUIRING:
1103 	case SEQ_STATE_READ_ACQUIRED:
1104 	case SEQ_STATE_RELEASED:
1105 		/* broken lock sequence */
1106 		if (!ls->broken) {
1107 			ls->broken = 1;
1108 			bad_hist[BROKEN_CONTENDED]++;
1109 		}
1110 		list_del_init(&seq->list);
1111 		free(seq);
1112 		goto end;
1113 	default:
1114 		BUG_ON("Unknown state of lock sequence found!\n");
1115 		break;
1116 	}
1117 
1118 	if (seq->state != SEQ_STATE_CONTENDED) {
1119 		seq->state = SEQ_STATE_CONTENDED;
1120 		seq->prev_event_time = sample->time;
1121 		ls->nr_contended++;
1122 	}
1123 end:
1124 	return 0;
1125 }
1126 
1127 static int report_lock_contention_end_event(struct perf_sample *sample)
1128 {
1129 	struct lock_stat *ls;
1130 	struct thread_stat *ts;
1131 	struct lock_seq_stat *seq;
1132 	u64 contended_term;
1133 	u64 addr = perf_sample__intval(sample, "lock_addr");
1134 	u64 key;
1135 	int ret;
1136 
1137 	ret = get_key_by_aggr_mode(&key, addr, sample);
1138 	if (ret < 0)
1139 		return ret;
1140 
1141 	ls = lock_stat_find(key);
1142 	if (!ls)
1143 		return 0;
1144 
1145 	ts = thread_stat_find(sample->tid);
1146 	if (!ts)
1147 		return 0;
1148 
1149 	seq = get_seq(ts, addr);
1150 	if (!seq)
1151 		return -ENOMEM;
1152 
1153 	switch (seq->state) {
1154 	case SEQ_STATE_UNINITIALIZED:
1155 		goto end;
1156 	case SEQ_STATE_CONTENDED:
1157 		contended_term = sample->time - seq->prev_event_time;
1158 		ls->wait_time_total += contended_term;
1159 		if (contended_term < ls->wait_time_min)
1160 			ls->wait_time_min = contended_term;
1161 		if (ls->wait_time_max < contended_term)
1162 			ls->wait_time_max = contended_term;
1163 		break;
1164 	case SEQ_STATE_ACQUIRING:
1165 	case SEQ_STATE_ACQUIRED:
1166 	case SEQ_STATE_READ_ACQUIRED:
1167 	case SEQ_STATE_RELEASED:
1168 		/* broken lock sequence */
1169 		if (!ls->broken) {
1170 			ls->broken = 1;
1171 			bad_hist[BROKEN_ACQUIRED]++;
1172 		}
1173 		list_del_init(&seq->list);
1174 		free(seq);
1175 		goto end;
1176 	default:
1177 		BUG_ON("Unknown state of lock sequence found!\n");
1178 		break;
1179 	}
1180 
1181 	seq->state = SEQ_STATE_ACQUIRED;
1182 	ls->nr_acquired++;
1183 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1184 end:
1185 	return 0;
1186 }
1187 
1188 /* lock oriented handlers */
1189 /* TODO: handlers for CPU oriented, thread oriented */
1190 static const struct trace_lock_handler report_lock_ops  = {
1191 	.acquire_event		= report_lock_acquire_event,
1192 	.acquired_event		= report_lock_acquired_event,
1193 	.contended_event	= report_lock_contended_event,
1194 	.release_event		= report_lock_release_event,
1195 	.contention_begin_event	= report_lock_contention_begin_event,
1196 	.contention_end_event	= report_lock_contention_end_event,
1197 };
1198 
1199 static const struct trace_lock_handler contention_lock_ops  = {
1200 	.contention_begin_event	= report_lock_contention_begin_event,
1201 	.contention_end_event	= report_lock_contention_end_event,
1202 };
1203 
1204 
1205 static const struct trace_lock_handler *trace_handler;
1206 
1207 static int evsel__process_lock_acquire(struct perf_sample *sample)
1208 {
1209 	if (trace_handler->acquire_event)
1210 		return trace_handler->acquire_event(sample);
1211 	return 0;
1212 }
1213 
1214 static int evsel__process_lock_acquired(struct perf_sample *sample)
1215 {
1216 	if (trace_handler->acquired_event)
1217 		return trace_handler->acquired_event(sample);
1218 	return 0;
1219 }
1220 
1221 static int evsel__process_lock_contended(struct perf_sample *sample)
1222 {
1223 	if (trace_handler->contended_event)
1224 		return trace_handler->contended_event(sample);
1225 	return 0;
1226 }
1227 
1228 static int evsel__process_lock_release(struct perf_sample *sample)
1229 {
1230 	if (trace_handler->release_event)
1231 		return trace_handler->release_event(sample);
1232 	return 0;
1233 }
1234 
1235 static int evsel__process_contention_begin(struct perf_sample *sample)
1236 {
1237 	if (trace_handler->contention_begin_event)
1238 		return trace_handler->contention_begin_event(sample);
1239 	return 0;
1240 }
1241 
1242 static int evsel__process_contention_end(struct perf_sample *sample)
1243 {
1244 	if (trace_handler->contention_end_event)
1245 		return trace_handler->contention_end_event(sample);
1246 	return 0;
1247 }
1248 
1249 static void print_bad_events(int bad, int total)
1250 {
1251 	/* Output for debug, this have to be removed */
1252 	int i;
1253 	int broken = 0;
1254 	const char *name[4] =
1255 		{ "acquire", "acquired", "contended", "release" };
1256 
1257 	for (i = 0; i < BROKEN_MAX; i++)
1258 		broken += bad_hist[i];
1259 
1260 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1261 		return;
1262 
1263 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1264 	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1265 	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1266 	fprintf(lock_output, "histogram of events caused bad sequence\n");
1267 	for (i = 0; i < BROKEN_MAX; i++)
1268 		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1269 }
1270 
1271 /* TODO: various way to print, coloring, nano or milli sec */
1272 static void print_result(void)
1273 {
1274 	struct lock_stat *st;
1275 	struct lock_key *key;
1276 	char cut_name[20];
1277 	int bad, total, printed;
1278 
1279 	if (!quiet) {
1280 		fprintf(lock_output, "%20s ", "Name");
1281 		list_for_each_entry(key, &lock_keys, list)
1282 			fprintf(lock_output, "%*s ", key->len, key->header);
1283 		fprintf(lock_output, "\n\n");
1284 	}
1285 
1286 	bad = total = printed = 0;
1287 	while ((st = pop_from_result())) {
1288 		total++;
1289 		if (st->broken)
1290 			bad++;
1291 		if (!st->nr_acquired)
1292 			continue;
1293 
1294 		bzero(cut_name, 20);
1295 
1296 		if (strlen(st->name) < 20) {
1297 			/* output raw name */
1298 			const char *name = st->name;
1299 
1300 			if (show_thread_stats) {
1301 				struct thread *t;
1302 
1303 				/* st->addr contains tid of thread */
1304 				t = perf_session__findnew(session, st->addr);
1305 				name = thread__comm_str(t);
1306 			}
1307 
1308 			fprintf(lock_output, "%20s ", name);
1309 		} else {
1310 			strncpy(cut_name, st->name, 16);
1311 			cut_name[16] = '.';
1312 			cut_name[17] = '.';
1313 			cut_name[18] = '.';
1314 			cut_name[19] = '\0';
1315 			/* cut off name for saving output style */
1316 			fprintf(lock_output, "%20s ", cut_name);
1317 		}
1318 
1319 		list_for_each_entry(key, &lock_keys, list) {
1320 			key->print(key, st);
1321 			fprintf(lock_output, " ");
1322 		}
1323 		fprintf(lock_output, "\n");
1324 
1325 		if (++printed >= print_nr_entries)
1326 			break;
1327 	}
1328 
1329 	print_bad_events(bad, total);
1330 }
1331 
1332 static bool info_threads, info_map;
1333 
1334 static void dump_threads(void)
1335 {
1336 	struct thread_stat *st;
1337 	struct rb_node *node;
1338 	struct thread *t;
1339 
1340 	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1341 
1342 	node = rb_first(&thread_stats);
1343 	while (node) {
1344 		st = container_of(node, struct thread_stat, rb);
1345 		t = perf_session__findnew(session, st->tid);
1346 		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1347 		node = rb_next(node);
1348 		thread__put(t);
1349 	}
1350 }
1351 
1352 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1353 {
1354 	int ret;
1355 
1356 	if (a->name && b->name)
1357 		ret = strcmp(a->name, b->name);
1358 	else
1359 		ret = !!a->name - !!b->name;
1360 
1361 	if (!ret)
1362 		return a->addr < b->addr;
1363 	else
1364 		return ret < 0;
1365 }
1366 
1367 static void dump_map(void)
1368 {
1369 	unsigned int i;
1370 	struct lock_stat *st;
1371 
1372 	fprintf(lock_output, "Address of instance: name of class\n");
1373 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1374 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1375 			insert_to_result(st, compare_maps);
1376 		}
1377 	}
1378 
1379 	while ((st = pop_from_result()))
1380 		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1381 }
1382 
1383 static void dump_info(void)
1384 {
1385 	if (info_threads)
1386 		dump_threads();
1387 
1388 	if (info_map) {
1389 		if (info_threads)
1390 			fputc('\n', lock_output);
1391 		dump_map();
1392 	}
1393 }
1394 
1395 static const struct evsel_str_handler lock_tracepoints[] = {
1396 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1397 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1398 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1399 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1400 };
1401 
1402 static const struct evsel_str_handler contention_tracepoints[] = {
1403 	{ "lock:contention_begin", evsel__process_contention_begin, },
1404 	{ "lock:contention_end",   evsel__process_contention_end,   },
1405 };
1406 
1407 static int process_event_update(const struct perf_tool *tool,
1408 				union perf_event *event,
1409 				struct evlist **pevlist)
1410 {
1411 	int ret;
1412 
1413 	ret = perf_event__process_event_update(tool, event, pevlist);
1414 	if (ret < 0)
1415 		return ret;
1416 
1417 	/* this can return -EEXIST since we call it for each evsel */
1418 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1419 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1420 	return 0;
1421 }
1422 
1423 typedef int (*tracepoint_handler)(struct perf_sample *sample);
1424 
1425 static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1426 				union perf_event *event,
1427 				struct perf_sample *sample,
1428 				struct machine *machine)
1429 {
1430 	struct evsel *evsel = sample->evsel;
1431 	int err = 0;
1432 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1433 							sample->tid);
1434 
1435 	if (thread == NULL) {
1436 		pr_debug("problem processing %d event, skipping it.\n",
1437 			event->header.type);
1438 		return -1;
1439 	}
1440 
1441 	if (evsel->handler != NULL) {
1442 		tracepoint_handler f = evsel->handler;
1443 		err = f(sample);
1444 	}
1445 
1446 	thread__put(thread);
1447 
1448 	return err;
1449 }
1450 
1451 static void combine_result(void)
1452 {
1453 	unsigned int i;
1454 	struct lock_stat *st;
1455 
1456 	if (!combine_locks)
1457 		return;
1458 
1459 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1460 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1461 			combine_lock_stats(st);
1462 		}
1463 	}
1464 }
1465 
1466 static void sort_result(void)
1467 {
1468 	unsigned int i;
1469 	struct lock_stat *st;
1470 
1471 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1472 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1473 			insert_to_result(st, compare);
1474 		}
1475 	}
1476 }
1477 
1478 static const struct {
1479 	unsigned int flags;
1480 	/*
1481 	 * Name of the lock flags (access), with delimeter ':'.
1482 	 * For example, rwsem:R of rwsem:W.
1483 	 */
1484 	const char *flags_name;
1485 	/* Name of the lock (type), for example, rwlock or rwsem. */
1486 	const char *lock_name;
1487 } lock_type_table[] = {
1488 	{ 0,				"semaphore",	"semaphore" },
1489 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1490 	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1491 	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1492 	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1493 	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1494 	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1495 	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1496 	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1497 	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1498 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1499 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1500 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1501 	/* alias for optimistic spinning only */
1502 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex:spin",	"mutex-spin" },
1503 };
1504 
1505 static const char *get_type_flags_name(unsigned int flags)
1506 {
1507 	flags &= LCB_F_TYPE_MASK;
1508 
1509 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1510 		if (lock_type_table[i].flags == flags)
1511 			return lock_type_table[i].flags_name;
1512 	}
1513 	return "unknown";
1514 }
1515 
1516 static const char *get_type_lock_name(unsigned int flags)
1517 {
1518 	flags &= LCB_F_TYPE_MASK;
1519 
1520 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1521 		if (lock_type_table[i].flags == flags)
1522 			return lock_type_table[i].lock_name;
1523 	}
1524 	return "unknown";
1525 }
1526 
1527 static void lock_filter_finish(void)
1528 {
1529 	zfree(&filters.types);
1530 	filters.nr_types = 0;
1531 
1532 	zfree(&filters.addrs);
1533 	filters.nr_addrs = 0;
1534 
1535 	for (int i = 0; i < filters.nr_syms; i++)
1536 		free(filters.syms[i]);
1537 
1538 	zfree(&filters.syms);
1539 	filters.nr_syms = 0;
1540 
1541 	zfree(&filters.cgrps);
1542 	filters.nr_cgrps = 0;
1543 
1544 	for (int i = 0; i < filters.nr_slabs; i++)
1545 		free(filters.slabs[i]);
1546 
1547 	zfree(&filters.slabs);
1548 	filters.nr_slabs = 0;
1549 }
1550 
1551 static void sort_contention_result(void)
1552 {
1553 	sort_result();
1554 }
1555 
1556 static void print_header_stdio(void)
1557 {
1558 	struct lock_key *key;
1559 
1560 	list_for_each_entry(key, &lock_keys, list)
1561 		fprintf(lock_output, "%*s ", key->len, key->header);
1562 
1563 	switch (aggr_mode) {
1564 	case LOCK_AGGR_TASK:
1565 		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1566 			show_lock_owner ? "owner" : "comm");
1567 		break;
1568 	case LOCK_AGGR_CALLER:
1569 		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1570 		break;
1571 	case LOCK_AGGR_ADDR:
1572 		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1573 		break;
1574 	case LOCK_AGGR_CGROUP:
1575 		fprintf(lock_output, "  %s\n\n", "cgroup");
1576 		break;
1577 	default:
1578 		break;
1579 	}
1580 }
1581 
1582 static void print_header_csv(const char *sep)
1583 {
1584 	struct lock_key *key;
1585 
1586 	fprintf(lock_output, "# output: ");
1587 	list_for_each_entry(key, &lock_keys, list)
1588 		fprintf(lock_output, "%s%s ", key->header, sep);
1589 
1590 	switch (aggr_mode) {
1591 	case LOCK_AGGR_TASK:
1592 		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1593 			show_lock_owner ? "owner" : "comm");
1594 		break;
1595 	case LOCK_AGGR_CALLER:
1596 		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1597 		if (verbose > 0)
1598 			fprintf(lock_output, "%s %s", sep, "stacktrace");
1599 		fprintf(lock_output, "\n");
1600 		break;
1601 	case LOCK_AGGR_ADDR:
1602 		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1603 		break;
1604 	case LOCK_AGGR_CGROUP:
1605 		fprintf(lock_output, "%s\n", "cgroup");
1606 		break;
1607 	default:
1608 		break;
1609 	}
1610 }
1611 
1612 static void print_header(void)
1613 {
1614 	if (!quiet) {
1615 		if (symbol_conf.field_sep)
1616 			print_header_csv(symbol_conf.field_sep);
1617 		else
1618 			print_header_stdio();
1619 	}
1620 }
1621 
1622 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1623 {
1624 	struct lock_key *key;
1625 	struct thread *t;
1626 	int pid;
1627 
1628 	list_for_each_entry(key, &lock_keys, list) {
1629 		key->print(key, st);
1630 		fprintf(lock_output, " ");
1631 	}
1632 
1633 	switch (aggr_mode) {
1634 	case LOCK_AGGR_CALLER:
1635 		fprintf(lock_output, "  %10s   %s\n", get_type_flags_name(st->flags), st->name);
1636 		break;
1637 	case LOCK_AGGR_TASK:
1638 		pid = st->addr;
1639 		t = perf_session__findnew(session, pid);
1640 		fprintf(lock_output, "  %10d   %s\n",
1641 			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1642 		break;
1643 	case LOCK_AGGR_ADDR:
1644 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1645 			st->name, get_type_lock_name(st->flags));
1646 		break;
1647 	case LOCK_AGGR_CGROUP:
1648 		fprintf(lock_output, "  %s\n", st->name);
1649 		break;
1650 	default:
1651 		break;
1652 	}
1653 
1654 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1655 		struct map *kmap;
1656 		struct symbol *sym;
1657 		char buf[128];
1658 		u64 ip;
1659 
1660 		for (int i = 0; i < max_stack_depth; i++) {
1661 			if (!st->callstack || !st->callstack[i])
1662 				break;
1663 
1664 			ip = st->callstack[i];
1665 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1666 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1667 			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1668 		}
1669 	}
1670 }
1671 
1672 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1673 				const char *sep)
1674 {
1675 	struct lock_key *key;
1676 	struct thread *t;
1677 	int pid;
1678 
1679 	list_for_each_entry(key, &lock_keys, list) {
1680 		key->print(key, st);
1681 		fprintf(lock_output, "%s ", sep);
1682 	}
1683 
1684 	switch (aggr_mode) {
1685 	case LOCK_AGGR_CALLER:
1686 		fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
1687 		if (verbose <= 0)
1688 			fprintf(lock_output, "\n");
1689 		break;
1690 	case LOCK_AGGR_TASK:
1691 		pid = st->addr;
1692 		t = perf_session__findnew(session, pid);
1693 		fprintf(lock_output, "%d%s %s\n", pid, sep,
1694 			pid == -1 ? "Unknown" : thread__comm_str(t));
1695 		break;
1696 	case LOCK_AGGR_ADDR:
1697 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1698 			st->name, sep, get_type_lock_name(st->flags));
1699 		break;
1700 	case LOCK_AGGR_CGROUP:
1701 		fprintf(lock_output, "%s\n",st->name);
1702 		break;
1703 	default:
1704 		break;
1705 	}
1706 
1707 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1708 		struct map *kmap;
1709 		struct symbol *sym;
1710 		char buf[128];
1711 		u64 ip;
1712 
1713 		for (int i = 0; i < max_stack_depth; i++) {
1714 			if (!st->callstack || !st->callstack[i])
1715 				break;
1716 
1717 			ip = st->callstack[i];
1718 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1719 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1720 			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1721 		}
1722 		fprintf(lock_output, "\n");
1723 	}
1724 }
1725 
1726 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1727 {
1728 	if (symbol_conf.field_sep)
1729 		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1730 	else
1731 		print_lock_stat_stdio(con, st);
1732 }
1733 
1734 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1735 {
1736 	/* Output for debug, this have to be removed */
1737 	int broken = fails->task + fails->stack + fails->time + fails->data;
1738 
1739 	if (!use_bpf)
1740 		print_bad_events(bad, total);
1741 
1742 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1743 		return;
1744 
1745 	total += broken;
1746 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1747 	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1748 	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1749 
1750 	fprintf(lock_output, "histogram of failure reasons\n");
1751 	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1752 	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1753 	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1754 	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1755 }
1756 
1757 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1758 			     const char *sep)
1759 {
1760 	/* Output for debug, this have to be removed */
1761 	if (use_bpf)
1762 		bad = fails->task + fails->stack + fails->time + fails->data;
1763 
1764 	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1765 		return;
1766 
1767 	total += bad;
1768 	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1769 
1770 	if (use_bpf) {
1771 		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1772 		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1773 		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1774 		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1775 	} else {
1776 		int i;
1777 		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1778 
1779 		for (i = 0; i < BROKEN_MAX; i++)
1780 			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1781 	}
1782 	fprintf(lock_output, "\n");
1783 }
1784 
1785 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1786 {
1787 	if (symbol_conf.field_sep)
1788 		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1789 	else
1790 		print_footer_stdio(total, bad, fails);
1791 }
1792 
1793 static void print_contention_result(struct lock_contention *con)
1794 {
1795 	struct lock_stat *st;
1796 	int bad, total, printed;
1797 
1798 	if (!quiet)
1799 		print_header();
1800 
1801 	bad = total = printed = 0;
1802 
1803 	while ((st = pop_from_result())) {
1804 		total += use_bpf ? st->nr_contended : 1;
1805 		if (st->broken)
1806 			bad++;
1807 
1808 		if (!st->wait_time_total)
1809 			continue;
1810 
1811 		print_lock_stat(con, st);
1812 
1813 		if (++printed >= print_nr_entries)
1814 			break;
1815 	}
1816 
1817 	if (con->owner && con->save_callstack && verbose > 0) {
1818 		struct rb_root root = RB_ROOT;
1819 
1820 		if (symbol_conf.field_sep)
1821 			fprintf(lock_output, "# owner stack trace:\n");
1822 		else
1823 			fprintf(lock_output, "\n=== owner stack trace ===\n\n");
1824 		while ((st = pop_owner_stack_trace(con)))
1825 			insert_to(&root, st, compare);
1826 
1827 		while ((st = pop_from(&root))) {
1828 			print_lock_stat(con, st);
1829 			free(st);
1830 		}
1831 	}
1832 
1833 	if (print_nr_entries) {
1834 		/* update the total/bad stats */
1835 		while ((st = pop_from_result())) {
1836 			total += use_bpf ? st->nr_contended : 1;
1837 			if (st->broken)
1838 				bad++;
1839 		}
1840 	}
1841 	/* some entries are collected but hidden by the callstack filter */
1842 	total += con->nr_filtered;
1843 
1844 	print_footer(total, bad, &con->fails);
1845 }
1846 
1847 static bool force;
1848 
1849 static int __cmd_report(bool display_info)
1850 {
1851 	int err = -EINVAL;
1852 	struct perf_tool eops;
1853 	struct perf_data data = {
1854 		.path  = input_name,
1855 		.mode  = PERF_DATA_MODE_READ,
1856 		.force = force,
1857 	};
1858 
1859 	perf_tool__init(&eops, /*ordered_events=*/true);
1860 	eops.attr		 = perf_event__process_attr;
1861 	eops.event_update	 = process_event_update;
1862 	eops.sample		 = process_sample_event;
1863 	eops.comm		 = perf_event__process_comm;
1864 	eops.mmap		 = perf_event__process_mmap;
1865 	eops.mmap2		 = perf_event__process_mmap2;
1866 	eops.namespaces		 = perf_event__process_namespaces;
1867 	eops.tracing_data	 = perf_event__process_tracing_data;
1868 	session = perf_session__new(&data, &eops);
1869 	if (IS_ERR(session)) {
1870 		pr_err("Initializing perf session failed\n");
1871 		return PTR_ERR(session);
1872 	}
1873 
1874 	symbol_conf.allow_aliases = true;
1875 	symbol__init(perf_session__env(session));
1876 
1877 	if (!data.is_pipe) {
1878 		if (!perf_session__has_traces(session, "lock record"))
1879 			goto out_delete;
1880 
1881 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1882 			pr_err("Initializing perf session tracepoint handlers failed\n");
1883 			goto out_delete;
1884 		}
1885 
1886 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1887 			pr_err("Initializing perf session tracepoint handlers failed\n");
1888 			goto out_delete;
1889 		}
1890 	}
1891 
1892 	if (setup_output_field(false, output_fields))
1893 		goto out_delete;
1894 
1895 	if (select_key(false))
1896 		goto out_delete;
1897 
1898 	if (show_thread_stats)
1899 		aggr_mode = LOCK_AGGR_TASK;
1900 
1901 	err = perf_session__process_events(session);
1902 	if (err)
1903 		goto out_delete;
1904 
1905 	setup_pager();
1906 	if (display_info) /* used for info subcommand */
1907 		dump_info();
1908 	else {
1909 		combine_result();
1910 		sort_result();
1911 		print_result();
1912 	}
1913 
1914 out_delete:
1915 	perf_session__delete(session);
1916 	return err;
1917 }
1918 
1919 static volatile sig_atomic_t done;
1920 
1921 static void sighandler(int sig __maybe_unused)
1922 {
1923 	done = 1;
1924 }
1925 
1926 static int check_lock_contention_options(const struct option *options,
1927 					 const char * const *usage)
1928 
1929 {
1930 	if (show_thread_stats && show_lock_addrs) {
1931 		pr_err("Cannot use thread and addr mode together\n");
1932 		parse_options_usage(usage, options, "threads", 0);
1933 		parse_options_usage(NULL, options, "lock-addr", 0);
1934 		return -1;
1935 	}
1936 
1937 	if (show_lock_owner && !use_bpf) {
1938 		pr_err("Lock owners are available only with BPF\n");
1939 		parse_options_usage(usage, options, "lock-owner", 0);
1940 		parse_options_usage(NULL, options, "use-bpf", 0);
1941 		return -1;
1942 	}
1943 
1944 	if (show_lock_owner && show_lock_addrs) {
1945 		pr_err("Cannot use owner and addr mode together\n");
1946 		parse_options_usage(usage, options, "lock-owner", 0);
1947 		parse_options_usage(NULL, options, "lock-addr", 0);
1948 		return -1;
1949 	}
1950 
1951 	if (show_lock_cgroups && !use_bpf) {
1952 		pr_err("Cgroups are available only with BPF\n");
1953 		parse_options_usage(usage, options, "lock-cgroup", 0);
1954 		parse_options_usage(NULL, options, "use-bpf", 0);
1955 		return -1;
1956 	}
1957 
1958 	if (show_lock_cgroups && show_lock_addrs) {
1959 		pr_err("Cannot use cgroup and addr mode together\n");
1960 		parse_options_usage(usage, options, "lock-cgroup", 0);
1961 		parse_options_usage(NULL, options, "lock-addr", 0);
1962 		return -1;
1963 	}
1964 
1965 	if (show_lock_cgroups && show_thread_stats) {
1966 		pr_err("Cannot use cgroup and thread mode together\n");
1967 		parse_options_usage(usage, options, "lock-cgroup", 0);
1968 		parse_options_usage(NULL, options, "threads", 0);
1969 		return -1;
1970 	}
1971 
1972 	if (symbol_conf.field_sep) {
1973 		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1974 		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1975 		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1976 			pr_err("Cannot use the separator that is already used\n");
1977 			parse_options_usage(usage, options, "x", 1);
1978 			return -1;
1979 		}
1980 	}
1981 
1982 	if (show_lock_owner && !show_thread_stats) {
1983 		pr_warning("Now -o try to show owner's callstack instead of pid and comm.\n");
1984 		pr_warning("Please use -t option too to keep the old behavior.\n");
1985 	}
1986 
1987 	return 0;
1988 }
1989 
1990 static int __cmd_contention(int argc, const char **argv)
1991 {
1992 	int err = -EINVAL;
1993 	struct perf_tool eops;
1994 	struct perf_data data = {
1995 		.path  = input_name,
1996 		.mode  = PERF_DATA_MODE_READ,
1997 		.force = force,
1998 	};
1999 	struct lock_contention con = {
2000 		.target = &target,
2001 		.map_nr_entries = bpf_map_entries,
2002 		.max_stack = max_stack_depth,
2003 		.stack_skip = stack_skip,
2004 		.filters = &filters,
2005 		.delays = delays,
2006 		.nr_delays = nr_delays,
2007 		.save_callstack = needs_callstack(),
2008 		.owner = show_lock_owner,
2009 		.cgroups = RB_ROOT,
2010 	};
2011 	struct perf_env host_env;
2012 
2013 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2014 	if (!lockhash_table)
2015 		return -ENOMEM;
2016 
2017 	con.result = &lockhash_table[0];
2018 
2019 	perf_tool__init(&eops, /*ordered_events=*/true);
2020 	eops.attr		 = perf_event__process_attr;
2021 	eops.event_update	 = process_event_update;
2022 	eops.sample		 = process_sample_event;
2023 	eops.comm		 = perf_event__process_comm;
2024 	eops.mmap		 = perf_event__process_mmap;
2025 	eops.mmap2		 = perf_event__process_mmap2;
2026 	eops.tracing_data	 = perf_event__process_tracing_data;
2027 
2028 	perf_env__init(&host_env);
2029 	session = __perf_session__new(use_bpf ? NULL : &data, &eops,
2030 				/*trace_event_repipe=*/false, &host_env);
2031 
2032 	if (IS_ERR(session)) {
2033 		pr_err("Initializing perf session failed\n");
2034 		err = PTR_ERR(session);
2035 		session = NULL;
2036 		goto out_delete;
2037 	}
2038 
2039 	con.machine = &session->machines.host;
2040 
2041 	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2042 		show_lock_addrs ? LOCK_AGGR_ADDR :
2043 		show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2044 
2045 	if (con.aggr_mode == LOCK_AGGR_CALLER)
2046 		con.save_callstack = true;
2047 
2048 	symbol_conf.allow_aliases = true;
2049 	symbol__init(perf_session__env(session));
2050 
2051 	if (use_bpf) {
2052 		err = target__validate(&target);
2053 		if (err) {
2054 			char errbuf[512];
2055 
2056 			target__strerror(&target, err, errbuf, 512);
2057 			pr_err("%s\n", errbuf);
2058 			goto out_delete;
2059 		}
2060 
2061 		done = 0;
2062 		signal(SIGINT, sighandler);
2063 		signal(SIGCHLD, sighandler);
2064 		signal(SIGTERM, sighandler);
2065 
2066 		con.evlist = evlist__new();
2067 		if (con.evlist == NULL) {
2068 			err = -ENOMEM;
2069 			goto out_delete;
2070 		}
2071 
2072 		err = evlist__create_maps(con.evlist, &target);
2073 		if (err < 0)
2074 			goto out_delete;
2075 
2076 		if (argc) {
2077 			err = evlist__prepare_workload(con.evlist, &target,
2078 						       argv, false, NULL);
2079 			if (err < 0)
2080 				goto out_delete;
2081 		}
2082 
2083 		err = lock_contention_prepare(&con);
2084 		if (err < 0) {
2085 			pr_err("lock contention BPF setup failed\n");
2086 			goto out_delete;
2087 		}
2088 	} else if (!data.is_pipe) {
2089 		if (!perf_session__has_traces(session, "lock record"))
2090 			goto out_delete;
2091 
2092 		if (!evlist__find_evsel_by_str(session->evlist,
2093 					       "lock:contention_begin")) {
2094 			pr_err("lock contention evsel not found\n");
2095 			goto out_delete;
2096 		}
2097 
2098 		if (perf_session__set_tracepoints_handlers(session,
2099 						contention_tracepoints)) {
2100 			pr_err("Initializing perf session tracepoint handlers failed\n");
2101 			goto out_delete;
2102 		}
2103 	}
2104 
2105 	err = setup_output_field(true, output_fields);
2106 	if (err) {
2107 		pr_err("Failed to setup output field\n");
2108 		goto out_delete;
2109 	}
2110 
2111 	err = select_key(true);
2112 	if (err)
2113 		goto out_delete;
2114 
2115 	if (symbol_conf.field_sep) {
2116 		int i;
2117 		struct lock_key *keys = contention_keys;
2118 
2119 		/* do not align output in CSV format */
2120 		for (i = 0; keys[i].name; i++)
2121 			keys[i].len = 0;
2122 	}
2123 
2124 	if (use_bpf) {
2125 		lock_contention_start();
2126 		if (argc)
2127 			evlist__start_workload(con.evlist);
2128 
2129 		while (!done) {
2130 			if (argc && waitpid(con.evlist->workload.pid, NULL, WNOHANG) > 0)
2131 				break;
2132 			sleep(1);
2133 		}
2134 
2135 		lock_contention_stop();
2136 		lock_contention_read(&con);
2137 	} else {
2138 		err = perf_session__process_events(session);
2139 		if (err)
2140 			goto out_delete;
2141 	}
2142 
2143 	setup_pager();
2144 
2145 	sort_contention_result();
2146 	print_contention_result(&con);
2147 
2148 out_delete:
2149 	lock_filter_finish();
2150 	evlist__delete(con.evlist);
2151 	lock_contention_finish(&con);
2152 	perf_session__delete(session);
2153 	perf_env__exit(&host_env);
2154 	zfree(&lockhash_table);
2155 	return err;
2156 }
2157 
2158 
2159 static int __cmd_record(int argc, const char **argv)
2160 {
2161 	const char *record_args[] = {
2162 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2163 	};
2164 	const char *callgraph_args[] = {
2165 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2166 	};
2167 	unsigned int rec_argc, i, j, ret;
2168 	unsigned int nr_tracepoints;
2169 	unsigned int nr_callgraph_args = 0;
2170 	const char **rec_argv;
2171 	bool has_lock_stat = true;
2172 
2173 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2174 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2175 			pr_debug("tracepoint %s is not enabled. "
2176 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2177 				 lock_tracepoints[i].name);
2178 			has_lock_stat = false;
2179 			break;
2180 		}
2181 	}
2182 
2183 	if (has_lock_stat)
2184 		goto setup_args;
2185 
2186 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2187 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2188 			pr_err("tracepoint %s is not enabled.\n",
2189 			       contention_tracepoints[i].name);
2190 			return 1;
2191 		}
2192 	}
2193 
2194 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2195 
2196 setup_args:
2197 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2198 
2199 	if (has_lock_stat)
2200 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2201 	else
2202 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2203 
2204 	/* factor of 2 is for -e in front of each tracepoint */
2205 	rec_argc += 2 * nr_tracepoints;
2206 
2207 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2208 	if (!rec_argv)
2209 		return -ENOMEM;
2210 
2211 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2212 		rec_argv[i] = record_args[i];
2213 
2214 	for (j = 0; j < nr_tracepoints; j++) {
2215 		rec_argv[i++] = "-e";
2216 		rec_argv[i++] = has_lock_stat
2217 			? lock_tracepoints[j].name
2218 			: contention_tracepoints[j].name;
2219 	}
2220 
2221 	for (j = 0; j < nr_callgraph_args; j++, i++)
2222 		rec_argv[i] = callgraph_args[j];
2223 
2224 	for (j = 1; j < (unsigned int)argc; j++, i++)
2225 		rec_argv[i] = argv[j];
2226 
2227 	BUG_ON(i != rec_argc);
2228 
2229 	ret = cmd_record(i, rec_argv);
2230 	free(rec_argv);
2231 	return ret;
2232 }
2233 
2234 static int parse_map_entry(const struct option *opt, const char *str,
2235 			    int unset __maybe_unused)
2236 {
2237 	unsigned long *len = (unsigned long *)opt->value;
2238 	unsigned long val;
2239 	char *endptr;
2240 
2241 	errno = 0;
2242 	val = strtoul(str, &endptr, 0);
2243 	if (*endptr != '\0' || errno != 0) {
2244 		pr_err("invalid BPF map length: %s\n", str);
2245 		return -1;
2246 	}
2247 
2248 	*len = val;
2249 	return 0;
2250 }
2251 
2252 static int parse_max_stack(const struct option *opt, const char *str,
2253 			   int unset __maybe_unused)
2254 {
2255 	int *len = opt->value;
2256 	long val;
2257 	char *endptr;
2258 
2259 	errno = 0;
2260 	val = strtol(str, &endptr, 0);
2261 	if (*endptr != '\0' || errno != 0) {
2262 		pr_err("invalid max stack depth: %s\n", str);
2263 		return -1;
2264 	}
2265 
2266 	if (val < 0 || val > sysctl__max_stack()) {
2267 		pr_err("invalid max stack depth: %ld\n", val);
2268 		return -1;
2269 	}
2270 
2271 	*len = val;
2272 	return 0;
2273 }
2274 
2275 static bool add_lock_type(unsigned int flags)
2276 {
2277 	unsigned int *tmp;
2278 
2279 	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2280 	if (tmp == NULL)
2281 		return false;
2282 
2283 	tmp[filters.nr_types++] = flags;
2284 	filters.types = tmp;
2285 	return true;
2286 }
2287 
2288 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2289 			   int unset __maybe_unused)
2290 {
2291 	char *s, *tmp, *tok;
2292 
2293 	s = strdup(str);
2294 	if (s == NULL)
2295 		return -1;
2296 
2297 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2298 		bool found = false;
2299 
2300 		/* `tok` is a flags name if it contains ':'. */
2301 		if (strchr(tok, ':')) {
2302 			for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2303 				if (!strcmp(lock_type_table[i].flags_name, tok) &&
2304 				    add_lock_type(lock_type_table[i].flags)) {
2305 					found = true;
2306 					break;
2307 				}
2308 			}
2309 
2310 			if (!found) {
2311 				pr_err("Unknown lock flags name: %s\n", tok);
2312 				free(s);
2313 				return -1;
2314 			}
2315 
2316 			continue;
2317 		}
2318 
2319 		/*
2320 		 * Otherwise `tok` is a lock name.
2321 		 * Single lock name could contain multiple flags.
2322 		 * Replace alias `pcpu-sem` with actual name `percpu-rwsem.
2323 		 */
2324 		if (!strcmp(tok, "pcpu-sem"))
2325 			tok = (char *)"percpu-rwsem";
2326 		for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2327 			if (!strcmp(lock_type_table[i].lock_name, tok)) {
2328 				if (add_lock_type(lock_type_table[i].flags)) {
2329 					found = true;
2330 				} else {
2331 					free(s);
2332 					return -1;
2333 				}
2334 			}
2335 		}
2336 
2337 		if (!found) {
2338 			pr_err("Unknown lock name: %s\n", tok);
2339 			free(s);
2340 			return -1;
2341 		}
2342 
2343 	}
2344 
2345 	free(s);
2346 	return 0;
2347 }
2348 
2349 static bool add_lock_addr(unsigned long addr)
2350 {
2351 	unsigned long *tmp;
2352 
2353 	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2354 	if (tmp == NULL) {
2355 		pr_err("Memory allocation failure\n");
2356 		return false;
2357 	}
2358 
2359 	tmp[filters.nr_addrs++] = addr;
2360 	filters.addrs = tmp;
2361 	return true;
2362 }
2363 
2364 static bool add_lock_sym(char *name)
2365 {
2366 	char **tmp;
2367 	char *sym = strdup(name);
2368 
2369 	if (sym == NULL) {
2370 		pr_err("Memory allocation failure\n");
2371 		return false;
2372 	}
2373 
2374 	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2375 	if (tmp == NULL) {
2376 		pr_err("Memory allocation failure\n");
2377 		free(sym);
2378 		return false;
2379 	}
2380 
2381 	tmp[filters.nr_syms++] = sym;
2382 	filters.syms = tmp;
2383 	return true;
2384 }
2385 
2386 static bool add_lock_slab(char *name)
2387 {
2388 	char **tmp;
2389 	char *sym = strdup(name);
2390 
2391 	if (sym == NULL) {
2392 		pr_err("Memory allocation failure\n");
2393 		return false;
2394 	}
2395 
2396 	tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2397 	if (tmp == NULL) {
2398 		pr_err("Memory allocation failure\n");
2399 		return false;
2400 	}
2401 
2402 	tmp[filters.nr_slabs++] = sym;
2403 	filters.slabs = tmp;
2404 	return true;
2405 }
2406 
2407 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2408 			   int unset __maybe_unused)
2409 {
2410 	char *s, *tmp, *tok;
2411 	int ret = 0;
2412 	u64 addr;
2413 
2414 	s = strdup(str);
2415 	if (s == NULL)
2416 		return -1;
2417 
2418 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2419 		char *end;
2420 
2421 		addr = strtoul(tok, &end, 16);
2422 		if (*end == '\0') {
2423 			if (!add_lock_addr(addr)) {
2424 				ret = -1;
2425 				break;
2426 			}
2427 			continue;
2428 		}
2429 
2430 		if (*tok == '&') {
2431 			if (!add_lock_slab(tok + 1)) {
2432 				ret = -1;
2433 				break;
2434 			}
2435 			continue;
2436 		}
2437 
2438 		/*
2439 		 * At this moment, we don't have kernel symbols.  Save the symbols
2440 		 * in a separate list and resolve them to addresses later.
2441 		 */
2442 		if (!add_lock_sym(tok)) {
2443 			ret = -1;
2444 			break;
2445 		}
2446 	}
2447 
2448 	free(s);
2449 	return ret;
2450 }
2451 
2452 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2453 			int unset __maybe_unused)
2454 {
2455 	const char **name = (const char **)opt->value;
2456 
2457 	if (str == NULL)
2458 		return -1;
2459 
2460 	lock_output = fopen(str, "w");
2461 	if (lock_output == NULL) {
2462 		pr_err("Cannot open %s\n", str);
2463 		return -1;
2464 	}
2465 
2466 	*name = str;
2467 	return 0;
2468 }
2469 
2470 static bool add_lock_cgroup(char *name)
2471 {
2472 	u64 *tmp;
2473 	struct cgroup *cgrp;
2474 
2475 	cgrp = cgroup__new(name, /*do_open=*/false);
2476 	if (cgrp == NULL) {
2477 		pr_err("Failed to create cgroup: %s\n", name);
2478 		return false;
2479 	}
2480 
2481 	if (read_cgroup_id(cgrp) < 0) {
2482 		pr_err("Failed to read cgroup id for %s\n", name);
2483 		cgroup__put(cgrp);
2484 		return false;
2485 	}
2486 
2487 	tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2488 	if (tmp == NULL) {
2489 		pr_err("Memory allocation failure\n");
2490 		return false;
2491 	}
2492 
2493 	tmp[filters.nr_cgrps++] = cgrp->id;
2494 	filters.cgrps = tmp;
2495 	cgroup__put(cgrp);
2496 	return true;
2497 }
2498 
2499 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2500 			       int unset __maybe_unused)
2501 {
2502 	char *s, *tmp, *tok;
2503 	int ret = 0;
2504 
2505 	s = strdup(str);
2506 	if (s == NULL)
2507 		return -1;
2508 
2509 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2510 		if (!add_lock_cgroup(tok)) {
2511 			ret = -1;
2512 			break;
2513 		}
2514 	}
2515 
2516 	free(s);
2517 	return ret;
2518 }
2519 
2520 static bool add_lock_delay(char *spec)
2521 {
2522 	char *at, *pos;
2523 	struct lock_delay *tmp;
2524 	unsigned long duration;
2525 
2526 	at = strchr(spec, '@');
2527 	if (at == NULL) {
2528 		pr_err("lock delay should have '@' sign: %s\n", spec);
2529 		return false;
2530 	}
2531 	if (at == spec) {
2532 		pr_err("lock delay should have time before '@': %s\n", spec);
2533 		return false;
2534 	}
2535 
2536 	*at = '\0';
2537 	duration = strtoul(spec, &pos, 0);
2538 	if (!strcmp(pos, "ns"))
2539 		duration *= 1;
2540 	else if (!strcmp(pos, "us"))
2541 		duration *= 1000;
2542 	else if (!strcmp(pos, "ms"))
2543 		duration *= 1000 * 1000;
2544 	else if (*pos) {
2545 		pr_err("invalid delay time: %s@%s\n", spec, at + 1);
2546 		return false;
2547 	}
2548 
2549 	if (duration > 10 * 1000 * 1000) {
2550 		pr_err("lock delay is too long: %s (> 10ms)\n", spec);
2551 		return false;
2552 	}
2553 
2554 	tmp = realloc(delays, (nr_delays + 1) * sizeof(*delays));
2555 	if (tmp == NULL) {
2556 		pr_err("Memory allocation failure\n");
2557 		return false;
2558 	}
2559 	delays = tmp;
2560 
2561 	delays[nr_delays].sym = strdup(at + 1);
2562 	if (delays[nr_delays].sym == NULL) {
2563 		pr_err("Memory allocation failure\n");
2564 		return false;
2565 	}
2566 	delays[nr_delays].time = duration;
2567 
2568 	nr_delays++;
2569 	return true;
2570 }
2571 
2572 static int parse_lock_delay(const struct option *opt __maybe_unused, const char *str,
2573 			    int unset __maybe_unused)
2574 {
2575 	char *s, *tmp, *tok;
2576 	int ret = 0;
2577 
2578 	s = strdup(str);
2579 	if (s == NULL)
2580 		return -1;
2581 
2582 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2583 		if (!add_lock_delay(tok)) {
2584 			ret = -1;
2585 			break;
2586 		}
2587 	}
2588 
2589 	free(s);
2590 	return ret;
2591 }
2592 
2593 int cmd_lock(int argc, const char **argv)
2594 {
2595 	const struct option lock_options[] = {
2596 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2597 	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2598 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2599 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2600 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2601 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2602 		   "file", "vmlinux pathname"),
2603 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2604 		   "file", "kallsyms pathname"),
2605 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2606 	OPT_END()
2607 	};
2608 
2609 	const struct option info_options[] = {
2610 	OPT_BOOLEAN('t', "threads", &info_threads,
2611 		    "dump the thread list in perf.data"),
2612 	OPT_BOOLEAN('m', "map", &info_map,
2613 		    "dump the map of lock instances (address:name table)"),
2614 	OPT_PARENT(lock_options)
2615 	};
2616 
2617 	const struct option report_options[] = {
2618 	OPT_STRING('k', "key", &sort_key, "acquired",
2619 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2620 	OPT_STRING('F', "field", &output_fields, NULL,
2621 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2622 	/* TODO: type */
2623 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2624 		    "combine locks in the same class"),
2625 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2626 		    "show per-thread lock stats"),
2627 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2628 	OPT_PARENT(lock_options)
2629 	};
2630 
2631 	struct option contention_options[] = {
2632 	OPT_STRING('k', "key", &sort_key, "wait_total",
2633 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2634 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2635 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2636 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2637 		    "show per-thread lock stats"),
2638 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2639 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2640 		    "System-wide collection from all CPUs"),
2641 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2642 		    "List of cpus to monitor"),
2643 	OPT_STRING('p', "pid", &target.pid, "pid",
2644 		   "Trace on existing process id"),
2645 	OPT_STRING(0, "tid", &target.tid, "tid",
2646 		   "Trace on existing thread id (exclusive to --pid)"),
2647 	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2648 		     "Max number of BPF map entries", parse_map_entry),
2649 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2650 		     "Set the maximum stack depth when collecting lock contention, "
2651 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2652 	OPT_INTEGER(0, "stack-skip", &stack_skip,
2653 		    "Set the number of stack depth to skip when finding a lock caller, "
2654 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2655 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2656 	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2657 	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2658 		     "Filter specific type of locks", parse_lock_type),
2659 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2660 		     "Filter specific address/symbol of locks", parse_lock_addr),
2661 	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2662 		     "Filter specific function in the callstack", parse_call_stack),
2663 	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2664 	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2665 		   "print result in CSV format with custom separator"),
2666 	OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2667 	OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2668 		     "Filter specific cgroups", parse_cgroup_filter),
2669 	OPT_CALLBACK('J', "inject-delay", NULL, "TIME@FUNC",
2670 		     "Inject delays to specific locks", parse_lock_delay),
2671 	OPT_PARENT(lock_options)
2672 	};
2673 
2674 	const char * const info_usage[] = {
2675 		"perf lock info [<options>]",
2676 		NULL
2677 	};
2678 	const char *const lock_subcommands[] = { "record", "report", "script",
2679 						 "info", "contention", NULL };
2680 	const char *lock_usage[] = {
2681 		NULL,
2682 		NULL
2683 	};
2684 	const char * const report_usage[] = {
2685 		"perf lock report [<options>]",
2686 		NULL
2687 	};
2688 	const char * const contention_usage[] = {
2689 		"perf lock contention [<options>]",
2690 		NULL
2691 	};
2692 	unsigned int i;
2693 	int rc = 0;
2694 
2695 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2696 	if (!lockhash_table)
2697 		return -ENOMEM;
2698 
2699 	for (i = 0; i < LOCKHASH_SIZE; i++)
2700 		INIT_HLIST_HEAD(lockhash_table + i);
2701 
2702 	lock_output = stderr;
2703 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2704 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2705 	if (!argc)
2706 		usage_with_options(lock_usage, lock_options);
2707 
2708 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2709 		return __cmd_record(argc, argv);
2710 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2711 		trace_handler = &report_lock_ops;
2712 		if (argc) {
2713 			argc = parse_options(argc, argv,
2714 					     report_options, report_usage, 0);
2715 			if (argc)
2716 				usage_with_options(report_usage, report_options);
2717 		}
2718 		rc = __cmd_report(false);
2719 	} else if (!strcmp(argv[0], "script")) {
2720 		/* Aliased to 'perf script' */
2721 		rc = cmd_script(argc, argv);
2722 	} else if (!strcmp(argv[0], "info")) {
2723 		if (argc) {
2724 			argc = parse_options(argc, argv,
2725 					     info_options, info_usage, 0);
2726 			if (argc)
2727 				usage_with_options(info_usage, info_options);
2728 		}
2729 
2730 		/* If neither threads nor map requested, display both */
2731 		if (!info_threads && !info_map) {
2732 			info_threads = true;
2733 			info_map = true;
2734 		}
2735 
2736 		/* recycling report_lock_ops */
2737 		trace_handler = &report_lock_ops;
2738 		rc = __cmd_report(true);
2739 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2740 		trace_handler = &contention_lock_ops;
2741 		sort_key = "wait_total";
2742 		output_fields = "contended,wait_total,wait_max,avg_wait";
2743 
2744 #ifndef HAVE_BPF_SKEL
2745 		set_option_nobuild(contention_options, 'b', "use-bpf",
2746 				   "no BUILD_BPF_SKEL=1", false);
2747 #endif
2748 		if (argc) {
2749 			argc = parse_options(argc, argv, contention_options,
2750 					     contention_usage, 0);
2751 		}
2752 
2753 		if (check_lock_contention_options(contention_options,
2754 						  contention_usage) < 0)
2755 			return -1;
2756 
2757 		rc = __cmd_contention(argc, argv);
2758 	} else {
2759 		usage_with_options(lock_usage, lock_options);
2760 	}
2761 
2762 	/* free usage string allocated by parse_options_subcommand */
2763 	free((void *)lock_usage[0]);
2764 
2765 	zfree(&lockhash_table);
2766 	return rc;
2767 }
2768