xref: /linux/tools/perf/builtin-lock.c (revision 02f2d58f235ddcf8bc33c084cff84fd685a1be11)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/cgroup.h"
14 #include "util/callchain.h"
15 #include "util/lock-contention.h"
16 #include "util/bpf_skel/lock_data.h"
17 
18 #include <subcmd/pager.h>
19 #include <subcmd/parse-options.h>
20 #include "util/trace-event.h"
21 #include "util/tracepoint.h"
22 
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/data.h"
27 #include "util/string2.h"
28 #include "util/map.h"
29 #include "util/util.h"
30 
31 #include <stdio.h>
32 #include <sys/types.h>
33 #include <sys/prctl.h>
34 #include <semaphore.h>
35 #include <math.h>
36 #include <limits.h>
37 #include <ctype.h>
38 
39 #include <linux/list.h>
40 #include <linux/hash.h>
41 #include <linux/kernel.h>
42 #include <linux/zalloc.h>
43 #include <linux/err.h>
44 #include <linux/stringify.h>
45 
46 static struct perf_session *session;
47 static struct target target;
48 
49 static struct rb_root		thread_stats;
50 
51 static bool combine_locks;
52 static bool show_thread_stats;
53 static bool show_lock_addrs;
54 static bool show_lock_owner;
55 static bool show_lock_cgroups;
56 static bool use_bpf;
57 static unsigned long bpf_map_entries = MAX_ENTRIES;
58 static int max_stack_depth = CONTENTION_STACK_DEPTH;
59 static int stack_skip = CONTENTION_STACK_SKIP;
60 static int print_nr_entries = INT_MAX / 2;
61 static const char *output_name = NULL;
62 static FILE *lock_output;
63 
64 static struct lock_filter filters;
65 
66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
67 
68 static struct thread_stat *thread_stat_find(u32 tid)
69 {
70 	struct rb_node *node;
71 	struct thread_stat *st;
72 
73 	node = thread_stats.rb_node;
74 	while (node) {
75 		st = container_of(node, struct thread_stat, rb);
76 		if (st->tid == tid)
77 			return st;
78 		else if (tid < st->tid)
79 			node = node->rb_left;
80 		else
81 			node = node->rb_right;
82 	}
83 
84 	return NULL;
85 }
86 
87 static void thread_stat_insert(struct thread_stat *new)
88 {
89 	struct rb_node **rb = &thread_stats.rb_node;
90 	struct rb_node *parent = NULL;
91 	struct thread_stat *p;
92 
93 	while (*rb) {
94 		p = container_of(*rb, struct thread_stat, rb);
95 		parent = *rb;
96 
97 		if (new->tid < p->tid)
98 			rb = &(*rb)->rb_left;
99 		else if (new->tid > p->tid)
100 			rb = &(*rb)->rb_right;
101 		else
102 			BUG_ON("inserting invalid thread_stat\n");
103 	}
104 
105 	rb_link_node(&new->rb, parent, rb);
106 	rb_insert_color(&new->rb, &thread_stats);
107 }
108 
109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
110 {
111 	struct thread_stat *st;
112 
113 	st = thread_stat_find(tid);
114 	if (st)
115 		return st;
116 
117 	st = zalloc(sizeof(struct thread_stat));
118 	if (!st) {
119 		pr_err("memory allocation failed\n");
120 		return NULL;
121 	}
122 
123 	st->tid = tid;
124 	INIT_LIST_HEAD(&st->seq_list);
125 
126 	thread_stat_insert(st);
127 
128 	return st;
129 }
130 
131 static struct thread_stat *thread_stat_findnew_first(u32 tid);
132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
133 	thread_stat_findnew_first;
134 
135 static struct thread_stat *thread_stat_findnew_first(u32 tid)
136 {
137 	struct thread_stat *st;
138 
139 	st = zalloc(sizeof(struct thread_stat));
140 	if (!st) {
141 		pr_err("memory allocation failed\n");
142 		return NULL;
143 	}
144 	st->tid = tid;
145 	INIT_LIST_HEAD(&st->seq_list);
146 
147 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
148 	rb_insert_color(&st->rb, &thread_stats);
149 
150 	thread_stat_findnew = thread_stat_findnew_after_first;
151 	return st;
152 }
153 
154 /* build simple key function one is bigger than two */
155 #define SINGLE_KEY(member)						\
156 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
157 					 struct lock_stat *two)		\
158 	{								\
159 		return one->member > two->member;			\
160 	}
161 
162 SINGLE_KEY(nr_acquired)
163 SINGLE_KEY(nr_contended)
164 SINGLE_KEY(avg_wait_time)
165 SINGLE_KEY(wait_time_total)
166 SINGLE_KEY(wait_time_max)
167 
168 static int lock_stat_key_wait_time_min(struct lock_stat *one,
169 					struct lock_stat *two)
170 {
171 	u64 s1 = one->wait_time_min;
172 	u64 s2 = two->wait_time_min;
173 	if (s1 == ULLONG_MAX)
174 		s1 = 0;
175 	if (s2 == ULLONG_MAX)
176 		s2 = 0;
177 	return s1 > s2;
178 }
179 
180 struct lock_key {
181 	/*
182 	 * name: the value for specify by user
183 	 * this should be simpler than raw name of member
184 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
185 	 */
186 	const char		*name;
187 	/* header: the string printed on the header line */
188 	const char		*header;
189 	/* len: the printing width of the field */
190 	int			len;
191 	/* key: a pointer to function to compare two lock stats for sorting */
192 	int			(*key)(struct lock_stat*, struct lock_stat*);
193 	/* print: a pointer to function to print a given lock stats */
194 	void			(*print)(struct lock_key*, struct lock_stat*);
195 	/* list: list entry to link this */
196 	struct list_head	list;
197 };
198 
199 static void lock_stat_key_print_time(unsigned long long nsec, int len)
200 {
201 	static const struct {
202 		float base;
203 		const char *unit;
204 	} table[] = {
205 		{ 1e9 * 3600, "h " },
206 		{ 1e9 * 60, "m " },
207 		{ 1e9, "s " },
208 		{ 1e6, "ms" },
209 		{ 1e3, "us" },
210 		{ 0, NULL },
211 	};
212 
213 	/* for CSV output */
214 	if (len == 0) {
215 		fprintf(lock_output, "%llu", nsec);
216 		return;
217 	}
218 
219 	for (int i = 0; table[i].unit; i++) {
220 		if (nsec < table[i].base)
221 			continue;
222 
223 		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
224 		return;
225 	}
226 
227 	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
228 }
229 
230 #define PRINT_KEY(member)						\
231 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
232 					   struct lock_stat *ls)	\
233 {									\
234 	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
235 }
236 
237 #define PRINT_TIME(member)						\
238 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
239 					   struct lock_stat *ls)	\
240 {									\
241 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
242 }
243 
244 PRINT_KEY(nr_acquired)
245 PRINT_KEY(nr_contended)
246 PRINT_TIME(avg_wait_time)
247 PRINT_TIME(wait_time_total)
248 PRINT_TIME(wait_time_max)
249 
250 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
251 					      struct lock_stat *ls)
252 {
253 	u64 wait_time = ls->wait_time_min;
254 
255 	if (wait_time == ULLONG_MAX)
256 		wait_time = 0;
257 
258 	lock_stat_key_print_time(wait_time, key->len);
259 }
260 
261 
262 static const char		*sort_key = "acquired";
263 
264 static int			(*compare)(struct lock_stat *, struct lock_stat *);
265 
266 static struct rb_root		sorted; /* place to store intermediate data */
267 static struct rb_root		result;	/* place to store sorted data */
268 
269 static LIST_HEAD(lock_keys);
270 static const char		*output_fields;
271 
272 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
273 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
274 static struct lock_key report_keys[] = {
275 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
276 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
277 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
278 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
279 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
280 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
281 
282 	/* extra comparisons much complicated should be here */
283 	{ }
284 };
285 
286 static struct lock_key contention_keys[] = {
287 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
288 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
289 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
290 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
291 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
292 
293 	/* extra comparisons much complicated should be here */
294 	{ }
295 };
296 
297 static int select_key(bool contention)
298 {
299 	int i;
300 	struct lock_key *keys = report_keys;
301 
302 	if (contention)
303 		keys = contention_keys;
304 
305 	for (i = 0; keys[i].name; i++) {
306 		if (!strcmp(keys[i].name, sort_key)) {
307 			compare = keys[i].key;
308 
309 			/* selected key should be in the output fields */
310 			if (list_empty(&keys[i].list))
311 				list_add_tail(&keys[i].list, &lock_keys);
312 
313 			return 0;
314 		}
315 	}
316 
317 	pr_err("Unknown compare key: %s\n", sort_key);
318 	return -1;
319 }
320 
321 static int add_output_field(bool contention, char *name)
322 {
323 	int i;
324 	struct lock_key *keys = report_keys;
325 
326 	if (contention)
327 		keys = contention_keys;
328 
329 	for (i = 0; keys[i].name; i++) {
330 		if (strcmp(keys[i].name, name))
331 			continue;
332 
333 		/* prevent double link */
334 		if (list_empty(&keys[i].list))
335 			list_add_tail(&keys[i].list, &lock_keys);
336 
337 		return 0;
338 	}
339 
340 	pr_err("Unknown output field: %s\n", name);
341 	return -1;
342 }
343 
344 static int setup_output_field(bool contention, const char *str)
345 {
346 	char *tok, *tmp, *orig;
347 	int i, ret = 0;
348 	struct lock_key *keys = report_keys;
349 
350 	if (contention)
351 		keys = contention_keys;
352 
353 	/* no output field given: use all of them */
354 	if (str == NULL) {
355 		for (i = 0; keys[i].name; i++)
356 			list_add_tail(&keys[i].list, &lock_keys);
357 		return 0;
358 	}
359 
360 	for (i = 0; keys[i].name; i++)
361 		INIT_LIST_HEAD(&keys[i].list);
362 
363 	orig = tmp = strdup(str);
364 	if (orig == NULL)
365 		return -ENOMEM;
366 
367 	while ((tok = strsep(&tmp, ",")) != NULL){
368 		ret = add_output_field(contention, tok);
369 		if (ret < 0)
370 			break;
371 	}
372 	free(orig);
373 
374 	return ret;
375 }
376 
377 static void combine_lock_stats(struct lock_stat *st)
378 {
379 	struct rb_node **rb = &sorted.rb_node;
380 	struct rb_node *parent = NULL;
381 	struct lock_stat *p;
382 	int ret;
383 
384 	while (*rb) {
385 		p = container_of(*rb, struct lock_stat, rb);
386 		parent = *rb;
387 
388 		if (st->name && p->name)
389 			ret = strcmp(st->name, p->name);
390 		else
391 			ret = !!st->name - !!p->name;
392 
393 		if (ret == 0) {
394 			p->nr_acquired += st->nr_acquired;
395 			p->nr_contended += st->nr_contended;
396 			p->wait_time_total += st->wait_time_total;
397 
398 			if (p->nr_contended)
399 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
400 
401 			if (p->wait_time_min > st->wait_time_min)
402 				p->wait_time_min = st->wait_time_min;
403 			if (p->wait_time_max < st->wait_time_max)
404 				p->wait_time_max = st->wait_time_max;
405 
406 			p->broken |= st->broken;
407 			st->combined = 1;
408 			return;
409 		}
410 
411 		if (ret < 0)
412 			rb = &(*rb)->rb_left;
413 		else
414 			rb = &(*rb)->rb_right;
415 	}
416 
417 	rb_link_node(&st->rb, parent, rb);
418 	rb_insert_color(&st->rb, &sorted);
419 }
420 
421 static void insert_to_result(struct lock_stat *st,
422 			     int (*bigger)(struct lock_stat *, struct lock_stat *))
423 {
424 	struct rb_node **rb = &result.rb_node;
425 	struct rb_node *parent = NULL;
426 	struct lock_stat *p;
427 
428 	if (combine_locks && st->combined)
429 		return;
430 
431 	while (*rb) {
432 		p = container_of(*rb, struct lock_stat, rb);
433 		parent = *rb;
434 
435 		if (bigger(st, p))
436 			rb = &(*rb)->rb_left;
437 		else
438 			rb = &(*rb)->rb_right;
439 	}
440 
441 	rb_link_node(&st->rb, parent, rb);
442 	rb_insert_color(&st->rb, &result);
443 }
444 
445 /* returns left most element of result, and erase it */
446 static struct lock_stat *pop_from_result(void)
447 {
448 	struct rb_node *node = result.rb_node;
449 
450 	if (!node)
451 		return NULL;
452 
453 	while (node->rb_left)
454 		node = node->rb_left;
455 
456 	rb_erase(node, &result);
457 	return container_of(node, struct lock_stat, rb);
458 }
459 
460 struct trace_lock_handler {
461 	/* it's used on CONFIG_LOCKDEP */
462 	int (*acquire_event)(struct evsel *evsel,
463 			     struct perf_sample *sample);
464 
465 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
466 	int (*acquired_event)(struct evsel *evsel,
467 			      struct perf_sample *sample);
468 
469 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
470 	int (*contended_event)(struct evsel *evsel,
471 			       struct perf_sample *sample);
472 
473 	/* it's used on CONFIG_LOCKDEP */
474 	int (*release_event)(struct evsel *evsel,
475 			     struct perf_sample *sample);
476 
477 	/* it's used when CONFIG_LOCKDEP is off */
478 	int (*contention_begin_event)(struct evsel *evsel,
479 				      struct perf_sample *sample);
480 
481 	/* it's used when CONFIG_LOCKDEP is off */
482 	int (*contention_end_event)(struct evsel *evsel,
483 				    struct perf_sample *sample);
484 };
485 
486 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
487 {
488 	struct lock_seq_stat *seq;
489 
490 	list_for_each_entry(seq, &ts->seq_list, list) {
491 		if (seq->addr == addr)
492 			return seq;
493 	}
494 
495 	seq = zalloc(sizeof(struct lock_seq_stat));
496 	if (!seq) {
497 		pr_err("memory allocation failed\n");
498 		return NULL;
499 	}
500 	seq->state = SEQ_STATE_UNINITIALIZED;
501 	seq->addr = addr;
502 
503 	list_add(&seq->list, &ts->seq_list);
504 	return seq;
505 }
506 
507 enum broken_state {
508 	BROKEN_ACQUIRE,
509 	BROKEN_ACQUIRED,
510 	BROKEN_CONTENDED,
511 	BROKEN_RELEASE,
512 	BROKEN_MAX,
513 };
514 
515 static int bad_hist[BROKEN_MAX];
516 
517 enum acquire_flags {
518 	TRY_LOCK = 1,
519 	READ_LOCK = 2,
520 };
521 
522 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
523 {
524 	switch (aggr_mode) {
525 	case LOCK_AGGR_ADDR:
526 		*key = addr;
527 		break;
528 	case LOCK_AGGR_TASK:
529 		*key = tid;
530 		break;
531 	case LOCK_AGGR_CALLER:
532 	case LOCK_AGGR_CGROUP:
533 	default:
534 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
535 		return -EINVAL;
536 	}
537 	return 0;
538 }
539 
540 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
541 
542 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
543 				 struct perf_sample *sample)
544 {
545 	if (aggr_mode == LOCK_AGGR_CALLER) {
546 		*key = callchain_id(evsel, sample);
547 		return 0;
548 	}
549 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
550 }
551 
552 static int report_lock_acquire_event(struct evsel *evsel,
553 				     struct perf_sample *sample)
554 {
555 	struct lock_stat *ls;
556 	struct thread_stat *ts;
557 	struct lock_seq_stat *seq;
558 	const char *name = evsel__strval(evsel, sample, "name");
559 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
560 	int flag = evsel__intval(evsel, sample, "flags");
561 	u64 key;
562 	int ret;
563 
564 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
565 	if (ret < 0)
566 		return ret;
567 
568 	ls = lock_stat_findnew(key, name, 0);
569 	if (!ls)
570 		return -ENOMEM;
571 
572 	ts = thread_stat_findnew(sample->tid);
573 	if (!ts)
574 		return -ENOMEM;
575 
576 	seq = get_seq(ts, addr);
577 	if (!seq)
578 		return -ENOMEM;
579 
580 	switch (seq->state) {
581 	case SEQ_STATE_UNINITIALIZED:
582 	case SEQ_STATE_RELEASED:
583 		if (!flag) {
584 			seq->state = SEQ_STATE_ACQUIRING;
585 		} else {
586 			if (flag & TRY_LOCK)
587 				ls->nr_trylock++;
588 			if (flag & READ_LOCK)
589 				ls->nr_readlock++;
590 			seq->state = SEQ_STATE_READ_ACQUIRED;
591 			seq->read_count = 1;
592 			ls->nr_acquired++;
593 		}
594 		break;
595 	case SEQ_STATE_READ_ACQUIRED:
596 		if (flag & READ_LOCK) {
597 			seq->read_count++;
598 			ls->nr_acquired++;
599 			goto end;
600 		} else {
601 			goto broken;
602 		}
603 		break;
604 	case SEQ_STATE_ACQUIRED:
605 	case SEQ_STATE_ACQUIRING:
606 	case SEQ_STATE_CONTENDED:
607 broken:
608 		/* broken lock sequence */
609 		if (!ls->broken) {
610 			ls->broken = 1;
611 			bad_hist[BROKEN_ACQUIRE]++;
612 		}
613 		list_del_init(&seq->list);
614 		free(seq);
615 		goto end;
616 	default:
617 		BUG_ON("Unknown state of lock sequence found!\n");
618 		break;
619 	}
620 
621 	ls->nr_acquire++;
622 	seq->prev_event_time = sample->time;
623 end:
624 	return 0;
625 }
626 
627 static int report_lock_acquired_event(struct evsel *evsel,
628 				      struct perf_sample *sample)
629 {
630 	struct lock_stat *ls;
631 	struct thread_stat *ts;
632 	struct lock_seq_stat *seq;
633 	u64 contended_term;
634 	const char *name = evsel__strval(evsel, sample, "name");
635 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
636 	u64 key;
637 	int ret;
638 
639 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
640 	if (ret < 0)
641 		return ret;
642 
643 	ls = lock_stat_findnew(key, name, 0);
644 	if (!ls)
645 		return -ENOMEM;
646 
647 	ts = thread_stat_findnew(sample->tid);
648 	if (!ts)
649 		return -ENOMEM;
650 
651 	seq = get_seq(ts, addr);
652 	if (!seq)
653 		return -ENOMEM;
654 
655 	switch (seq->state) {
656 	case SEQ_STATE_UNINITIALIZED:
657 		/* orphan event, do nothing */
658 		return 0;
659 	case SEQ_STATE_ACQUIRING:
660 		break;
661 	case SEQ_STATE_CONTENDED:
662 		contended_term = sample->time - seq->prev_event_time;
663 		ls->wait_time_total += contended_term;
664 		if (contended_term < ls->wait_time_min)
665 			ls->wait_time_min = contended_term;
666 		if (ls->wait_time_max < contended_term)
667 			ls->wait_time_max = contended_term;
668 		break;
669 	case SEQ_STATE_RELEASED:
670 	case SEQ_STATE_ACQUIRED:
671 	case SEQ_STATE_READ_ACQUIRED:
672 		/* broken lock sequence */
673 		if (!ls->broken) {
674 			ls->broken = 1;
675 			bad_hist[BROKEN_ACQUIRED]++;
676 		}
677 		list_del_init(&seq->list);
678 		free(seq);
679 		goto end;
680 	default:
681 		BUG_ON("Unknown state of lock sequence found!\n");
682 		break;
683 	}
684 
685 	seq->state = SEQ_STATE_ACQUIRED;
686 	ls->nr_acquired++;
687 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
688 	seq->prev_event_time = sample->time;
689 end:
690 	return 0;
691 }
692 
693 static int report_lock_contended_event(struct evsel *evsel,
694 				       struct perf_sample *sample)
695 {
696 	struct lock_stat *ls;
697 	struct thread_stat *ts;
698 	struct lock_seq_stat *seq;
699 	const char *name = evsel__strval(evsel, sample, "name");
700 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
701 	u64 key;
702 	int ret;
703 
704 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
705 	if (ret < 0)
706 		return ret;
707 
708 	ls = lock_stat_findnew(key, name, 0);
709 	if (!ls)
710 		return -ENOMEM;
711 
712 	ts = thread_stat_findnew(sample->tid);
713 	if (!ts)
714 		return -ENOMEM;
715 
716 	seq = get_seq(ts, addr);
717 	if (!seq)
718 		return -ENOMEM;
719 
720 	switch (seq->state) {
721 	case SEQ_STATE_UNINITIALIZED:
722 		/* orphan event, do nothing */
723 		return 0;
724 	case SEQ_STATE_ACQUIRING:
725 		break;
726 	case SEQ_STATE_RELEASED:
727 	case SEQ_STATE_ACQUIRED:
728 	case SEQ_STATE_READ_ACQUIRED:
729 	case SEQ_STATE_CONTENDED:
730 		/* broken lock sequence */
731 		if (!ls->broken) {
732 			ls->broken = 1;
733 			bad_hist[BROKEN_CONTENDED]++;
734 		}
735 		list_del_init(&seq->list);
736 		free(seq);
737 		goto end;
738 	default:
739 		BUG_ON("Unknown state of lock sequence found!\n");
740 		break;
741 	}
742 
743 	seq->state = SEQ_STATE_CONTENDED;
744 	ls->nr_contended++;
745 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
746 	seq->prev_event_time = sample->time;
747 end:
748 	return 0;
749 }
750 
751 static int report_lock_release_event(struct evsel *evsel,
752 				     struct perf_sample *sample)
753 {
754 	struct lock_stat *ls;
755 	struct thread_stat *ts;
756 	struct lock_seq_stat *seq;
757 	const char *name = evsel__strval(evsel, sample, "name");
758 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
759 	u64 key;
760 	int ret;
761 
762 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
763 	if (ret < 0)
764 		return ret;
765 
766 	ls = lock_stat_findnew(key, name, 0);
767 	if (!ls)
768 		return -ENOMEM;
769 
770 	ts = thread_stat_findnew(sample->tid);
771 	if (!ts)
772 		return -ENOMEM;
773 
774 	seq = get_seq(ts, addr);
775 	if (!seq)
776 		return -ENOMEM;
777 
778 	switch (seq->state) {
779 	case SEQ_STATE_UNINITIALIZED:
780 		goto end;
781 	case SEQ_STATE_ACQUIRED:
782 		break;
783 	case SEQ_STATE_READ_ACQUIRED:
784 		seq->read_count--;
785 		BUG_ON(seq->read_count < 0);
786 		if (seq->read_count) {
787 			ls->nr_release++;
788 			goto end;
789 		}
790 		break;
791 	case SEQ_STATE_ACQUIRING:
792 	case SEQ_STATE_CONTENDED:
793 	case SEQ_STATE_RELEASED:
794 		/* broken lock sequence */
795 		if (!ls->broken) {
796 			ls->broken = 1;
797 			bad_hist[BROKEN_RELEASE]++;
798 		}
799 		goto free_seq;
800 	default:
801 		BUG_ON("Unknown state of lock sequence found!\n");
802 		break;
803 	}
804 
805 	ls->nr_release++;
806 free_seq:
807 	list_del_init(&seq->list);
808 	free(seq);
809 end:
810 	return 0;
811 }
812 
813 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
814 				  char *buf, int size)
815 {
816 	u64 offset;
817 
818 	if (map == NULL || sym == NULL) {
819 		buf[0] = '\0';
820 		return 0;
821 	}
822 
823 	offset = map__map_ip(map, ip) - sym->start;
824 
825 	if (offset)
826 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
827 	else
828 		return strlcpy(buf, sym->name, size);
829 }
830 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
831 				  char *buf, int size)
832 {
833 	struct thread *thread;
834 	struct callchain_cursor *cursor;
835 	struct machine *machine = &session->machines.host;
836 	struct symbol *sym;
837 	int skip = 0;
838 	int ret;
839 
840 	/* lock names will be replaced to task name later */
841 	if (show_thread_stats)
842 		return -1;
843 
844 	thread = machine__findnew_thread(machine, -1, sample->pid);
845 	if (thread == NULL)
846 		return -1;
847 
848 	cursor = get_tls_callchain_cursor();
849 
850 	/* use caller function name from the callchain */
851 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
852 					NULL, NULL, max_stack_depth);
853 	if (ret != 0) {
854 		thread__put(thread);
855 		return -1;
856 	}
857 
858 	callchain_cursor_commit(cursor);
859 	thread__put(thread);
860 
861 	while (true) {
862 		struct callchain_cursor_node *node;
863 
864 		node = callchain_cursor_current(cursor);
865 		if (node == NULL)
866 			break;
867 
868 		/* skip first few entries - for lock functions */
869 		if (++skip <= stack_skip)
870 			goto next;
871 
872 		sym = node->ms.sym;
873 		if (sym && !machine__is_lock_function(machine, node->ip)) {
874 			get_symbol_name_offset(node->ms.map, sym, node->ip,
875 					       buf, size);
876 			return 0;
877 		}
878 
879 next:
880 		callchain_cursor_advance(cursor);
881 	}
882 	return -1;
883 }
884 
885 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
886 {
887 	struct callchain_cursor *cursor;
888 	struct machine *machine = &session->machines.host;
889 	struct thread *thread;
890 	u64 hash = 0;
891 	int skip = 0;
892 	int ret;
893 
894 	thread = machine__findnew_thread(machine, -1, sample->pid);
895 	if (thread == NULL)
896 		return -1;
897 
898 	cursor = get_tls_callchain_cursor();
899 	/* use caller function name from the callchain */
900 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
901 					NULL, NULL, max_stack_depth);
902 	thread__put(thread);
903 
904 	if (ret != 0)
905 		return -1;
906 
907 	callchain_cursor_commit(cursor);
908 
909 	while (true) {
910 		struct callchain_cursor_node *node;
911 
912 		node = callchain_cursor_current(cursor);
913 		if (node == NULL)
914 			break;
915 
916 		/* skip first few entries - for lock functions */
917 		if (++skip <= stack_skip)
918 			goto next;
919 
920 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
921 			goto next;
922 
923 		hash ^= hash_long((unsigned long)node->ip, 64);
924 
925 next:
926 		callchain_cursor_advance(cursor);
927 	}
928 	return hash;
929 }
930 
931 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
932 {
933 	u64 *callstack;
934 	u64 i;
935 	int c;
936 
937 	callstack = calloc(max_stack, sizeof(*callstack));
938 	if (callstack == NULL)
939 		return NULL;
940 
941 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
942 		u64 ip = sample->callchain->ips[i];
943 
944 		if (ip >= PERF_CONTEXT_MAX)
945 			continue;
946 
947 		callstack[c++] = ip;
948 	}
949 	return callstack;
950 }
951 
952 static int report_lock_contention_begin_event(struct evsel *evsel,
953 					      struct perf_sample *sample)
954 {
955 	struct lock_stat *ls;
956 	struct thread_stat *ts;
957 	struct lock_seq_stat *seq;
958 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
959 	unsigned int flags = evsel__intval(evsel, sample, "flags");
960 	u64 key;
961 	int i, ret;
962 	static bool kmap_loaded;
963 	struct machine *machine = &session->machines.host;
964 	struct map *kmap;
965 	struct symbol *sym;
966 
967 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
968 	if (ret < 0)
969 		return ret;
970 
971 	if (!kmap_loaded) {
972 		unsigned long *addrs;
973 
974 		/* make sure it loads the kernel map to find lock symbols */
975 		map__load(machine__kernel_map(machine));
976 		kmap_loaded = true;
977 
978 		/* convert (kernel) symbols to addresses */
979 		for (i = 0; i < filters.nr_syms; i++) {
980 			sym = machine__find_kernel_symbol_by_name(machine,
981 								  filters.syms[i],
982 								  &kmap);
983 			if (sym == NULL) {
984 				pr_warning("ignore unknown symbol: %s\n",
985 					   filters.syms[i]);
986 				continue;
987 			}
988 
989 			addrs = realloc(filters.addrs,
990 					(filters.nr_addrs + 1) * sizeof(*addrs));
991 			if (addrs == NULL) {
992 				pr_warning("memory allocation failure\n");
993 				return -ENOMEM;
994 			}
995 
996 			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
997 			filters.addrs = addrs;
998 		}
999 	}
1000 
1001 	ls = lock_stat_find(key);
1002 	if (!ls) {
1003 		char buf[128];
1004 		const char *name = "";
1005 
1006 		switch (aggr_mode) {
1007 		case LOCK_AGGR_ADDR:
1008 			sym = machine__find_kernel_symbol(machine, key, &kmap);
1009 			if (sym)
1010 				name = sym->name;
1011 			break;
1012 		case LOCK_AGGR_CALLER:
1013 			name = buf;
1014 			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1015 				name = "Unknown";
1016 			break;
1017 		case LOCK_AGGR_CGROUP:
1018 		case LOCK_AGGR_TASK:
1019 		default:
1020 			break;
1021 		}
1022 
1023 		ls = lock_stat_findnew(key, name, flags);
1024 		if (!ls)
1025 			return -ENOMEM;
1026 	}
1027 
1028 	if (filters.nr_types) {
1029 		bool found = false;
1030 
1031 		for (i = 0; i < filters.nr_types; i++) {
1032 			if (flags == filters.types[i]) {
1033 				found = true;
1034 				break;
1035 			}
1036 		}
1037 
1038 		if (!found)
1039 			return 0;
1040 	}
1041 
1042 	if (filters.nr_addrs) {
1043 		bool found = false;
1044 
1045 		for (i = 0; i < filters.nr_addrs; i++) {
1046 			if (addr == filters.addrs[i]) {
1047 				found = true;
1048 				break;
1049 			}
1050 		}
1051 
1052 		if (!found)
1053 			return 0;
1054 	}
1055 
1056 	if (needs_callstack()) {
1057 		u64 *callstack = get_callstack(sample, max_stack_depth);
1058 		if (callstack == NULL)
1059 			return -ENOMEM;
1060 
1061 		if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1062 			free(callstack);
1063 			return 0;
1064 		}
1065 
1066 		if (ls->callstack == NULL)
1067 			ls->callstack = callstack;
1068 		else
1069 			free(callstack);
1070 	}
1071 
1072 	ts = thread_stat_findnew(sample->tid);
1073 	if (!ts)
1074 		return -ENOMEM;
1075 
1076 	seq = get_seq(ts, addr);
1077 	if (!seq)
1078 		return -ENOMEM;
1079 
1080 	switch (seq->state) {
1081 	case SEQ_STATE_UNINITIALIZED:
1082 	case SEQ_STATE_ACQUIRED:
1083 		break;
1084 	case SEQ_STATE_CONTENDED:
1085 		/*
1086 		 * It can have nested contention begin with mutex spinning,
1087 		 * then we would use the original contention begin event and
1088 		 * ignore the second one.
1089 		 */
1090 		goto end;
1091 	case SEQ_STATE_ACQUIRING:
1092 	case SEQ_STATE_READ_ACQUIRED:
1093 	case SEQ_STATE_RELEASED:
1094 		/* broken lock sequence */
1095 		if (!ls->broken) {
1096 			ls->broken = 1;
1097 			bad_hist[BROKEN_CONTENDED]++;
1098 		}
1099 		list_del_init(&seq->list);
1100 		free(seq);
1101 		goto end;
1102 	default:
1103 		BUG_ON("Unknown state of lock sequence found!\n");
1104 		break;
1105 	}
1106 
1107 	if (seq->state != SEQ_STATE_CONTENDED) {
1108 		seq->state = SEQ_STATE_CONTENDED;
1109 		seq->prev_event_time = sample->time;
1110 		ls->nr_contended++;
1111 	}
1112 end:
1113 	return 0;
1114 }
1115 
1116 static int report_lock_contention_end_event(struct evsel *evsel,
1117 					    struct perf_sample *sample)
1118 {
1119 	struct lock_stat *ls;
1120 	struct thread_stat *ts;
1121 	struct lock_seq_stat *seq;
1122 	u64 contended_term;
1123 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1124 	u64 key;
1125 	int ret;
1126 
1127 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1128 	if (ret < 0)
1129 		return ret;
1130 
1131 	ls = lock_stat_find(key);
1132 	if (!ls)
1133 		return 0;
1134 
1135 	ts = thread_stat_find(sample->tid);
1136 	if (!ts)
1137 		return 0;
1138 
1139 	seq = get_seq(ts, addr);
1140 	if (!seq)
1141 		return -ENOMEM;
1142 
1143 	switch (seq->state) {
1144 	case SEQ_STATE_UNINITIALIZED:
1145 		goto end;
1146 	case SEQ_STATE_CONTENDED:
1147 		contended_term = sample->time - seq->prev_event_time;
1148 		ls->wait_time_total += contended_term;
1149 		if (contended_term < ls->wait_time_min)
1150 			ls->wait_time_min = contended_term;
1151 		if (ls->wait_time_max < contended_term)
1152 			ls->wait_time_max = contended_term;
1153 		break;
1154 	case SEQ_STATE_ACQUIRING:
1155 	case SEQ_STATE_ACQUIRED:
1156 	case SEQ_STATE_READ_ACQUIRED:
1157 	case SEQ_STATE_RELEASED:
1158 		/* broken lock sequence */
1159 		if (!ls->broken) {
1160 			ls->broken = 1;
1161 			bad_hist[BROKEN_ACQUIRED]++;
1162 		}
1163 		list_del_init(&seq->list);
1164 		free(seq);
1165 		goto end;
1166 	default:
1167 		BUG_ON("Unknown state of lock sequence found!\n");
1168 		break;
1169 	}
1170 
1171 	seq->state = SEQ_STATE_ACQUIRED;
1172 	ls->nr_acquired++;
1173 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1174 end:
1175 	return 0;
1176 }
1177 
1178 /* lock oriented handlers */
1179 /* TODO: handlers for CPU oriented, thread oriented */
1180 static struct trace_lock_handler report_lock_ops  = {
1181 	.acquire_event		= report_lock_acquire_event,
1182 	.acquired_event		= report_lock_acquired_event,
1183 	.contended_event	= report_lock_contended_event,
1184 	.release_event		= report_lock_release_event,
1185 	.contention_begin_event	= report_lock_contention_begin_event,
1186 	.contention_end_event	= report_lock_contention_end_event,
1187 };
1188 
1189 static struct trace_lock_handler contention_lock_ops  = {
1190 	.contention_begin_event	= report_lock_contention_begin_event,
1191 	.contention_end_event	= report_lock_contention_end_event,
1192 };
1193 
1194 
1195 static struct trace_lock_handler *trace_handler;
1196 
1197 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1198 {
1199 	if (trace_handler->acquire_event)
1200 		return trace_handler->acquire_event(evsel, sample);
1201 	return 0;
1202 }
1203 
1204 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1205 {
1206 	if (trace_handler->acquired_event)
1207 		return trace_handler->acquired_event(evsel, sample);
1208 	return 0;
1209 }
1210 
1211 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1212 {
1213 	if (trace_handler->contended_event)
1214 		return trace_handler->contended_event(evsel, sample);
1215 	return 0;
1216 }
1217 
1218 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1219 {
1220 	if (trace_handler->release_event)
1221 		return trace_handler->release_event(evsel, sample);
1222 	return 0;
1223 }
1224 
1225 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1226 {
1227 	if (trace_handler->contention_begin_event)
1228 		return trace_handler->contention_begin_event(evsel, sample);
1229 	return 0;
1230 }
1231 
1232 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1233 {
1234 	if (trace_handler->contention_end_event)
1235 		return trace_handler->contention_end_event(evsel, sample);
1236 	return 0;
1237 }
1238 
1239 static void print_bad_events(int bad, int total)
1240 {
1241 	/* Output for debug, this have to be removed */
1242 	int i;
1243 	int broken = 0;
1244 	const char *name[4] =
1245 		{ "acquire", "acquired", "contended", "release" };
1246 
1247 	for (i = 0; i < BROKEN_MAX; i++)
1248 		broken += bad_hist[i];
1249 
1250 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1251 		return;
1252 
1253 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1254 	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1255 	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1256 	fprintf(lock_output, "histogram of events caused bad sequence\n");
1257 	for (i = 0; i < BROKEN_MAX; i++)
1258 		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1259 }
1260 
1261 /* TODO: various way to print, coloring, nano or milli sec */
1262 static void print_result(void)
1263 {
1264 	struct lock_stat *st;
1265 	struct lock_key *key;
1266 	char cut_name[20];
1267 	int bad, total, printed;
1268 
1269 	if (!quiet) {
1270 		fprintf(lock_output, "%20s ", "Name");
1271 		list_for_each_entry(key, &lock_keys, list)
1272 			fprintf(lock_output, "%*s ", key->len, key->header);
1273 		fprintf(lock_output, "\n\n");
1274 	}
1275 
1276 	bad = total = printed = 0;
1277 	while ((st = pop_from_result())) {
1278 		total++;
1279 		if (st->broken)
1280 			bad++;
1281 		if (!st->nr_acquired)
1282 			continue;
1283 
1284 		bzero(cut_name, 20);
1285 
1286 		if (strlen(st->name) < 20) {
1287 			/* output raw name */
1288 			const char *name = st->name;
1289 
1290 			if (show_thread_stats) {
1291 				struct thread *t;
1292 
1293 				/* st->addr contains tid of thread */
1294 				t = perf_session__findnew(session, st->addr);
1295 				name = thread__comm_str(t);
1296 			}
1297 
1298 			fprintf(lock_output, "%20s ", name);
1299 		} else {
1300 			strncpy(cut_name, st->name, 16);
1301 			cut_name[16] = '.';
1302 			cut_name[17] = '.';
1303 			cut_name[18] = '.';
1304 			cut_name[19] = '\0';
1305 			/* cut off name for saving output style */
1306 			fprintf(lock_output, "%20s ", cut_name);
1307 		}
1308 
1309 		list_for_each_entry(key, &lock_keys, list) {
1310 			key->print(key, st);
1311 			fprintf(lock_output, " ");
1312 		}
1313 		fprintf(lock_output, "\n");
1314 
1315 		if (++printed >= print_nr_entries)
1316 			break;
1317 	}
1318 
1319 	print_bad_events(bad, total);
1320 }
1321 
1322 static bool info_threads, info_map;
1323 
1324 static void dump_threads(void)
1325 {
1326 	struct thread_stat *st;
1327 	struct rb_node *node;
1328 	struct thread *t;
1329 
1330 	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1331 
1332 	node = rb_first(&thread_stats);
1333 	while (node) {
1334 		st = container_of(node, struct thread_stat, rb);
1335 		t = perf_session__findnew(session, st->tid);
1336 		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1337 		node = rb_next(node);
1338 		thread__put(t);
1339 	}
1340 }
1341 
1342 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1343 {
1344 	int ret;
1345 
1346 	if (a->name && b->name)
1347 		ret = strcmp(a->name, b->name);
1348 	else
1349 		ret = !!a->name - !!b->name;
1350 
1351 	if (!ret)
1352 		return a->addr < b->addr;
1353 	else
1354 		return ret < 0;
1355 }
1356 
1357 static void dump_map(void)
1358 {
1359 	unsigned int i;
1360 	struct lock_stat *st;
1361 
1362 	fprintf(lock_output, "Address of instance: name of class\n");
1363 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1364 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1365 			insert_to_result(st, compare_maps);
1366 		}
1367 	}
1368 
1369 	while ((st = pop_from_result()))
1370 		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1371 }
1372 
1373 static void dump_info(void)
1374 {
1375 	if (info_threads)
1376 		dump_threads();
1377 
1378 	if (info_map) {
1379 		if (info_threads)
1380 			fputc('\n', lock_output);
1381 		dump_map();
1382 	}
1383 }
1384 
1385 static const struct evsel_str_handler lock_tracepoints[] = {
1386 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1387 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1388 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1389 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1390 };
1391 
1392 static const struct evsel_str_handler contention_tracepoints[] = {
1393 	{ "lock:contention_begin", evsel__process_contention_begin, },
1394 	{ "lock:contention_end",   evsel__process_contention_end,   },
1395 };
1396 
1397 static int process_event_update(const struct perf_tool *tool,
1398 				union perf_event *event,
1399 				struct evlist **pevlist)
1400 {
1401 	int ret;
1402 
1403 	ret = perf_event__process_event_update(tool, event, pevlist);
1404 	if (ret < 0)
1405 		return ret;
1406 
1407 	/* this can return -EEXIST since we call it for each evsel */
1408 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1409 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1410 	return 0;
1411 }
1412 
1413 typedef int (*tracepoint_handler)(struct evsel *evsel,
1414 				  struct perf_sample *sample);
1415 
1416 static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1417 				union perf_event *event,
1418 				struct perf_sample *sample,
1419 				struct evsel *evsel,
1420 				struct machine *machine)
1421 {
1422 	int err = 0;
1423 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1424 							sample->tid);
1425 
1426 	if (thread == NULL) {
1427 		pr_debug("problem processing %d event, skipping it.\n",
1428 			event->header.type);
1429 		return -1;
1430 	}
1431 
1432 	if (evsel->handler != NULL) {
1433 		tracepoint_handler f = evsel->handler;
1434 		err = f(evsel, sample);
1435 	}
1436 
1437 	thread__put(thread);
1438 
1439 	return err;
1440 }
1441 
1442 static void combine_result(void)
1443 {
1444 	unsigned int i;
1445 	struct lock_stat *st;
1446 
1447 	if (!combine_locks)
1448 		return;
1449 
1450 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1451 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1452 			combine_lock_stats(st);
1453 		}
1454 	}
1455 }
1456 
1457 static void sort_result(void)
1458 {
1459 	unsigned int i;
1460 	struct lock_stat *st;
1461 
1462 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1463 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1464 			insert_to_result(st, compare);
1465 		}
1466 	}
1467 }
1468 
1469 static const struct {
1470 	unsigned int flags;
1471 	const char *str;
1472 	const char *name;
1473 } lock_type_table[] = {
1474 	{ 0,				"semaphore",	"semaphore" },
1475 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1476 	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1477 	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1478 	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1479 	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1480 	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1481 	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1482 	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1483 	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1484 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1485 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1486 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1487 	/* alias for get_type_flag() */
1488 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex-spin",	"mutex" },
1489 };
1490 
1491 static const char *get_type_str(unsigned int flags)
1492 {
1493 	flags &= LCB_F_TYPE_MASK;
1494 
1495 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1496 		if (lock_type_table[i].flags == flags)
1497 			return lock_type_table[i].str;
1498 	}
1499 	return "unknown";
1500 }
1501 
1502 static const char *get_type_name(unsigned int flags)
1503 {
1504 	flags &= LCB_F_TYPE_MASK;
1505 
1506 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1507 		if (lock_type_table[i].flags == flags)
1508 			return lock_type_table[i].name;
1509 	}
1510 	return "unknown";
1511 }
1512 
1513 static unsigned int get_type_flag(const char *str)
1514 {
1515 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1516 		if (!strcmp(lock_type_table[i].name, str))
1517 			return lock_type_table[i].flags;
1518 	}
1519 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1520 		if (!strcmp(lock_type_table[i].str, str))
1521 			return lock_type_table[i].flags;
1522 	}
1523 	return UINT_MAX;
1524 }
1525 
1526 static void lock_filter_finish(void)
1527 {
1528 	zfree(&filters.types);
1529 	filters.nr_types = 0;
1530 
1531 	zfree(&filters.addrs);
1532 	filters.nr_addrs = 0;
1533 
1534 	for (int i = 0; i < filters.nr_syms; i++)
1535 		free(filters.syms[i]);
1536 
1537 	zfree(&filters.syms);
1538 	filters.nr_syms = 0;
1539 
1540 	zfree(&filters.cgrps);
1541 	filters.nr_cgrps = 0;
1542 
1543 	for (int i = 0; i < filters.nr_slabs; i++)
1544 		free(filters.slabs[i]);
1545 
1546 	zfree(&filters.slabs);
1547 	filters.nr_slabs = 0;
1548 }
1549 
1550 static void sort_contention_result(void)
1551 {
1552 	sort_result();
1553 }
1554 
1555 static void print_header_stdio(void)
1556 {
1557 	struct lock_key *key;
1558 
1559 	list_for_each_entry(key, &lock_keys, list)
1560 		fprintf(lock_output, "%*s ", key->len, key->header);
1561 
1562 	switch (aggr_mode) {
1563 	case LOCK_AGGR_TASK:
1564 		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1565 			show_lock_owner ? "owner" : "comm");
1566 		break;
1567 	case LOCK_AGGR_CALLER:
1568 		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1569 		break;
1570 	case LOCK_AGGR_ADDR:
1571 		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1572 		break;
1573 	case LOCK_AGGR_CGROUP:
1574 		fprintf(lock_output, "  %s\n\n", "cgroup");
1575 		break;
1576 	default:
1577 		break;
1578 	}
1579 }
1580 
1581 static void print_header_csv(const char *sep)
1582 {
1583 	struct lock_key *key;
1584 
1585 	fprintf(lock_output, "# output: ");
1586 	list_for_each_entry(key, &lock_keys, list)
1587 		fprintf(lock_output, "%s%s ", key->header, sep);
1588 
1589 	switch (aggr_mode) {
1590 	case LOCK_AGGR_TASK:
1591 		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1592 			show_lock_owner ? "owner" : "comm");
1593 		break;
1594 	case LOCK_AGGR_CALLER:
1595 		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1596 		if (verbose > 0)
1597 			fprintf(lock_output, "%s %s", sep, "stacktrace");
1598 		fprintf(lock_output, "\n");
1599 		break;
1600 	case LOCK_AGGR_ADDR:
1601 		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1602 		break;
1603 	case LOCK_AGGR_CGROUP:
1604 		fprintf(lock_output, "%s\n", "cgroup");
1605 		break;
1606 	default:
1607 		break;
1608 	}
1609 }
1610 
1611 static void print_header(void)
1612 {
1613 	if (!quiet) {
1614 		if (symbol_conf.field_sep)
1615 			print_header_csv(symbol_conf.field_sep);
1616 		else
1617 			print_header_stdio();
1618 	}
1619 }
1620 
1621 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1622 {
1623 	struct lock_key *key;
1624 	struct thread *t;
1625 	int pid;
1626 
1627 	list_for_each_entry(key, &lock_keys, list) {
1628 		key->print(key, st);
1629 		fprintf(lock_output, " ");
1630 	}
1631 
1632 	switch (aggr_mode) {
1633 	case LOCK_AGGR_CALLER:
1634 		fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
1635 		break;
1636 	case LOCK_AGGR_TASK:
1637 		pid = st->addr;
1638 		t = perf_session__findnew(session, pid);
1639 		fprintf(lock_output, "  %10d   %s\n",
1640 			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1641 		break;
1642 	case LOCK_AGGR_ADDR:
1643 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1644 			st->name, get_type_name(st->flags));
1645 		break;
1646 	case LOCK_AGGR_CGROUP:
1647 		fprintf(lock_output, "  %s\n", st->name);
1648 		break;
1649 	default:
1650 		break;
1651 	}
1652 
1653 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1654 		struct map *kmap;
1655 		struct symbol *sym;
1656 		char buf[128];
1657 		u64 ip;
1658 
1659 		for (int i = 0; i < max_stack_depth; i++) {
1660 			if (!st->callstack || !st->callstack[i])
1661 				break;
1662 
1663 			ip = st->callstack[i];
1664 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1665 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1666 			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1667 		}
1668 	}
1669 }
1670 
1671 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1672 				const char *sep)
1673 {
1674 	struct lock_key *key;
1675 	struct thread *t;
1676 	int pid;
1677 
1678 	list_for_each_entry(key, &lock_keys, list) {
1679 		key->print(key, st);
1680 		fprintf(lock_output, "%s ", sep);
1681 	}
1682 
1683 	switch (aggr_mode) {
1684 	case LOCK_AGGR_CALLER:
1685 		fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
1686 		if (verbose <= 0)
1687 			fprintf(lock_output, "\n");
1688 		break;
1689 	case LOCK_AGGR_TASK:
1690 		pid = st->addr;
1691 		t = perf_session__findnew(session, pid);
1692 		fprintf(lock_output, "%d%s %s\n", pid, sep,
1693 			pid == -1 ? "Unknown" : thread__comm_str(t));
1694 		break;
1695 	case LOCK_AGGR_ADDR:
1696 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1697 			st->name, sep, get_type_name(st->flags));
1698 		break;
1699 	case LOCK_AGGR_CGROUP:
1700 		fprintf(lock_output, "%s\n",st->name);
1701 		break;
1702 	default:
1703 		break;
1704 	}
1705 
1706 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1707 		struct map *kmap;
1708 		struct symbol *sym;
1709 		char buf[128];
1710 		u64 ip;
1711 
1712 		for (int i = 0; i < max_stack_depth; i++) {
1713 			if (!st->callstack || !st->callstack[i])
1714 				break;
1715 
1716 			ip = st->callstack[i];
1717 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1718 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1719 			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1720 		}
1721 		fprintf(lock_output, "\n");
1722 	}
1723 }
1724 
1725 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1726 {
1727 	if (symbol_conf.field_sep)
1728 		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1729 	else
1730 		print_lock_stat_stdio(con, st);
1731 }
1732 
1733 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1734 {
1735 	/* Output for debug, this have to be removed */
1736 	int broken = fails->task + fails->stack + fails->time + fails->data;
1737 
1738 	if (!use_bpf)
1739 		print_bad_events(bad, total);
1740 
1741 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1742 		return;
1743 
1744 	total += broken;
1745 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1746 	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1747 	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1748 
1749 	fprintf(lock_output, "histogram of failure reasons\n");
1750 	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1751 	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1752 	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1753 	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1754 }
1755 
1756 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1757 			     const char *sep)
1758 {
1759 	/* Output for debug, this have to be removed */
1760 	if (use_bpf)
1761 		bad = fails->task + fails->stack + fails->time + fails->data;
1762 
1763 	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1764 		return;
1765 
1766 	total += bad;
1767 	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1768 
1769 	if (use_bpf) {
1770 		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1771 		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1772 		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1773 		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1774 	} else {
1775 		int i;
1776 		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1777 
1778 		for (i = 0; i < BROKEN_MAX; i++)
1779 			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1780 	}
1781 	fprintf(lock_output, "\n");
1782 }
1783 
1784 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1785 {
1786 	if (symbol_conf.field_sep)
1787 		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1788 	else
1789 		print_footer_stdio(total, bad, fails);
1790 }
1791 
1792 static void print_contention_result(struct lock_contention *con)
1793 {
1794 	struct lock_stat *st;
1795 	int bad, total, printed;
1796 
1797 	if (!quiet)
1798 		print_header();
1799 
1800 	bad = total = printed = 0;
1801 
1802 	while ((st = pop_from_result())) {
1803 		total += use_bpf ? st->nr_contended : 1;
1804 		if (st->broken)
1805 			bad++;
1806 
1807 		if (!st->wait_time_total)
1808 			continue;
1809 
1810 		print_lock_stat(con, st);
1811 
1812 		if (++printed >= print_nr_entries)
1813 			break;
1814 	}
1815 
1816 	if (print_nr_entries) {
1817 		/* update the total/bad stats */
1818 		while ((st = pop_from_result())) {
1819 			total += use_bpf ? st->nr_contended : 1;
1820 			if (st->broken)
1821 				bad++;
1822 		}
1823 	}
1824 	/* some entries are collected but hidden by the callstack filter */
1825 	total += con->nr_filtered;
1826 
1827 	print_footer(total, bad, &con->fails);
1828 }
1829 
1830 static bool force;
1831 
1832 static int __cmd_report(bool display_info)
1833 {
1834 	int err = -EINVAL;
1835 	struct perf_tool eops;
1836 	struct perf_data data = {
1837 		.path  = input_name,
1838 		.mode  = PERF_DATA_MODE_READ,
1839 		.force = force,
1840 	};
1841 
1842 	perf_tool__init(&eops, /*ordered_events=*/true);
1843 	eops.attr		 = perf_event__process_attr;
1844 	eops.event_update	 = process_event_update;
1845 	eops.sample		 = process_sample_event;
1846 	eops.comm		 = perf_event__process_comm;
1847 	eops.mmap		 = perf_event__process_mmap;
1848 	eops.namespaces		 = perf_event__process_namespaces;
1849 	eops.tracing_data	 = perf_event__process_tracing_data;
1850 	session = perf_session__new(&data, &eops);
1851 	if (IS_ERR(session)) {
1852 		pr_err("Initializing perf session failed\n");
1853 		return PTR_ERR(session);
1854 	}
1855 
1856 	symbol_conf.allow_aliases = true;
1857 	symbol__init(&session->header.env);
1858 
1859 	if (!data.is_pipe) {
1860 		if (!perf_session__has_traces(session, "lock record"))
1861 			goto out_delete;
1862 
1863 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1864 			pr_err("Initializing perf session tracepoint handlers failed\n");
1865 			goto out_delete;
1866 		}
1867 
1868 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1869 			pr_err("Initializing perf session tracepoint handlers failed\n");
1870 			goto out_delete;
1871 		}
1872 	}
1873 
1874 	if (setup_output_field(false, output_fields))
1875 		goto out_delete;
1876 
1877 	if (select_key(false))
1878 		goto out_delete;
1879 
1880 	if (show_thread_stats)
1881 		aggr_mode = LOCK_AGGR_TASK;
1882 
1883 	err = perf_session__process_events(session);
1884 	if (err)
1885 		goto out_delete;
1886 
1887 	setup_pager();
1888 	if (display_info) /* used for info subcommand */
1889 		dump_info();
1890 	else {
1891 		combine_result();
1892 		sort_result();
1893 		print_result();
1894 	}
1895 
1896 out_delete:
1897 	perf_session__delete(session);
1898 	return err;
1899 }
1900 
1901 static void sighandler(int sig __maybe_unused)
1902 {
1903 }
1904 
1905 static int check_lock_contention_options(const struct option *options,
1906 					 const char * const *usage)
1907 
1908 {
1909 	if (show_thread_stats && show_lock_addrs) {
1910 		pr_err("Cannot use thread and addr mode together\n");
1911 		parse_options_usage(usage, options, "threads", 0);
1912 		parse_options_usage(NULL, options, "lock-addr", 0);
1913 		return -1;
1914 	}
1915 
1916 	if (show_lock_owner && !use_bpf) {
1917 		pr_err("Lock owners are available only with BPF\n");
1918 		parse_options_usage(usage, options, "lock-owner", 0);
1919 		parse_options_usage(NULL, options, "use-bpf", 0);
1920 		return -1;
1921 	}
1922 
1923 	if (show_lock_owner && show_lock_addrs) {
1924 		pr_err("Cannot use owner and addr mode together\n");
1925 		parse_options_usage(usage, options, "lock-owner", 0);
1926 		parse_options_usage(NULL, options, "lock-addr", 0);
1927 		return -1;
1928 	}
1929 
1930 	if (show_lock_cgroups && !use_bpf) {
1931 		pr_err("Cgroups are available only with BPF\n");
1932 		parse_options_usage(usage, options, "lock-cgroup", 0);
1933 		parse_options_usage(NULL, options, "use-bpf", 0);
1934 		return -1;
1935 	}
1936 
1937 	if (show_lock_cgroups && show_lock_addrs) {
1938 		pr_err("Cannot use cgroup and addr mode together\n");
1939 		parse_options_usage(usage, options, "lock-cgroup", 0);
1940 		parse_options_usage(NULL, options, "lock-addr", 0);
1941 		return -1;
1942 	}
1943 
1944 	if (show_lock_cgroups && show_thread_stats) {
1945 		pr_err("Cannot use cgroup and thread mode together\n");
1946 		parse_options_usage(usage, options, "lock-cgroup", 0);
1947 		parse_options_usage(NULL, options, "threads", 0);
1948 		return -1;
1949 	}
1950 
1951 	if (symbol_conf.field_sep) {
1952 		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1953 		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1954 		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1955 			pr_err("Cannot use the separator that is already used\n");
1956 			parse_options_usage(usage, options, "x", 1);
1957 			return -1;
1958 		}
1959 	}
1960 
1961 	if (show_lock_owner)
1962 		show_thread_stats = true;
1963 
1964 	return 0;
1965 }
1966 
1967 static int __cmd_contention(int argc, const char **argv)
1968 {
1969 	int err = -EINVAL;
1970 	struct perf_tool eops;
1971 	struct perf_data data = {
1972 		.path  = input_name,
1973 		.mode  = PERF_DATA_MODE_READ,
1974 		.force = force,
1975 	};
1976 	struct lock_contention con = {
1977 		.target = &target,
1978 		.map_nr_entries = bpf_map_entries,
1979 		.max_stack = max_stack_depth,
1980 		.stack_skip = stack_skip,
1981 		.filters = &filters,
1982 		.save_callstack = needs_callstack(),
1983 		.owner = show_lock_owner,
1984 		.cgroups = RB_ROOT,
1985 	};
1986 
1987 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
1988 	if (!lockhash_table)
1989 		return -ENOMEM;
1990 
1991 	con.result = &lockhash_table[0];
1992 
1993 	perf_tool__init(&eops, /*ordered_events=*/true);
1994 	eops.attr		 = perf_event__process_attr;
1995 	eops.event_update	 = process_event_update;
1996 	eops.sample		 = process_sample_event;
1997 	eops.comm		 = perf_event__process_comm;
1998 	eops.mmap		 = perf_event__process_mmap;
1999 	eops.tracing_data	 = perf_event__process_tracing_data;
2000 
2001 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
2002 	if (IS_ERR(session)) {
2003 		pr_err("Initializing perf session failed\n");
2004 		err = PTR_ERR(session);
2005 		session = NULL;
2006 		goto out_delete;
2007 	}
2008 
2009 	con.machine = &session->machines.host;
2010 
2011 	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2012 		show_lock_addrs ? LOCK_AGGR_ADDR :
2013 		show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2014 
2015 	if (con.aggr_mode == LOCK_AGGR_CALLER)
2016 		con.save_callstack = true;
2017 
2018 	symbol_conf.allow_aliases = true;
2019 	symbol__init(&session->header.env);
2020 
2021 	if (use_bpf) {
2022 		err = target__validate(&target);
2023 		if (err) {
2024 			char errbuf[512];
2025 
2026 			target__strerror(&target, err, errbuf, 512);
2027 			pr_err("%s\n", errbuf);
2028 			goto out_delete;
2029 		}
2030 
2031 		signal(SIGINT, sighandler);
2032 		signal(SIGCHLD, sighandler);
2033 		signal(SIGTERM, sighandler);
2034 
2035 		con.evlist = evlist__new();
2036 		if (con.evlist == NULL) {
2037 			err = -ENOMEM;
2038 			goto out_delete;
2039 		}
2040 
2041 		err = evlist__create_maps(con.evlist, &target);
2042 		if (err < 0)
2043 			goto out_delete;
2044 
2045 		if (argc) {
2046 			err = evlist__prepare_workload(con.evlist, &target,
2047 						       argv, false, NULL);
2048 			if (err < 0)
2049 				goto out_delete;
2050 		}
2051 
2052 		if (lock_contention_prepare(&con) < 0) {
2053 			pr_err("lock contention BPF setup failed\n");
2054 			goto out_delete;
2055 		}
2056 	} else if (!data.is_pipe) {
2057 		if (!perf_session__has_traces(session, "lock record"))
2058 			goto out_delete;
2059 
2060 		if (!evlist__find_evsel_by_str(session->evlist,
2061 					       "lock:contention_begin")) {
2062 			pr_err("lock contention evsel not found\n");
2063 			goto out_delete;
2064 		}
2065 
2066 		if (perf_session__set_tracepoints_handlers(session,
2067 						contention_tracepoints)) {
2068 			pr_err("Initializing perf session tracepoint handlers failed\n");
2069 			goto out_delete;
2070 		}
2071 	}
2072 
2073 	if (setup_output_field(true, output_fields))
2074 		goto out_delete;
2075 
2076 	if (select_key(true))
2077 		goto out_delete;
2078 
2079 	if (symbol_conf.field_sep) {
2080 		int i;
2081 		struct lock_key *keys = contention_keys;
2082 
2083 		/* do not align output in CSV format */
2084 		for (i = 0; keys[i].name; i++)
2085 			keys[i].len = 0;
2086 	}
2087 
2088 	if (use_bpf) {
2089 		lock_contention_start();
2090 		if (argc)
2091 			evlist__start_workload(con.evlist);
2092 
2093 		/* wait for signal */
2094 		pause();
2095 
2096 		lock_contention_stop();
2097 		lock_contention_read(&con);
2098 	} else {
2099 		err = perf_session__process_events(session);
2100 		if (err)
2101 			goto out_delete;
2102 	}
2103 
2104 	setup_pager();
2105 
2106 	sort_contention_result();
2107 	print_contention_result(&con);
2108 
2109 out_delete:
2110 	lock_filter_finish();
2111 	evlist__delete(con.evlist);
2112 	lock_contention_finish(&con);
2113 	perf_session__delete(session);
2114 	zfree(&lockhash_table);
2115 	return err;
2116 }
2117 
2118 
2119 static int __cmd_record(int argc, const char **argv)
2120 {
2121 	const char *record_args[] = {
2122 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2123 	};
2124 	const char *callgraph_args[] = {
2125 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2126 	};
2127 	unsigned int rec_argc, i, j, ret;
2128 	unsigned int nr_tracepoints;
2129 	unsigned int nr_callgraph_args = 0;
2130 	const char **rec_argv;
2131 	bool has_lock_stat = true;
2132 
2133 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2134 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2135 			pr_debug("tracepoint %s is not enabled. "
2136 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2137 				 lock_tracepoints[i].name);
2138 			has_lock_stat = false;
2139 			break;
2140 		}
2141 	}
2142 
2143 	if (has_lock_stat)
2144 		goto setup_args;
2145 
2146 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2147 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2148 			pr_err("tracepoint %s is not enabled.\n",
2149 			       contention_tracepoints[i].name);
2150 			return 1;
2151 		}
2152 	}
2153 
2154 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2155 
2156 setup_args:
2157 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2158 
2159 	if (has_lock_stat)
2160 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2161 	else
2162 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2163 
2164 	/* factor of 2 is for -e in front of each tracepoint */
2165 	rec_argc += 2 * nr_tracepoints;
2166 
2167 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2168 	if (!rec_argv)
2169 		return -ENOMEM;
2170 
2171 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2172 		rec_argv[i] = record_args[i];
2173 
2174 	for (j = 0; j < nr_tracepoints; j++) {
2175 		rec_argv[i++] = "-e";
2176 		rec_argv[i++] = has_lock_stat
2177 			? lock_tracepoints[j].name
2178 			: contention_tracepoints[j].name;
2179 	}
2180 
2181 	for (j = 0; j < nr_callgraph_args; j++, i++)
2182 		rec_argv[i] = callgraph_args[j];
2183 
2184 	for (j = 1; j < (unsigned int)argc; j++, i++)
2185 		rec_argv[i] = argv[j];
2186 
2187 	BUG_ON(i != rec_argc);
2188 
2189 	ret = cmd_record(i, rec_argv);
2190 	free(rec_argv);
2191 	return ret;
2192 }
2193 
2194 static int parse_map_entry(const struct option *opt, const char *str,
2195 			    int unset __maybe_unused)
2196 {
2197 	unsigned long *len = (unsigned long *)opt->value;
2198 	unsigned long val;
2199 	char *endptr;
2200 
2201 	errno = 0;
2202 	val = strtoul(str, &endptr, 0);
2203 	if (*endptr != '\0' || errno != 0) {
2204 		pr_err("invalid BPF map length: %s\n", str);
2205 		return -1;
2206 	}
2207 
2208 	*len = val;
2209 	return 0;
2210 }
2211 
2212 static int parse_max_stack(const struct option *opt, const char *str,
2213 			   int unset __maybe_unused)
2214 {
2215 	unsigned long *len = (unsigned long *)opt->value;
2216 	long val;
2217 	char *endptr;
2218 
2219 	errno = 0;
2220 	val = strtol(str, &endptr, 0);
2221 	if (*endptr != '\0' || errno != 0) {
2222 		pr_err("invalid max stack depth: %s\n", str);
2223 		return -1;
2224 	}
2225 
2226 	if (val < 0 || val > sysctl__max_stack()) {
2227 		pr_err("invalid max stack depth: %ld\n", val);
2228 		return -1;
2229 	}
2230 
2231 	*len = val;
2232 	return 0;
2233 }
2234 
2235 static bool add_lock_type(unsigned int flags)
2236 {
2237 	unsigned int *tmp;
2238 
2239 	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2240 	if (tmp == NULL)
2241 		return false;
2242 
2243 	tmp[filters.nr_types++] = flags;
2244 	filters.types = tmp;
2245 	return true;
2246 }
2247 
2248 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2249 			   int unset __maybe_unused)
2250 {
2251 	char *s, *tmp, *tok;
2252 	int ret = 0;
2253 
2254 	s = strdup(str);
2255 	if (s == NULL)
2256 		return -1;
2257 
2258 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2259 		unsigned int flags = get_type_flag(tok);
2260 
2261 		if (flags == -1U) {
2262 			pr_err("Unknown lock flags: %s\n", tok);
2263 			ret = -1;
2264 			break;
2265 		}
2266 
2267 		if (!add_lock_type(flags)) {
2268 			ret = -1;
2269 			break;
2270 		}
2271 	}
2272 
2273 	free(s);
2274 	return ret;
2275 }
2276 
2277 static bool add_lock_addr(unsigned long addr)
2278 {
2279 	unsigned long *tmp;
2280 
2281 	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2282 	if (tmp == NULL) {
2283 		pr_err("Memory allocation failure\n");
2284 		return false;
2285 	}
2286 
2287 	tmp[filters.nr_addrs++] = addr;
2288 	filters.addrs = tmp;
2289 	return true;
2290 }
2291 
2292 static bool add_lock_sym(char *name)
2293 {
2294 	char **tmp;
2295 	char *sym = strdup(name);
2296 
2297 	if (sym == NULL) {
2298 		pr_err("Memory allocation failure\n");
2299 		return false;
2300 	}
2301 
2302 	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2303 	if (tmp == NULL) {
2304 		pr_err("Memory allocation failure\n");
2305 		free(sym);
2306 		return false;
2307 	}
2308 
2309 	tmp[filters.nr_syms++] = sym;
2310 	filters.syms = tmp;
2311 	return true;
2312 }
2313 
2314 static bool add_lock_slab(char *name)
2315 {
2316 	char **tmp;
2317 	char *sym = strdup(name);
2318 
2319 	if (sym == NULL) {
2320 		pr_err("Memory allocation failure\n");
2321 		return false;
2322 	}
2323 
2324 	tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2325 	if (tmp == NULL) {
2326 		pr_err("Memory allocation failure\n");
2327 		return false;
2328 	}
2329 
2330 	tmp[filters.nr_slabs++] = sym;
2331 	filters.slabs = tmp;
2332 	return true;
2333 }
2334 
2335 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2336 			   int unset __maybe_unused)
2337 {
2338 	char *s, *tmp, *tok;
2339 	int ret = 0;
2340 	u64 addr;
2341 
2342 	s = strdup(str);
2343 	if (s == NULL)
2344 		return -1;
2345 
2346 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2347 		char *end;
2348 
2349 		addr = strtoul(tok, &end, 16);
2350 		if (*end == '\0') {
2351 			if (!add_lock_addr(addr)) {
2352 				ret = -1;
2353 				break;
2354 			}
2355 			continue;
2356 		}
2357 
2358 		if (*tok == '&') {
2359 			if (!add_lock_slab(tok + 1)) {
2360 				ret = -1;
2361 				break;
2362 			}
2363 			continue;
2364 		}
2365 
2366 		/*
2367 		 * At this moment, we don't have kernel symbols.  Save the symbols
2368 		 * in a separate list and resolve them to addresses later.
2369 		 */
2370 		if (!add_lock_sym(tok)) {
2371 			ret = -1;
2372 			break;
2373 		}
2374 	}
2375 
2376 	free(s);
2377 	return ret;
2378 }
2379 
2380 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2381 			int unset __maybe_unused)
2382 {
2383 	const char **name = (const char **)opt->value;
2384 
2385 	if (str == NULL)
2386 		return -1;
2387 
2388 	lock_output = fopen(str, "w");
2389 	if (lock_output == NULL) {
2390 		pr_err("Cannot open %s\n", str);
2391 		return -1;
2392 	}
2393 
2394 	*name = str;
2395 	return 0;
2396 }
2397 
2398 static bool add_lock_cgroup(char *name)
2399 {
2400 	u64 *tmp;
2401 	struct cgroup *cgrp;
2402 
2403 	cgrp = cgroup__new(name, /*do_open=*/false);
2404 	if (cgrp == NULL) {
2405 		pr_err("Failed to create cgroup: %s\n", name);
2406 		return false;
2407 	}
2408 
2409 	if (read_cgroup_id(cgrp) < 0) {
2410 		pr_err("Failed to read cgroup id for %s\n", name);
2411 		cgroup__put(cgrp);
2412 		return false;
2413 	}
2414 
2415 	tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2416 	if (tmp == NULL) {
2417 		pr_err("Memory allocation failure\n");
2418 		return false;
2419 	}
2420 
2421 	tmp[filters.nr_cgrps++] = cgrp->id;
2422 	filters.cgrps = tmp;
2423 	cgroup__put(cgrp);
2424 	return true;
2425 }
2426 
2427 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2428 			       int unset __maybe_unused)
2429 {
2430 	char *s, *tmp, *tok;
2431 	int ret = 0;
2432 
2433 	s = strdup(str);
2434 	if (s == NULL)
2435 		return -1;
2436 
2437 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2438 		if (!add_lock_cgroup(tok)) {
2439 			ret = -1;
2440 			break;
2441 		}
2442 	}
2443 
2444 	free(s);
2445 	return ret;
2446 }
2447 
2448 int cmd_lock(int argc, const char **argv)
2449 {
2450 	const struct option lock_options[] = {
2451 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2452 	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2453 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2454 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2455 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2456 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2457 		   "file", "vmlinux pathname"),
2458 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2459 		   "file", "kallsyms pathname"),
2460 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2461 	OPT_END()
2462 	};
2463 
2464 	const struct option info_options[] = {
2465 	OPT_BOOLEAN('t', "threads", &info_threads,
2466 		    "dump the thread list in perf.data"),
2467 	OPT_BOOLEAN('m', "map", &info_map,
2468 		    "dump the map of lock instances (address:name table)"),
2469 	OPT_PARENT(lock_options)
2470 	};
2471 
2472 	const struct option report_options[] = {
2473 	OPT_STRING('k', "key", &sort_key, "acquired",
2474 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2475 	OPT_STRING('F', "field", &output_fields, NULL,
2476 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2477 	/* TODO: type */
2478 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2479 		    "combine locks in the same class"),
2480 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2481 		    "show per-thread lock stats"),
2482 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2483 	OPT_PARENT(lock_options)
2484 	};
2485 
2486 	struct option contention_options[] = {
2487 	OPT_STRING('k', "key", &sort_key, "wait_total",
2488 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2489 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2490 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2491 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2492 		    "show per-thread lock stats"),
2493 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2494 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2495 		    "System-wide collection from all CPUs"),
2496 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2497 		    "List of cpus to monitor"),
2498 	OPT_STRING('p', "pid", &target.pid, "pid",
2499 		   "Trace on existing process id"),
2500 	OPT_STRING(0, "tid", &target.tid, "tid",
2501 		   "Trace on existing thread id (exclusive to --pid)"),
2502 	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2503 		     "Max number of BPF map entries", parse_map_entry),
2504 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2505 		     "Set the maximum stack depth when collecting lock contention, "
2506 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2507 	OPT_INTEGER(0, "stack-skip", &stack_skip,
2508 		    "Set the number of stack depth to skip when finding a lock caller, "
2509 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2510 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2511 	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2512 	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2513 		     "Filter specific type of locks", parse_lock_type),
2514 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2515 		     "Filter specific address/symbol of locks", parse_lock_addr),
2516 	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2517 		     "Filter specific function in the callstack", parse_call_stack),
2518 	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2519 	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2520 		   "print result in CSV format with custom separator"),
2521 	OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2522 	OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2523 		     "Filter specific cgroups", parse_cgroup_filter),
2524 	OPT_PARENT(lock_options)
2525 	};
2526 
2527 	const char * const info_usage[] = {
2528 		"perf lock info [<options>]",
2529 		NULL
2530 	};
2531 	const char *const lock_subcommands[] = { "record", "report", "script",
2532 						 "info", "contention", NULL };
2533 	const char *lock_usage[] = {
2534 		NULL,
2535 		NULL
2536 	};
2537 	const char * const report_usage[] = {
2538 		"perf lock report [<options>]",
2539 		NULL
2540 	};
2541 	const char * const contention_usage[] = {
2542 		"perf lock contention [<options>]",
2543 		NULL
2544 	};
2545 	unsigned int i;
2546 	int rc = 0;
2547 
2548 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2549 	if (!lockhash_table)
2550 		return -ENOMEM;
2551 
2552 	for (i = 0; i < LOCKHASH_SIZE; i++)
2553 		INIT_HLIST_HEAD(lockhash_table + i);
2554 
2555 	lock_output = stderr;
2556 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2557 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2558 	if (!argc)
2559 		usage_with_options(lock_usage, lock_options);
2560 
2561 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2562 		return __cmd_record(argc, argv);
2563 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2564 		trace_handler = &report_lock_ops;
2565 		if (argc) {
2566 			argc = parse_options(argc, argv,
2567 					     report_options, report_usage, 0);
2568 			if (argc)
2569 				usage_with_options(report_usage, report_options);
2570 		}
2571 		rc = __cmd_report(false);
2572 	} else if (!strcmp(argv[0], "script")) {
2573 		/* Aliased to 'perf script' */
2574 		rc = cmd_script(argc, argv);
2575 	} else if (!strcmp(argv[0], "info")) {
2576 		if (argc) {
2577 			argc = parse_options(argc, argv,
2578 					     info_options, info_usage, 0);
2579 			if (argc)
2580 				usage_with_options(info_usage, info_options);
2581 		}
2582 
2583 		/* If neither threads nor map requested, display both */
2584 		if (!info_threads && !info_map) {
2585 			info_threads = true;
2586 			info_map = true;
2587 		}
2588 
2589 		/* recycling report_lock_ops */
2590 		trace_handler = &report_lock_ops;
2591 		rc = __cmd_report(true);
2592 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2593 		trace_handler = &contention_lock_ops;
2594 		sort_key = "wait_total";
2595 		output_fields = "contended,wait_total,wait_max,avg_wait";
2596 
2597 #ifndef HAVE_BPF_SKEL
2598 		set_option_nobuild(contention_options, 'b', "use-bpf",
2599 				   "no BUILD_BPF_SKEL=1", false);
2600 #endif
2601 		if (argc) {
2602 			argc = parse_options(argc, argv, contention_options,
2603 					     contention_usage, 0);
2604 		}
2605 
2606 		if (check_lock_contention_options(contention_options,
2607 						  contention_usage) < 0)
2608 			return -1;
2609 
2610 		rc = __cmd_contention(argc, argv);
2611 	} else {
2612 		usage_with_options(lock_usage, lock_options);
2613 	}
2614 
2615 	/* free usage string allocated by parse_options_subcommand */
2616 	free((void *)lock_usage[0]);
2617 
2618 	zfree(&lockhash_table);
2619 	return rc;
2620 }
2621