xref: /linux/tools/perf/builtin-lock.c (revision 7685b334d1e4927cc73b62c65293ba65748d9c52)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/cgroup.h"
14 #include "util/callchain.h"
15 #include "util/lock-contention.h"
16 #include "util/bpf_skel/lock_data.h"
17 
18 #include <subcmd/pager.h>
19 #include <subcmd/parse-options.h>
20 #include "util/trace-event.h"
21 #include "util/tracepoint.h"
22 
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/data.h"
27 #include "util/string2.h"
28 #include "util/map.h"
29 #include "util/util.h"
30 
31 #include <stdio.h>
32 #include <sys/types.h>
33 #include <sys/prctl.h>
34 #include <semaphore.h>
35 #include <math.h>
36 #include <limits.h>
37 #include <ctype.h>
38 
39 #include <linux/list.h>
40 #include <linux/hash.h>
41 #include <linux/kernel.h>
42 #include <linux/zalloc.h>
43 #include <linux/err.h>
44 #include <linux/stringify.h>
45 
46 static struct perf_session *session;
47 static struct target target;
48 
49 static struct rb_root		thread_stats;
50 
51 static bool combine_locks;
52 static bool show_thread_stats;
53 static bool show_lock_addrs;
54 static bool show_lock_owner;
55 static bool show_lock_cgroups;
56 static bool use_bpf;
57 static unsigned long bpf_map_entries = MAX_ENTRIES;
58 static int max_stack_depth = CONTENTION_STACK_DEPTH;
59 static int stack_skip = CONTENTION_STACK_SKIP;
60 static int print_nr_entries = INT_MAX / 2;
61 static const char *output_name = NULL;
62 static FILE *lock_output;
63 
64 static struct lock_filter filters;
65 
66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
67 
68 static struct thread_stat *thread_stat_find(u32 tid)
69 {
70 	struct rb_node *node;
71 	struct thread_stat *st;
72 
73 	node = thread_stats.rb_node;
74 	while (node) {
75 		st = container_of(node, struct thread_stat, rb);
76 		if (st->tid == tid)
77 			return st;
78 		else if (tid < st->tid)
79 			node = node->rb_left;
80 		else
81 			node = node->rb_right;
82 	}
83 
84 	return NULL;
85 }
86 
87 static void thread_stat_insert(struct thread_stat *new)
88 {
89 	struct rb_node **rb = &thread_stats.rb_node;
90 	struct rb_node *parent = NULL;
91 	struct thread_stat *p;
92 
93 	while (*rb) {
94 		p = container_of(*rb, struct thread_stat, rb);
95 		parent = *rb;
96 
97 		if (new->tid < p->tid)
98 			rb = &(*rb)->rb_left;
99 		else if (new->tid > p->tid)
100 			rb = &(*rb)->rb_right;
101 		else
102 			BUG_ON("inserting invalid thread_stat\n");
103 	}
104 
105 	rb_link_node(&new->rb, parent, rb);
106 	rb_insert_color(&new->rb, &thread_stats);
107 }
108 
109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
110 {
111 	struct thread_stat *st;
112 
113 	st = thread_stat_find(tid);
114 	if (st)
115 		return st;
116 
117 	st = zalloc(sizeof(struct thread_stat));
118 	if (!st) {
119 		pr_err("memory allocation failed\n");
120 		return NULL;
121 	}
122 
123 	st->tid = tid;
124 	INIT_LIST_HEAD(&st->seq_list);
125 
126 	thread_stat_insert(st);
127 
128 	return st;
129 }
130 
131 static struct thread_stat *thread_stat_findnew_first(u32 tid);
132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
133 	thread_stat_findnew_first;
134 
135 static struct thread_stat *thread_stat_findnew_first(u32 tid)
136 {
137 	struct thread_stat *st;
138 
139 	st = zalloc(sizeof(struct thread_stat));
140 	if (!st) {
141 		pr_err("memory allocation failed\n");
142 		return NULL;
143 	}
144 	st->tid = tid;
145 	INIT_LIST_HEAD(&st->seq_list);
146 
147 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
148 	rb_insert_color(&st->rb, &thread_stats);
149 
150 	thread_stat_findnew = thread_stat_findnew_after_first;
151 	return st;
152 }
153 
154 /* build simple key function one is bigger than two */
155 #define SINGLE_KEY(member)						\
156 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
157 					 struct lock_stat *two)		\
158 	{								\
159 		return one->member > two->member;			\
160 	}
161 
162 SINGLE_KEY(nr_acquired)
163 SINGLE_KEY(nr_contended)
164 SINGLE_KEY(avg_wait_time)
165 SINGLE_KEY(wait_time_total)
166 SINGLE_KEY(wait_time_max)
167 
168 static int lock_stat_key_wait_time_min(struct lock_stat *one,
169 					struct lock_stat *two)
170 {
171 	u64 s1 = one->wait_time_min;
172 	u64 s2 = two->wait_time_min;
173 	if (s1 == ULLONG_MAX)
174 		s1 = 0;
175 	if (s2 == ULLONG_MAX)
176 		s2 = 0;
177 	return s1 > s2;
178 }
179 
180 struct lock_key {
181 	/*
182 	 * name: the value for specify by user
183 	 * this should be simpler than raw name of member
184 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
185 	 */
186 	const char		*name;
187 	/* header: the string printed on the header line */
188 	const char		*header;
189 	/* len: the printing width of the field */
190 	int			len;
191 	/* key: a pointer to function to compare two lock stats for sorting */
192 	int			(*key)(struct lock_stat*, struct lock_stat*);
193 	/* print: a pointer to function to print a given lock stats */
194 	void			(*print)(struct lock_key*, struct lock_stat*);
195 	/* list: list entry to link this */
196 	struct list_head	list;
197 };
198 
199 static void lock_stat_key_print_time(unsigned long long nsec, int len)
200 {
201 	static const struct {
202 		float base;
203 		const char *unit;
204 	} table[] = {
205 		{ 1e9 * 3600, "h " },
206 		{ 1e9 * 60, "m " },
207 		{ 1e9, "s " },
208 		{ 1e6, "ms" },
209 		{ 1e3, "us" },
210 		{ 0, NULL },
211 	};
212 
213 	/* for CSV output */
214 	if (len == 0) {
215 		fprintf(lock_output, "%llu", nsec);
216 		return;
217 	}
218 
219 	for (int i = 0; table[i].unit; i++) {
220 		if (nsec < table[i].base)
221 			continue;
222 
223 		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
224 		return;
225 	}
226 
227 	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
228 }
229 
230 #define PRINT_KEY(member)						\
231 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
232 					   struct lock_stat *ls)	\
233 {									\
234 	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
235 }
236 
237 #define PRINT_TIME(member)						\
238 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
239 					   struct lock_stat *ls)	\
240 {									\
241 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
242 }
243 
244 PRINT_KEY(nr_acquired)
245 PRINT_KEY(nr_contended)
246 PRINT_TIME(avg_wait_time)
247 PRINT_TIME(wait_time_total)
248 PRINT_TIME(wait_time_max)
249 
250 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
251 					      struct lock_stat *ls)
252 {
253 	u64 wait_time = ls->wait_time_min;
254 
255 	if (wait_time == ULLONG_MAX)
256 		wait_time = 0;
257 
258 	lock_stat_key_print_time(wait_time, key->len);
259 }
260 
261 
262 static const char		*sort_key = "acquired";
263 
264 static int			(*compare)(struct lock_stat *, struct lock_stat *);
265 
266 static struct rb_root		sorted; /* place to store intermediate data */
267 static struct rb_root		result;	/* place to store sorted data */
268 
269 static LIST_HEAD(lock_keys);
270 static const char		*output_fields;
271 
272 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
273 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
274 static struct lock_key report_keys[] = {
275 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
276 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
277 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
278 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
279 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
280 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
281 
282 	/* extra comparisons much complicated should be here */
283 	{ }
284 };
285 
286 static struct lock_key contention_keys[] = {
287 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
288 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
289 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
290 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
291 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
292 
293 	/* extra comparisons much complicated should be here */
294 	{ }
295 };
296 
297 static int select_key(bool contention)
298 {
299 	int i;
300 	struct lock_key *keys = report_keys;
301 
302 	if (contention)
303 		keys = contention_keys;
304 
305 	for (i = 0; keys[i].name; i++) {
306 		if (!strcmp(keys[i].name, sort_key)) {
307 			compare = keys[i].key;
308 
309 			/* selected key should be in the output fields */
310 			if (list_empty(&keys[i].list))
311 				list_add_tail(&keys[i].list, &lock_keys);
312 
313 			return 0;
314 		}
315 	}
316 
317 	pr_err("Unknown compare key: %s\n", sort_key);
318 	return -1;
319 }
320 
321 static int add_output_field(bool contention, char *name)
322 {
323 	int i;
324 	struct lock_key *keys = report_keys;
325 
326 	if (contention)
327 		keys = contention_keys;
328 
329 	for (i = 0; keys[i].name; i++) {
330 		if (strcmp(keys[i].name, name))
331 			continue;
332 
333 		/* prevent double link */
334 		if (list_empty(&keys[i].list))
335 			list_add_tail(&keys[i].list, &lock_keys);
336 
337 		return 0;
338 	}
339 
340 	pr_err("Unknown output field: %s\n", name);
341 	return -1;
342 }
343 
344 static int setup_output_field(bool contention, const char *str)
345 {
346 	char *tok, *tmp, *orig;
347 	int i, ret = 0;
348 	struct lock_key *keys = report_keys;
349 
350 	if (contention)
351 		keys = contention_keys;
352 
353 	/* no output field given: use all of them */
354 	if (str == NULL) {
355 		for (i = 0; keys[i].name; i++)
356 			list_add_tail(&keys[i].list, &lock_keys);
357 		return 0;
358 	}
359 
360 	for (i = 0; keys[i].name; i++)
361 		INIT_LIST_HEAD(&keys[i].list);
362 
363 	orig = tmp = strdup(str);
364 	if (orig == NULL)
365 		return -ENOMEM;
366 
367 	while ((tok = strsep(&tmp, ",")) != NULL){
368 		ret = add_output_field(contention, tok);
369 		if (ret < 0)
370 			break;
371 	}
372 	free(orig);
373 
374 	return ret;
375 }
376 
377 static void combine_lock_stats(struct lock_stat *st)
378 {
379 	struct rb_node **rb = &sorted.rb_node;
380 	struct rb_node *parent = NULL;
381 	struct lock_stat *p;
382 	int ret;
383 
384 	while (*rb) {
385 		p = container_of(*rb, struct lock_stat, rb);
386 		parent = *rb;
387 
388 		if (st->name && p->name)
389 			ret = strcmp(st->name, p->name);
390 		else
391 			ret = !!st->name - !!p->name;
392 
393 		if (ret == 0) {
394 			p->nr_acquired += st->nr_acquired;
395 			p->nr_contended += st->nr_contended;
396 			p->wait_time_total += st->wait_time_total;
397 
398 			if (p->nr_contended)
399 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
400 
401 			if (p->wait_time_min > st->wait_time_min)
402 				p->wait_time_min = st->wait_time_min;
403 			if (p->wait_time_max < st->wait_time_max)
404 				p->wait_time_max = st->wait_time_max;
405 
406 			p->broken |= st->broken;
407 			st->combined = 1;
408 			return;
409 		}
410 
411 		if (ret < 0)
412 			rb = &(*rb)->rb_left;
413 		else
414 			rb = &(*rb)->rb_right;
415 	}
416 
417 	rb_link_node(&st->rb, parent, rb);
418 	rb_insert_color(&st->rb, &sorted);
419 }
420 
421 static void insert_to_result(struct lock_stat *st,
422 			     int (*bigger)(struct lock_stat *, struct lock_stat *))
423 {
424 	struct rb_node **rb = &result.rb_node;
425 	struct rb_node *parent = NULL;
426 	struct lock_stat *p;
427 
428 	if (combine_locks && st->combined)
429 		return;
430 
431 	while (*rb) {
432 		p = container_of(*rb, struct lock_stat, rb);
433 		parent = *rb;
434 
435 		if (bigger(st, p))
436 			rb = &(*rb)->rb_left;
437 		else
438 			rb = &(*rb)->rb_right;
439 	}
440 
441 	rb_link_node(&st->rb, parent, rb);
442 	rb_insert_color(&st->rb, &result);
443 }
444 
445 /* returns left most element of result, and erase it */
446 static struct lock_stat *pop_from_result(void)
447 {
448 	struct rb_node *node = result.rb_node;
449 
450 	if (!node)
451 		return NULL;
452 
453 	while (node->rb_left)
454 		node = node->rb_left;
455 
456 	rb_erase(node, &result);
457 	return container_of(node, struct lock_stat, rb);
458 }
459 
460 struct trace_lock_handler {
461 	/* it's used on CONFIG_LOCKDEP */
462 	int (*acquire_event)(struct evsel *evsel,
463 			     struct perf_sample *sample);
464 
465 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
466 	int (*acquired_event)(struct evsel *evsel,
467 			      struct perf_sample *sample);
468 
469 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
470 	int (*contended_event)(struct evsel *evsel,
471 			       struct perf_sample *sample);
472 
473 	/* it's used on CONFIG_LOCKDEP */
474 	int (*release_event)(struct evsel *evsel,
475 			     struct perf_sample *sample);
476 
477 	/* it's used when CONFIG_LOCKDEP is off */
478 	int (*contention_begin_event)(struct evsel *evsel,
479 				      struct perf_sample *sample);
480 
481 	/* it's used when CONFIG_LOCKDEP is off */
482 	int (*contention_end_event)(struct evsel *evsel,
483 				    struct perf_sample *sample);
484 };
485 
486 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
487 {
488 	struct lock_seq_stat *seq;
489 
490 	list_for_each_entry(seq, &ts->seq_list, list) {
491 		if (seq->addr == addr)
492 			return seq;
493 	}
494 
495 	seq = zalloc(sizeof(struct lock_seq_stat));
496 	if (!seq) {
497 		pr_err("memory allocation failed\n");
498 		return NULL;
499 	}
500 	seq->state = SEQ_STATE_UNINITIALIZED;
501 	seq->addr = addr;
502 
503 	list_add(&seq->list, &ts->seq_list);
504 	return seq;
505 }
506 
507 enum broken_state {
508 	BROKEN_ACQUIRE,
509 	BROKEN_ACQUIRED,
510 	BROKEN_CONTENDED,
511 	BROKEN_RELEASE,
512 	BROKEN_MAX,
513 };
514 
515 static int bad_hist[BROKEN_MAX];
516 
517 enum acquire_flags {
518 	TRY_LOCK = 1,
519 	READ_LOCK = 2,
520 };
521 
522 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
523 {
524 	switch (aggr_mode) {
525 	case LOCK_AGGR_ADDR:
526 		*key = addr;
527 		break;
528 	case LOCK_AGGR_TASK:
529 		*key = tid;
530 		break;
531 	case LOCK_AGGR_CALLER:
532 	case LOCK_AGGR_CGROUP:
533 	default:
534 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
535 		return -EINVAL;
536 	}
537 	return 0;
538 }
539 
540 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
541 
542 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
543 				 struct perf_sample *sample)
544 {
545 	if (aggr_mode == LOCK_AGGR_CALLER) {
546 		*key = callchain_id(evsel, sample);
547 		return 0;
548 	}
549 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
550 }
551 
552 static int report_lock_acquire_event(struct evsel *evsel,
553 				     struct perf_sample *sample)
554 {
555 	struct lock_stat *ls;
556 	struct thread_stat *ts;
557 	struct lock_seq_stat *seq;
558 	const char *name = evsel__strval(evsel, sample, "name");
559 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
560 	int flag = evsel__intval(evsel, sample, "flags");
561 	u64 key;
562 	int ret;
563 
564 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
565 	if (ret < 0)
566 		return ret;
567 
568 	ls = lock_stat_findnew(key, name, 0);
569 	if (!ls)
570 		return -ENOMEM;
571 
572 	ts = thread_stat_findnew(sample->tid);
573 	if (!ts)
574 		return -ENOMEM;
575 
576 	seq = get_seq(ts, addr);
577 	if (!seq)
578 		return -ENOMEM;
579 
580 	switch (seq->state) {
581 	case SEQ_STATE_UNINITIALIZED:
582 	case SEQ_STATE_RELEASED:
583 		if (!flag) {
584 			seq->state = SEQ_STATE_ACQUIRING;
585 		} else {
586 			if (flag & TRY_LOCK)
587 				ls->nr_trylock++;
588 			if (flag & READ_LOCK)
589 				ls->nr_readlock++;
590 			seq->state = SEQ_STATE_READ_ACQUIRED;
591 			seq->read_count = 1;
592 			ls->nr_acquired++;
593 		}
594 		break;
595 	case SEQ_STATE_READ_ACQUIRED:
596 		if (flag & READ_LOCK) {
597 			seq->read_count++;
598 			ls->nr_acquired++;
599 			goto end;
600 		} else {
601 			goto broken;
602 		}
603 		break;
604 	case SEQ_STATE_ACQUIRED:
605 	case SEQ_STATE_ACQUIRING:
606 	case SEQ_STATE_CONTENDED:
607 broken:
608 		/* broken lock sequence */
609 		if (!ls->broken) {
610 			ls->broken = 1;
611 			bad_hist[BROKEN_ACQUIRE]++;
612 		}
613 		list_del_init(&seq->list);
614 		free(seq);
615 		goto end;
616 	default:
617 		BUG_ON("Unknown state of lock sequence found!\n");
618 		break;
619 	}
620 
621 	ls->nr_acquire++;
622 	seq->prev_event_time = sample->time;
623 end:
624 	return 0;
625 }
626 
627 static int report_lock_acquired_event(struct evsel *evsel,
628 				      struct perf_sample *sample)
629 {
630 	struct lock_stat *ls;
631 	struct thread_stat *ts;
632 	struct lock_seq_stat *seq;
633 	u64 contended_term;
634 	const char *name = evsel__strval(evsel, sample, "name");
635 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
636 	u64 key;
637 	int ret;
638 
639 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
640 	if (ret < 0)
641 		return ret;
642 
643 	ls = lock_stat_findnew(key, name, 0);
644 	if (!ls)
645 		return -ENOMEM;
646 
647 	ts = thread_stat_findnew(sample->tid);
648 	if (!ts)
649 		return -ENOMEM;
650 
651 	seq = get_seq(ts, addr);
652 	if (!seq)
653 		return -ENOMEM;
654 
655 	switch (seq->state) {
656 	case SEQ_STATE_UNINITIALIZED:
657 		/* orphan event, do nothing */
658 		return 0;
659 	case SEQ_STATE_ACQUIRING:
660 		break;
661 	case SEQ_STATE_CONTENDED:
662 		contended_term = sample->time - seq->prev_event_time;
663 		ls->wait_time_total += contended_term;
664 		if (contended_term < ls->wait_time_min)
665 			ls->wait_time_min = contended_term;
666 		if (ls->wait_time_max < contended_term)
667 			ls->wait_time_max = contended_term;
668 		break;
669 	case SEQ_STATE_RELEASED:
670 	case SEQ_STATE_ACQUIRED:
671 	case SEQ_STATE_READ_ACQUIRED:
672 		/* broken lock sequence */
673 		if (!ls->broken) {
674 			ls->broken = 1;
675 			bad_hist[BROKEN_ACQUIRED]++;
676 		}
677 		list_del_init(&seq->list);
678 		free(seq);
679 		goto end;
680 	default:
681 		BUG_ON("Unknown state of lock sequence found!\n");
682 		break;
683 	}
684 
685 	seq->state = SEQ_STATE_ACQUIRED;
686 	ls->nr_acquired++;
687 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
688 	seq->prev_event_time = sample->time;
689 end:
690 	return 0;
691 }
692 
693 static int report_lock_contended_event(struct evsel *evsel,
694 				       struct perf_sample *sample)
695 {
696 	struct lock_stat *ls;
697 	struct thread_stat *ts;
698 	struct lock_seq_stat *seq;
699 	const char *name = evsel__strval(evsel, sample, "name");
700 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
701 	u64 key;
702 	int ret;
703 
704 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
705 	if (ret < 0)
706 		return ret;
707 
708 	ls = lock_stat_findnew(key, name, 0);
709 	if (!ls)
710 		return -ENOMEM;
711 
712 	ts = thread_stat_findnew(sample->tid);
713 	if (!ts)
714 		return -ENOMEM;
715 
716 	seq = get_seq(ts, addr);
717 	if (!seq)
718 		return -ENOMEM;
719 
720 	switch (seq->state) {
721 	case SEQ_STATE_UNINITIALIZED:
722 		/* orphan event, do nothing */
723 		return 0;
724 	case SEQ_STATE_ACQUIRING:
725 		break;
726 	case SEQ_STATE_RELEASED:
727 	case SEQ_STATE_ACQUIRED:
728 	case SEQ_STATE_READ_ACQUIRED:
729 	case SEQ_STATE_CONTENDED:
730 		/* broken lock sequence */
731 		if (!ls->broken) {
732 			ls->broken = 1;
733 			bad_hist[BROKEN_CONTENDED]++;
734 		}
735 		list_del_init(&seq->list);
736 		free(seq);
737 		goto end;
738 	default:
739 		BUG_ON("Unknown state of lock sequence found!\n");
740 		break;
741 	}
742 
743 	seq->state = SEQ_STATE_CONTENDED;
744 	ls->nr_contended++;
745 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
746 	seq->prev_event_time = sample->time;
747 end:
748 	return 0;
749 }
750 
751 static int report_lock_release_event(struct evsel *evsel,
752 				     struct perf_sample *sample)
753 {
754 	struct lock_stat *ls;
755 	struct thread_stat *ts;
756 	struct lock_seq_stat *seq;
757 	const char *name = evsel__strval(evsel, sample, "name");
758 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
759 	u64 key;
760 	int ret;
761 
762 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
763 	if (ret < 0)
764 		return ret;
765 
766 	ls = lock_stat_findnew(key, name, 0);
767 	if (!ls)
768 		return -ENOMEM;
769 
770 	ts = thread_stat_findnew(sample->tid);
771 	if (!ts)
772 		return -ENOMEM;
773 
774 	seq = get_seq(ts, addr);
775 	if (!seq)
776 		return -ENOMEM;
777 
778 	switch (seq->state) {
779 	case SEQ_STATE_UNINITIALIZED:
780 		goto end;
781 	case SEQ_STATE_ACQUIRED:
782 		break;
783 	case SEQ_STATE_READ_ACQUIRED:
784 		seq->read_count--;
785 		BUG_ON(seq->read_count < 0);
786 		if (seq->read_count) {
787 			ls->nr_release++;
788 			goto end;
789 		}
790 		break;
791 	case SEQ_STATE_ACQUIRING:
792 	case SEQ_STATE_CONTENDED:
793 	case SEQ_STATE_RELEASED:
794 		/* broken lock sequence */
795 		if (!ls->broken) {
796 			ls->broken = 1;
797 			bad_hist[BROKEN_RELEASE]++;
798 		}
799 		goto free_seq;
800 	default:
801 		BUG_ON("Unknown state of lock sequence found!\n");
802 		break;
803 	}
804 
805 	ls->nr_release++;
806 free_seq:
807 	list_del_init(&seq->list);
808 	free(seq);
809 end:
810 	return 0;
811 }
812 
813 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
814 				  char *buf, int size)
815 {
816 	u64 offset;
817 
818 	if (map == NULL || sym == NULL) {
819 		buf[0] = '\0';
820 		return 0;
821 	}
822 
823 	offset = map__map_ip(map, ip) - sym->start;
824 
825 	if (offset)
826 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
827 	else
828 		return strlcpy(buf, sym->name, size);
829 }
830 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
831 				  char *buf, int size)
832 {
833 	struct thread *thread;
834 	struct callchain_cursor *cursor;
835 	struct machine *machine = &session->machines.host;
836 	struct symbol *sym;
837 	int skip = 0;
838 	int ret;
839 
840 	/* lock names will be replaced to task name later */
841 	if (show_thread_stats)
842 		return -1;
843 
844 	thread = machine__findnew_thread(machine, -1, sample->pid);
845 	if (thread == NULL)
846 		return -1;
847 
848 	cursor = get_tls_callchain_cursor();
849 
850 	/* use caller function name from the callchain */
851 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
852 					NULL, NULL, max_stack_depth);
853 	if (ret != 0) {
854 		thread__put(thread);
855 		return -1;
856 	}
857 
858 	callchain_cursor_commit(cursor);
859 	thread__put(thread);
860 
861 	while (true) {
862 		struct callchain_cursor_node *node;
863 
864 		node = callchain_cursor_current(cursor);
865 		if (node == NULL)
866 			break;
867 
868 		/* skip first few entries - for lock functions */
869 		if (++skip <= stack_skip)
870 			goto next;
871 
872 		sym = node->ms.sym;
873 		if (sym && !machine__is_lock_function(machine, node->ip)) {
874 			get_symbol_name_offset(node->ms.map, sym, node->ip,
875 					       buf, size);
876 			return 0;
877 		}
878 
879 next:
880 		callchain_cursor_advance(cursor);
881 	}
882 	return -1;
883 }
884 
885 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
886 {
887 	struct callchain_cursor *cursor;
888 	struct machine *machine = &session->machines.host;
889 	struct thread *thread;
890 	u64 hash = 0;
891 	int skip = 0;
892 	int ret;
893 
894 	thread = machine__findnew_thread(machine, -1, sample->pid);
895 	if (thread == NULL)
896 		return -1;
897 
898 	cursor = get_tls_callchain_cursor();
899 	/* use caller function name from the callchain */
900 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
901 					NULL, NULL, max_stack_depth);
902 	thread__put(thread);
903 
904 	if (ret != 0)
905 		return -1;
906 
907 	callchain_cursor_commit(cursor);
908 
909 	while (true) {
910 		struct callchain_cursor_node *node;
911 
912 		node = callchain_cursor_current(cursor);
913 		if (node == NULL)
914 			break;
915 
916 		/* skip first few entries - for lock functions */
917 		if (++skip <= stack_skip)
918 			goto next;
919 
920 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
921 			goto next;
922 
923 		hash ^= hash_long((unsigned long)node->ip, 64);
924 
925 next:
926 		callchain_cursor_advance(cursor);
927 	}
928 	return hash;
929 }
930 
931 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
932 {
933 	u64 *callstack;
934 	u64 i;
935 	int c;
936 
937 	callstack = calloc(max_stack, sizeof(*callstack));
938 	if (callstack == NULL)
939 		return NULL;
940 
941 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
942 		u64 ip = sample->callchain->ips[i];
943 
944 		if (ip >= PERF_CONTEXT_MAX)
945 			continue;
946 
947 		callstack[c++] = ip;
948 	}
949 	return callstack;
950 }
951 
952 static int report_lock_contention_begin_event(struct evsel *evsel,
953 					      struct perf_sample *sample)
954 {
955 	struct lock_stat *ls;
956 	struct thread_stat *ts;
957 	struct lock_seq_stat *seq;
958 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
959 	unsigned int flags = evsel__intval(evsel, sample, "flags");
960 	u64 key;
961 	int i, ret;
962 	static bool kmap_loaded;
963 	struct machine *machine = &session->machines.host;
964 	struct map *kmap;
965 	struct symbol *sym;
966 
967 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
968 	if (ret < 0)
969 		return ret;
970 
971 	if (!kmap_loaded) {
972 		unsigned long *addrs;
973 
974 		/* make sure it loads the kernel map to find lock symbols */
975 		map__load(machine__kernel_map(machine));
976 		kmap_loaded = true;
977 
978 		/* convert (kernel) symbols to addresses */
979 		for (i = 0; i < filters.nr_syms; i++) {
980 			sym = machine__find_kernel_symbol_by_name(machine,
981 								  filters.syms[i],
982 								  &kmap);
983 			if (sym == NULL) {
984 				pr_warning("ignore unknown symbol: %s\n",
985 					   filters.syms[i]);
986 				continue;
987 			}
988 
989 			addrs = realloc(filters.addrs,
990 					(filters.nr_addrs + 1) * sizeof(*addrs));
991 			if (addrs == NULL) {
992 				pr_warning("memory allocation failure\n");
993 				return -ENOMEM;
994 			}
995 
996 			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
997 			filters.addrs = addrs;
998 		}
999 	}
1000 
1001 	ls = lock_stat_find(key);
1002 	if (!ls) {
1003 		char buf[128];
1004 		const char *name = "";
1005 
1006 		switch (aggr_mode) {
1007 		case LOCK_AGGR_ADDR:
1008 			sym = machine__find_kernel_symbol(machine, key, &kmap);
1009 			if (sym)
1010 				name = sym->name;
1011 			break;
1012 		case LOCK_AGGR_CALLER:
1013 			name = buf;
1014 			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1015 				name = "Unknown";
1016 			break;
1017 		case LOCK_AGGR_CGROUP:
1018 		case LOCK_AGGR_TASK:
1019 		default:
1020 			break;
1021 		}
1022 
1023 		ls = lock_stat_findnew(key, name, flags);
1024 		if (!ls)
1025 			return -ENOMEM;
1026 	}
1027 
1028 	if (filters.nr_types) {
1029 		bool found = false;
1030 
1031 		for (i = 0; i < filters.nr_types; i++) {
1032 			if (flags == filters.types[i]) {
1033 				found = true;
1034 				break;
1035 			}
1036 		}
1037 
1038 		if (!found)
1039 			return 0;
1040 	}
1041 
1042 	if (filters.nr_addrs) {
1043 		bool found = false;
1044 
1045 		for (i = 0; i < filters.nr_addrs; i++) {
1046 			if (addr == filters.addrs[i]) {
1047 				found = true;
1048 				break;
1049 			}
1050 		}
1051 
1052 		if (!found)
1053 			return 0;
1054 	}
1055 
1056 	if (needs_callstack()) {
1057 		u64 *callstack = get_callstack(sample, max_stack_depth);
1058 		if (callstack == NULL)
1059 			return -ENOMEM;
1060 
1061 		if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1062 			free(callstack);
1063 			return 0;
1064 		}
1065 
1066 		if (ls->callstack == NULL)
1067 			ls->callstack = callstack;
1068 		else
1069 			free(callstack);
1070 	}
1071 
1072 	ts = thread_stat_findnew(sample->tid);
1073 	if (!ts)
1074 		return -ENOMEM;
1075 
1076 	seq = get_seq(ts, addr);
1077 	if (!seq)
1078 		return -ENOMEM;
1079 
1080 	switch (seq->state) {
1081 	case SEQ_STATE_UNINITIALIZED:
1082 	case SEQ_STATE_ACQUIRED:
1083 		break;
1084 	case SEQ_STATE_CONTENDED:
1085 		/*
1086 		 * It can have nested contention begin with mutex spinning,
1087 		 * then we would use the original contention begin event and
1088 		 * ignore the second one.
1089 		 */
1090 		goto end;
1091 	case SEQ_STATE_ACQUIRING:
1092 	case SEQ_STATE_READ_ACQUIRED:
1093 	case SEQ_STATE_RELEASED:
1094 		/* broken lock sequence */
1095 		if (!ls->broken) {
1096 			ls->broken = 1;
1097 			bad_hist[BROKEN_CONTENDED]++;
1098 		}
1099 		list_del_init(&seq->list);
1100 		free(seq);
1101 		goto end;
1102 	default:
1103 		BUG_ON("Unknown state of lock sequence found!\n");
1104 		break;
1105 	}
1106 
1107 	if (seq->state != SEQ_STATE_CONTENDED) {
1108 		seq->state = SEQ_STATE_CONTENDED;
1109 		seq->prev_event_time = sample->time;
1110 		ls->nr_contended++;
1111 	}
1112 end:
1113 	return 0;
1114 }
1115 
1116 static int report_lock_contention_end_event(struct evsel *evsel,
1117 					    struct perf_sample *sample)
1118 {
1119 	struct lock_stat *ls;
1120 	struct thread_stat *ts;
1121 	struct lock_seq_stat *seq;
1122 	u64 contended_term;
1123 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1124 	u64 key;
1125 	int ret;
1126 
1127 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1128 	if (ret < 0)
1129 		return ret;
1130 
1131 	ls = lock_stat_find(key);
1132 	if (!ls)
1133 		return 0;
1134 
1135 	ts = thread_stat_find(sample->tid);
1136 	if (!ts)
1137 		return 0;
1138 
1139 	seq = get_seq(ts, addr);
1140 	if (!seq)
1141 		return -ENOMEM;
1142 
1143 	switch (seq->state) {
1144 	case SEQ_STATE_UNINITIALIZED:
1145 		goto end;
1146 	case SEQ_STATE_CONTENDED:
1147 		contended_term = sample->time - seq->prev_event_time;
1148 		ls->wait_time_total += contended_term;
1149 		if (contended_term < ls->wait_time_min)
1150 			ls->wait_time_min = contended_term;
1151 		if (ls->wait_time_max < contended_term)
1152 			ls->wait_time_max = contended_term;
1153 		break;
1154 	case SEQ_STATE_ACQUIRING:
1155 	case SEQ_STATE_ACQUIRED:
1156 	case SEQ_STATE_READ_ACQUIRED:
1157 	case SEQ_STATE_RELEASED:
1158 		/* broken lock sequence */
1159 		if (!ls->broken) {
1160 			ls->broken = 1;
1161 			bad_hist[BROKEN_ACQUIRED]++;
1162 		}
1163 		list_del_init(&seq->list);
1164 		free(seq);
1165 		goto end;
1166 	default:
1167 		BUG_ON("Unknown state of lock sequence found!\n");
1168 		break;
1169 	}
1170 
1171 	seq->state = SEQ_STATE_ACQUIRED;
1172 	ls->nr_acquired++;
1173 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1174 end:
1175 	return 0;
1176 }
1177 
1178 /* lock oriented handlers */
1179 /* TODO: handlers for CPU oriented, thread oriented */
1180 static struct trace_lock_handler report_lock_ops  = {
1181 	.acquire_event		= report_lock_acquire_event,
1182 	.acquired_event		= report_lock_acquired_event,
1183 	.contended_event	= report_lock_contended_event,
1184 	.release_event		= report_lock_release_event,
1185 	.contention_begin_event	= report_lock_contention_begin_event,
1186 	.contention_end_event	= report_lock_contention_end_event,
1187 };
1188 
1189 static struct trace_lock_handler contention_lock_ops  = {
1190 	.contention_begin_event	= report_lock_contention_begin_event,
1191 	.contention_end_event	= report_lock_contention_end_event,
1192 };
1193 
1194 
1195 static struct trace_lock_handler *trace_handler;
1196 
1197 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1198 {
1199 	if (trace_handler->acquire_event)
1200 		return trace_handler->acquire_event(evsel, sample);
1201 	return 0;
1202 }
1203 
1204 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1205 {
1206 	if (trace_handler->acquired_event)
1207 		return trace_handler->acquired_event(evsel, sample);
1208 	return 0;
1209 }
1210 
1211 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1212 {
1213 	if (trace_handler->contended_event)
1214 		return trace_handler->contended_event(evsel, sample);
1215 	return 0;
1216 }
1217 
1218 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1219 {
1220 	if (trace_handler->release_event)
1221 		return trace_handler->release_event(evsel, sample);
1222 	return 0;
1223 }
1224 
1225 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1226 {
1227 	if (trace_handler->contention_begin_event)
1228 		return trace_handler->contention_begin_event(evsel, sample);
1229 	return 0;
1230 }
1231 
1232 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1233 {
1234 	if (trace_handler->contention_end_event)
1235 		return trace_handler->contention_end_event(evsel, sample);
1236 	return 0;
1237 }
1238 
1239 static void print_bad_events(int bad, int total)
1240 {
1241 	/* Output for debug, this have to be removed */
1242 	int i;
1243 	int broken = 0;
1244 	const char *name[4] =
1245 		{ "acquire", "acquired", "contended", "release" };
1246 
1247 	for (i = 0; i < BROKEN_MAX; i++)
1248 		broken += bad_hist[i];
1249 
1250 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1251 		return;
1252 
1253 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1254 	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1255 	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1256 	fprintf(lock_output, "histogram of events caused bad sequence\n");
1257 	for (i = 0; i < BROKEN_MAX; i++)
1258 		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1259 }
1260 
1261 /* TODO: various way to print, coloring, nano or milli sec */
1262 static void print_result(void)
1263 {
1264 	struct lock_stat *st;
1265 	struct lock_key *key;
1266 	char cut_name[20];
1267 	int bad, total, printed;
1268 
1269 	if (!quiet) {
1270 		fprintf(lock_output, "%20s ", "Name");
1271 		list_for_each_entry(key, &lock_keys, list)
1272 			fprintf(lock_output, "%*s ", key->len, key->header);
1273 		fprintf(lock_output, "\n\n");
1274 	}
1275 
1276 	bad = total = printed = 0;
1277 	while ((st = pop_from_result())) {
1278 		total++;
1279 		if (st->broken)
1280 			bad++;
1281 		if (!st->nr_acquired)
1282 			continue;
1283 
1284 		bzero(cut_name, 20);
1285 
1286 		if (strlen(st->name) < 20) {
1287 			/* output raw name */
1288 			const char *name = st->name;
1289 
1290 			if (show_thread_stats) {
1291 				struct thread *t;
1292 
1293 				/* st->addr contains tid of thread */
1294 				t = perf_session__findnew(session, st->addr);
1295 				name = thread__comm_str(t);
1296 			}
1297 
1298 			fprintf(lock_output, "%20s ", name);
1299 		} else {
1300 			strncpy(cut_name, st->name, 16);
1301 			cut_name[16] = '.';
1302 			cut_name[17] = '.';
1303 			cut_name[18] = '.';
1304 			cut_name[19] = '\0';
1305 			/* cut off name for saving output style */
1306 			fprintf(lock_output, "%20s ", cut_name);
1307 		}
1308 
1309 		list_for_each_entry(key, &lock_keys, list) {
1310 			key->print(key, st);
1311 			fprintf(lock_output, " ");
1312 		}
1313 		fprintf(lock_output, "\n");
1314 
1315 		if (++printed >= print_nr_entries)
1316 			break;
1317 	}
1318 
1319 	print_bad_events(bad, total);
1320 }
1321 
1322 static bool info_threads, info_map;
1323 
1324 static void dump_threads(void)
1325 {
1326 	struct thread_stat *st;
1327 	struct rb_node *node;
1328 	struct thread *t;
1329 
1330 	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1331 
1332 	node = rb_first(&thread_stats);
1333 	while (node) {
1334 		st = container_of(node, struct thread_stat, rb);
1335 		t = perf_session__findnew(session, st->tid);
1336 		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1337 		node = rb_next(node);
1338 		thread__put(t);
1339 	}
1340 }
1341 
1342 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1343 {
1344 	int ret;
1345 
1346 	if (a->name && b->name)
1347 		ret = strcmp(a->name, b->name);
1348 	else
1349 		ret = !!a->name - !!b->name;
1350 
1351 	if (!ret)
1352 		return a->addr < b->addr;
1353 	else
1354 		return ret < 0;
1355 }
1356 
1357 static void dump_map(void)
1358 {
1359 	unsigned int i;
1360 	struct lock_stat *st;
1361 
1362 	fprintf(lock_output, "Address of instance: name of class\n");
1363 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1364 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1365 			insert_to_result(st, compare_maps);
1366 		}
1367 	}
1368 
1369 	while ((st = pop_from_result()))
1370 		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1371 }
1372 
1373 static void dump_info(void)
1374 {
1375 	if (info_threads)
1376 		dump_threads();
1377 
1378 	if (info_map) {
1379 		if (info_threads)
1380 			fputc('\n', lock_output);
1381 		dump_map();
1382 	}
1383 }
1384 
1385 static const struct evsel_str_handler lock_tracepoints[] = {
1386 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1387 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1388 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1389 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1390 };
1391 
1392 static const struct evsel_str_handler contention_tracepoints[] = {
1393 	{ "lock:contention_begin", evsel__process_contention_begin, },
1394 	{ "lock:contention_end",   evsel__process_contention_end,   },
1395 };
1396 
1397 static int process_event_update(const struct perf_tool *tool,
1398 				union perf_event *event,
1399 				struct evlist **pevlist)
1400 {
1401 	int ret;
1402 
1403 	ret = perf_event__process_event_update(tool, event, pevlist);
1404 	if (ret < 0)
1405 		return ret;
1406 
1407 	/* this can return -EEXIST since we call it for each evsel */
1408 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1409 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1410 	return 0;
1411 }
1412 
1413 typedef int (*tracepoint_handler)(struct evsel *evsel,
1414 				  struct perf_sample *sample);
1415 
1416 static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1417 				union perf_event *event,
1418 				struct perf_sample *sample,
1419 				struct evsel *evsel,
1420 				struct machine *machine)
1421 {
1422 	int err = 0;
1423 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1424 							sample->tid);
1425 
1426 	if (thread == NULL) {
1427 		pr_debug("problem processing %d event, skipping it.\n",
1428 			event->header.type);
1429 		return -1;
1430 	}
1431 
1432 	if (evsel->handler != NULL) {
1433 		tracepoint_handler f = evsel->handler;
1434 		err = f(evsel, sample);
1435 	}
1436 
1437 	thread__put(thread);
1438 
1439 	return err;
1440 }
1441 
1442 static void combine_result(void)
1443 {
1444 	unsigned int i;
1445 	struct lock_stat *st;
1446 
1447 	if (!combine_locks)
1448 		return;
1449 
1450 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1451 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1452 			combine_lock_stats(st);
1453 		}
1454 	}
1455 }
1456 
1457 static void sort_result(void)
1458 {
1459 	unsigned int i;
1460 	struct lock_stat *st;
1461 
1462 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1463 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1464 			insert_to_result(st, compare);
1465 		}
1466 	}
1467 }
1468 
1469 static const struct {
1470 	unsigned int flags;
1471 	/*
1472 	 * Name of the lock flags (access), with delimeter ':'.
1473 	 * For example, rwsem:R of rwsem:W.
1474 	 */
1475 	const char *flags_name;
1476 	/* Name of the lock (type), for example, rwlock or rwsem. */
1477 	const char *lock_name;
1478 } lock_type_table[] = {
1479 	{ 0,				"semaphore",	"semaphore" },
1480 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1481 	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1482 	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1483 	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1484 	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1485 	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1486 	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1487 	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1488 	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1489 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1490 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1491 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1492 	/* alias for optimistic spinning only */
1493 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex:spin",	"mutex-spin" },
1494 };
1495 
1496 static const char *get_type_flags_name(unsigned int flags)
1497 {
1498 	flags &= LCB_F_TYPE_MASK;
1499 
1500 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1501 		if (lock_type_table[i].flags == flags)
1502 			return lock_type_table[i].flags_name;
1503 	}
1504 	return "unknown";
1505 }
1506 
1507 static const char *get_type_lock_name(unsigned int flags)
1508 {
1509 	flags &= LCB_F_TYPE_MASK;
1510 
1511 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1512 		if (lock_type_table[i].flags == flags)
1513 			return lock_type_table[i].lock_name;
1514 	}
1515 	return "unknown";
1516 }
1517 
1518 static void lock_filter_finish(void)
1519 {
1520 	zfree(&filters.types);
1521 	filters.nr_types = 0;
1522 
1523 	zfree(&filters.addrs);
1524 	filters.nr_addrs = 0;
1525 
1526 	for (int i = 0; i < filters.nr_syms; i++)
1527 		free(filters.syms[i]);
1528 
1529 	zfree(&filters.syms);
1530 	filters.nr_syms = 0;
1531 
1532 	zfree(&filters.cgrps);
1533 	filters.nr_cgrps = 0;
1534 
1535 	for (int i = 0; i < filters.nr_slabs; i++)
1536 		free(filters.slabs[i]);
1537 
1538 	zfree(&filters.slabs);
1539 	filters.nr_slabs = 0;
1540 }
1541 
1542 static void sort_contention_result(void)
1543 {
1544 	sort_result();
1545 }
1546 
1547 static void print_header_stdio(void)
1548 {
1549 	struct lock_key *key;
1550 
1551 	list_for_each_entry(key, &lock_keys, list)
1552 		fprintf(lock_output, "%*s ", key->len, key->header);
1553 
1554 	switch (aggr_mode) {
1555 	case LOCK_AGGR_TASK:
1556 		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1557 			show_lock_owner ? "owner" : "comm");
1558 		break;
1559 	case LOCK_AGGR_CALLER:
1560 		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1561 		break;
1562 	case LOCK_AGGR_ADDR:
1563 		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1564 		break;
1565 	case LOCK_AGGR_CGROUP:
1566 		fprintf(lock_output, "  %s\n\n", "cgroup");
1567 		break;
1568 	default:
1569 		break;
1570 	}
1571 }
1572 
1573 static void print_header_csv(const char *sep)
1574 {
1575 	struct lock_key *key;
1576 
1577 	fprintf(lock_output, "# output: ");
1578 	list_for_each_entry(key, &lock_keys, list)
1579 		fprintf(lock_output, "%s%s ", key->header, sep);
1580 
1581 	switch (aggr_mode) {
1582 	case LOCK_AGGR_TASK:
1583 		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1584 			show_lock_owner ? "owner" : "comm");
1585 		break;
1586 	case LOCK_AGGR_CALLER:
1587 		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1588 		if (verbose > 0)
1589 			fprintf(lock_output, "%s %s", sep, "stacktrace");
1590 		fprintf(lock_output, "\n");
1591 		break;
1592 	case LOCK_AGGR_ADDR:
1593 		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1594 		break;
1595 	case LOCK_AGGR_CGROUP:
1596 		fprintf(lock_output, "%s\n", "cgroup");
1597 		break;
1598 	default:
1599 		break;
1600 	}
1601 }
1602 
1603 static void print_header(void)
1604 {
1605 	if (!quiet) {
1606 		if (symbol_conf.field_sep)
1607 			print_header_csv(symbol_conf.field_sep);
1608 		else
1609 			print_header_stdio();
1610 	}
1611 }
1612 
1613 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1614 {
1615 	struct lock_key *key;
1616 	struct thread *t;
1617 	int pid;
1618 
1619 	list_for_each_entry(key, &lock_keys, list) {
1620 		key->print(key, st);
1621 		fprintf(lock_output, " ");
1622 	}
1623 
1624 	switch (aggr_mode) {
1625 	case LOCK_AGGR_CALLER:
1626 		fprintf(lock_output, "  %10s   %s\n", get_type_flags_name(st->flags), st->name);
1627 		break;
1628 	case LOCK_AGGR_TASK:
1629 		pid = st->addr;
1630 		t = perf_session__findnew(session, pid);
1631 		fprintf(lock_output, "  %10d   %s\n",
1632 			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1633 		break;
1634 	case LOCK_AGGR_ADDR:
1635 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1636 			st->name, get_type_lock_name(st->flags));
1637 		break;
1638 	case LOCK_AGGR_CGROUP:
1639 		fprintf(lock_output, "  %s\n", st->name);
1640 		break;
1641 	default:
1642 		break;
1643 	}
1644 
1645 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1646 		struct map *kmap;
1647 		struct symbol *sym;
1648 		char buf[128];
1649 		u64 ip;
1650 
1651 		for (int i = 0; i < max_stack_depth; i++) {
1652 			if (!st->callstack || !st->callstack[i])
1653 				break;
1654 
1655 			ip = st->callstack[i];
1656 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1657 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1658 			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1659 		}
1660 	}
1661 }
1662 
1663 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1664 				const char *sep)
1665 {
1666 	struct lock_key *key;
1667 	struct thread *t;
1668 	int pid;
1669 
1670 	list_for_each_entry(key, &lock_keys, list) {
1671 		key->print(key, st);
1672 		fprintf(lock_output, "%s ", sep);
1673 	}
1674 
1675 	switch (aggr_mode) {
1676 	case LOCK_AGGR_CALLER:
1677 		fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
1678 		if (verbose <= 0)
1679 			fprintf(lock_output, "\n");
1680 		break;
1681 	case LOCK_AGGR_TASK:
1682 		pid = st->addr;
1683 		t = perf_session__findnew(session, pid);
1684 		fprintf(lock_output, "%d%s %s\n", pid, sep,
1685 			pid == -1 ? "Unknown" : thread__comm_str(t));
1686 		break;
1687 	case LOCK_AGGR_ADDR:
1688 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1689 			st->name, sep, get_type_lock_name(st->flags));
1690 		break;
1691 	case LOCK_AGGR_CGROUP:
1692 		fprintf(lock_output, "%s\n",st->name);
1693 		break;
1694 	default:
1695 		break;
1696 	}
1697 
1698 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1699 		struct map *kmap;
1700 		struct symbol *sym;
1701 		char buf[128];
1702 		u64 ip;
1703 
1704 		for (int i = 0; i < max_stack_depth; i++) {
1705 			if (!st->callstack || !st->callstack[i])
1706 				break;
1707 
1708 			ip = st->callstack[i];
1709 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1710 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1711 			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1712 		}
1713 		fprintf(lock_output, "\n");
1714 	}
1715 }
1716 
1717 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1718 {
1719 	if (symbol_conf.field_sep)
1720 		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1721 	else
1722 		print_lock_stat_stdio(con, st);
1723 }
1724 
1725 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1726 {
1727 	/* Output for debug, this have to be removed */
1728 	int broken = fails->task + fails->stack + fails->time + fails->data;
1729 
1730 	if (!use_bpf)
1731 		print_bad_events(bad, total);
1732 
1733 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1734 		return;
1735 
1736 	total += broken;
1737 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1738 	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1739 	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1740 
1741 	fprintf(lock_output, "histogram of failure reasons\n");
1742 	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1743 	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1744 	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1745 	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1746 }
1747 
1748 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1749 			     const char *sep)
1750 {
1751 	/* Output for debug, this have to be removed */
1752 	if (use_bpf)
1753 		bad = fails->task + fails->stack + fails->time + fails->data;
1754 
1755 	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1756 		return;
1757 
1758 	total += bad;
1759 	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1760 
1761 	if (use_bpf) {
1762 		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1763 		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1764 		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1765 		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1766 	} else {
1767 		int i;
1768 		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1769 
1770 		for (i = 0; i < BROKEN_MAX; i++)
1771 			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1772 	}
1773 	fprintf(lock_output, "\n");
1774 }
1775 
1776 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1777 {
1778 	if (symbol_conf.field_sep)
1779 		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1780 	else
1781 		print_footer_stdio(total, bad, fails);
1782 }
1783 
1784 static void print_contention_result(struct lock_contention *con)
1785 {
1786 	struct lock_stat *st;
1787 	int bad, total, printed;
1788 
1789 	if (!quiet)
1790 		print_header();
1791 
1792 	bad = total = printed = 0;
1793 
1794 	while ((st = pop_from_result())) {
1795 		total += use_bpf ? st->nr_contended : 1;
1796 		if (st->broken)
1797 			bad++;
1798 
1799 		if (!st->wait_time_total)
1800 			continue;
1801 
1802 		print_lock_stat(con, st);
1803 
1804 		if (++printed >= print_nr_entries)
1805 			break;
1806 	}
1807 
1808 	if (print_nr_entries) {
1809 		/* update the total/bad stats */
1810 		while ((st = pop_from_result())) {
1811 			total += use_bpf ? st->nr_contended : 1;
1812 			if (st->broken)
1813 				bad++;
1814 		}
1815 	}
1816 	/* some entries are collected but hidden by the callstack filter */
1817 	total += con->nr_filtered;
1818 
1819 	print_footer(total, bad, &con->fails);
1820 }
1821 
1822 static bool force;
1823 
1824 static int __cmd_report(bool display_info)
1825 {
1826 	int err = -EINVAL;
1827 	struct perf_tool eops;
1828 	struct perf_data data = {
1829 		.path  = input_name,
1830 		.mode  = PERF_DATA_MODE_READ,
1831 		.force = force,
1832 	};
1833 
1834 	perf_tool__init(&eops, /*ordered_events=*/true);
1835 	eops.attr		 = perf_event__process_attr;
1836 	eops.event_update	 = process_event_update;
1837 	eops.sample		 = process_sample_event;
1838 	eops.comm		 = perf_event__process_comm;
1839 	eops.mmap		 = perf_event__process_mmap;
1840 	eops.namespaces		 = perf_event__process_namespaces;
1841 	eops.tracing_data	 = perf_event__process_tracing_data;
1842 	session = perf_session__new(&data, &eops);
1843 	if (IS_ERR(session)) {
1844 		pr_err("Initializing perf session failed\n");
1845 		return PTR_ERR(session);
1846 	}
1847 
1848 	symbol_conf.allow_aliases = true;
1849 	symbol__init(&session->header.env);
1850 
1851 	if (!data.is_pipe) {
1852 		if (!perf_session__has_traces(session, "lock record"))
1853 			goto out_delete;
1854 
1855 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1856 			pr_err("Initializing perf session tracepoint handlers failed\n");
1857 			goto out_delete;
1858 		}
1859 
1860 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1861 			pr_err("Initializing perf session tracepoint handlers failed\n");
1862 			goto out_delete;
1863 		}
1864 	}
1865 
1866 	if (setup_output_field(false, output_fields))
1867 		goto out_delete;
1868 
1869 	if (select_key(false))
1870 		goto out_delete;
1871 
1872 	if (show_thread_stats)
1873 		aggr_mode = LOCK_AGGR_TASK;
1874 
1875 	err = perf_session__process_events(session);
1876 	if (err)
1877 		goto out_delete;
1878 
1879 	setup_pager();
1880 	if (display_info) /* used for info subcommand */
1881 		dump_info();
1882 	else {
1883 		combine_result();
1884 		sort_result();
1885 		print_result();
1886 	}
1887 
1888 out_delete:
1889 	perf_session__delete(session);
1890 	return err;
1891 }
1892 
1893 static void sighandler(int sig __maybe_unused)
1894 {
1895 }
1896 
1897 static int check_lock_contention_options(const struct option *options,
1898 					 const char * const *usage)
1899 
1900 {
1901 	if (show_thread_stats && show_lock_addrs) {
1902 		pr_err("Cannot use thread and addr mode together\n");
1903 		parse_options_usage(usage, options, "threads", 0);
1904 		parse_options_usage(NULL, options, "lock-addr", 0);
1905 		return -1;
1906 	}
1907 
1908 	if (show_lock_owner && !use_bpf) {
1909 		pr_err("Lock owners are available only with BPF\n");
1910 		parse_options_usage(usage, options, "lock-owner", 0);
1911 		parse_options_usage(NULL, options, "use-bpf", 0);
1912 		return -1;
1913 	}
1914 
1915 	if (show_lock_owner && show_lock_addrs) {
1916 		pr_err("Cannot use owner and addr mode together\n");
1917 		parse_options_usage(usage, options, "lock-owner", 0);
1918 		parse_options_usage(NULL, options, "lock-addr", 0);
1919 		return -1;
1920 	}
1921 
1922 	if (show_lock_cgroups && !use_bpf) {
1923 		pr_err("Cgroups are available only with BPF\n");
1924 		parse_options_usage(usage, options, "lock-cgroup", 0);
1925 		parse_options_usage(NULL, options, "use-bpf", 0);
1926 		return -1;
1927 	}
1928 
1929 	if (show_lock_cgroups && show_lock_addrs) {
1930 		pr_err("Cannot use cgroup and addr mode together\n");
1931 		parse_options_usage(usage, options, "lock-cgroup", 0);
1932 		parse_options_usage(NULL, options, "lock-addr", 0);
1933 		return -1;
1934 	}
1935 
1936 	if (show_lock_cgroups && show_thread_stats) {
1937 		pr_err("Cannot use cgroup and thread mode together\n");
1938 		parse_options_usage(usage, options, "lock-cgroup", 0);
1939 		parse_options_usage(NULL, options, "threads", 0);
1940 		return -1;
1941 	}
1942 
1943 	if (symbol_conf.field_sep) {
1944 		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1945 		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1946 		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1947 			pr_err("Cannot use the separator that is already used\n");
1948 			parse_options_usage(usage, options, "x", 1);
1949 			return -1;
1950 		}
1951 	}
1952 
1953 	if (show_lock_owner)
1954 		show_thread_stats = true;
1955 
1956 	return 0;
1957 }
1958 
1959 static int __cmd_contention(int argc, const char **argv)
1960 {
1961 	int err = -EINVAL;
1962 	struct perf_tool eops;
1963 	struct perf_data data = {
1964 		.path  = input_name,
1965 		.mode  = PERF_DATA_MODE_READ,
1966 		.force = force,
1967 	};
1968 	struct lock_contention con = {
1969 		.target = &target,
1970 		.map_nr_entries = bpf_map_entries,
1971 		.max_stack = max_stack_depth,
1972 		.stack_skip = stack_skip,
1973 		.filters = &filters,
1974 		.save_callstack = needs_callstack(),
1975 		.owner = show_lock_owner,
1976 		.cgroups = RB_ROOT,
1977 	};
1978 
1979 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
1980 	if (!lockhash_table)
1981 		return -ENOMEM;
1982 
1983 	con.result = &lockhash_table[0];
1984 
1985 	perf_tool__init(&eops, /*ordered_events=*/true);
1986 	eops.attr		 = perf_event__process_attr;
1987 	eops.event_update	 = process_event_update;
1988 	eops.sample		 = process_sample_event;
1989 	eops.comm		 = perf_event__process_comm;
1990 	eops.mmap		 = perf_event__process_mmap;
1991 	eops.tracing_data	 = perf_event__process_tracing_data;
1992 
1993 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
1994 	if (IS_ERR(session)) {
1995 		pr_err("Initializing perf session failed\n");
1996 		err = PTR_ERR(session);
1997 		session = NULL;
1998 		goto out_delete;
1999 	}
2000 
2001 	con.machine = &session->machines.host;
2002 
2003 	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2004 		show_lock_addrs ? LOCK_AGGR_ADDR :
2005 		show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2006 
2007 	if (con.aggr_mode == LOCK_AGGR_CALLER)
2008 		con.save_callstack = true;
2009 
2010 	symbol_conf.allow_aliases = true;
2011 	symbol__init(&session->header.env);
2012 
2013 	if (use_bpf) {
2014 		err = target__validate(&target);
2015 		if (err) {
2016 			char errbuf[512];
2017 
2018 			target__strerror(&target, err, errbuf, 512);
2019 			pr_err("%s\n", errbuf);
2020 			goto out_delete;
2021 		}
2022 
2023 		signal(SIGINT, sighandler);
2024 		signal(SIGCHLD, sighandler);
2025 		signal(SIGTERM, sighandler);
2026 
2027 		con.evlist = evlist__new();
2028 		if (con.evlist == NULL) {
2029 			err = -ENOMEM;
2030 			goto out_delete;
2031 		}
2032 
2033 		err = evlist__create_maps(con.evlist, &target);
2034 		if (err < 0)
2035 			goto out_delete;
2036 
2037 		if (argc) {
2038 			err = evlist__prepare_workload(con.evlist, &target,
2039 						       argv, false, NULL);
2040 			if (err < 0)
2041 				goto out_delete;
2042 		}
2043 
2044 		err = lock_contention_prepare(&con);
2045 		if (err < 0) {
2046 			pr_err("lock contention BPF setup failed\n");
2047 			goto out_delete;
2048 		}
2049 	} else if (!data.is_pipe) {
2050 		if (!perf_session__has_traces(session, "lock record"))
2051 			goto out_delete;
2052 
2053 		if (!evlist__find_evsel_by_str(session->evlist,
2054 					       "lock:contention_begin")) {
2055 			pr_err("lock contention evsel not found\n");
2056 			goto out_delete;
2057 		}
2058 
2059 		if (perf_session__set_tracepoints_handlers(session,
2060 						contention_tracepoints)) {
2061 			pr_err("Initializing perf session tracepoint handlers failed\n");
2062 			goto out_delete;
2063 		}
2064 	}
2065 
2066 	err = setup_output_field(true, output_fields);
2067 	if (err) {
2068 		pr_err("Failed to setup output field\n");
2069 		goto out_delete;
2070 	}
2071 
2072 	err = select_key(true);
2073 	if (err)
2074 		goto out_delete;
2075 
2076 	if (symbol_conf.field_sep) {
2077 		int i;
2078 		struct lock_key *keys = contention_keys;
2079 
2080 		/* do not align output in CSV format */
2081 		for (i = 0; keys[i].name; i++)
2082 			keys[i].len = 0;
2083 	}
2084 
2085 	if (use_bpf) {
2086 		lock_contention_start();
2087 		if (argc)
2088 			evlist__start_workload(con.evlist);
2089 
2090 		/* wait for signal */
2091 		pause();
2092 
2093 		lock_contention_stop();
2094 		lock_contention_read(&con);
2095 	} else {
2096 		err = perf_session__process_events(session);
2097 		if (err)
2098 			goto out_delete;
2099 	}
2100 
2101 	setup_pager();
2102 
2103 	sort_contention_result();
2104 	print_contention_result(&con);
2105 
2106 out_delete:
2107 	lock_filter_finish();
2108 	evlist__delete(con.evlist);
2109 	lock_contention_finish(&con);
2110 	perf_session__delete(session);
2111 	zfree(&lockhash_table);
2112 	return err;
2113 }
2114 
2115 
2116 static int __cmd_record(int argc, const char **argv)
2117 {
2118 	const char *record_args[] = {
2119 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2120 	};
2121 	const char *callgraph_args[] = {
2122 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2123 	};
2124 	unsigned int rec_argc, i, j, ret;
2125 	unsigned int nr_tracepoints;
2126 	unsigned int nr_callgraph_args = 0;
2127 	const char **rec_argv;
2128 	bool has_lock_stat = true;
2129 
2130 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2131 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2132 			pr_debug("tracepoint %s is not enabled. "
2133 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2134 				 lock_tracepoints[i].name);
2135 			has_lock_stat = false;
2136 			break;
2137 		}
2138 	}
2139 
2140 	if (has_lock_stat)
2141 		goto setup_args;
2142 
2143 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2144 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2145 			pr_err("tracepoint %s is not enabled.\n",
2146 			       contention_tracepoints[i].name);
2147 			return 1;
2148 		}
2149 	}
2150 
2151 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2152 
2153 setup_args:
2154 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2155 
2156 	if (has_lock_stat)
2157 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2158 	else
2159 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2160 
2161 	/* factor of 2 is for -e in front of each tracepoint */
2162 	rec_argc += 2 * nr_tracepoints;
2163 
2164 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2165 	if (!rec_argv)
2166 		return -ENOMEM;
2167 
2168 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2169 		rec_argv[i] = record_args[i];
2170 
2171 	for (j = 0; j < nr_tracepoints; j++) {
2172 		rec_argv[i++] = "-e";
2173 		rec_argv[i++] = has_lock_stat
2174 			? lock_tracepoints[j].name
2175 			: contention_tracepoints[j].name;
2176 	}
2177 
2178 	for (j = 0; j < nr_callgraph_args; j++, i++)
2179 		rec_argv[i] = callgraph_args[j];
2180 
2181 	for (j = 1; j < (unsigned int)argc; j++, i++)
2182 		rec_argv[i] = argv[j];
2183 
2184 	BUG_ON(i != rec_argc);
2185 
2186 	ret = cmd_record(i, rec_argv);
2187 	free(rec_argv);
2188 	return ret;
2189 }
2190 
2191 static int parse_map_entry(const struct option *opt, const char *str,
2192 			    int unset __maybe_unused)
2193 {
2194 	unsigned long *len = (unsigned long *)opt->value;
2195 	unsigned long val;
2196 	char *endptr;
2197 
2198 	errno = 0;
2199 	val = strtoul(str, &endptr, 0);
2200 	if (*endptr != '\0' || errno != 0) {
2201 		pr_err("invalid BPF map length: %s\n", str);
2202 		return -1;
2203 	}
2204 
2205 	*len = val;
2206 	return 0;
2207 }
2208 
2209 static int parse_max_stack(const struct option *opt, const char *str,
2210 			   int unset __maybe_unused)
2211 {
2212 	unsigned long *len = (unsigned long *)opt->value;
2213 	long val;
2214 	char *endptr;
2215 
2216 	errno = 0;
2217 	val = strtol(str, &endptr, 0);
2218 	if (*endptr != '\0' || errno != 0) {
2219 		pr_err("invalid max stack depth: %s\n", str);
2220 		return -1;
2221 	}
2222 
2223 	if (val < 0 || val > sysctl__max_stack()) {
2224 		pr_err("invalid max stack depth: %ld\n", val);
2225 		return -1;
2226 	}
2227 
2228 	*len = val;
2229 	return 0;
2230 }
2231 
2232 static bool add_lock_type(unsigned int flags)
2233 {
2234 	unsigned int *tmp;
2235 
2236 	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2237 	if (tmp == NULL)
2238 		return false;
2239 
2240 	tmp[filters.nr_types++] = flags;
2241 	filters.types = tmp;
2242 	return true;
2243 }
2244 
2245 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2246 			   int unset __maybe_unused)
2247 {
2248 	char *s, *tmp, *tok;
2249 
2250 	s = strdup(str);
2251 	if (s == NULL)
2252 		return -1;
2253 
2254 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2255 		bool found = false;
2256 
2257 		/* `tok` is a flags name if it contains ':'. */
2258 		if (strchr(tok, ':')) {
2259 			for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2260 				if (!strcmp(lock_type_table[i].flags_name, tok) &&
2261 				    add_lock_type(lock_type_table[i].flags)) {
2262 					found = true;
2263 					break;
2264 				}
2265 			}
2266 
2267 			if (!found) {
2268 				pr_err("Unknown lock flags name: %s\n", tok);
2269 				free(s);
2270 				return -1;
2271 			}
2272 
2273 			continue;
2274 		}
2275 
2276 		/*
2277 		 * Otherwise `tok` is a lock name.
2278 		 * Single lock name could contain multiple flags.
2279 		 * Replace alias `pcpu-sem` with actual name `percpu-rwsem.
2280 		 */
2281 		if (!strcmp(tok, "pcpu-sem"))
2282 			tok = (char *)"percpu-rwsem";
2283 		for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2284 			if (!strcmp(lock_type_table[i].lock_name, tok)) {
2285 				if (add_lock_type(lock_type_table[i].flags)) {
2286 					found = true;
2287 				} else {
2288 					free(s);
2289 					return -1;
2290 				}
2291 			}
2292 		}
2293 
2294 		if (!found) {
2295 			pr_err("Unknown lock name: %s\n", tok);
2296 			free(s);
2297 			return -1;
2298 		}
2299 
2300 	}
2301 
2302 	free(s);
2303 	return 0;
2304 }
2305 
2306 static bool add_lock_addr(unsigned long addr)
2307 {
2308 	unsigned long *tmp;
2309 
2310 	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2311 	if (tmp == NULL) {
2312 		pr_err("Memory allocation failure\n");
2313 		return false;
2314 	}
2315 
2316 	tmp[filters.nr_addrs++] = addr;
2317 	filters.addrs = tmp;
2318 	return true;
2319 }
2320 
2321 static bool add_lock_sym(char *name)
2322 {
2323 	char **tmp;
2324 	char *sym = strdup(name);
2325 
2326 	if (sym == NULL) {
2327 		pr_err("Memory allocation failure\n");
2328 		return false;
2329 	}
2330 
2331 	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2332 	if (tmp == NULL) {
2333 		pr_err("Memory allocation failure\n");
2334 		free(sym);
2335 		return false;
2336 	}
2337 
2338 	tmp[filters.nr_syms++] = sym;
2339 	filters.syms = tmp;
2340 	return true;
2341 }
2342 
2343 static bool add_lock_slab(char *name)
2344 {
2345 	char **tmp;
2346 	char *sym = strdup(name);
2347 
2348 	if (sym == NULL) {
2349 		pr_err("Memory allocation failure\n");
2350 		return false;
2351 	}
2352 
2353 	tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2354 	if (tmp == NULL) {
2355 		pr_err("Memory allocation failure\n");
2356 		return false;
2357 	}
2358 
2359 	tmp[filters.nr_slabs++] = sym;
2360 	filters.slabs = tmp;
2361 	return true;
2362 }
2363 
2364 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2365 			   int unset __maybe_unused)
2366 {
2367 	char *s, *tmp, *tok;
2368 	int ret = 0;
2369 	u64 addr;
2370 
2371 	s = strdup(str);
2372 	if (s == NULL)
2373 		return -1;
2374 
2375 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2376 		char *end;
2377 
2378 		addr = strtoul(tok, &end, 16);
2379 		if (*end == '\0') {
2380 			if (!add_lock_addr(addr)) {
2381 				ret = -1;
2382 				break;
2383 			}
2384 			continue;
2385 		}
2386 
2387 		if (*tok == '&') {
2388 			if (!add_lock_slab(tok + 1)) {
2389 				ret = -1;
2390 				break;
2391 			}
2392 			continue;
2393 		}
2394 
2395 		/*
2396 		 * At this moment, we don't have kernel symbols.  Save the symbols
2397 		 * in a separate list and resolve them to addresses later.
2398 		 */
2399 		if (!add_lock_sym(tok)) {
2400 			ret = -1;
2401 			break;
2402 		}
2403 	}
2404 
2405 	free(s);
2406 	return ret;
2407 }
2408 
2409 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2410 			int unset __maybe_unused)
2411 {
2412 	const char **name = (const char **)opt->value;
2413 
2414 	if (str == NULL)
2415 		return -1;
2416 
2417 	lock_output = fopen(str, "w");
2418 	if (lock_output == NULL) {
2419 		pr_err("Cannot open %s\n", str);
2420 		return -1;
2421 	}
2422 
2423 	*name = str;
2424 	return 0;
2425 }
2426 
2427 static bool add_lock_cgroup(char *name)
2428 {
2429 	u64 *tmp;
2430 	struct cgroup *cgrp;
2431 
2432 	cgrp = cgroup__new(name, /*do_open=*/false);
2433 	if (cgrp == NULL) {
2434 		pr_err("Failed to create cgroup: %s\n", name);
2435 		return false;
2436 	}
2437 
2438 	if (read_cgroup_id(cgrp) < 0) {
2439 		pr_err("Failed to read cgroup id for %s\n", name);
2440 		cgroup__put(cgrp);
2441 		return false;
2442 	}
2443 
2444 	tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2445 	if (tmp == NULL) {
2446 		pr_err("Memory allocation failure\n");
2447 		return false;
2448 	}
2449 
2450 	tmp[filters.nr_cgrps++] = cgrp->id;
2451 	filters.cgrps = tmp;
2452 	cgroup__put(cgrp);
2453 	return true;
2454 }
2455 
2456 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2457 			       int unset __maybe_unused)
2458 {
2459 	char *s, *tmp, *tok;
2460 	int ret = 0;
2461 
2462 	s = strdup(str);
2463 	if (s == NULL)
2464 		return -1;
2465 
2466 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2467 		if (!add_lock_cgroup(tok)) {
2468 			ret = -1;
2469 			break;
2470 		}
2471 	}
2472 
2473 	free(s);
2474 	return ret;
2475 }
2476 
2477 int cmd_lock(int argc, const char **argv)
2478 {
2479 	const struct option lock_options[] = {
2480 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2481 	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2482 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2483 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2484 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2485 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2486 		   "file", "vmlinux pathname"),
2487 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2488 		   "file", "kallsyms pathname"),
2489 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2490 	OPT_END()
2491 	};
2492 
2493 	const struct option info_options[] = {
2494 	OPT_BOOLEAN('t', "threads", &info_threads,
2495 		    "dump the thread list in perf.data"),
2496 	OPT_BOOLEAN('m', "map", &info_map,
2497 		    "dump the map of lock instances (address:name table)"),
2498 	OPT_PARENT(lock_options)
2499 	};
2500 
2501 	const struct option report_options[] = {
2502 	OPT_STRING('k', "key", &sort_key, "acquired",
2503 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2504 	OPT_STRING('F', "field", &output_fields, NULL,
2505 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2506 	/* TODO: type */
2507 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2508 		    "combine locks in the same class"),
2509 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2510 		    "show per-thread lock stats"),
2511 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2512 	OPT_PARENT(lock_options)
2513 	};
2514 
2515 	struct option contention_options[] = {
2516 	OPT_STRING('k', "key", &sort_key, "wait_total",
2517 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2518 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2519 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2520 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2521 		    "show per-thread lock stats"),
2522 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2523 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2524 		    "System-wide collection from all CPUs"),
2525 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2526 		    "List of cpus to monitor"),
2527 	OPT_STRING('p', "pid", &target.pid, "pid",
2528 		   "Trace on existing process id"),
2529 	OPT_STRING(0, "tid", &target.tid, "tid",
2530 		   "Trace on existing thread id (exclusive to --pid)"),
2531 	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2532 		     "Max number of BPF map entries", parse_map_entry),
2533 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2534 		     "Set the maximum stack depth when collecting lock contention, "
2535 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2536 	OPT_INTEGER(0, "stack-skip", &stack_skip,
2537 		    "Set the number of stack depth to skip when finding a lock caller, "
2538 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2539 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2540 	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2541 	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2542 		     "Filter specific type of locks", parse_lock_type),
2543 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2544 		     "Filter specific address/symbol of locks", parse_lock_addr),
2545 	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2546 		     "Filter specific function in the callstack", parse_call_stack),
2547 	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2548 	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2549 		   "print result in CSV format with custom separator"),
2550 	OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2551 	OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2552 		     "Filter specific cgroups", parse_cgroup_filter),
2553 	OPT_PARENT(lock_options)
2554 	};
2555 
2556 	const char * const info_usage[] = {
2557 		"perf lock info [<options>]",
2558 		NULL
2559 	};
2560 	const char *const lock_subcommands[] = { "record", "report", "script",
2561 						 "info", "contention", NULL };
2562 	const char *lock_usage[] = {
2563 		NULL,
2564 		NULL
2565 	};
2566 	const char * const report_usage[] = {
2567 		"perf lock report [<options>]",
2568 		NULL
2569 	};
2570 	const char * const contention_usage[] = {
2571 		"perf lock contention [<options>]",
2572 		NULL
2573 	};
2574 	unsigned int i;
2575 	int rc = 0;
2576 
2577 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2578 	if (!lockhash_table)
2579 		return -ENOMEM;
2580 
2581 	for (i = 0; i < LOCKHASH_SIZE; i++)
2582 		INIT_HLIST_HEAD(lockhash_table + i);
2583 
2584 	lock_output = stderr;
2585 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2586 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2587 	if (!argc)
2588 		usage_with_options(lock_usage, lock_options);
2589 
2590 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2591 		return __cmd_record(argc, argv);
2592 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2593 		trace_handler = &report_lock_ops;
2594 		if (argc) {
2595 			argc = parse_options(argc, argv,
2596 					     report_options, report_usage, 0);
2597 			if (argc)
2598 				usage_with_options(report_usage, report_options);
2599 		}
2600 		rc = __cmd_report(false);
2601 	} else if (!strcmp(argv[0], "script")) {
2602 		/* Aliased to 'perf script' */
2603 		rc = cmd_script(argc, argv);
2604 	} else if (!strcmp(argv[0], "info")) {
2605 		if (argc) {
2606 			argc = parse_options(argc, argv,
2607 					     info_options, info_usage, 0);
2608 			if (argc)
2609 				usage_with_options(info_usage, info_options);
2610 		}
2611 
2612 		/* If neither threads nor map requested, display both */
2613 		if (!info_threads && !info_map) {
2614 			info_threads = true;
2615 			info_map = true;
2616 		}
2617 
2618 		/* recycling report_lock_ops */
2619 		trace_handler = &report_lock_ops;
2620 		rc = __cmd_report(true);
2621 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2622 		trace_handler = &contention_lock_ops;
2623 		sort_key = "wait_total";
2624 		output_fields = "contended,wait_total,wait_max,avg_wait";
2625 
2626 #ifndef HAVE_BPF_SKEL
2627 		set_option_nobuild(contention_options, 'b', "use-bpf",
2628 				   "no BUILD_BPF_SKEL=1", false);
2629 #endif
2630 		if (argc) {
2631 			argc = parse_options(argc, argv, contention_options,
2632 					     contention_usage, 0);
2633 		}
2634 
2635 		if (check_lock_contention_options(contention_options,
2636 						  contention_usage) < 0)
2637 			return -1;
2638 
2639 		rc = __cmd_contention(argc, argv);
2640 	} else {
2641 		usage_with_options(lock_usage, lock_options);
2642 	}
2643 
2644 	/* free usage string allocated by parse_options_subcommand */
2645 	free((void *)lock_usage[0]);
2646 
2647 	zfree(&lockhash_table);
2648 	return rc;
2649 }
2650