xref: /linux/tools/perf/builtin-kvm.c (revision 7ae811b12e419fd70b7d7159f20ed8519bbe18cc)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "builtin.h"
3 #include "perf.h"
4 
5 #include "util/build-id.h"
6 #include "util/evsel.h"
7 #include "util/evlist.h"
8 #include "util/term.h"
9 #include "util/cache.h"
10 #include "util/symbol.h"
11 #include "util/thread.h"
12 #include "util/header.h"
13 #include "util/session.h"
14 #include "util/intlist.h"
15 #include <subcmd/parse-options.h>
16 #include "util/trace-event.h"
17 #include "util/debug.h"
18 #include "util/tool.h"
19 #include "util/stat.h"
20 #include "util/top.h"
21 #include "util/data.h"
22 #include "util/ordered-events.h"
23 
24 #include <sys/prctl.h>
25 #ifdef HAVE_TIMERFD_SUPPORT
26 #include <sys/timerfd.h>
27 #endif
28 #include <sys/time.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <fcntl.h>
32 
33 #include <linux/kernel.h>
34 #include <linux/time64.h>
35 #include <linux/zalloc.h>
36 #include <errno.h>
37 #include <inttypes.h>
38 #include <poll.h>
39 #include <termios.h>
40 #include <semaphore.h>
41 #include <signal.h>
42 #include <math.h>
43 
44 static const char *get_filename_for_perf_kvm(void)
45 {
46 	const char *filename;
47 
48 	if (perf_host && !perf_guest)
49 		filename = strdup("perf.data.host");
50 	else if (!perf_host && perf_guest)
51 		filename = strdup("perf.data.guest");
52 	else
53 		filename = strdup("perf.data.kvm");
54 
55 	return filename;
56 }
57 
58 #ifdef HAVE_KVM_STAT_SUPPORT
59 #include "util/kvm-stat.h"
60 
61 void exit_event_get_key(struct evsel *evsel,
62 			struct perf_sample *sample,
63 			struct event_key *key)
64 {
65 	key->info = 0;
66 	key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
67 }
68 
69 bool kvm_exit_event(struct evsel *evsel)
70 {
71 	return !strcmp(evsel->name, kvm_exit_trace);
72 }
73 
74 bool exit_event_begin(struct evsel *evsel,
75 		      struct perf_sample *sample, struct event_key *key)
76 {
77 	if (kvm_exit_event(evsel)) {
78 		exit_event_get_key(evsel, sample, key);
79 		return true;
80 	}
81 
82 	return false;
83 }
84 
85 bool kvm_entry_event(struct evsel *evsel)
86 {
87 	return !strcmp(evsel->name, kvm_entry_trace);
88 }
89 
90 bool exit_event_end(struct evsel *evsel,
91 		    struct perf_sample *sample __maybe_unused,
92 		    struct event_key *key __maybe_unused)
93 {
94 	return kvm_entry_event(evsel);
95 }
96 
97 static const char *get_exit_reason(struct perf_kvm_stat *kvm,
98 				   struct exit_reasons_table *tbl,
99 				   u64 exit_code)
100 {
101 	while (tbl->reason != NULL) {
102 		if (tbl->exit_code == exit_code)
103 			return tbl->reason;
104 		tbl++;
105 	}
106 
107 	pr_err("unknown kvm exit code:%lld on %s\n",
108 		(unsigned long long)exit_code, kvm->exit_reasons_isa);
109 	return "UNKNOWN";
110 }
111 
112 void exit_event_decode_key(struct perf_kvm_stat *kvm,
113 			   struct event_key *key,
114 			   char *decode)
115 {
116 	const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
117 						  key->key);
118 
119 	scnprintf(decode, decode_str_len, "%s", exit_reason);
120 }
121 
122 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
123 {
124 	struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;
125 
126 	for (events_ops = kvm_reg_events_ops; events_ops->name; events_ops++) {
127 		if (!strcmp(events_ops->name, kvm->report_event)) {
128 			kvm->events_ops = events_ops->ops;
129 			return true;
130 		}
131 	}
132 
133 	return false;
134 }
135 
136 struct vcpu_event_record {
137 	int vcpu_id;
138 	u64 start_time;
139 	struct kvm_event *last_event;
140 };
141 
142 
143 static void init_kvm_event_record(struct perf_kvm_stat *kvm)
144 {
145 	unsigned int i;
146 
147 	for (i = 0; i < EVENTS_CACHE_SIZE; i++)
148 		INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
149 }
150 
151 #ifdef HAVE_TIMERFD_SUPPORT
152 static void clear_events_cache_stats(struct list_head *kvm_events_cache)
153 {
154 	struct list_head *head;
155 	struct kvm_event *event;
156 	unsigned int i;
157 	int j;
158 
159 	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
160 		head = &kvm_events_cache[i];
161 		list_for_each_entry(event, head, hash_entry) {
162 			/* reset stats for event */
163 			event->total.time = 0;
164 			init_stats(&event->total.stats);
165 
166 			for (j = 0; j < event->max_vcpu; ++j) {
167 				event->vcpu[j].time = 0;
168 				init_stats(&event->vcpu[j].stats);
169 			}
170 		}
171 	}
172 }
173 #endif
174 
175 static int kvm_events_hash_fn(u64 key)
176 {
177 	return key & (EVENTS_CACHE_SIZE - 1);
178 }
179 
180 static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
181 {
182 	int old_max_vcpu = event->max_vcpu;
183 	void *prev;
184 
185 	if (vcpu_id < event->max_vcpu)
186 		return true;
187 
188 	while (event->max_vcpu <= vcpu_id)
189 		event->max_vcpu += DEFAULT_VCPU_NUM;
190 
191 	prev = event->vcpu;
192 	event->vcpu = realloc(event->vcpu,
193 			      event->max_vcpu * sizeof(*event->vcpu));
194 	if (!event->vcpu) {
195 		free(prev);
196 		pr_err("Not enough memory\n");
197 		return false;
198 	}
199 
200 	memset(event->vcpu + old_max_vcpu, 0,
201 	       (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
202 	return true;
203 }
204 
205 static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
206 {
207 	struct kvm_event *event;
208 
209 	event = zalloc(sizeof(*event));
210 	if (!event) {
211 		pr_err("Not enough memory\n");
212 		return NULL;
213 	}
214 
215 	event->key = *key;
216 	init_stats(&event->total.stats);
217 	return event;
218 }
219 
220 static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
221 					       struct event_key *key)
222 {
223 	struct kvm_event *event;
224 	struct list_head *head;
225 
226 	BUG_ON(key->key == INVALID_KEY);
227 
228 	head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
229 	list_for_each_entry(event, head, hash_entry) {
230 		if (event->key.key == key->key && event->key.info == key->info)
231 			return event;
232 	}
233 
234 	event = kvm_alloc_init_event(key);
235 	if (!event)
236 		return NULL;
237 
238 	list_add(&event->hash_entry, head);
239 	return event;
240 }
241 
242 static bool handle_begin_event(struct perf_kvm_stat *kvm,
243 			       struct vcpu_event_record *vcpu_record,
244 			       struct event_key *key, u64 timestamp)
245 {
246 	struct kvm_event *event = NULL;
247 
248 	if (key->key != INVALID_KEY)
249 		event = find_create_kvm_event(kvm, key);
250 
251 	vcpu_record->last_event = event;
252 	vcpu_record->start_time = timestamp;
253 	return true;
254 }
255 
256 static void
257 kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
258 {
259 	kvm_stats->time += time_diff;
260 	update_stats(&kvm_stats->stats, time_diff);
261 }
262 
263 static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
264 {
265 	struct kvm_event_stats *kvm_stats = &event->total;
266 
267 	if (vcpu_id != -1)
268 		kvm_stats = &event->vcpu[vcpu_id];
269 
270 	return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
271 				avg_stats(&kvm_stats->stats));
272 }
273 
274 static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
275 			     u64 time_diff)
276 {
277 	if (vcpu_id == -1) {
278 		kvm_update_event_stats(&event->total, time_diff);
279 		return true;
280 	}
281 
282 	if (!kvm_event_expand(event, vcpu_id))
283 		return false;
284 
285 	kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
286 	return true;
287 }
288 
289 static bool is_child_event(struct perf_kvm_stat *kvm,
290 			   struct evsel *evsel,
291 			   struct perf_sample *sample,
292 			   struct event_key *key)
293 {
294 	struct child_event_ops *child_ops;
295 
296 	child_ops = kvm->events_ops->child_ops;
297 
298 	if (!child_ops)
299 		return false;
300 
301 	for (; child_ops->name; child_ops++) {
302 		if (!strcmp(evsel->name, child_ops->name)) {
303 			child_ops->get_key(evsel, sample, key);
304 			return true;
305 		}
306 	}
307 
308 	return false;
309 }
310 
311 static bool handle_child_event(struct perf_kvm_stat *kvm,
312 			       struct vcpu_event_record *vcpu_record,
313 			       struct event_key *key,
314 			       struct perf_sample *sample __maybe_unused)
315 {
316 	struct kvm_event *event = NULL;
317 
318 	if (key->key != INVALID_KEY)
319 		event = find_create_kvm_event(kvm, key);
320 
321 	vcpu_record->last_event = event;
322 
323 	return true;
324 }
325 
326 static bool skip_event(const char *event)
327 {
328 	const char * const *skip_events;
329 
330 	for (skip_events = kvm_skip_events; *skip_events; skip_events++)
331 		if (!strcmp(event, *skip_events))
332 			return true;
333 
334 	return false;
335 }
336 
337 static bool handle_end_event(struct perf_kvm_stat *kvm,
338 			     struct vcpu_event_record *vcpu_record,
339 			     struct event_key *key,
340 			     struct perf_sample *sample)
341 {
342 	struct kvm_event *event;
343 	u64 time_begin, time_diff;
344 	int vcpu;
345 
346 	if (kvm->trace_vcpu == -1)
347 		vcpu = -1;
348 	else
349 		vcpu = vcpu_record->vcpu_id;
350 
351 	event = vcpu_record->last_event;
352 	time_begin = vcpu_record->start_time;
353 
354 	/* The begin event is not caught. */
355 	if (!time_begin)
356 		return true;
357 
358 	/*
359 	 * In some case, the 'begin event' only records the start timestamp,
360 	 * the actual event is recognized in the 'end event' (e.g. mmio-event).
361 	 */
362 
363 	/* Both begin and end events did not get the key. */
364 	if (!event && key->key == INVALID_KEY)
365 		return true;
366 
367 	if (!event)
368 		event = find_create_kvm_event(kvm, key);
369 
370 	if (!event)
371 		return false;
372 
373 	vcpu_record->last_event = NULL;
374 	vcpu_record->start_time = 0;
375 
376 	/* seems to happen once in a while during live mode */
377 	if (sample->time < time_begin) {
378 		pr_debug("End time before begin time; skipping event.\n");
379 		return true;
380 	}
381 
382 	time_diff = sample->time - time_begin;
383 
384 	if (kvm->duration && time_diff > kvm->duration) {
385 		char decode[decode_str_len];
386 
387 		kvm->events_ops->decode_key(kvm, &event->key, decode);
388 		if (!skip_event(decode)) {
389 			pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
390 				 sample->time, sample->pid, vcpu_record->vcpu_id,
391 				 decode, time_diff / NSEC_PER_USEC);
392 		}
393 	}
394 
395 	return update_kvm_event(event, vcpu, time_diff);
396 }
397 
398 static
399 struct vcpu_event_record *per_vcpu_record(struct thread *thread,
400 					  struct evsel *evsel,
401 					  struct perf_sample *sample)
402 {
403 	/* Only kvm_entry records vcpu id. */
404 	if (!thread__priv(thread) && kvm_entry_event(evsel)) {
405 		struct vcpu_event_record *vcpu_record;
406 
407 		vcpu_record = zalloc(sizeof(*vcpu_record));
408 		if (!vcpu_record) {
409 			pr_err("%s: Not enough memory\n", __func__);
410 			return NULL;
411 		}
412 
413 		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
414 							  vcpu_id_str);
415 		thread__set_priv(thread, vcpu_record);
416 	}
417 
418 	return thread__priv(thread);
419 }
420 
421 static bool handle_kvm_event(struct perf_kvm_stat *kvm,
422 			     struct thread *thread,
423 			     struct evsel *evsel,
424 			     struct perf_sample *sample)
425 {
426 	struct vcpu_event_record *vcpu_record;
427 	struct event_key key = { .key = INVALID_KEY,
428 				 .exit_reasons = kvm->exit_reasons };
429 
430 	vcpu_record = per_vcpu_record(thread, evsel, sample);
431 	if (!vcpu_record)
432 		return true;
433 
434 	/* only process events for vcpus user cares about */
435 	if ((kvm->trace_vcpu != -1) &&
436 	    (kvm->trace_vcpu != vcpu_record->vcpu_id))
437 		return true;
438 
439 	if (kvm->events_ops->is_begin_event(evsel, sample, &key))
440 		return handle_begin_event(kvm, vcpu_record, &key, sample->time);
441 
442 	if (is_child_event(kvm, evsel, sample, &key))
443 		return handle_child_event(kvm, vcpu_record, &key, sample);
444 
445 	if (kvm->events_ops->is_end_event(evsel, sample, &key))
446 		return handle_end_event(kvm, vcpu_record, &key, sample);
447 
448 	return true;
449 }
450 
451 #define GET_EVENT_KEY(func, field)					\
452 static u64 get_event_ ##func(struct kvm_event *event, int vcpu)		\
453 {									\
454 	if (vcpu == -1)							\
455 		return event->total.field;				\
456 									\
457 	if (vcpu >= event->max_vcpu)					\
458 		return 0;						\
459 									\
460 	return event->vcpu[vcpu].field;					\
461 }
462 
463 #define COMPARE_EVENT_KEY(func, field)					\
464 GET_EVENT_KEY(func, field)						\
465 static int compare_kvm_event_ ## func(struct kvm_event *one,		\
466 					struct kvm_event *two, int vcpu)\
467 {									\
468 	return get_event_ ##func(one, vcpu) >				\
469 				get_event_ ##func(two, vcpu);		\
470 }
471 
472 GET_EVENT_KEY(time, time);
473 COMPARE_EVENT_KEY(count, stats.n);
474 COMPARE_EVENT_KEY(mean, stats.mean);
475 GET_EVENT_KEY(max, stats.max);
476 GET_EVENT_KEY(min, stats.min);
477 
478 #define DEF_SORT_NAME_KEY(name, compare_key)				\
479 	{ #name, compare_kvm_event_ ## compare_key }
480 
481 static struct kvm_event_key keys[] = {
482 	DEF_SORT_NAME_KEY(sample, count),
483 	DEF_SORT_NAME_KEY(time, mean),
484 	{ NULL, NULL }
485 };
486 
487 static bool select_key(struct perf_kvm_stat *kvm)
488 {
489 	int i;
490 
491 	for (i = 0; keys[i].name; i++) {
492 		if (!strcmp(keys[i].name, kvm->sort_key)) {
493 			kvm->compare = keys[i].key;
494 			return true;
495 		}
496 	}
497 
498 	pr_err("Unknown compare key:%s\n", kvm->sort_key);
499 	return false;
500 }
501 
502 static void insert_to_result(struct rb_root *result, struct kvm_event *event,
503 			     key_cmp_fun bigger, int vcpu)
504 {
505 	struct rb_node **rb = &result->rb_node;
506 	struct rb_node *parent = NULL;
507 	struct kvm_event *p;
508 
509 	while (*rb) {
510 		p = container_of(*rb, struct kvm_event, rb);
511 		parent = *rb;
512 
513 		if (bigger(event, p, vcpu))
514 			rb = &(*rb)->rb_left;
515 		else
516 			rb = &(*rb)->rb_right;
517 	}
518 
519 	rb_link_node(&event->rb, parent, rb);
520 	rb_insert_color(&event->rb, result);
521 }
522 
523 static void
524 update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
525 {
526 	int vcpu = kvm->trace_vcpu;
527 
528 	kvm->total_count += get_event_count(event, vcpu);
529 	kvm->total_time += get_event_time(event, vcpu);
530 }
531 
532 static bool event_is_valid(struct kvm_event *event, int vcpu)
533 {
534 	return !!get_event_count(event, vcpu);
535 }
536 
537 static void sort_result(struct perf_kvm_stat *kvm)
538 {
539 	unsigned int i;
540 	int vcpu = kvm->trace_vcpu;
541 	struct kvm_event *event;
542 
543 	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
544 		list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
545 			if (event_is_valid(event, vcpu)) {
546 				update_total_count(kvm, event);
547 				insert_to_result(&kvm->result, event,
548 						 kvm->compare, vcpu);
549 			}
550 		}
551 	}
552 }
553 
554 /* returns left most element of result, and erase it */
555 static struct kvm_event *pop_from_result(struct rb_root *result)
556 {
557 	struct rb_node *node = rb_first(result);
558 
559 	if (!node)
560 		return NULL;
561 
562 	rb_erase(node, result);
563 	return container_of(node, struct kvm_event, rb);
564 }
565 
566 static void print_vcpu_info(struct perf_kvm_stat *kvm)
567 {
568 	int vcpu = kvm->trace_vcpu;
569 
570 	pr_info("Analyze events for ");
571 
572 	if (kvm->opts.target.system_wide)
573 		pr_info("all VMs, ");
574 	else if (kvm->opts.target.pid)
575 		pr_info("pid(s) %s, ", kvm->opts.target.pid);
576 	else
577 		pr_info("dazed and confused on what is monitored, ");
578 
579 	if (vcpu == -1)
580 		pr_info("all VCPUs:\n\n");
581 	else
582 		pr_info("VCPU %d:\n\n", vcpu);
583 }
584 
585 static void show_timeofday(void)
586 {
587 	char date[64];
588 	struct timeval tv;
589 	struct tm ltime;
590 
591 	gettimeofday(&tv, NULL);
592 	if (localtime_r(&tv.tv_sec, &ltime)) {
593 		strftime(date, sizeof(date), "%H:%M:%S", &ltime);
594 		pr_info("%s.%06ld", date, tv.tv_usec);
595 	} else
596 		pr_info("00:00:00.000000");
597 
598 	return;
599 }
600 
601 static void print_result(struct perf_kvm_stat *kvm)
602 {
603 	char decode[decode_str_len];
604 	struct kvm_event *event;
605 	int vcpu = kvm->trace_vcpu;
606 
607 	if (kvm->live) {
608 		puts(CONSOLE_CLEAR);
609 		show_timeofday();
610 	}
611 
612 	pr_info("\n\n");
613 	print_vcpu_info(kvm);
614 	pr_info("%*s ", decode_str_len, kvm->events_ops->name);
615 	pr_info("%10s ", "Samples");
616 	pr_info("%9s ", "Samples%");
617 
618 	pr_info("%9s ", "Time%");
619 	pr_info("%11s ", "Min Time");
620 	pr_info("%11s ", "Max Time");
621 	pr_info("%16s ", "Avg time");
622 	pr_info("\n\n");
623 
624 	while ((event = pop_from_result(&kvm->result))) {
625 		u64 ecount, etime, max, min;
626 
627 		ecount = get_event_count(event, vcpu);
628 		etime = get_event_time(event, vcpu);
629 		max = get_event_max(event, vcpu);
630 		min = get_event_min(event, vcpu);
631 
632 		kvm->events_ops->decode_key(kvm, &event->key, decode);
633 		pr_info("%*s ", decode_str_len, decode);
634 		pr_info("%10llu ", (unsigned long long)ecount);
635 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
636 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
637 		pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
638 		pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
639 		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
640 			kvm_event_rel_stddev(vcpu, event));
641 		pr_info("\n");
642 	}
643 
644 	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
645 		kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
646 
647 	if (kvm->lost_events)
648 		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
649 }
650 
651 #ifdef HAVE_TIMERFD_SUPPORT
652 static int process_lost_event(struct perf_tool *tool,
653 			      union perf_event *event __maybe_unused,
654 			      struct perf_sample *sample __maybe_unused,
655 			      struct machine *machine __maybe_unused)
656 {
657 	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
658 
659 	kvm->lost_events++;
660 	return 0;
661 }
662 #endif
663 
664 static bool skip_sample(struct perf_kvm_stat *kvm,
665 			struct perf_sample *sample)
666 {
667 	if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
668 		return true;
669 
670 	return false;
671 }
672 
673 static int process_sample_event(struct perf_tool *tool,
674 				union perf_event *event,
675 				struct perf_sample *sample,
676 				struct evsel *evsel,
677 				struct machine *machine)
678 {
679 	int err = 0;
680 	struct thread *thread;
681 	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
682 						 tool);
683 
684 	if (skip_sample(kvm, sample))
685 		return 0;
686 
687 	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
688 	if (thread == NULL) {
689 		pr_debug("problem processing %d event, skipping it.\n",
690 			event->header.type);
691 		return -1;
692 	}
693 
694 	if (!handle_kvm_event(kvm, thread, evsel, sample))
695 		err = -1;
696 
697 	thread__put(thread);
698 	return err;
699 }
700 
701 static int cpu_isa_config(struct perf_kvm_stat *kvm)
702 {
703 	char buf[64], *cpuid;
704 	int err;
705 
706 	if (kvm->live) {
707 		err = get_cpuid(buf, sizeof(buf));
708 		if (err != 0) {
709 			pr_err("Failed to look up CPU type\n");
710 			return err;
711 		}
712 		cpuid = buf;
713 	} else
714 		cpuid = kvm->session->header.env.cpuid;
715 
716 	if (!cpuid) {
717 		pr_err("Failed to look up CPU type\n");
718 		return -EINVAL;
719 	}
720 
721 	err = cpu_isa_init(kvm, cpuid);
722 	if (err == -ENOTSUP)
723 		pr_err("CPU %s is not supported.\n", cpuid);
724 
725 	return err;
726 }
727 
728 static bool verify_vcpu(int vcpu)
729 {
730 	if (vcpu != -1 && vcpu < 0) {
731 		pr_err("Invalid vcpu:%d.\n", vcpu);
732 		return false;
733 	}
734 
735 	return true;
736 }
737 
738 #ifdef HAVE_TIMERFD_SUPPORT
739 /* keeping the max events to a modest level to keep
740  * the processing of samples per mmap smooth.
741  */
742 #define PERF_KVM__MAX_EVENTS_PER_MMAP  25
743 
744 static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
745 				   u64 *mmap_time)
746 {
747 	struct evlist *evlist = kvm->evlist;
748 	union perf_event *event;
749 	struct perf_mmap *md;
750 	u64 timestamp;
751 	s64 n = 0;
752 	int err;
753 
754 	*mmap_time = ULLONG_MAX;
755 	md = &evlist->mmap[idx];
756 	err = perf_mmap__read_init(md);
757 	if (err < 0)
758 		return (err == -EAGAIN) ? 0 : -1;
759 
760 	while ((event = perf_mmap__read_event(md)) != NULL) {
761 		err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
762 		if (err) {
763 			perf_mmap__consume(md);
764 			pr_err("Failed to parse sample\n");
765 			return -1;
766 		}
767 
768 		err = perf_session__queue_event(kvm->session, event, timestamp, 0);
769 		/*
770 		 * FIXME: Here we can't consume the event, as perf_session__queue_event will
771 		 *        point to it, and it'll get possibly overwritten by the kernel.
772 		 */
773 		perf_mmap__consume(md);
774 
775 		if (err) {
776 			pr_err("Failed to enqueue sample: %d\n", err);
777 			return -1;
778 		}
779 
780 		/* save time stamp of our first sample for this mmap */
781 		if (n == 0)
782 			*mmap_time = timestamp;
783 
784 		/* limit events per mmap handled all at once */
785 		n++;
786 		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
787 			break;
788 	}
789 
790 	perf_mmap__read_done(md);
791 	return n;
792 }
793 
794 static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
795 {
796 	int i, err, throttled = 0;
797 	s64 n, ntotal = 0;
798 	u64 flush_time = ULLONG_MAX, mmap_time;
799 
800 	for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
801 		n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
802 		if (n < 0)
803 			return -1;
804 
805 		/* flush time is going to be the minimum of all the individual
806 		 * mmap times. Essentially, we flush all the samples queued up
807 		 * from the last pass under our minimal start time -- that leaves
808 		 * a very small race for samples to come in with a lower timestamp.
809 		 * The ioctl to return the perf_clock timestamp should close the
810 		 * race entirely.
811 		 */
812 		if (mmap_time < flush_time)
813 			flush_time = mmap_time;
814 
815 		ntotal += n;
816 		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
817 			throttled = 1;
818 	}
819 
820 	/* flush queue after each round in which we processed events */
821 	if (ntotal) {
822 		struct ordered_events *oe = &kvm->session->ordered_events;
823 
824 		oe->next_flush = flush_time;
825 		err = ordered_events__flush(oe, OE_FLUSH__ROUND);
826 		if (err) {
827 			if (kvm->lost_events)
828 				pr_info("\nLost events: %" PRIu64 "\n\n",
829 					kvm->lost_events);
830 			return err;
831 		}
832 	}
833 
834 	return throttled;
835 }
836 
837 static volatile int done;
838 
839 static void sig_handler(int sig __maybe_unused)
840 {
841 	done = 1;
842 }
843 
844 static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
845 {
846 	struct itimerspec new_value;
847 	int rc = -1;
848 
849 	kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
850 	if (kvm->timerfd < 0) {
851 		pr_err("timerfd_create failed\n");
852 		goto out;
853 	}
854 
855 	new_value.it_value.tv_sec = kvm->display_time;
856 	new_value.it_value.tv_nsec = 0;
857 	new_value.it_interval.tv_sec = kvm->display_time;
858 	new_value.it_interval.tv_nsec = 0;
859 
860 	if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
861 		pr_err("timerfd_settime failed: %d\n", errno);
862 		close(kvm->timerfd);
863 		goto out;
864 	}
865 
866 	rc = 0;
867 out:
868 	return rc;
869 }
870 
871 static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
872 {
873 	uint64_t c;
874 	int rc;
875 
876 	rc = read(kvm->timerfd, &c, sizeof(uint64_t));
877 	if (rc < 0) {
878 		if (errno == EAGAIN)
879 			return 0;
880 
881 		pr_err("Failed to read timer fd: %d\n", errno);
882 		return -1;
883 	}
884 
885 	if (rc != sizeof(uint64_t)) {
886 		pr_err("Error reading timer fd - invalid size returned\n");
887 		return -1;
888 	}
889 
890 	if (c != 1)
891 		pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
892 
893 	/* update display */
894 	sort_result(kvm);
895 	print_result(kvm);
896 
897 	/* reset counts */
898 	clear_events_cache_stats(kvm->kvm_events_cache);
899 	kvm->total_count = 0;
900 	kvm->total_time = 0;
901 	kvm->lost_events = 0;
902 
903 	return 0;
904 }
905 
906 static int fd_set_nonblock(int fd)
907 {
908 	long arg = 0;
909 
910 	arg = fcntl(fd, F_GETFL);
911 	if (arg < 0) {
912 		pr_err("Failed to get current flags for fd %d\n", fd);
913 		return -1;
914 	}
915 
916 	if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
917 		pr_err("Failed to set non-block option on fd %d\n", fd);
918 		return -1;
919 	}
920 
921 	return 0;
922 }
923 
924 static int perf_kvm__handle_stdin(void)
925 {
926 	int c;
927 
928 	c = getc(stdin);
929 	if (c == 'q')
930 		return 1;
931 
932 	return 0;
933 }
934 
935 static int kvm_events_live_report(struct perf_kvm_stat *kvm)
936 {
937 	int nr_stdin, ret, err = -EINVAL;
938 	struct termios save;
939 
940 	/* live flag must be set first */
941 	kvm->live = true;
942 
943 	ret = cpu_isa_config(kvm);
944 	if (ret < 0)
945 		return ret;
946 
947 	if (!verify_vcpu(kvm->trace_vcpu) ||
948 	    !select_key(kvm) ||
949 	    !register_kvm_events_ops(kvm)) {
950 		goto out;
951 	}
952 
953 	set_term_quiet_input(&save);
954 	init_kvm_event_record(kvm);
955 
956 	signal(SIGINT, sig_handler);
957 	signal(SIGTERM, sig_handler);
958 
959 	/* add timer fd */
960 	if (perf_kvm__timerfd_create(kvm) < 0) {
961 		err = -1;
962 		goto out;
963 	}
964 
965 	if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
966 		goto out;
967 
968 	nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin));
969 	if (nr_stdin < 0)
970 		goto out;
971 
972 	if (fd_set_nonblock(fileno(stdin)) != 0)
973 		goto out;
974 
975 	/* everything is good - enable the events and process */
976 	evlist__enable(kvm->evlist);
977 
978 	while (!done) {
979 		struct fdarray *fda = &kvm->evlist->pollfd;
980 		int rc;
981 
982 		rc = perf_kvm__mmap_read(kvm);
983 		if (rc < 0)
984 			break;
985 
986 		err = perf_kvm__handle_timerfd(kvm);
987 		if (err)
988 			goto out;
989 
990 		if (fda->entries[nr_stdin].revents & POLLIN)
991 			done = perf_kvm__handle_stdin();
992 
993 		if (!rc && !done)
994 			err = fdarray__poll(fda, 100);
995 	}
996 
997 	evlist__disable(kvm->evlist);
998 
999 	if (err == 0) {
1000 		sort_result(kvm);
1001 		print_result(kvm);
1002 	}
1003 
1004 out:
1005 	if (kvm->timerfd >= 0)
1006 		close(kvm->timerfd);
1007 
1008 	tcsetattr(0, TCSAFLUSH, &save);
1009 	return err;
1010 }
1011 
1012 static int kvm_live_open_events(struct perf_kvm_stat *kvm)
1013 {
1014 	int err, rc = -1;
1015 	struct evsel *pos;
1016 	struct evlist *evlist = kvm->evlist;
1017 	char sbuf[STRERR_BUFSIZE];
1018 
1019 	perf_evlist__config(evlist, &kvm->opts, NULL);
1020 
1021 	/*
1022 	 * Note: exclude_{guest,host} do not apply here.
1023 	 *       This command processes KVM tracepoints from host only
1024 	 */
1025 	evlist__for_each_entry(evlist, pos) {
1026 		struct perf_event_attr *attr = &pos->core.attr;
1027 
1028 		/* make sure these *are* set */
1029 		perf_evsel__set_sample_bit(pos, TID);
1030 		perf_evsel__set_sample_bit(pos, TIME);
1031 		perf_evsel__set_sample_bit(pos, CPU);
1032 		perf_evsel__set_sample_bit(pos, RAW);
1033 		/* make sure these are *not*; want as small a sample as possible */
1034 		perf_evsel__reset_sample_bit(pos, PERIOD);
1035 		perf_evsel__reset_sample_bit(pos, IP);
1036 		perf_evsel__reset_sample_bit(pos, CALLCHAIN);
1037 		perf_evsel__reset_sample_bit(pos, ADDR);
1038 		perf_evsel__reset_sample_bit(pos, READ);
1039 		attr->mmap = 0;
1040 		attr->comm = 0;
1041 		attr->task = 0;
1042 
1043 		attr->sample_period = 1;
1044 
1045 		attr->watermark = 0;
1046 		attr->wakeup_events = 1000;
1047 
1048 		/* will enable all once we are ready */
1049 		attr->disabled = 1;
1050 	}
1051 
1052 	err = evlist__open(evlist);
1053 	if (err < 0) {
1054 		printf("Couldn't create the events: %s\n",
1055 		       str_error_r(errno, sbuf, sizeof(sbuf)));
1056 		goto out;
1057 	}
1058 
1059 	if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
1060 		ui__error("Failed to mmap the events: %s\n",
1061 			  str_error_r(errno, sbuf, sizeof(sbuf)));
1062 		evlist__close(evlist);
1063 		goto out;
1064 	}
1065 
1066 	rc = 0;
1067 
1068 out:
1069 	return rc;
1070 }
1071 #endif
1072 
1073 static int read_events(struct perf_kvm_stat *kvm)
1074 {
1075 	int ret;
1076 
1077 	struct perf_tool eops = {
1078 		.sample			= process_sample_event,
1079 		.comm			= perf_event__process_comm,
1080 		.namespaces		= perf_event__process_namespaces,
1081 		.ordered_events		= true,
1082 	};
1083 	struct perf_data file = {
1084 		.path  = kvm->file_name,
1085 		.mode  = PERF_DATA_MODE_READ,
1086 		.force = kvm->force,
1087 	};
1088 
1089 	kvm->tool = eops;
1090 	kvm->session = perf_session__new(&file, false, &kvm->tool);
1091 	if (!kvm->session) {
1092 		pr_err("Initializing perf session failed\n");
1093 		return -1;
1094 	}
1095 
1096 	symbol__init(&kvm->session->header.env);
1097 
1098 	if (!perf_session__has_traces(kvm->session, "kvm record")) {
1099 		ret = -EINVAL;
1100 		goto out_delete;
1101 	}
1102 
1103 	/*
1104 	 * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
1105 	 * traced in the old kernel.
1106 	 */
1107 	ret = cpu_isa_config(kvm);
1108 	if (ret < 0)
1109 		goto out_delete;
1110 
1111 	ret = perf_session__process_events(kvm->session);
1112 
1113 out_delete:
1114 	perf_session__delete(kvm->session);
1115 	return ret;
1116 }
1117 
1118 static int parse_target_str(struct perf_kvm_stat *kvm)
1119 {
1120 	if (kvm->opts.target.pid) {
1121 		kvm->pid_list = intlist__new(kvm->opts.target.pid);
1122 		if (kvm->pid_list == NULL) {
1123 			pr_err("Error parsing process id string\n");
1124 			return -EINVAL;
1125 		}
1126 	}
1127 
1128 	return 0;
1129 }
1130 
1131 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
1132 {
1133 	int ret = -EINVAL;
1134 	int vcpu = kvm->trace_vcpu;
1135 
1136 	if (parse_target_str(kvm) != 0)
1137 		goto exit;
1138 
1139 	if (!verify_vcpu(vcpu))
1140 		goto exit;
1141 
1142 	if (!select_key(kvm))
1143 		goto exit;
1144 
1145 	if (!register_kvm_events_ops(kvm))
1146 		goto exit;
1147 
1148 	init_kvm_event_record(kvm);
1149 	setup_pager();
1150 
1151 	ret = read_events(kvm);
1152 	if (ret)
1153 		goto exit;
1154 
1155 	sort_result(kvm);
1156 	print_result(kvm);
1157 
1158 exit:
1159 	return ret;
1160 }
1161 
1162 #define STRDUP_FAIL_EXIT(s)		\
1163 	({	char *_p;		\
1164 	_p = strdup(s);		\
1165 		if (!_p)		\
1166 			return -ENOMEM;	\
1167 		_p;			\
1168 	})
1169 
1170 int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
1171 {
1172 	return 0;
1173 }
1174 
1175 static int
1176 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
1177 {
1178 	unsigned int rec_argc, i, j, events_tp_size;
1179 	const char **rec_argv;
1180 	const char * const record_args[] = {
1181 		"record",
1182 		"-R",
1183 		"-m", "1024",
1184 		"-c", "1",
1185 	};
1186 	const char * const kvm_stat_record_usage[] = {
1187 		"perf kvm stat record [<options>]",
1188 		NULL
1189 	};
1190 	const char * const *events_tp;
1191 	int ret;
1192 
1193 	events_tp_size = 0;
1194 	ret = setup_kvm_events_tp(kvm);
1195 	if (ret < 0) {
1196 		pr_err("Unable to setup the kvm tracepoints\n");
1197 		return ret;
1198 	}
1199 
1200 	for (events_tp = kvm_events_tp; *events_tp; events_tp++)
1201 		events_tp_size++;
1202 
1203 	rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
1204 		   2 * events_tp_size;
1205 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1206 
1207 	if (rec_argv == NULL)
1208 		return -ENOMEM;
1209 
1210 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1211 		rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
1212 
1213 	for (j = 0; j < events_tp_size; j++) {
1214 		rec_argv[i++] = "-e";
1215 		rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
1216 	}
1217 
1218 	rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
1219 	rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
1220 
1221 	for (j = 1; j < (unsigned int)argc; j++, i++)
1222 		rec_argv[i] = argv[j];
1223 
1224 	set_option_flag(record_options, 'e', "event", PARSE_OPT_HIDDEN);
1225 	set_option_flag(record_options, 0, "filter", PARSE_OPT_HIDDEN);
1226 	set_option_flag(record_options, 'R', "raw-samples", PARSE_OPT_HIDDEN);
1227 
1228 	set_option_flag(record_options, 'F', "freq", PARSE_OPT_DISABLED);
1229 	set_option_flag(record_options, 0, "group", PARSE_OPT_DISABLED);
1230 	set_option_flag(record_options, 'g', NULL, PARSE_OPT_DISABLED);
1231 	set_option_flag(record_options, 0, "call-graph", PARSE_OPT_DISABLED);
1232 	set_option_flag(record_options, 'd', "data", PARSE_OPT_DISABLED);
1233 	set_option_flag(record_options, 'T', "timestamp", PARSE_OPT_DISABLED);
1234 	set_option_flag(record_options, 'P', "period", PARSE_OPT_DISABLED);
1235 	set_option_flag(record_options, 'n', "no-samples", PARSE_OPT_DISABLED);
1236 	set_option_flag(record_options, 'N', "no-buildid-cache", PARSE_OPT_DISABLED);
1237 	set_option_flag(record_options, 'B', "no-buildid", PARSE_OPT_DISABLED);
1238 	set_option_flag(record_options, 'G', "cgroup", PARSE_OPT_DISABLED);
1239 	set_option_flag(record_options, 'b', "branch-any", PARSE_OPT_DISABLED);
1240 	set_option_flag(record_options, 'j', "branch-filter", PARSE_OPT_DISABLED);
1241 	set_option_flag(record_options, 'W', "weight", PARSE_OPT_DISABLED);
1242 	set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED);
1243 
1244 	record_usage = kvm_stat_record_usage;
1245 	return cmd_record(i, rec_argv);
1246 }
1247 
1248 static int
1249 kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
1250 {
1251 	const struct option kvm_events_report_options[] = {
1252 		OPT_STRING(0, "event", &kvm->report_event, "report event",
1253 			   "event for reporting: vmexit, "
1254 			   "mmio (x86 only), ioport (x86 only)"),
1255 		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1256 			    "vcpu id to report"),
1257 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1258 			    "key for sorting: sample(sort by samples number)"
1259 			    " time (sort by avg time)"),
1260 		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
1261 			   "analyze events only for given process id(s)"),
1262 		OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"),
1263 		OPT_END()
1264 	};
1265 
1266 	const char * const kvm_events_report_usage[] = {
1267 		"perf kvm stat report [<options>]",
1268 		NULL
1269 	};
1270 
1271 	if (argc) {
1272 		argc = parse_options(argc, argv,
1273 				     kvm_events_report_options,
1274 				     kvm_events_report_usage, 0);
1275 		if (argc)
1276 			usage_with_options(kvm_events_report_usage,
1277 					   kvm_events_report_options);
1278 	}
1279 
1280 	if (!kvm->opts.target.pid)
1281 		kvm->opts.target.system_wide = true;
1282 
1283 	return kvm_events_report_vcpu(kvm);
1284 }
1285 
1286 #ifdef HAVE_TIMERFD_SUPPORT
1287 static struct evlist *kvm_live_event_list(void)
1288 {
1289 	struct evlist *evlist;
1290 	char *tp, *name, *sys;
1291 	int err = -1;
1292 	const char * const *events_tp;
1293 
1294 	evlist = evlist__new();
1295 	if (evlist == NULL)
1296 		return NULL;
1297 
1298 	for (events_tp = kvm_events_tp; *events_tp; events_tp++) {
1299 
1300 		tp = strdup(*events_tp);
1301 		if (tp == NULL)
1302 			goto out;
1303 
1304 		/* split tracepoint into subsystem and name */
1305 		sys = tp;
1306 		name = strchr(tp, ':');
1307 		if (name == NULL) {
1308 			pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
1309 			       *events_tp);
1310 			free(tp);
1311 			goto out;
1312 		}
1313 		*name = '\0';
1314 		name++;
1315 
1316 		if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
1317 			pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
1318 			free(tp);
1319 			goto out;
1320 		}
1321 
1322 		free(tp);
1323 	}
1324 
1325 	err = 0;
1326 
1327 out:
1328 	if (err) {
1329 		evlist__delete(evlist);
1330 		evlist = NULL;
1331 	}
1332 
1333 	return evlist;
1334 }
1335 
1336 static int kvm_events_live(struct perf_kvm_stat *kvm,
1337 			   int argc, const char **argv)
1338 {
1339 	char errbuf[BUFSIZ];
1340 	int err;
1341 
1342 	const struct option live_options[] = {
1343 		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
1344 			"record events on existing process id"),
1345 		OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
1346 			"number of mmap data pages",
1347 			perf_evlist__parse_mmap_pages),
1348 		OPT_INCR('v', "verbose", &verbose,
1349 			"be more verbose (show counter open errors, etc)"),
1350 		OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
1351 			"system-wide collection from all CPUs"),
1352 		OPT_UINTEGER('d', "display", &kvm->display_time,
1353 			"time in seconds between display updates"),
1354 		OPT_STRING(0, "event", &kvm->report_event, "report event",
1355 			"event for reporting: "
1356 			"vmexit, mmio (x86 only), ioport (x86 only)"),
1357 		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1358 			"vcpu id to report"),
1359 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1360 			"key for sorting: sample(sort by samples number)"
1361 			" time (sort by avg time)"),
1362 		OPT_U64(0, "duration", &kvm->duration,
1363 			"show events other than"
1364 			" HLT (x86 only) or Wait state (s390 only)"
1365 			" that take longer than duration usecs"),
1366 		OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1367 				"per thread proc mmap processing timeout in ms"),
1368 		OPT_END()
1369 	};
1370 	const char * const live_usage[] = {
1371 		"perf kvm stat live [<options>]",
1372 		NULL
1373 	};
1374 	struct perf_data data = {
1375 		.mode = PERF_DATA_MODE_WRITE,
1376 	};
1377 
1378 
1379 	/* event handling */
1380 	kvm->tool.sample = process_sample_event;
1381 	kvm->tool.comm   = perf_event__process_comm;
1382 	kvm->tool.exit   = perf_event__process_exit;
1383 	kvm->tool.fork   = perf_event__process_fork;
1384 	kvm->tool.lost   = process_lost_event;
1385 	kvm->tool.namespaces  = perf_event__process_namespaces;
1386 	kvm->tool.ordered_events = true;
1387 	perf_tool__fill_defaults(&kvm->tool);
1388 
1389 	/* set defaults */
1390 	kvm->display_time = 1;
1391 	kvm->opts.user_interval = 1;
1392 	kvm->opts.mmap_pages = 512;
1393 	kvm->opts.target.uses_mmap = false;
1394 	kvm->opts.target.uid_str = NULL;
1395 	kvm->opts.target.uid = UINT_MAX;
1396 
1397 	symbol__init(NULL);
1398 	disable_buildid_cache();
1399 
1400 	use_browser = 0;
1401 
1402 	if (argc) {
1403 		argc = parse_options(argc, argv, live_options,
1404 				     live_usage, 0);
1405 		if (argc)
1406 			usage_with_options(live_usage, live_options);
1407 	}
1408 
1409 	kvm->duration *= NSEC_PER_USEC;   /* convert usec to nsec */
1410 
1411 	/*
1412 	 * target related setups
1413 	 */
1414 	err = target__validate(&kvm->opts.target);
1415 	if (err) {
1416 		target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
1417 		ui__warning("%s", errbuf);
1418 	}
1419 
1420 	if (target__none(&kvm->opts.target))
1421 		kvm->opts.target.system_wide = true;
1422 
1423 
1424 	/*
1425 	 * generate the event list
1426 	 */
1427 	err = setup_kvm_events_tp(kvm);
1428 	if (err < 0) {
1429 		pr_err("Unable to setup the kvm tracepoints\n");
1430 		return err;
1431 	}
1432 
1433 	kvm->evlist = kvm_live_event_list();
1434 	if (kvm->evlist == NULL) {
1435 		err = -1;
1436 		goto out;
1437 	}
1438 
1439 	if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
1440 		usage_with_options(live_usage, live_options);
1441 
1442 	/*
1443 	 * perf session
1444 	 */
1445 	kvm->session = perf_session__new(&data, false, &kvm->tool);
1446 	if (kvm->session == NULL) {
1447 		err = -1;
1448 		goto out;
1449 	}
1450 	kvm->session->evlist = kvm->evlist;
1451 	perf_session__set_id_hdr_size(kvm->session);
1452 	ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
1453 	machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
1454 				    kvm->evlist->core.threads, false, 1);
1455 	err = kvm_live_open_events(kvm);
1456 	if (err)
1457 		goto out;
1458 
1459 	err = kvm_events_live_report(kvm);
1460 
1461 out:
1462 	perf_session__delete(kvm->session);
1463 	kvm->session = NULL;
1464 	evlist__delete(kvm->evlist);
1465 
1466 	return err;
1467 }
1468 #endif
1469 
1470 static void print_kvm_stat_usage(void)
1471 {
1472 	printf("Usage: perf kvm stat <command>\n\n");
1473 
1474 	printf("# Available commands:\n");
1475 	printf("\trecord: record kvm events\n");
1476 	printf("\treport: report statistical data of kvm events\n");
1477 	printf("\tlive:   live reporting of statistical data of kvm events\n");
1478 
1479 	printf("\nOtherwise, it is the alias of 'perf stat':\n");
1480 }
1481 
1482 static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
1483 {
1484 	struct perf_kvm_stat kvm = {
1485 		.file_name = file_name,
1486 
1487 		.trace_vcpu	= -1,
1488 		.report_event	= "vmexit",
1489 		.sort_key	= "sample",
1490 
1491 	};
1492 
1493 	if (argc == 1) {
1494 		print_kvm_stat_usage();
1495 		goto perf_stat;
1496 	}
1497 
1498 	if (!strncmp(argv[1], "rec", 3))
1499 		return kvm_events_record(&kvm, argc - 1, argv + 1);
1500 
1501 	if (!strncmp(argv[1], "rep", 3))
1502 		return kvm_events_report(&kvm, argc - 1 , argv + 1);
1503 
1504 #ifdef HAVE_TIMERFD_SUPPORT
1505 	if (!strncmp(argv[1], "live", 4))
1506 		return kvm_events_live(&kvm, argc - 1 , argv + 1);
1507 #endif
1508 
1509 perf_stat:
1510 	return cmd_stat(argc, argv);
1511 }
1512 #endif /* HAVE_KVM_STAT_SUPPORT */
1513 
1514 static int __cmd_record(const char *file_name, int argc, const char **argv)
1515 {
1516 	int rec_argc, i = 0, j;
1517 	const char **rec_argv;
1518 
1519 	rec_argc = argc + 2;
1520 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1521 	rec_argv[i++] = strdup("record");
1522 	rec_argv[i++] = strdup("-o");
1523 	rec_argv[i++] = strdup(file_name);
1524 	for (j = 1; j < argc; j++, i++)
1525 		rec_argv[i] = argv[j];
1526 
1527 	BUG_ON(i != rec_argc);
1528 
1529 	return cmd_record(i, rec_argv);
1530 }
1531 
1532 static int __cmd_report(const char *file_name, int argc, const char **argv)
1533 {
1534 	int rec_argc, i = 0, j;
1535 	const char **rec_argv;
1536 
1537 	rec_argc = argc + 2;
1538 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1539 	rec_argv[i++] = strdup("report");
1540 	rec_argv[i++] = strdup("-i");
1541 	rec_argv[i++] = strdup(file_name);
1542 	for (j = 1; j < argc; j++, i++)
1543 		rec_argv[i] = argv[j];
1544 
1545 	BUG_ON(i != rec_argc);
1546 
1547 	return cmd_report(i, rec_argv);
1548 }
1549 
1550 static int
1551 __cmd_buildid_list(const char *file_name, int argc, const char **argv)
1552 {
1553 	int rec_argc, i = 0, j;
1554 	const char **rec_argv;
1555 
1556 	rec_argc = argc + 2;
1557 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1558 	rec_argv[i++] = strdup("buildid-list");
1559 	rec_argv[i++] = strdup("-i");
1560 	rec_argv[i++] = strdup(file_name);
1561 	for (j = 1; j < argc; j++, i++)
1562 		rec_argv[i] = argv[j];
1563 
1564 	BUG_ON(i != rec_argc);
1565 
1566 	return cmd_buildid_list(i, rec_argv);
1567 }
1568 
1569 int cmd_kvm(int argc, const char **argv)
1570 {
1571 	const char *file_name = NULL;
1572 	const struct option kvm_options[] = {
1573 		OPT_STRING('i', "input", &file_name, "file",
1574 			   "Input file name"),
1575 		OPT_STRING('o', "output", &file_name, "file",
1576 			   "Output file name"),
1577 		OPT_BOOLEAN(0, "guest", &perf_guest,
1578 			    "Collect guest os data"),
1579 		OPT_BOOLEAN(0, "host", &perf_host,
1580 			    "Collect host os data"),
1581 		OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
1582 			   "guest mount directory under which every guest os"
1583 			   " instance has a subdir"),
1584 		OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
1585 			   "file", "file saving guest os vmlinux"),
1586 		OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
1587 			   "file", "file saving guest os /proc/kallsyms"),
1588 		OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
1589 			   "file", "file saving guest os /proc/modules"),
1590 		OPT_INCR('v', "verbose", &verbose,
1591 			    "be more verbose (show counter open errors, etc)"),
1592 		OPT_END()
1593 	};
1594 
1595 	const char *const kvm_subcommands[] = { "top", "record", "report", "diff",
1596 						"buildid-list", "stat", NULL };
1597 	const char *kvm_usage[] = { NULL, NULL };
1598 
1599 	perf_host  = 0;
1600 	perf_guest = 1;
1601 
1602 	argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage,
1603 					PARSE_OPT_STOP_AT_NON_OPTION);
1604 	if (!argc)
1605 		usage_with_options(kvm_usage, kvm_options);
1606 
1607 	if (!perf_host)
1608 		perf_guest = 1;
1609 
1610 	if (!file_name) {
1611 		file_name = get_filename_for_perf_kvm();
1612 
1613 		if (!file_name) {
1614 			pr_err("Failed to allocate memory for filename\n");
1615 			return -ENOMEM;
1616 		}
1617 	}
1618 
1619 	if (!strncmp(argv[0], "rec", 3))
1620 		return __cmd_record(file_name, argc, argv);
1621 	else if (!strncmp(argv[0], "rep", 3))
1622 		return __cmd_report(file_name, argc, argv);
1623 	else if (!strncmp(argv[0], "diff", 4))
1624 		return cmd_diff(argc, argv);
1625 	else if (!strncmp(argv[0], "top", 3))
1626 		return cmd_top(argc, argv);
1627 	else if (!strncmp(argv[0], "buildid-list", 12))
1628 		return __cmd_buildid_list(file_name, argc, argv);
1629 #ifdef HAVE_KVM_STAT_SUPPORT
1630 	else if (!strncmp(argv[0], "stat", 4))
1631 		return kvm_cmd_stat(file_name, argc, argv);
1632 #endif
1633 	else
1634 		usage_with_options(kvm_usage, kvm_options);
1635 
1636 	return 0;
1637 }
1638