xref: /linux/tools/perf/bench/synthesize.c (revision 989fe6771266bdb82a815d78802c5aa7c918fdfd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Benchmark synthesis of perf events such as at the start of a 'perf
4  * record'. Synthesis is done on the current process and the 'dummy' event
5  * handlers are invoked that support dump_trace but otherwise do nothing.
6  *
7  * Copyright 2019 Google LLC.
8  */
9 #include <stdio.h>
10 #include "bench.h"
11 #include "../util/debug.h"
12 #include "../util/session.h"
13 #include "../util/stat.h"
14 #include "../util/synthetic-events.h"
15 #include "../util/target.h"
16 #include "../util/thread_map.h"
17 #include "../util/tool.h"
18 #include "../util/util.h"
19 #include <linux/atomic.h>
20 #include <linux/err.h>
21 #include <linux/time64.h>
22 #include <subcmd/parse-options.h>
23 
24 static unsigned int min_threads = 1;
25 static unsigned int max_threads = UINT_MAX;
26 static unsigned int single_iterations = 10000;
27 static unsigned int multi_iterations = 10;
28 static bool run_st;
29 static bool run_mt;
30 
31 static const struct option options[] = {
32 	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
33 	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
34 	OPT_UINTEGER('m', "min-threads", &min_threads,
35 		"Minimum number of threads in multithreaded bench"),
36 	OPT_UINTEGER('M', "max-threads", &max_threads,
37 		"Maximum number of threads in multithreaded bench"),
38 	OPT_UINTEGER('i', "single-iterations", &single_iterations,
39 		"Number of iterations used to compute single-threaded average"),
40 	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
41 		"Number of iterations used to compute multi-threaded average"),
42 	OPT_END()
43 };
44 
45 static const char *const bench_usage[] = {
46 	"perf bench internals synthesize <options>",
47 	NULL
48 };
49 
50 static atomic_t event_count;
51 
52 static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
53 				     union perf_event *event __maybe_unused,
54 				     struct perf_sample *sample __maybe_unused,
55 				     struct machine *machine __maybe_unused)
56 {
57 	atomic_inc(&event_count);
58 	return 0;
59 }
60 
61 static int do_run_single_threaded(struct perf_session *session,
62 				struct perf_thread_map *threads,
63 				struct target *target, bool data_mmap)
64 {
65 	const unsigned int nr_threads_synthesize = 1;
66 	struct timeval start, end, diff;
67 	u64 runtime_us;
68 	unsigned int i;
69 	double time_average, time_stddev, event_average, event_stddev;
70 	int err;
71 	struct stats time_stats, event_stats;
72 
73 	init_stats(&time_stats);
74 	init_stats(&event_stats);
75 
76 	for (i = 0; i < single_iterations; i++) {
77 		atomic_set(&event_count, 0);
78 		gettimeofday(&start, NULL);
79 		err = __machine__synthesize_threads(&session->machines.host,
80 						NULL,
81 						target, threads,
82 						process_synthesized_event,
83 						true, data_mmap,
84 						nr_threads_synthesize);
85 		if (err)
86 			return err;
87 
88 		gettimeofday(&end, NULL);
89 		timersub(&end, &start, &diff);
90 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
91 		update_stats(&time_stats, runtime_us);
92 		update_stats(&event_stats, atomic_read(&event_count));
93 	}
94 
95 	time_average = avg_stats(&time_stats);
96 	time_stddev = stddev_stats(&time_stats);
97 	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
98 		data_mmap ? "data " : "", time_average, time_stddev);
99 
100 	event_average = avg_stats(&event_stats);
101 	event_stddev = stddev_stats(&event_stats);
102 	printf("  Average num. events: %.3f (+- %.3f)\n",
103 		event_average, event_stddev);
104 
105 	printf("  Average time per event %.3f usec\n",
106 		time_average / event_average);
107 	return 0;
108 }
109 
110 static int run_single_threaded(void)
111 {
112 	struct perf_session *session;
113 	struct target target = {
114 		.pid = "self",
115 	};
116 	struct perf_thread_map *threads;
117 	struct perf_env host_env;
118 	int err;
119 
120 	perf_set_singlethreaded();
121 	perf_env__init(&host_env);
122 	session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
123 				      /*trace_event_repipe=*/false, &host_env);
124 	if (IS_ERR(session)) {
125 		pr_err("Session creation failed.\n");
126 		perf_env__exit(&host_env);
127 		return PTR_ERR(session);
128 	}
129 	threads = thread_map__new_by_pid(getpid());
130 	if (!threads) {
131 		pr_err("Thread map creation failed.\n");
132 		err = -ENOMEM;
133 		goto err_out;
134 	}
135 
136 	puts(
137 "Computing performance of single threaded perf event synthesis by\n"
138 "synthesizing events on the perf process itself:");
139 
140 	err = do_run_single_threaded(session, threads, &target, false);
141 	if (err)
142 		goto err_out;
143 
144 	err = do_run_single_threaded(session, threads, &target, true);
145 
146 err_out:
147 	if (threads)
148 		perf_thread_map__put(threads);
149 
150 	perf_session__delete(session);
151 	perf_env__exit(&host_env);
152 	return err;
153 }
154 
155 static int do_run_multi_threaded(struct target *target,
156 				unsigned int nr_threads_synthesize)
157 {
158 	struct timeval start, end, diff;
159 	u64 runtime_us;
160 	unsigned int i;
161 	double time_average, time_stddev, event_average, event_stddev;
162 	int err = 0;
163 	struct stats time_stats, event_stats;
164 	struct perf_session *session;
165 	struct perf_env host_env;
166 
167 	perf_env__init(&host_env);
168 	init_stats(&time_stats);
169 	init_stats(&event_stats);
170 	for (i = 0; i < multi_iterations; i++) {
171 		session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
172 					      /*trace_event_repipe=*/false, &host_env);
173 		if (IS_ERR(session)) {
174 			err = PTR_ERR(session);
175 			goto err_out;
176 		}
177 		atomic_set(&event_count, 0);
178 		gettimeofday(&start, NULL);
179 		err = __machine__synthesize_threads(&session->machines.host,
180 						NULL,
181 						target, NULL,
182 						process_synthesized_event,
183 						true, false,
184 						nr_threads_synthesize);
185 		if (err) {
186 			perf_session__delete(session);
187 			goto err_out;
188 		}
189 
190 		gettimeofday(&end, NULL);
191 		timersub(&end, &start, &diff);
192 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
193 		update_stats(&time_stats, runtime_us);
194 		update_stats(&event_stats, atomic_read(&event_count));
195 		perf_session__delete(session);
196 	}
197 
198 	time_average = avg_stats(&time_stats);
199 	time_stddev = stddev_stats(&time_stats);
200 	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
201 		time_average, time_stddev);
202 
203 	event_average = avg_stats(&event_stats);
204 	event_stddev = stddev_stats(&event_stats);
205 	printf("    Average num. events: %.3f (+- %.3f)\n",
206 		event_average, event_stddev);
207 
208 	printf("    Average time per event %.3f usec\n",
209 		time_average / event_average);
210 err_out:
211 	perf_env__exit(&host_env);
212 	return err;
213 }
214 
215 static int run_multi_threaded(void)
216 {
217 	struct target target = {
218 		.cpu_list = "0"
219 	};
220 	unsigned int nr_threads_synthesize;
221 	int err;
222 
223 	if (max_threads == UINT_MAX)
224 		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
225 
226 	puts(
227 "Computing performance of multi threaded perf event synthesis by\n"
228 "synthesizing events on CPU 0:");
229 
230 	for (nr_threads_synthesize = min_threads;
231 	     nr_threads_synthesize <= max_threads;
232 	     nr_threads_synthesize++) {
233 		if (nr_threads_synthesize == 1)
234 			perf_set_singlethreaded();
235 		else
236 			perf_set_multithreaded();
237 
238 		printf("  Number of synthesis threads: %u\n",
239 			nr_threads_synthesize);
240 
241 		err = do_run_multi_threaded(&target, nr_threads_synthesize);
242 		if (err)
243 			return err;
244 	}
245 	perf_set_singlethreaded();
246 	return 0;
247 }
248 
249 int bench_synthesize(int argc, const char **argv)
250 {
251 	int err = 0;
252 
253 	argc = parse_options(argc, argv, options, bench_usage, 0);
254 	if (argc) {
255 		usage_with_options(bench_usage, options);
256 		exit(EXIT_FAILURE);
257 	}
258 
259 	/*
260 	 * If neither single threaded or multi-threaded are specified, default
261 	 * to running just single threaded.
262 	 */
263 	if (!run_st && !run_mt)
264 		run_st = true;
265 
266 	if (run_st)
267 		err = run_single_threaded();
268 
269 	if (!err && run_mt)
270 		err = run_multi_threaded();
271 
272 	return err;
273 }
274