xref: /linux/tools/perf/bench/synthesize.c (revision 9e906a9dead17d81d6c2687f65e159231d0e3286)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Benchmark synthesis of perf events such as at the start of a 'perf
4  * record'. Synthesis is done on the current process and the 'dummy' event
5  * handlers are invoked that support dump_trace but otherwise do nothing.
6  *
7  * Copyright 2019 Google LLC.
8  */
9 #include <errno.h>
10 #include <stdio.h>
11 #include "bench.h"
12 #include "../util/debug.h"
13 #include "../util/session.h"
14 #include "../util/stat.h"
15 #include "../util/synthetic-events.h"
16 #include "../util/target.h"
17 #include "../util/thread_map.h"
18 #include "../util/tool.h"
19 #include "../util/util.h"
20 #include <linux/atomic.h>
21 #include <linux/err.h>
22 #include <linux/time64.h>
23 #include <subcmd/parse-options.h>
24 
25 static unsigned int min_threads = 1;
26 static unsigned int max_threads = UINT_MAX;
27 static unsigned int single_iterations = 10000;
28 static unsigned int multi_iterations = 10;
29 static bool run_st;
30 static bool run_mt;
31 
32 static const struct option options[] = {
33 	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
34 	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
35 	OPT_UINTEGER('m', "min-threads", &min_threads,
36 		"Minimum number of threads in multithreaded bench"),
37 	OPT_UINTEGER('M', "max-threads", &max_threads,
38 		"Maximum number of threads in multithreaded bench"),
39 	OPT_UINTEGER('i', "single-iterations", &single_iterations,
40 		"Number of iterations used to compute single-threaded average"),
41 	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
42 		"Number of iterations used to compute multi-threaded average"),
43 	OPT_END()
44 };
45 
46 static const char *const bench_usage[] = {
47 	"perf bench internals synthesize <options>",
48 	NULL
49 };
50 
51 static atomic_t event_count;
52 
process_synthesized_event(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)53 static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
54 				     union perf_event *event __maybe_unused,
55 				     struct perf_sample *sample __maybe_unused,
56 				     struct machine *machine __maybe_unused)
57 {
58 	atomic_inc(&event_count);
59 	return 0;
60 }
61 
do_run_single_threaded(struct perf_session * session,struct perf_thread_map * threads,struct target * target,bool data_mmap)62 static int do_run_single_threaded(struct perf_session *session,
63 				struct perf_thread_map *threads,
64 				struct target *target, bool data_mmap)
65 {
66 	const unsigned int nr_threads_synthesize = 1;
67 	struct timeval start, end, diff;
68 	u64 runtime_us;
69 	unsigned int i;
70 	double time_average, time_stddev, event_average, event_stddev;
71 	int err;
72 	struct stats time_stats, event_stats;
73 
74 	init_stats(&time_stats);
75 	init_stats(&event_stats);
76 
77 	for (i = 0; i < single_iterations; i++) {
78 		atomic_set(&event_count, 0);
79 		gettimeofday(&start, NULL);
80 		err = __machine__synthesize_threads(&session->machines.host,
81 						NULL,
82 						target, threads,
83 						process_synthesized_event,
84 						true, data_mmap,
85 						nr_threads_synthesize);
86 		if (err)
87 			return err;
88 
89 		gettimeofday(&end, NULL);
90 		timersub(&end, &start, &diff);
91 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
92 		update_stats(&time_stats, runtime_us);
93 		update_stats(&event_stats, atomic_read(&event_count));
94 	}
95 
96 	time_average = avg_stats(&time_stats);
97 	time_stddev = stddev_stats(&time_stats);
98 	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
99 		data_mmap ? "data " : "", time_average, time_stddev);
100 
101 	event_average = avg_stats(&event_stats);
102 	event_stddev = stddev_stats(&event_stats);
103 	printf("  Average num. events: %.3f (+- %.3f)\n",
104 		event_average, event_stddev);
105 
106 	printf("  Average time per event %.3f usec\n",
107 		time_average / event_average);
108 	return 0;
109 }
110 
run_single_threaded(void)111 static int run_single_threaded(void)
112 {
113 	struct perf_session *session;
114 	struct target target = {
115 		.pid = "self",
116 	};
117 	struct perf_thread_map *threads;
118 	struct perf_env host_env;
119 	int err;
120 
121 	perf_set_singlethreaded();
122 	perf_env__init(&host_env);
123 	session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
124 				      /*trace_event_repipe=*/false, &host_env);
125 	if (IS_ERR(session)) {
126 		pr_err("Session creation failed.\n");
127 		perf_env__exit(&host_env);
128 		return PTR_ERR(session);
129 	}
130 	threads = thread_map__new_by_pid(getpid());
131 	if (!threads) {
132 		pr_err("Thread map creation failed.\n");
133 		err = -ENOMEM;
134 		goto err_out;
135 	}
136 
137 	puts(
138 "Computing performance of single threaded perf event synthesis by\n"
139 "synthesizing events on the perf process itself:");
140 
141 	err = do_run_single_threaded(session, threads, &target, false);
142 	if (err)
143 		goto err_out;
144 
145 	err = do_run_single_threaded(session, threads, &target, true);
146 
147 err_out:
148 	if (threads)
149 		perf_thread_map__put(threads);
150 
151 	perf_session__delete(session);
152 	perf_env__exit(&host_env);
153 	return err;
154 }
155 
do_run_multi_threaded(struct target * target,unsigned int nr_threads_synthesize)156 static int do_run_multi_threaded(struct target *target,
157 				unsigned int nr_threads_synthesize)
158 {
159 	struct timeval start, end, diff;
160 	u64 runtime_us;
161 	unsigned int i;
162 	double time_average, time_stddev, event_average, event_stddev;
163 	int err = 0;
164 	struct stats time_stats, event_stats;
165 	struct perf_session *session;
166 	struct perf_env host_env;
167 
168 	perf_env__init(&host_env);
169 	init_stats(&time_stats);
170 	init_stats(&event_stats);
171 	for (i = 0; i < multi_iterations; i++) {
172 		session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
173 					      /*trace_event_repipe=*/false, &host_env);
174 		if (IS_ERR(session)) {
175 			err = PTR_ERR(session);
176 			goto err_out;
177 		}
178 		atomic_set(&event_count, 0);
179 		gettimeofday(&start, NULL);
180 		err = __machine__synthesize_threads(&session->machines.host,
181 						NULL,
182 						target, NULL,
183 						process_synthesized_event,
184 						true, false,
185 						nr_threads_synthesize);
186 		if (err) {
187 			perf_session__delete(session);
188 			goto err_out;
189 		}
190 
191 		gettimeofday(&end, NULL);
192 		timersub(&end, &start, &diff);
193 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
194 		update_stats(&time_stats, runtime_us);
195 		update_stats(&event_stats, atomic_read(&event_count));
196 		perf_session__delete(session);
197 	}
198 
199 	time_average = avg_stats(&time_stats);
200 	time_stddev = stddev_stats(&time_stats);
201 	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
202 		time_average, time_stddev);
203 
204 	event_average = avg_stats(&event_stats);
205 	event_stddev = stddev_stats(&event_stats);
206 	printf("    Average num. events: %.3f (+- %.3f)\n",
207 		event_average, event_stddev);
208 
209 	printf("    Average time per event %.3f usec\n",
210 		time_average / event_average);
211 err_out:
212 	perf_env__exit(&host_env);
213 	return err;
214 }
215 
run_multi_threaded(void)216 static int run_multi_threaded(void)
217 {
218 	struct target target = {
219 		.cpu_list = "0"
220 	};
221 	unsigned int nr_threads_synthesize;
222 	int err;
223 
224 	if (max_threads == UINT_MAX)
225 		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
226 
227 	puts(
228 "Computing performance of multi threaded perf event synthesis by\n"
229 "synthesizing events on CPU 0:");
230 
231 	for (nr_threads_synthesize = min_threads;
232 	     nr_threads_synthesize <= max_threads;
233 	     nr_threads_synthesize++) {
234 		if (nr_threads_synthesize == 1)
235 			perf_set_singlethreaded();
236 		else
237 			perf_set_multithreaded();
238 
239 		printf("  Number of synthesis threads: %u\n",
240 			nr_threads_synthesize);
241 
242 		err = do_run_multi_threaded(&target, nr_threads_synthesize);
243 		if (err)
244 			return err;
245 	}
246 	perf_set_singlethreaded();
247 	return 0;
248 }
249 
bench_synthesize(int argc,const char ** argv)250 int bench_synthesize(int argc, const char **argv)
251 {
252 	int err = 0;
253 
254 	argc = parse_options(argc, argv, options, bench_usage, 0);
255 	if (argc) {
256 		usage_with_options(bench_usage, options);
257 		exit(EXIT_FAILURE);
258 	}
259 
260 	/*
261 	 * If neither single threaded or multi-threaded are specified, default
262 	 * to running just single threaded.
263 	 */
264 	if (!run_st && !run_mt)
265 		run_st = true;
266 
267 	if (run_st)
268 		err = run_single_threaded();
269 
270 	if (!err && run_mt)
271 		err = run_multi_threaded();
272 
273 	return err;
274 }
275