1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Benchmark synthesis of perf events such as at the start of a 'perf
4 * record'. Synthesis is done on the current process and the 'dummy' event
5 * handlers are invoked that support dump_trace but otherwise do nothing.
6 *
7 * Copyright 2019 Google LLC.
8 */
9 #include <errno.h>
10 #include <stdio.h>
11 #include "bench.h"
12 #include "../util/debug.h"
13 #include "../util/session.h"
14 #include "../util/stat.h"
15 #include "../util/synthetic-events.h"
16 #include "../util/target.h"
17 #include "../util/thread_map.h"
18 #include "../util/tool.h"
19 #include "../util/util.h"
20 #include <linux/atomic.h>
21 #include <linux/err.h>
22 #include <linux/time64.h>
23 #include <subcmd/parse-options.h>
24
25 static unsigned int min_threads = 1;
26 static unsigned int max_threads = UINT_MAX;
27 static unsigned int single_iterations = 10000;
28 static unsigned int multi_iterations = 10;
29 static bool run_st;
30 static bool run_mt;
31
32 static const struct option options[] = {
33 OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
34 OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
35 OPT_UINTEGER('m', "min-threads", &min_threads,
36 "Minimum number of threads in multithreaded bench"),
37 OPT_UINTEGER('M', "max-threads", &max_threads,
38 "Maximum number of threads in multithreaded bench"),
39 OPT_UINTEGER('i', "single-iterations", &single_iterations,
40 "Number of iterations used to compute single-threaded average"),
41 OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
42 "Number of iterations used to compute multi-threaded average"),
43 OPT_END()
44 };
45
46 static const char *const bench_usage[] = {
47 "perf bench internals synthesize <options>",
48 NULL
49 };
50
51 static atomic_t event_count;
52
process_synthesized_event(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)53 static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
54 union perf_event *event __maybe_unused,
55 struct perf_sample *sample __maybe_unused,
56 struct machine *machine __maybe_unused)
57 {
58 atomic_inc(&event_count);
59 return 0;
60 }
61
do_run_single_threaded(struct perf_session * session,struct perf_thread_map * threads,struct target * target,bool data_mmap)62 static int do_run_single_threaded(struct perf_session *session,
63 struct perf_thread_map *threads,
64 struct target *target, bool data_mmap)
65 {
66 const unsigned int nr_threads_synthesize = 1;
67 struct timeval start, end, diff;
68 u64 runtime_us;
69 unsigned int i;
70 double time_average, time_stddev, event_average, event_stddev;
71 int err;
72 struct stats time_stats, event_stats;
73
74 init_stats(&time_stats);
75 init_stats(&event_stats);
76
77 for (i = 0; i < single_iterations; i++) {
78 atomic_set(&event_count, 0);
79 gettimeofday(&start, NULL);
80 err = __machine__synthesize_threads(&session->machines.host,
81 NULL,
82 target, threads,
83 process_synthesized_event,
84 true, data_mmap,
85 nr_threads_synthesize);
86 if (err)
87 return err;
88
89 gettimeofday(&end, NULL);
90 timersub(&end, &start, &diff);
91 runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
92 update_stats(&time_stats, runtime_us);
93 update_stats(&event_stats, atomic_read(&event_count));
94 }
95
96 time_average = avg_stats(&time_stats);
97 time_stddev = stddev_stats(&time_stats);
98 printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
99 data_mmap ? "data " : "", time_average, time_stddev);
100
101 event_average = avg_stats(&event_stats);
102 event_stddev = stddev_stats(&event_stats);
103 printf(" Average num. events: %.3f (+- %.3f)\n",
104 event_average, event_stddev);
105
106 printf(" Average time per event %.3f usec\n",
107 time_average / event_average);
108 return 0;
109 }
110
run_single_threaded(void)111 static int run_single_threaded(void)
112 {
113 struct perf_session *session;
114 struct target target = {
115 .pid = "self",
116 };
117 struct perf_thread_map *threads;
118 struct perf_env host_env;
119 int err;
120
121 perf_set_singlethreaded();
122 perf_env__init(&host_env);
123 session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
124 /*trace_event_repipe=*/false, &host_env);
125 if (IS_ERR(session)) {
126 pr_err("Session creation failed.\n");
127 perf_env__exit(&host_env);
128 return PTR_ERR(session);
129 }
130 threads = thread_map__new_by_pid(getpid());
131 if (!threads) {
132 pr_err("Thread map creation failed.\n");
133 err = -ENOMEM;
134 goto err_out;
135 }
136
137 puts(
138 "Computing performance of single threaded perf event synthesis by\n"
139 "synthesizing events on the perf process itself:");
140
141 err = do_run_single_threaded(session, threads, &target, false);
142 if (err)
143 goto err_out;
144
145 err = do_run_single_threaded(session, threads, &target, true);
146
147 err_out:
148 if (threads)
149 perf_thread_map__put(threads);
150
151 perf_session__delete(session);
152 perf_env__exit(&host_env);
153 return err;
154 }
155
do_run_multi_threaded(struct target * target,unsigned int nr_threads_synthesize)156 static int do_run_multi_threaded(struct target *target,
157 unsigned int nr_threads_synthesize)
158 {
159 struct timeval start, end, diff;
160 u64 runtime_us;
161 unsigned int i;
162 double time_average, time_stddev, event_average, event_stddev;
163 int err = 0;
164 struct stats time_stats, event_stats;
165 struct perf_session *session;
166 struct perf_env host_env;
167
168 perf_env__init(&host_env);
169 init_stats(&time_stats);
170 init_stats(&event_stats);
171 for (i = 0; i < multi_iterations; i++) {
172 session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
173 /*trace_event_repipe=*/false, &host_env);
174 if (IS_ERR(session)) {
175 err = PTR_ERR(session);
176 goto err_out;
177 }
178 atomic_set(&event_count, 0);
179 gettimeofday(&start, NULL);
180 err = __machine__synthesize_threads(&session->machines.host,
181 NULL,
182 target, NULL,
183 process_synthesized_event,
184 true, false,
185 nr_threads_synthesize);
186 if (err) {
187 perf_session__delete(session);
188 goto err_out;
189 }
190
191 gettimeofday(&end, NULL);
192 timersub(&end, &start, &diff);
193 runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
194 update_stats(&time_stats, runtime_us);
195 update_stats(&event_stats, atomic_read(&event_count));
196 perf_session__delete(session);
197 }
198
199 time_average = avg_stats(&time_stats);
200 time_stddev = stddev_stats(&time_stats);
201 printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n",
202 time_average, time_stddev);
203
204 event_average = avg_stats(&event_stats);
205 event_stddev = stddev_stats(&event_stats);
206 printf(" Average num. events: %.3f (+- %.3f)\n",
207 event_average, event_stddev);
208
209 printf(" Average time per event %.3f usec\n",
210 time_average / event_average);
211 err_out:
212 perf_env__exit(&host_env);
213 return err;
214 }
215
run_multi_threaded(void)216 static int run_multi_threaded(void)
217 {
218 struct target target = {
219 .cpu_list = "0"
220 };
221 unsigned int nr_threads_synthesize;
222 int err;
223
224 if (max_threads == UINT_MAX)
225 max_threads = sysconf(_SC_NPROCESSORS_ONLN);
226
227 puts(
228 "Computing performance of multi threaded perf event synthesis by\n"
229 "synthesizing events on CPU 0:");
230
231 for (nr_threads_synthesize = min_threads;
232 nr_threads_synthesize <= max_threads;
233 nr_threads_synthesize++) {
234 if (nr_threads_synthesize == 1)
235 perf_set_singlethreaded();
236 else
237 perf_set_multithreaded();
238
239 printf(" Number of synthesis threads: %u\n",
240 nr_threads_synthesize);
241
242 err = do_run_multi_threaded(&target, nr_threads_synthesize);
243 if (err)
244 return err;
245 }
246 perf_set_singlethreaded();
247 return 0;
248 }
249
bench_synthesize(int argc,const char ** argv)250 int bench_synthesize(int argc, const char **argv)
251 {
252 int err = 0;
253
254 argc = parse_options(argc, argv, options, bench_usage, 0);
255 if (argc) {
256 usage_with_options(bench_usage, options);
257 exit(EXIT_FAILURE);
258 }
259
260 /*
261 * If neither single threaded or multi-threaded are specified, default
262 * to running just single threaded.
263 */
264 if (!run_st && !run_mt)
265 run_st = true;
266
267 if (run_st)
268 err = run_single_threaded();
269
270 if (!err && run_mt)
271 err = run_multi_threaded();
272
273 return err;
274 }
275