1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Benchmark synthesis of perf events such as at the start of a 'perf 4 * record'. Synthesis is done on the current process and the 'dummy' event 5 * handlers are invoked that support dump_trace but otherwise do nothing. 6 * 7 * Copyright 2019 Google LLC. 8 */ 9 #include <errno.h> 10 #include <stdio.h> 11 #include "bench.h" 12 #include "../util/debug.h" 13 #include "../util/session.h" 14 #include "../util/stat.h" 15 #include "../util/synthetic-events.h" 16 #include "../util/target.h" 17 #include "../util/thread_map.h" 18 #include "../util/tool.h" 19 #include "../util/util.h" 20 #include <linux/atomic.h> 21 #include <linux/err.h> 22 #include <linux/time64.h> 23 #include <subcmd/parse-options.h> 24 25 static unsigned int min_threads = 1; 26 static unsigned int max_threads = UINT_MAX; 27 static unsigned int single_iterations = 10000; 28 static unsigned int multi_iterations = 10; 29 static bool run_st; 30 static bool run_mt; 31 32 static const struct option options[] = { 33 OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"), 34 OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"), 35 OPT_UINTEGER('m', "min-threads", &min_threads, 36 "Minimum number of threads in multithreaded bench"), 37 OPT_UINTEGER('M', "max-threads", &max_threads, 38 "Maximum number of threads in multithreaded bench"), 39 OPT_UINTEGER('i', "single-iterations", &single_iterations, 40 "Number of iterations used to compute single-threaded average"), 41 OPT_UINTEGER('I', "multi-iterations", &multi_iterations, 42 "Number of iterations used to compute multi-threaded average"), 43 OPT_END() 44 }; 45 46 static const char *const bench_usage[] = { 47 "perf bench internals synthesize <options>", 48 NULL 49 }; 50 51 static atomic_t event_count; 52 53 static int process_synthesized_event(const struct perf_tool *tool __maybe_unused, 54 union perf_event *event __maybe_unused, 55 struct perf_sample *sample __maybe_unused, 56 struct machine *machine __maybe_unused) 57 { 58 atomic_inc(&event_count); 59 return 0; 60 } 61 62 static int do_run_single_threaded(struct perf_session *session, 63 struct perf_thread_map *threads, 64 struct target *target, bool data_mmap) 65 { 66 const unsigned int nr_threads_synthesize = 1; 67 struct timeval start, end, diff; 68 u64 runtime_us; 69 unsigned int i; 70 double time_average, time_stddev, event_average, event_stddev; 71 int err; 72 struct stats time_stats, event_stats; 73 74 init_stats(&time_stats); 75 init_stats(&event_stats); 76 77 for (i = 0; i < single_iterations; i++) { 78 atomic_set(&event_count, 0); 79 gettimeofday(&start, NULL); 80 err = __machine__synthesize_threads(&session->machines.host, 81 NULL, 82 target, threads, 83 process_synthesized_event, 84 true, data_mmap, 85 nr_threads_synthesize); 86 if (err) 87 return err; 88 89 gettimeofday(&end, NULL); 90 timersub(&end, &start, &diff); 91 runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 92 update_stats(&time_stats, runtime_us); 93 update_stats(&event_stats, atomic_read(&event_count)); 94 } 95 96 time_average = avg_stats(&time_stats); 97 time_stddev = stddev_stats(&time_stats); 98 printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n", 99 data_mmap ? "data " : "", time_average, time_stddev); 100 101 event_average = avg_stats(&event_stats); 102 event_stddev = stddev_stats(&event_stats); 103 printf(" Average num. events: %.3f (+- %.3f)\n", 104 event_average, event_stddev); 105 106 printf(" Average time per event %.3f usec\n", 107 time_average / event_average); 108 return 0; 109 } 110 111 static int run_single_threaded(void) 112 { 113 struct perf_session *session; 114 struct target target = { 115 .pid = "self", 116 }; 117 struct perf_thread_map *threads; 118 struct perf_env host_env; 119 int err; 120 121 perf_set_singlethreaded(); 122 perf_env__init(&host_env); 123 session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, 124 /*trace_event_repipe=*/false, &host_env); 125 if (IS_ERR(session)) { 126 pr_err("Session creation failed.\n"); 127 perf_env__exit(&host_env); 128 return PTR_ERR(session); 129 } 130 threads = thread_map__new_by_pid(getpid()); 131 if (!threads) { 132 pr_err("Thread map creation failed.\n"); 133 err = -ENOMEM; 134 goto err_out; 135 } 136 137 puts( 138 "Computing performance of single threaded perf event synthesis by\n" 139 "synthesizing events on the perf process itself:"); 140 141 err = do_run_single_threaded(session, threads, &target, false); 142 if (err) 143 goto err_out; 144 145 err = do_run_single_threaded(session, threads, &target, true); 146 147 err_out: 148 if (threads) 149 perf_thread_map__put(threads); 150 151 perf_session__delete(session); 152 perf_env__exit(&host_env); 153 return err; 154 } 155 156 static int do_run_multi_threaded(struct target *target, 157 unsigned int nr_threads_synthesize) 158 { 159 struct timeval start, end, diff; 160 u64 runtime_us; 161 unsigned int i; 162 double time_average, time_stddev, event_average, event_stddev; 163 int err = 0; 164 struct stats time_stats, event_stats; 165 struct perf_session *session; 166 struct perf_env host_env; 167 168 perf_env__init(&host_env); 169 init_stats(&time_stats); 170 init_stats(&event_stats); 171 for (i = 0; i < multi_iterations; i++) { 172 session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, 173 /*trace_event_repipe=*/false, &host_env); 174 if (IS_ERR(session)) { 175 err = PTR_ERR(session); 176 goto err_out; 177 } 178 atomic_set(&event_count, 0); 179 gettimeofday(&start, NULL); 180 err = __machine__synthesize_threads(&session->machines.host, 181 NULL, 182 target, NULL, 183 process_synthesized_event, 184 true, false, 185 nr_threads_synthesize); 186 if (err) { 187 perf_session__delete(session); 188 goto err_out; 189 } 190 191 gettimeofday(&end, NULL); 192 timersub(&end, &start, &diff); 193 runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 194 update_stats(&time_stats, runtime_us); 195 update_stats(&event_stats, atomic_read(&event_count)); 196 perf_session__delete(session); 197 } 198 199 time_average = avg_stats(&time_stats); 200 time_stddev = stddev_stats(&time_stats); 201 printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n", 202 time_average, time_stddev); 203 204 event_average = avg_stats(&event_stats); 205 event_stddev = stddev_stats(&event_stats); 206 printf(" Average num. events: %.3f (+- %.3f)\n", 207 event_average, event_stddev); 208 209 printf(" Average time per event %.3f usec\n", 210 time_average / event_average); 211 err_out: 212 perf_env__exit(&host_env); 213 return err; 214 } 215 216 static int run_multi_threaded(void) 217 { 218 struct target target = { 219 .cpu_list = "0" 220 }; 221 unsigned int nr_threads_synthesize; 222 int err; 223 224 if (max_threads == UINT_MAX) 225 max_threads = sysconf(_SC_NPROCESSORS_ONLN); 226 227 puts( 228 "Computing performance of multi threaded perf event synthesis by\n" 229 "synthesizing events on CPU 0:"); 230 231 for (nr_threads_synthesize = min_threads; 232 nr_threads_synthesize <= max_threads; 233 nr_threads_synthesize++) { 234 if (nr_threads_synthesize == 1) 235 perf_set_singlethreaded(); 236 else 237 perf_set_multithreaded(); 238 239 printf(" Number of synthesis threads: %u\n", 240 nr_threads_synthesize); 241 242 err = do_run_multi_threaded(&target, nr_threads_synthesize); 243 if (err) 244 return err; 245 } 246 perf_set_singlethreaded(); 247 return 0; 248 } 249 250 int bench_synthesize(int argc, const char **argv) 251 { 252 int err = 0; 253 254 argc = parse_options(argc, argv, options, bench_usage, 0); 255 if (argc) { 256 usage_with_options(bench_usage, options); 257 exit(EXIT_FAILURE); 258 } 259 260 /* 261 * If neither single threaded or multi-threaded are specified, default 262 * to running just single threaded. 263 */ 264 if (!run_st && !run_mt) 265 run_st = true; 266 267 if (run_st) 268 err = run_single_threaded(); 269 270 if (!err && run_mt) 271 err = run_multi_threaded(); 272 273 return err; 274 } 275