1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Benchmarking code execution time inside the kernel 4 * 5 * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer 6 */ 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/module.h> 10 #include <linux/time.h> 11 12 #include <linux/perf_event.h> /* perf_event_create_kernel_counter() */ 13 14 /* For concurrency testing */ 15 #include <linux/completion.h> 16 #include <linux/sched.h> 17 #include <linux/workqueue.h> 18 #include <linux/kthread.h> 19 20 #include "time_bench.h" 21 22 static int verbose = 1; 23 24 /** TSC (Time-Stamp Counter) based ** 25 * See: linux/time_bench.h 26 * tsc_start_clock() and tsc_stop_clock() 27 */ 28 29 /** Wall-clock based ** 30 */ 31 32 /** PMU (Performance Monitor Unit) based ** 33 */ 34 #define PERF_FORMAT \ 35 (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ 36 PERF_FORMAT_TOTAL_TIME_RUNNING) 37 38 struct raw_perf_event { 39 uint64_t config; /* event */ 40 uint64_t config1; /* umask */ 41 struct perf_event *save; 42 char *desc; 43 }; 44 45 /* if HT is enable a maximum of 4 events (5 if one is instructions 46 * retired can be specified, if HT is disabled a maximum of 8 (9 if 47 * one is instructions retired) can be specified. 48 * 49 * From Table 19-1. Architectural Performance Events 50 * Architectures Software Developer’s Manual Volume 3: System Programming 51 * Guide 52 */ 53 struct raw_perf_event perf_events[] = { 54 { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, 55 { 0xc0, 0x00, NULL, "Instruction Retired" } 56 }; 57 58 #define NUM_EVTS (ARRAY_SIZE(perf_events)) 59 60 /* WARNING: PMU config is currently broken! 61 */ 62 bool time_bench_PMU_config(bool enable) 63 { 64 int i; 65 struct perf_event_attr perf_conf; 66 struct perf_event *perf_event; 67 int cpu; 68 69 preempt_disable(); 70 cpu = smp_processor_id(); 71 pr_info("DEBUG: cpu:%d\n", cpu); 72 preempt_enable(); 73 74 memset(&perf_conf, 0, sizeof(struct perf_event_attr)); 75 perf_conf.type = PERF_TYPE_RAW; 76 perf_conf.size = sizeof(struct perf_event_attr); 77 perf_conf.read_format = PERF_FORMAT; 78 perf_conf.pinned = 1; 79 perf_conf.exclude_user = 1; /* No userspace events */ 80 perf_conf.exclude_kernel = 0; /* Only kernel events */ 81 82 for (i = 0; i < NUM_EVTS; i++) { 83 perf_conf.disabled = enable; 84 //perf_conf.disabled = (i == 0) ? 1 : 0; 85 perf_conf.config = perf_events[i].config; 86 perf_conf.config1 = perf_events[i].config1; 87 if (verbose) 88 pr_info("%s() enable PMU counter: %s\n", 89 __func__, perf_events[i].desc); 90 perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, 91 NULL /* task */, 92 NULL /* overflow_handler*/, 93 NULL /* context */); 94 if (perf_event) { 95 perf_events[i].save = perf_event; 96 pr_info("%s():DEBUG perf_event success\n", __func__); 97 98 perf_event_enable(perf_event); 99 } else { 100 pr_info("%s():DEBUG perf_event is NULL\n", __func__); 101 } 102 } 103 104 return true; 105 } 106 107 /** Generic functions ** 108 */ 109 110 /* Calculate stats, store results in record */ 111 bool time_bench_calc_stats(struct time_bench_record *rec) 112 { 113 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ 114 uint64_t ns_per_call_tmp_rem = 0; 115 uint32_t ns_per_call_remainder = 0; 116 uint64_t pmc_ipc_tmp_rem = 0; 117 uint32_t pmc_ipc_remainder = 0; 118 uint32_t pmc_ipc_div = 0; 119 uint32_t invoked_cnt_precision = 0; 120 uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ 121 122 if (rec->flags & TIME_BENCH_LOOP) { 123 if (rec->invoked_cnt < 1000) { 124 pr_err("ERR: need more(>1000) loops(%llu) for timing\n", 125 rec->invoked_cnt); 126 return false; 127 } 128 if (rec->invoked_cnt > ((1ULL << 32) - 1)) { 129 /* div_u64_rem() can only support div with 32bit*/ 130 pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", 131 rec->invoked_cnt); 132 return false; 133 } 134 invoked_cnt = (uint32_t)rec->invoked_cnt; 135 } 136 137 /* TSC (Time-Stamp Counter) records */ 138 if (rec->flags & TIME_BENCH_TSC) { 139 rec->tsc_interval = rec->tsc_stop - rec->tsc_start; 140 if (rec->tsc_interval == 0) { 141 pr_err("ABORT: timing took ZERO TSC time\n"); 142 return false; 143 } 144 /* Calculate stats */ 145 if (rec->flags & TIME_BENCH_LOOP) 146 rec->tsc_cycles = rec->tsc_interval / invoked_cnt; 147 else 148 rec->tsc_cycles = rec->tsc_interval; 149 } 150 151 /* Wall-clock time calc */ 152 if (rec->flags & TIME_BENCH_WALLCLOCK) { 153 rec->time_start = rec->ts_start.tv_nsec + 154 (NANOSEC_PER_SEC * rec->ts_start.tv_sec); 155 rec->time_stop = rec->ts_stop.tv_nsec + 156 (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); 157 rec->time_interval = rec->time_stop - rec->time_start; 158 if (rec->time_interval == 0) { 159 pr_err("ABORT: timing took ZERO wallclock time\n"); 160 return false; 161 } 162 /* Calculate stats */ 163 /*** Division in kernel it tricky ***/ 164 /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ 165 /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ 166 rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, 167 &rec->time_sec_remainder); 168 //TODO: use existing struct timespec records instead of div? 169 170 if (rec->flags & TIME_BENCH_LOOP) { 171 /*** Division in kernel it tricky ***/ 172 /* Orig: ns = ((double)time_interval / invoked_cnt); */ 173 /* First get quotient */ 174 rec->ns_per_call_quotient = 175 div_u64_rem(rec->time_interval, invoked_cnt, 176 &ns_per_call_remainder); 177 /* Now get decimals .xxx precision (incorrect roundup)*/ 178 ns_per_call_tmp_rem = ns_per_call_remainder; 179 invoked_cnt_precision = invoked_cnt / 1000; 180 if (invoked_cnt_precision > 0) { 181 rec->ns_per_call_decimal = 182 div_u64_rem(ns_per_call_tmp_rem, 183 invoked_cnt_precision, 184 &ns_per_call_remainder); 185 } 186 } 187 } 188 189 /* Performance Monitor Unit (PMU) counters */ 190 if (rec->flags & TIME_BENCH_PMU) { 191 //FIXME: Overflow handling??? 192 rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; 193 rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; 194 195 /* Calc Instruction Per Cycle (IPC) */ 196 /* First get quotient */ 197 rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, 198 &pmc_ipc_remainder); 199 /* Now get decimals .xxx precision (incorrect roundup)*/ 200 pmc_ipc_tmp_rem = pmc_ipc_remainder; 201 pmc_ipc_div = rec->pmc_clk / 1000; 202 if (pmc_ipc_div > 0) { 203 rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, 204 pmc_ipc_div, 205 &pmc_ipc_remainder); 206 } 207 } 208 209 return true; 210 } 211 212 /* Generic function for invoking a loop function and calculating 213 * execution time stats. The function being called/timed is assumed 214 * to perform a tight loop, and update the timing record struct. 215 */ 216 bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, 217 int (*func)(struct time_bench_record *record, void *data)) 218 { 219 struct time_bench_record rec; 220 221 /* Setup record */ 222 memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ 223 rec.version_abi = 1; 224 rec.loops = loops; 225 rec.step = step; 226 rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); 227 228 /*** Loop function being timed ***/ 229 if (!func(&rec, data)) { 230 pr_err("ABORT: function being timed failed\n"); 231 return false; 232 } 233 234 if (rec.invoked_cnt < loops) 235 pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", 236 rec.invoked_cnt, loops); 237 238 /* Calculate stats */ 239 time_bench_calc_stats(&rec); 240 241 pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", 242 txt, rec.tsc_cycles, rec.ns_per_call_quotient, 243 rec.ns_per_call_decimal, rec.step, rec.time_sec, 244 rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, 245 rec.tsc_interval); 246 if (rec.flags & TIME_BENCH_PMU) 247 pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", 248 txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, 249 rec.pmc_ipc_decimal); 250 return true; 251 } 252 253 /* Function getting invoked by kthread */ 254 static int invoke_test_on_cpu_func(void *private) 255 { 256 struct time_bench_cpu *cpu = private; 257 struct time_bench_sync *sync = cpu->sync; 258 cpumask_t newmask = CPU_MASK_NONE; 259 void *data = cpu->data; 260 261 /* Restrict CPU */ 262 cpumask_set_cpu(cpu->rec.cpu, &newmask); 263 set_cpus_allowed_ptr(current, &newmask); 264 265 /* Synchronize start of concurrency test */ 266 atomic_inc(&sync->nr_tests_running); 267 wait_for_completion(&sync->start_event); 268 269 /* Start benchmark function */ 270 if (!cpu->bench_func(&cpu->rec, data)) { 271 pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", 272 cpu->rec.cpu, smp_processor_id()); 273 } else { 274 if (verbose) 275 pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, 276 smp_processor_id()); 277 } 278 cpu->did_bench_run = true; 279 280 /* End test */ 281 atomic_dec(&sync->nr_tests_running); 282 /* Wait for kthread_stop() telling us to stop */ 283 while (!kthread_should_stop()) { 284 set_current_state(TASK_INTERRUPTIBLE); 285 schedule(); 286 } 287 __set_current_state(TASK_RUNNING); 288 return 0; 289 } 290 291 void time_bench_print_stats_cpumask(const char *desc, 292 struct time_bench_cpu *cpu_tasks, 293 const struct cpumask *mask) 294 { 295 uint64_t average = 0; 296 int cpu; 297 int step = 0; 298 struct sum { 299 uint64_t tsc_cycles; 300 int records; 301 } sum = { 0 }; 302 303 /* Get stats */ 304 for_each_cpu(cpu, mask) { 305 struct time_bench_cpu *c = &cpu_tasks[cpu]; 306 struct time_bench_record *rec = &c->rec; 307 308 /* Calculate stats */ 309 time_bench_calc_stats(rec); 310 311 pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", 312 desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, 313 rec->ns_per_call_decimal, rec->step, rec->time_sec, 314 rec->time_sec_remainder, rec->time_interval, 315 rec->invoked_cnt, rec->tsc_interval); 316 317 /* Collect average */ 318 sum.records++; 319 sum.tsc_cycles += rec->tsc_cycles; 320 step = rec->step; 321 } 322 323 if (sum.records) /* avoid div-by-zero */ 324 average = sum.tsc_cycles / sum.records; 325 pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, 326 average, sum.records, step); 327 } 328 329 void time_bench_run_concurrent(uint32_t loops, int step, void *data, 330 const struct cpumask *mask, /* Support masking outsome CPUs*/ 331 struct time_bench_sync *sync, 332 struct time_bench_cpu *cpu_tasks, 333 int (*func)(struct time_bench_record *record, void *data)) 334 { 335 int cpu, running = 0; 336 337 if (verbose) // DEBUG 338 pr_warn("%s() Started on CPU:%d\n", __func__, 339 smp_processor_id()); 340 341 /* Reset sync conditions */ 342 atomic_set(&sync->nr_tests_running, 0); 343 init_completion(&sync->start_event); 344 345 /* Spawn off jobs on all CPUs */ 346 for_each_cpu(cpu, mask) { 347 struct time_bench_cpu *c = &cpu_tasks[cpu]; 348 349 running++; 350 c->sync = sync; /* Send sync variable along */ 351 c->data = data; /* Send opaque along */ 352 353 /* Init benchmark record */ 354 memset(&c->rec, 0, sizeof(struct time_bench_record)); 355 c->rec.version_abi = 1; 356 c->rec.loops = loops; 357 c->rec.step = step; 358 c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | 359 TIME_BENCH_WALLCLOCK); 360 c->rec.cpu = cpu; 361 c->bench_func = func; 362 c->task = kthread_run(invoke_test_on_cpu_func, c, 363 "time_bench%d", cpu); 364 if (IS_ERR(c->task)) { 365 pr_err("%s(): Failed to start test func\n", __func__); 366 return; /* Argh, what about cleanup?! */ 367 } 368 } 369 370 /* Wait until all processes are running */ 371 while (atomic_read(&sync->nr_tests_running) < running) { 372 set_current_state(TASK_UNINTERRUPTIBLE); 373 schedule_timeout(10); 374 } 375 /* Kick off all CPU concurrently on completion event */ 376 complete_all(&sync->start_event); 377 378 /* Wait for CPUs to finish */ 379 while (atomic_read(&sync->nr_tests_running)) { 380 set_current_state(TASK_UNINTERRUPTIBLE); 381 schedule_timeout(10); 382 } 383 384 /* Stop the kthreads */ 385 for_each_cpu(cpu, mask) { 386 struct time_bench_cpu *c = &cpu_tasks[cpu]; 387 388 kthread_stop(c->task); 389 } 390 391 if (verbose) // DEBUG - happens often, finish on another CPU 392 pr_warn("%s() Finished on CPU:%d\n", __func__, 393 smp_processor_id()); 394 } 395