1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Benchmarking code execution time inside the kernel 4 * 5 * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer 6 * for licensing details see kernel-base/COPYING 7 */ 8 #ifndef _LINUX_TIME_BENCH_H 9 #define _LINUX_TIME_BENCH_H 10 11 /* Main structure used for recording a benchmark run */ 12 struct time_bench_record { 13 uint32_t version_abi; 14 uint32_t loops; /* Requested loop invocations */ 15 uint32_t step; /* option for e.g. bulk invocations */ 16 17 uint32_t flags; /* Measurements types enabled */ 18 #define TIME_BENCH_LOOP BIT(0) 19 #define TIME_BENCH_TSC BIT(1) 20 #define TIME_BENCH_WALLCLOCK BIT(2) 21 #define TIME_BENCH_PMU BIT(3) 22 23 uint32_t cpu; /* Used when embedded in time_bench_cpu */ 24 25 /* Records */ 26 uint64_t invoked_cnt; /* Returned actual invocations */ 27 uint64_t tsc_start; 28 uint64_t tsc_stop; 29 struct timespec64 ts_start; 30 struct timespec64 ts_stop; 31 /* PMU counters for instruction and cycles 32 * instructions counter including pipelined instructions 33 */ 34 uint64_t pmc_inst_start; 35 uint64_t pmc_inst_stop; 36 /* CPU unhalted clock counter */ 37 uint64_t pmc_clk_start; 38 uint64_t pmc_clk_stop; 39 40 /* Result records */ 41 uint64_t tsc_interval; 42 uint64_t time_start, time_stop, time_interval; /* in nanosec */ 43 uint64_t pmc_inst, pmc_clk; 44 45 /* Derived result records */ 46 uint64_t tsc_cycles; // +decimal? 47 uint64_t ns_per_call_quotient, ns_per_call_decimal; 48 uint64_t time_sec; 49 uint32_t time_sec_remainder; 50 uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */ 51 }; 52 53 /* For synchronizing parallel CPUs to run concurrently */ 54 struct time_bench_sync { 55 atomic_t nr_tests_running; 56 struct completion start_event; 57 }; 58 59 /* Keep track of CPUs executing our bench function. 60 * 61 * Embed a time_bench_record for storing info per cpu 62 */ 63 struct time_bench_cpu { 64 struct time_bench_record rec; 65 struct time_bench_sync *sync; /* back ptr */ 66 struct task_struct *task; 67 /* "data" opaque could have been placed in time_bench_sync, 68 * but to avoid any false sharing, place it per CPU 69 */ 70 void *data; 71 /* Support masking outsome CPUs, mark if it ran */ 72 bool did_bench_run; 73 /* int cpu; // note CPU stored in time_bench_record */ 74 int (*bench_func)(struct time_bench_record *record, void *data); 75 }; 76 77 /* 78 * Below TSC assembler code is not compatible with other archs, and 79 * can also fail on guests if cpu-flags are not correct. 80 * 81 * The way TSC reading is used, many iterations, does not require as 82 * high accuracy as described below (in Intel Doc #324264). 83 * 84 * Considering changing to use get_cycles() (#include <asm/timex.h>). 85 */ 86 87 /** TSC (Time-Stamp Counter) based ** 88 * Recommend reading, to understand details of reading TSC accurately: 89 * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel" 90 * 91 * Consider getting exclusive ownership of CPU by using: 92 * unsigned long flags; 93 * preempt_disable(); 94 * raw_local_irq_save(flags); 95 * _your_code_ 96 * raw_local_irq_restore(flags); 97 * preempt_enable(); 98 * 99 * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx" 100 * RDTSC only change "%rax" and "%rdx" but 101 * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx) 102 */ 103 static __always_inline uint64_t tsc_start_clock(void) 104 { 105 /* See: Intel Doc #324264 */ 106 unsigned int hi, lo; 107 108 asm volatile("CPUID\n\t" 109 "RDTSC\n\t" 110 "mov %%edx, %0\n\t" 111 "mov %%eax, %1\n\t" 112 : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); 113 //FIXME: on 32bit use clobbered %eax + %edx 114 return ((uint64_t)lo) | (((uint64_t)hi) << 32); 115 } 116 117 static __always_inline uint64_t tsc_stop_clock(void) 118 { 119 /* See: Intel Doc #324264 */ 120 unsigned int hi, lo; 121 122 asm volatile("RDTSCP\n\t" 123 "mov %%edx, %0\n\t" 124 "mov %%eax, %1\n\t" 125 "CPUID\n\t" 126 : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); 127 return ((uint64_t)lo) | (((uint64_t)hi) << 32); 128 } 129 130 /** Wall-clock based ** 131 * 132 * use: getnstimeofday() 133 * getnstimeofday(&rec->ts_start); 134 * getnstimeofday(&rec->ts_stop); 135 * 136 * API changed see: Documentation/core-api/timekeeping.rst 137 * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday 138 * 139 * We should instead use: ktime_get_real_ts64() is a direct 140 * replacement, but consider using monotonic time (ktime_get_ts64()) 141 * and/or a ktime_t based interface (ktime_get()/ktime_get_real()). 142 */ 143 144 /** PMU (Performance Monitor Unit) based ** 145 * 146 * Needed for calculating: Instructions Per Cycle (IPC) 147 * - The IPC number tell how efficient the CPU pipelining were 148 */ 149 //lookup: perf_event_create_kernel_counter() 150 151 bool time_bench_PMU_config(bool enable); 152 153 /* Raw reading via rdpmc() using fixed counters 154 * 155 * From: https://github.com/andikleen/simple-pmu 156 */ 157 enum { 158 FIXED_SELECT = (1U << 30), /* == 0x40000000 */ 159 FIXED_INST_RETIRED_ANY = 0, 160 FIXED_CPU_CLK_UNHALTED_CORE = 1, 161 FIXED_CPU_CLK_UNHALTED_REF = 2, 162 }; 163 164 static __always_inline unsigned int long long p_rdpmc(unsigned int in) 165 { 166 unsigned int d, a; 167 168 asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory"); 169 return ((unsigned long long)d << 32) | a; 170 } 171 172 /* These PMU counter needs to be enabled, but I don't have the 173 * configure code implemented. My current hack is running: 174 * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko 175 */ 176 /* Reading all pipelined instruction */ 177 static __always_inline unsigned long long pmc_inst(void) 178 { 179 return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY); 180 } 181 182 /* Reading CPU clock cycles */ 183 static __always_inline unsigned long long pmc_clk(void) 184 { 185 return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE); 186 } 187 188 /* Raw reading via MSR rdmsr() is likely wrong 189 * FIXME: How can I know which raw MSR registers are conf for what? 190 */ 191 #define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */ 192 #define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */ 193 #define MSR_IA32_PCM2 0x400000C3 194 static inline uint64_t msr_inst(unsigned long long *msr_result) 195 { 196 return rdmsrq_safe(MSR_IA32_PCM0, msr_result); 197 } 198 199 /** Generic functions ** 200 */ 201 bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, 202 int (*func)(struct time_bench_record *rec, void *data)); 203 bool time_bench_calc_stats(struct time_bench_record *rec); 204 205 void time_bench_run_concurrent(uint32_t loops, int step, void *data, 206 const struct cpumask *mask, /* Support masking outsome CPUs*/ 207 struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks, 208 int (*func)(struct time_bench_record *record, void *data)); 209 void time_bench_print_stats_cpumask(const char *desc, 210 struct time_bench_cpu *cpu_tasks, 211 const struct cpumask *mask); 212 213 //FIXME: use rec->flags to select measurement, should be MACRO 214 static __always_inline void time_bench_start(struct time_bench_record *rec) 215 { 216 //getnstimeofday(&rec->ts_start); 217 ktime_get_real_ts64(&rec->ts_start); 218 if (rec->flags & TIME_BENCH_PMU) { 219 rec->pmc_inst_start = pmc_inst(); 220 rec->pmc_clk_start = pmc_clk(); 221 } 222 rec->tsc_start = tsc_start_clock(); 223 } 224 225 static __always_inline void time_bench_stop(struct time_bench_record *rec, 226 uint64_t invoked_cnt) 227 { 228 rec->tsc_stop = tsc_stop_clock(); 229 if (rec->flags & TIME_BENCH_PMU) { 230 rec->pmc_inst_stop = pmc_inst(); 231 rec->pmc_clk_stop = pmc_clk(); 232 } 233 //getnstimeofday(&rec->ts_stop); 234 ktime_get_real_ts64(&rec->ts_stop); 235 rec->invoked_cnt = invoked_cnt; 236 } 237 238 #endif /* _LINUX_TIME_BENCH_H */ 239