1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement support for AMD Fam19h Branch Sampling feature 4 * Based on specifications published in AMD PPR Fam19 Model 01 5 * 6 * Copyright 2021 Google LLC 7 * Contributed by Stephane Eranian <eranian@google.com> 8 */ 9 #include <linux/kernel.h> 10 #include <linux/jump_label.h> 11 #include <asm/msr.h> 12 #include <asm/cpufeature.h> 13 14 #include "../perf_event.h" 15 16 #define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */ 17 18 /* Debug Extension Configuration register layout */ 19 union amd_debug_extn_cfg { 20 __u64 val; 21 struct { 22 __u64 rsvd0:2, /* reserved */ 23 brsmen:1, /* branch sample enable */ 24 rsvd4_3:2,/* reserved - must be 0x3 */ 25 vb:1, /* valid branches recorded */ 26 rsvd2:10, /* reserved */ 27 msroff:4, /* index of next entry to write */ 28 rsvd3:4, /* reserved */ 29 pmc:3, /* #PMC holding the sampling event */ 30 rsvd4:37; /* reserved */ 31 }; 32 }; 33 34 static inline unsigned int brs_from(int idx) 35 { 36 return MSR_AMD_SAMP_BR_FROM + 2 * idx; 37 } 38 39 static inline unsigned int brs_to(int idx) 40 { 41 return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; 42 } 43 44 static inline void set_debug_extn_cfg(u64 val) 45 { 46 /* bits[4:3] must always be set to 11b */ 47 wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); 48 } 49 50 static inline u64 get_debug_extn_cfg(void) 51 { 52 u64 val; 53 54 rdmsrl(MSR_AMD_DBG_EXTN_CFG, val); 55 return val; 56 } 57 58 static bool __init amd_brs_detect(void) 59 { 60 if (!cpu_feature_enabled(X86_FEATURE_BRS)) 61 return false; 62 63 switch (boot_cpu_data.x86) { 64 case 0x19: /* AMD Fam19h (Zen3) */ 65 x86_pmu.lbr_nr = 16; 66 67 /* No hardware filtering supported */ 68 x86_pmu.lbr_sel_map = NULL; 69 x86_pmu.lbr_sel_mask = 0; 70 break; 71 default: 72 return false; 73 } 74 75 return true; 76 } 77 78 /* 79 * Current BRS implementation does not support branch type or privilege level 80 * filtering. Therefore, this function simply enforces these limitations. No need for 81 * a br_sel_map. Software filtering is not supported because it would not correlate well 82 * with a sampling period. 83 */ 84 static int amd_brs_setup_filter(struct perf_event *event) 85 { 86 u64 type = event->attr.branch_sample_type; 87 88 /* No BRS support */ 89 if (!x86_pmu.lbr_nr) 90 return -EOPNOTSUPP; 91 92 /* Can only capture all branches, i.e., no filtering */ 93 if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY) 94 return -EINVAL; 95 96 return 0; 97 } 98 99 static inline int amd_is_brs_event(struct perf_event *e) 100 { 101 return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; 102 } 103 104 int amd_brs_hw_config(struct perf_event *event) 105 { 106 int ret = 0; 107 108 /* 109 * Due to interrupt holding, BRS is not recommended in 110 * counting mode. 111 */ 112 if (!is_sampling_event(event)) 113 return -EINVAL; 114 115 /* 116 * Due to the way BRS operates by holding the interrupt until 117 * lbr_nr entries have been captured, it does not make sense 118 * to allow sampling on BRS with an event that does not match 119 * what BRS is capturing, i.e., retired taken branches. 120 * Otherwise the correlation with the event's period is even 121 * more loose: 122 * 123 * With retired taken branch: 124 * Effective P = P + 16 + X 125 * With any other event: 126 * Effective P = P + Y + X 127 * 128 * Where X is the number of taken branches due to interrupt 129 * skid. Skid is large. 130 * 131 * Where Y is the occurences of the event while BRS is 132 * capturing the lbr_nr entries. 133 * 134 * By using retired taken branches, we limit the impact on the 135 * Y variable. We know it cannot be more than the depth of 136 * BRS. 137 */ 138 if (!amd_is_brs_event(event)) 139 return -EINVAL; 140 141 /* 142 * BRS implementation does not work with frequency mode 143 * reprogramming of the period. 144 */ 145 if (event->attr.freq) 146 return -EINVAL; 147 /* 148 * The kernel subtracts BRS depth from period, so it must 149 * be big enough. 150 */ 151 if (event->attr.sample_period <= x86_pmu.lbr_nr) 152 return -EINVAL; 153 154 /* 155 * Check if we can allow PERF_SAMPLE_BRANCH_STACK 156 */ 157 ret = amd_brs_setup_filter(event); 158 159 /* only set in case of success */ 160 if (!ret) 161 event->hw.flags |= PERF_X86_EVENT_AMD_BRS; 162 163 return ret; 164 } 165 166 /* tos = top of stack, i.e., last valid entry written */ 167 static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) 168 { 169 /* 170 * msroff: index of next entry to write so top-of-stack is one off 171 * if BRS is full then msroff is set back to 0. 172 */ 173 return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1; 174 } 175 176 /* 177 * make sure we have a sane BRS offset to begin with 178 * especially with kexec 179 */ 180 void amd_brs_reset(void) 181 { 182 if (!cpu_feature_enabled(X86_FEATURE_BRS)) 183 return; 184 185 /* 186 * Reset config 187 */ 188 set_debug_extn_cfg(0); 189 190 /* 191 * Mark first entry as poisoned 192 */ 193 wrmsrl(brs_to(0), BRS_POISON); 194 } 195 196 int __init amd_brs_init(void) 197 { 198 if (!amd_brs_detect()) 199 return -EOPNOTSUPP; 200 201 pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr); 202 203 return 0; 204 } 205 206 void amd_brs_enable(void) 207 { 208 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 209 union amd_debug_extn_cfg cfg; 210 211 /* Activate only on first user */ 212 if (++cpuc->brs_active > 1) 213 return; 214 215 cfg.val = 0; /* reset all fields */ 216 cfg.brsmen = 1; /* enable branch sampling */ 217 218 /* Set enable bit */ 219 set_debug_extn_cfg(cfg.val); 220 } 221 222 void amd_brs_enable_all(void) 223 { 224 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 225 if (cpuc->lbr_users) 226 amd_brs_enable(); 227 } 228 229 void amd_brs_disable(void) 230 { 231 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 232 union amd_debug_extn_cfg cfg; 233 234 /* Check if active (could be disabled via x86_pmu_disable_all()) */ 235 if (!cpuc->brs_active) 236 return; 237 238 /* Only disable for last user */ 239 if (--cpuc->brs_active) 240 return; 241 242 /* 243 * Clear the brsmen bit but preserve the others as they contain 244 * useful state such as vb and msroff 245 */ 246 cfg.val = get_debug_extn_cfg(); 247 248 /* 249 * When coming in on interrupt and BRS is full, then hw will have 250 * already stopped BRS, no need to issue wrmsr again 251 */ 252 if (cfg.brsmen) { 253 cfg.brsmen = 0; 254 set_debug_extn_cfg(cfg.val); 255 } 256 } 257 258 void amd_brs_disable_all(void) 259 { 260 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 261 if (cpuc->lbr_users) 262 amd_brs_disable(); 263 } 264 265 static bool amd_brs_match_plm(struct perf_event *event, u64 to) 266 { 267 int type = event->attr.branch_sample_type; 268 int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV; 269 int plm_u = PERF_SAMPLE_BRANCH_USER; 270 271 if (!(type & plm_k) && kernel_ip(to)) 272 return 0; 273 274 if (!(type & plm_u) && !kernel_ip(to)) 275 return 0; 276 277 return 1; 278 } 279 280 /* 281 * Caller must ensure amd_brs_inuse() is true before calling 282 * return: 283 */ 284 void amd_brs_drain(void) 285 { 286 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 287 struct perf_event *event = cpuc->events[0]; 288 struct perf_branch_entry *br = cpuc->lbr_entries; 289 union amd_debug_extn_cfg cfg; 290 u32 i, nr = 0, num, tos, start; 291 u32 shift = 64 - boot_cpu_data.x86_virt_bits; 292 293 /* 294 * BRS event forced on PMC0, 295 * so check if there is an event. 296 * It is possible to have lbr_users > 0 but the event 297 * not yet scheduled due to long latency PMU irq 298 */ 299 if (!event) 300 goto empty; 301 302 cfg.val = get_debug_extn_cfg(); 303 304 /* Sanity check [0-x86_pmu.lbr_nr] */ 305 if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr)) 306 goto empty; 307 308 /* No valid branch */ 309 if (cfg.vb == 0) 310 goto empty; 311 312 /* 313 * msr.off points to next entry to be written 314 * tos = most recent entry index = msr.off - 1 315 * BRS register buffer saturates, so we know we have 316 * start < tos and that we have to read from start to tos 317 */ 318 start = 0; 319 tos = amd_brs_get_tos(&cfg); 320 321 num = tos - start + 1; 322 323 /* 324 * BRS is only one pass (saturation) from MSROFF to depth-1 325 * MSROFF wraps to zero when buffer is full 326 */ 327 for (i = 0; i < num; i++) { 328 u32 brs_idx = tos - i; 329 u64 from, to; 330 331 rdmsrl(brs_to(brs_idx), to); 332 333 /* Entry does not belong to us (as marked by kernel) */ 334 if (to == BRS_POISON) 335 break; 336 337 /* 338 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. 339 * Necessary to generate proper virtual addresses suitable for 340 * symbolization 341 */ 342 to = (u64)(((s64)to << shift) >> shift); 343 344 if (!amd_brs_match_plm(event, to)) 345 continue; 346 347 rdmsrl(brs_from(brs_idx), from); 348 349 perf_clear_branch_entry_bitfields(br+nr); 350 351 br[nr].from = from; 352 br[nr].to = to; 353 354 nr++; 355 } 356 empty: 357 /* Record number of sampled branches */ 358 cpuc->lbr_stack.nr = nr; 359 } 360 361 /* 362 * Poison most recent entry to prevent reuse by next task 363 * required because BRS entry are not tagged by PID 364 */ 365 static void amd_brs_poison_buffer(void) 366 { 367 union amd_debug_extn_cfg cfg; 368 unsigned int idx; 369 370 /* Get current state */ 371 cfg.val = get_debug_extn_cfg(); 372 373 /* idx is most recently written entry */ 374 idx = amd_brs_get_tos(&cfg); 375 376 /* Poison target of entry */ 377 wrmsrl(brs_to(idx), BRS_POISON); 378 } 379 380 /* 381 * On context switch in, we need to make sure no samples from previous user 382 * are left in the BRS. 383 * 384 * On ctxswin, sched_in = true, called after the PMU has started 385 * On ctxswout, sched_in = false, called before the PMU is stopped 386 */ 387 void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) 388 { 389 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 390 391 /* no active users */ 392 if (!cpuc->lbr_users) 393 return; 394 395 /* 396 * On context switch in, we need to ensure we do not use entries 397 * from previous BRS user on that CPU, so we poison the buffer as 398 * a faster way compared to resetting all entries. 399 */ 400 if (sched_in) 401 amd_brs_poison_buffer(); 402 } 403 404 /* 405 * called from ACPI processor_idle.c or acpi_pad.c 406 * with interrupts disabled 407 */ 408 void perf_amd_brs_lopwr_cb(bool lopwr_in) 409 { 410 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 411 union amd_debug_extn_cfg cfg; 412 413 /* 414 * on mwait in, we may end up in non C0 state. 415 * we must disable branch sampling to avoid holding the NMI 416 * for too long. We disable it in hardware but we 417 * keep the state in cpuc, so we can re-enable. 418 * 419 * The hardware will deliver the NMI if needed when brsmen cleared 420 */ 421 if (cpuc->brs_active) { 422 cfg.val = get_debug_extn_cfg(); 423 cfg.brsmen = !lopwr_in; 424 set_debug_extn_cfg(cfg.val); 425 } 426 } 427 428 DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); 429 EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb); 430 431 void __init amd_brs_lopwr_init(void) 432 { 433 static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb); 434 } 435