1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/perf_event.h> 3 #include <asm/perf_event.h> 4 5 #include "../perf_event.h" 6 7 /* LBR Branch Select valid bits */ 8 #define LBR_SELECT_MASK 0x1ff 9 10 /* 11 * LBR Branch Select filter bits which when set, ensures that the 12 * corresponding type of branches are not recorded 13 */ 14 #define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */ 15 #define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */ 16 #define LBR_SELECT_JCC 2 /* Conditional branches */ 17 #define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */ 18 #define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */ 19 #define LBR_SELECT_RET_NEAR 5 /* Near returns */ 20 #define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */ 21 #define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */ 22 #define LBR_SELECT_FAR_BRANCH 8 /* Far branches */ 23 24 #define LBR_KERNEL BIT(LBR_SELECT_KERNEL) 25 #define LBR_USER BIT(LBR_SELECT_USER) 26 #define LBR_JCC BIT(LBR_SELECT_JCC) 27 #define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL) 28 #define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND) 29 #define LBR_RETURN BIT(LBR_SELECT_RET_NEAR) 30 #define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL) 31 #define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND) 32 #define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH) 33 #define LBR_NOT_SUPP -1 /* unsupported filter */ 34 #define LBR_IGNORE 0 35 36 #define LBR_ANY \ 37 (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \ 38 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR) 39 40 struct branch_entry { 41 union { 42 struct { 43 u64 ip:58; 44 u64 ip_sign_ext:5; 45 u64 mispredict:1; 46 } split; 47 u64 full; 48 } from; 49 50 union { 51 struct { 52 u64 ip:58; 53 u64 ip_sign_ext:3; 54 u64 reserved:1; 55 u64 spec:1; 56 u64 valid:1; 57 } split; 58 u64 full; 59 } to; 60 }; 61 62 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) 63 { 64 wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); 65 } 66 67 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) 68 { 69 wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); 70 } 71 72 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) 73 { 74 u64 val; 75 76 rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); 77 78 return val; 79 } 80 81 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) 82 { 83 u64 val; 84 85 rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); 86 87 return val; 88 } 89 90 static __always_inline u64 sign_ext_branch_ip(u64 ip) 91 { 92 u32 shift = 64 - boot_cpu_data.x86_virt_bits; 93 94 return (u64)(((s64)ip << shift) >> shift); 95 } 96 97 static void amd_pmu_lbr_filter(void) 98 { 99 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 100 int br_sel = cpuc->br_sel, offset, type, i, j; 101 bool compress = false; 102 bool fused_only = false; 103 u64 from, to; 104 105 /* If sampling all branches, there is nothing to filter */ 106 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && 107 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) 108 fused_only = true; 109 110 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 111 from = cpuc->lbr_entries[i].from; 112 to = cpuc->lbr_entries[i].to; 113 type = branch_type_fused(from, to, 0, &offset); 114 115 /* 116 * Adjust the branch from address in case of instruction 117 * fusion where it points to an instruction preceding the 118 * actual branch 119 */ 120 if (offset) { 121 cpuc->lbr_entries[i].from += offset; 122 if (fused_only) 123 continue; 124 } 125 126 /* If type does not correspond, then discard */ 127 if (type == X86_BR_NONE || (br_sel & type) != type) { 128 cpuc->lbr_entries[i].from = 0; /* mark invalid */ 129 compress = true; 130 } 131 132 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) 133 cpuc->lbr_entries[i].type = common_branch_type(type); 134 } 135 136 if (!compress) 137 return; 138 139 /* Remove all invalid entries */ 140 for (i = 0; i < cpuc->lbr_stack.nr; ) { 141 if (!cpuc->lbr_entries[i].from) { 142 j = i; 143 while (++j < cpuc->lbr_stack.nr) 144 cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j]; 145 cpuc->lbr_stack.nr--; 146 if (!cpuc->lbr_entries[i].from) 147 continue; 148 } 149 i++; 150 } 151 } 152 153 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = { 154 PERF_BR_SPEC_NA, 155 PERF_BR_SPEC_WRONG_PATH, 156 PERF_BR_NON_SPEC_CORRECT_PATH, 157 PERF_BR_SPEC_CORRECT_PATH, 158 }; 159 160 void amd_pmu_lbr_read(void) 161 { 162 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 163 struct perf_branch_entry *br = cpuc->lbr_entries; 164 struct branch_entry entry; 165 int out = 0, idx, i; 166 167 if (!cpuc->lbr_users) 168 return; 169 170 for (i = 0; i < x86_pmu.lbr_nr; i++) { 171 entry.from.full = amd_pmu_lbr_get_from(i); 172 entry.to.full = amd_pmu_lbr_get_to(i); 173 174 /* 175 * Check if a branch has been logged; if valid = 0, spec = 0 176 * then no branch was recorded; if reserved = 1 then an 177 * erroneous branch was recorded (see Erratum 1452) 178 */ 179 if ((!entry.to.split.valid && !entry.to.split.spec) || 180 entry.to.split.reserved) 181 continue; 182 183 perf_clear_branch_entry_bitfields(br + out); 184 185 br[out].from = sign_ext_branch_ip(entry.from.split.ip); 186 br[out].to = sign_ext_branch_ip(entry.to.split.ip); 187 br[out].mispred = entry.from.split.mispredict; 188 br[out].predicted = !br[out].mispred; 189 190 /* 191 * Set branch speculation information using the status of 192 * the valid and spec bits. 193 * 194 * When valid = 0, spec = 0, no branch was recorded and the 195 * entry is discarded as seen above. 196 * 197 * When valid = 0, spec = 1, the recorded branch was 198 * speculative but took the wrong path. 199 * 200 * When valid = 1, spec = 0, the recorded branch was 201 * non-speculative but took the correct path. 202 * 203 * When valid = 1, spec = 1, the recorded branch was 204 * speculative and took the correct path 205 */ 206 idx = (entry.to.split.valid << 1) | entry.to.split.spec; 207 br[out].spec = lbr_spec_map[idx]; 208 out++; 209 } 210 211 cpuc->lbr_stack.nr = out; 212 213 /* 214 * Internal register renaming always ensures that LBR From[0] and 215 * LBR To[0] always represent the TOS 216 */ 217 cpuc->lbr_stack.hw_idx = 0; 218 219 /* Perform further software filtering */ 220 amd_pmu_lbr_filter(); 221 } 222 223 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 224 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 225 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 226 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE, 227 228 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 229 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, 230 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 231 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 232 [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP, 233 [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP, 234 [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP, 235 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 236 237 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP, 238 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 239 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 240 241 [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP, 242 [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP, 243 }; 244 245 static int amd_pmu_lbr_setup_filter(struct perf_event *event) 246 { 247 struct hw_perf_event_extra *reg = &event->hw.branch_reg; 248 u64 br_type = event->attr.branch_sample_type; 249 u64 mask = 0, v; 250 int i; 251 252 /* No LBR support */ 253 if (!x86_pmu.lbr_nr) 254 return -EOPNOTSUPP; 255 256 if (br_type & PERF_SAMPLE_BRANCH_USER) 257 mask |= X86_BR_USER; 258 259 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 260 mask |= X86_BR_KERNEL; 261 262 /* Ignore BRANCH_HV here */ 263 264 if (br_type & PERF_SAMPLE_BRANCH_ANY) 265 mask |= X86_BR_ANY; 266 267 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 268 mask |= X86_BR_ANY_CALL; 269 270 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 271 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 272 273 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 274 mask |= X86_BR_IND_CALL; 275 276 if (br_type & PERF_SAMPLE_BRANCH_COND) 277 mask |= X86_BR_JCC; 278 279 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 280 mask |= X86_BR_IND_JMP; 281 282 if (br_type & PERF_SAMPLE_BRANCH_CALL) 283 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 284 285 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) 286 mask |= X86_BR_TYPE_SAVE; 287 288 reg->reg = mask; 289 mask = 0; 290 291 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 292 if (!(br_type & BIT_ULL(i))) 293 continue; 294 295 v = lbr_select_map[i]; 296 if (v == LBR_NOT_SUPP) 297 return -EOPNOTSUPP; 298 299 if (v != LBR_IGNORE) 300 mask |= v; 301 } 302 303 /* Filter bits operate in suppress mode */ 304 reg->config = mask ^ LBR_SELECT_MASK; 305 306 return 0; 307 } 308 309 int amd_pmu_lbr_hw_config(struct perf_event *event) 310 { 311 int ret = 0; 312 313 /* LBR is not recommended in counting mode */ 314 if (!is_sampling_event(event)) 315 return -EINVAL; 316 317 ret = amd_pmu_lbr_setup_filter(event); 318 if (!ret) 319 event->attach_state |= PERF_ATTACH_SCHED_CB; 320 321 return ret; 322 } 323 324 void amd_pmu_lbr_reset(void) 325 { 326 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 327 int i; 328 329 if (!x86_pmu.lbr_nr) 330 return; 331 332 /* Reset all branch records individually */ 333 for (i = 0; i < x86_pmu.lbr_nr; i++) { 334 amd_pmu_lbr_set_from(i, 0); 335 amd_pmu_lbr_set_to(i, 0); 336 } 337 338 cpuc->last_task_ctx = NULL; 339 cpuc->last_log_id = 0; 340 wrmsrl(MSR_AMD64_LBR_SELECT, 0); 341 } 342 343 void amd_pmu_lbr_add(struct perf_event *event) 344 { 345 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 346 struct hw_perf_event_extra *reg = &event->hw.branch_reg; 347 348 if (!x86_pmu.lbr_nr) 349 return; 350 351 if (has_branch_stack(event)) { 352 cpuc->lbr_select = 1; 353 cpuc->lbr_sel->config = reg->config; 354 cpuc->br_sel = reg->reg; 355 } 356 357 perf_sched_cb_inc(event->pmu); 358 359 if (!cpuc->lbr_users++ && !event->total_time_running) 360 amd_pmu_lbr_reset(); 361 } 362 363 void amd_pmu_lbr_del(struct perf_event *event) 364 { 365 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 366 367 if (!x86_pmu.lbr_nr) 368 return; 369 370 if (has_branch_stack(event)) 371 cpuc->lbr_select = 0; 372 373 cpuc->lbr_users--; 374 WARN_ON_ONCE(cpuc->lbr_users < 0); 375 perf_sched_cb_dec(event->pmu); 376 } 377 378 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 379 { 380 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 381 382 /* 383 * A context switch can flip the address space and LBR entries are 384 * not tagged with an identifier. Hence, branches cannot be resolved 385 * from the old address space and the LBR records should be wiped. 386 */ 387 if (cpuc->lbr_users && sched_in) 388 amd_pmu_lbr_reset(); 389 } 390 391 void amd_pmu_lbr_enable_all(void) 392 { 393 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 394 u64 lbr_select, dbg_ctl, dbg_extn_cfg; 395 396 if (!cpuc->lbr_users || !x86_pmu.lbr_nr) 397 return; 398 399 /* Set hardware branch filter */ 400 if (cpuc->lbr_select) { 401 lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; 402 wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); 403 } 404 405 if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { 406 rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); 407 wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 408 } 409 410 rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); 411 wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); 412 } 413 414 void amd_pmu_lbr_disable_all(void) 415 { 416 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 417 u64 dbg_ctl, dbg_extn_cfg; 418 419 if (!cpuc->lbr_users || !x86_pmu.lbr_nr) 420 return; 421 422 rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); 423 wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); 424 425 if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { 426 rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); 427 wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 428 } 429 } 430 431 __init int amd_pmu_lbr_init(void) 432 { 433 union cpuid_0x80000022_ebx ebx; 434 435 if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2)) 436 return -EOPNOTSUPP; 437 438 /* Set number of entries */ 439 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); 440 x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz; 441 442 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); 443 444 return 0; 445 } 446