1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/perf_event.h> 3 #include <asm/msr.h> 4 #include <asm/perf_event.h> 5 6 #include "../perf_event.h" 7 8 /* LBR Branch Select valid bits */ 9 #define LBR_SELECT_MASK 0x1ff 10 11 /* 12 * LBR Branch Select filter bits which when set, ensures that the 13 * corresponding type of branches are not recorded 14 */ 15 #define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */ 16 #define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */ 17 #define LBR_SELECT_JCC 2 /* Conditional branches */ 18 #define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */ 19 #define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */ 20 #define LBR_SELECT_RET_NEAR 5 /* Near returns */ 21 #define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */ 22 #define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */ 23 #define LBR_SELECT_FAR_BRANCH 8 /* Far branches */ 24 25 #define LBR_KERNEL BIT(LBR_SELECT_KERNEL) 26 #define LBR_USER BIT(LBR_SELECT_USER) 27 #define LBR_JCC BIT(LBR_SELECT_JCC) 28 #define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL) 29 #define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND) 30 #define LBR_RETURN BIT(LBR_SELECT_RET_NEAR) 31 #define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL) 32 #define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND) 33 #define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH) 34 #define LBR_NOT_SUPP -1 /* unsupported filter */ 35 #define LBR_IGNORE 0 36 37 #define LBR_ANY \ 38 (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \ 39 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR) 40 41 struct branch_entry { 42 union { 43 struct { 44 u64 ip:58; 45 u64 ip_sign_ext:5; 46 u64 mispredict:1; 47 } split; 48 u64 full; 49 } from; 50 51 union { 52 struct { 53 u64 ip:58; 54 u64 ip_sign_ext:3; 55 u64 reserved:1; 56 u64 spec:1; 57 u64 valid:1; 58 } split; 59 u64 full; 60 } to; 61 }; 62 63 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) 64 { 65 wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val); 66 } 67 68 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) 69 { 70 wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); 71 } 72 73 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) 74 { 75 u64 val; 76 77 rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val); 78 79 return val; 80 } 81 82 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) 83 { 84 u64 val; 85 86 rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); 87 88 return val; 89 } 90 91 static __always_inline u64 sign_ext_branch_ip(u64 ip) 92 { 93 u32 shift = 64 - boot_cpu_data.x86_virt_bits; 94 95 return (u64)(((s64)ip << shift) >> shift); 96 } 97 98 static void amd_pmu_lbr_filter(void) 99 { 100 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 101 int br_sel = cpuc->br_sel, offset, type, i, j; 102 bool compress = false; 103 bool fused_only = false; 104 u64 from, to; 105 106 /* If sampling all branches, there is nothing to filter */ 107 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && 108 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) 109 fused_only = true; 110 111 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 112 from = cpuc->lbr_entries[i].from; 113 to = cpuc->lbr_entries[i].to; 114 type = branch_type_fused(from, to, 0, &offset); 115 116 /* 117 * Adjust the branch from address in case of instruction 118 * fusion where it points to an instruction preceding the 119 * actual branch 120 */ 121 if (offset) { 122 cpuc->lbr_entries[i].from += offset; 123 if (fused_only) 124 continue; 125 } 126 127 /* If type does not correspond, then discard */ 128 if (type == X86_BR_NONE || (br_sel & type) != type) { 129 cpuc->lbr_entries[i].from = 0; /* mark invalid */ 130 compress = true; 131 } 132 133 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) 134 cpuc->lbr_entries[i].type = common_branch_type(type); 135 } 136 137 if (!compress) 138 return; 139 140 /* Remove all invalid entries */ 141 for (i = 0; i < cpuc->lbr_stack.nr; ) { 142 if (!cpuc->lbr_entries[i].from) { 143 j = i; 144 while (++j < cpuc->lbr_stack.nr) 145 cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j]; 146 cpuc->lbr_stack.nr--; 147 if (!cpuc->lbr_entries[i].from) 148 continue; 149 } 150 i++; 151 } 152 } 153 154 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = { 155 PERF_BR_SPEC_NA, 156 PERF_BR_SPEC_WRONG_PATH, 157 PERF_BR_NON_SPEC_CORRECT_PATH, 158 PERF_BR_SPEC_CORRECT_PATH, 159 }; 160 161 void amd_pmu_lbr_read(void) 162 { 163 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 164 struct perf_branch_entry *br = cpuc->lbr_entries; 165 struct branch_entry entry; 166 int out = 0, idx, i; 167 168 if (!cpuc->lbr_users) 169 return; 170 171 for (i = 0; i < x86_pmu.lbr_nr; i++) { 172 entry.from.full = amd_pmu_lbr_get_from(i); 173 entry.to.full = amd_pmu_lbr_get_to(i); 174 175 /* 176 * Check if a branch has been logged; if valid = 0, spec = 0 177 * then no branch was recorded; if reserved = 1 then an 178 * erroneous branch was recorded (see Erratum 1452) 179 */ 180 if ((!entry.to.split.valid && !entry.to.split.spec) || 181 entry.to.split.reserved) 182 continue; 183 184 perf_clear_branch_entry_bitfields(br + out); 185 186 br[out].from = sign_ext_branch_ip(entry.from.split.ip); 187 br[out].to = sign_ext_branch_ip(entry.to.split.ip); 188 br[out].mispred = entry.from.split.mispredict; 189 br[out].predicted = !br[out].mispred; 190 191 /* 192 * Set branch speculation information using the status of 193 * the valid and spec bits. 194 * 195 * When valid = 0, spec = 0, no branch was recorded and the 196 * entry is discarded as seen above. 197 * 198 * When valid = 0, spec = 1, the recorded branch was 199 * speculative but took the wrong path. 200 * 201 * When valid = 1, spec = 0, the recorded branch was 202 * non-speculative but took the correct path. 203 * 204 * When valid = 1, spec = 1, the recorded branch was 205 * speculative and took the correct path 206 */ 207 idx = (entry.to.split.valid << 1) | entry.to.split.spec; 208 br[out].spec = lbr_spec_map[idx]; 209 out++; 210 } 211 212 cpuc->lbr_stack.nr = out; 213 214 /* 215 * Internal register renaming always ensures that LBR From[0] and 216 * LBR To[0] always represent the TOS 217 */ 218 cpuc->lbr_stack.hw_idx = 0; 219 220 /* Perform further software filtering */ 221 amd_pmu_lbr_filter(); 222 } 223 224 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 225 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 226 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 227 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE, 228 229 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 230 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, 231 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 232 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 233 [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP, 234 [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP, 235 [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP, 236 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 237 238 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP, 239 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 240 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 241 242 [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP, 243 [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP, 244 }; 245 246 static int amd_pmu_lbr_setup_filter(struct perf_event *event) 247 { 248 struct hw_perf_event_extra *reg = &event->hw.branch_reg; 249 u64 br_type = event->attr.branch_sample_type; 250 u64 mask = 0, v; 251 int i; 252 253 /* No LBR support */ 254 if (!x86_pmu.lbr_nr) 255 return -EOPNOTSUPP; 256 257 if (br_type & PERF_SAMPLE_BRANCH_USER) 258 mask |= X86_BR_USER; 259 260 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 261 mask |= X86_BR_KERNEL; 262 263 /* Ignore BRANCH_HV here */ 264 265 if (br_type & PERF_SAMPLE_BRANCH_ANY) 266 mask |= X86_BR_ANY; 267 268 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 269 mask |= X86_BR_ANY_CALL; 270 271 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 272 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 273 274 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 275 mask |= X86_BR_IND_CALL; 276 277 if (br_type & PERF_SAMPLE_BRANCH_COND) 278 mask |= X86_BR_JCC; 279 280 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 281 mask |= X86_BR_IND_JMP; 282 283 if (br_type & PERF_SAMPLE_BRANCH_CALL) 284 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 285 286 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) 287 mask |= X86_BR_TYPE_SAVE; 288 289 reg->reg = mask; 290 mask = 0; 291 292 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 293 if (!(br_type & BIT_ULL(i))) 294 continue; 295 296 v = lbr_select_map[i]; 297 if (v == LBR_NOT_SUPP) 298 return -EOPNOTSUPP; 299 300 if (v != LBR_IGNORE) 301 mask |= v; 302 } 303 304 /* Filter bits operate in suppress mode */ 305 reg->config = mask ^ LBR_SELECT_MASK; 306 307 return 0; 308 } 309 310 int amd_pmu_lbr_hw_config(struct perf_event *event) 311 { 312 int ret = 0; 313 314 ret = amd_pmu_lbr_setup_filter(event); 315 if (!ret) 316 event->attach_state |= PERF_ATTACH_SCHED_CB; 317 318 return ret; 319 } 320 321 void amd_pmu_lbr_reset(void) 322 { 323 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 324 int i; 325 326 if (!x86_pmu.lbr_nr) 327 return; 328 329 /* Reset all branch records individually */ 330 for (i = 0; i < x86_pmu.lbr_nr; i++) { 331 amd_pmu_lbr_set_from(i, 0); 332 amd_pmu_lbr_set_to(i, 0); 333 } 334 335 cpuc->last_task_ctx = NULL; 336 cpuc->last_log_id = 0; 337 wrmsrq(MSR_AMD64_LBR_SELECT, 0); 338 } 339 340 void amd_pmu_lbr_add(struct perf_event *event) 341 { 342 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 343 struct hw_perf_event_extra *reg = &event->hw.branch_reg; 344 345 if (!x86_pmu.lbr_nr) 346 return; 347 348 if (has_branch_stack(event)) { 349 cpuc->lbr_select = 1; 350 cpuc->lbr_sel->config = reg->config; 351 cpuc->br_sel = reg->reg; 352 } 353 354 perf_sched_cb_inc(event->pmu); 355 356 if (!cpuc->lbr_users++ && !event->total_time_running) 357 amd_pmu_lbr_reset(); 358 } 359 360 void amd_pmu_lbr_del(struct perf_event *event) 361 { 362 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 363 364 if (!x86_pmu.lbr_nr) 365 return; 366 367 if (has_branch_stack(event)) 368 cpuc->lbr_select = 0; 369 370 cpuc->lbr_users--; 371 WARN_ON_ONCE(cpuc->lbr_users < 0); 372 perf_sched_cb_dec(event->pmu); 373 } 374 375 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, 376 struct task_struct *task, bool sched_in) 377 { 378 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 379 380 /* 381 * A context switch can flip the address space and LBR entries are 382 * not tagged with an identifier. Hence, branches cannot be resolved 383 * from the old address space and the LBR records should be wiped. 384 */ 385 if (cpuc->lbr_users && sched_in) 386 amd_pmu_lbr_reset(); 387 } 388 389 void amd_pmu_lbr_enable_all(void) 390 { 391 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 392 u64 lbr_select, dbg_ctl, dbg_extn_cfg; 393 394 if (!cpuc->lbr_users || !x86_pmu.lbr_nr) 395 return; 396 397 /* Set hardware branch filter */ 398 if (cpuc->lbr_select) { 399 lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; 400 wrmsrq(MSR_AMD64_LBR_SELECT, lbr_select); 401 } 402 403 if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { 404 rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl); 405 wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 406 } 407 408 rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); 409 wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); 410 } 411 412 void amd_pmu_lbr_disable_all(void) 413 { 414 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 415 416 if (!cpuc->lbr_users || !x86_pmu.lbr_nr) 417 return; 418 419 __amd_pmu_lbr_disable(); 420 } 421 422 __init int amd_pmu_lbr_init(void) 423 { 424 union cpuid_0x80000022_ebx ebx; 425 426 if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2)) 427 return -EOPNOTSUPP; 428 429 /* Set number of entries */ 430 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); 431 x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz; 432 433 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); 434 435 return 0; 436 } 437