1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/perf_event.h> 3 #include <linux/types.h> 4 5 #include <asm/perf_event.h> 6 #include <asm/msr.h> 7 8 #include "../perf_event.h" 9 10 /* 11 * Intel LBR_SELECT bits 12 * Intel Vol3a, April 2011, Section 16.7 Table 16-10 13 * 14 * Hardware branch filter (not available on all CPUs) 15 */ 16 #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 17 #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 18 #define LBR_JCC_BIT 2 /* do not capture conditional branches */ 19 #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 20 #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 21 #define LBR_RETURN_BIT 5 /* do not capture near returns */ 22 #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 23 #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 24 #define LBR_FAR_BIT 8 /* do not capture far branches */ 25 #define LBR_CALL_STACK_BIT 9 /* enable call stack */ 26 27 /* 28 * Following bit only exists in Linux; we mask it out before writing it to 29 * the actual MSR. But it helps the constraint perf code to understand 30 * that this is a separate configuration. 31 */ 32 #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ 33 34 #define LBR_KERNEL (1 << LBR_KERNEL_BIT) 35 #define LBR_USER (1 << LBR_USER_BIT) 36 #define LBR_JCC (1 << LBR_JCC_BIT) 37 #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 38 #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 39 #define LBR_RETURN (1 << LBR_RETURN_BIT) 40 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 41 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 42 #define LBR_FAR (1 << LBR_FAR_BIT) 43 #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 44 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) 45 46 #define LBR_PLM (LBR_KERNEL | LBR_USER) 47 48 #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ 49 #define LBR_NOT_SUPP -1 /* LBR filter not supported */ 50 #define LBR_IGN 0 /* ignored */ 51 52 #define LBR_ANY \ 53 (LBR_JCC |\ 54 LBR_REL_CALL |\ 55 LBR_IND_CALL |\ 56 LBR_RETURN |\ 57 LBR_REL_JMP |\ 58 LBR_IND_JMP |\ 59 LBR_FAR) 60 61 #define LBR_FROM_FLAG_MISPRED BIT_ULL(63) 62 #define LBR_FROM_FLAG_IN_TX BIT_ULL(62) 63 #define LBR_FROM_FLAG_ABORT BIT_ULL(61) 64 65 #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) 66 67 /* 68 * Intel LBR_CTL bits 69 * 70 * Hardware branch filter for Arch LBR 71 */ 72 #define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */ 73 #define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */ 74 #define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */ 75 #define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */ 76 #define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */ 77 #define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */ 78 #define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */ 79 #define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */ 80 #define ARCH_LBR_RETURN_BIT 21 /* capture near returns */ 81 #define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */ 82 83 #define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT) 84 #define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT) 85 #define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT) 86 #define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT) 87 #define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT) 88 #define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT) 89 #define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT) 90 #define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT) 91 #define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT) 92 #define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT) 93 94 #define ARCH_LBR_ANY \ 95 (ARCH_LBR_JCC |\ 96 ARCH_LBR_REL_JMP |\ 97 ARCH_LBR_IND_JMP |\ 98 ARCH_LBR_REL_CALL |\ 99 ARCH_LBR_IND_CALL |\ 100 ARCH_LBR_RETURN |\ 101 ARCH_LBR_OTHER_BRANCH) 102 103 #define ARCH_LBR_CTL_MASK 0x7f000e 104 105 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 106 107 static __always_inline bool is_lbr_call_stack_bit_set(u64 config) 108 { 109 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 110 return !!(config & ARCH_LBR_CALL_STACK); 111 112 return !!(config & LBR_CALL_STACK); 113 } 114 115 /* 116 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 117 * otherwise it becomes near impossible to get a reliable stack. 118 */ 119 120 static void __intel_pmu_lbr_enable(bool pmi) 121 { 122 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 123 u64 debugctl, lbr_select = 0, orig_debugctl; 124 125 /* 126 * No need to unfreeze manually, as v4 can do that as part 127 * of the GLOBAL_STATUS ack. 128 */ 129 if (pmi && x86_pmu.version >= 4) 130 return; 131 132 /* 133 * No need to reprogram LBR_SELECT in a PMI, as it 134 * did not change. 135 */ 136 if (cpuc->lbr_sel) 137 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; 138 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) 139 wrmsrl(MSR_LBR_SELECT, lbr_select); 140 141 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 142 orig_debugctl = debugctl; 143 144 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 145 debugctl |= DEBUGCTLMSR_LBR; 146 /* 147 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 148 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 149 * may cause superfluous increase/decrease of LBR_TOS. 150 */ 151 if (is_lbr_call_stack_bit_set(lbr_select)) 152 debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 153 else 154 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 155 156 if (orig_debugctl != debugctl) 157 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 158 159 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 160 wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); 161 } 162 163 void intel_pmu_lbr_reset_32(void) 164 { 165 int i; 166 167 for (i = 0; i < x86_pmu.lbr_nr; i++) 168 wrmsrl(x86_pmu.lbr_from + i, 0); 169 } 170 171 void intel_pmu_lbr_reset_64(void) 172 { 173 int i; 174 175 for (i = 0; i < x86_pmu.lbr_nr; i++) { 176 wrmsrl(x86_pmu.lbr_from + i, 0); 177 wrmsrl(x86_pmu.lbr_to + i, 0); 178 if (x86_pmu.lbr_has_info) 179 wrmsrl(x86_pmu.lbr_info + i, 0); 180 } 181 } 182 183 static void intel_pmu_arch_lbr_reset(void) 184 { 185 /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ 186 wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); 187 } 188 189 void intel_pmu_lbr_reset(void) 190 { 191 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 192 193 if (!x86_pmu.lbr_nr) 194 return; 195 196 x86_pmu.lbr_reset(); 197 198 cpuc->last_task_ctx = NULL; 199 cpuc->last_log_id = 0; 200 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) 201 wrmsrl(MSR_LBR_SELECT, 0); 202 } 203 204 /* 205 * TOS = most recently recorded branch 206 */ 207 static inline u64 intel_pmu_lbr_tos(void) 208 { 209 u64 tos; 210 211 rdmsrl(x86_pmu.lbr_tos, tos); 212 return tos; 213 } 214 215 enum { 216 LBR_NONE, 217 LBR_VALID, 218 }; 219 220 /* 221 * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x 222 * are the TSX flags when TSX is supported, but when TSX is not supported 223 * they have no consistent behavior: 224 * 225 * - For wrmsr(), bits 61:62 are considered part of the sign extension. 226 * - For HW updates (branch captures) bits 61:62 are always OFF and are not 227 * part of the sign extension. 228 * 229 * Therefore, if: 230 * 231 * 1) LBR format LBR_FORMAT_EIP_FLAGS2 232 * 2) CPU has no TSX support enabled 233 * 234 * ... then any value passed to wrmsr() must be sign extended to 63 bits and any 235 * value from rdmsr() must be converted to have a 61 bits sign extension, 236 * ignoring the TSX flags. 237 */ 238 static inline bool lbr_from_signext_quirk_needed(void) 239 { 240 bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || 241 boot_cpu_has(X86_FEATURE_RTM); 242 243 return !tsx_support; 244 } 245 246 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); 247 248 /* If quirk is enabled, ensure sign extension is 63 bits: */ 249 inline u64 lbr_from_signext_quirk_wr(u64 val) 250 { 251 if (static_branch_unlikely(&lbr_from_quirk_key)) { 252 /* 253 * Sign extend into bits 61:62 while preserving bit 63. 254 * 255 * Quirk is enabled when TSX is disabled. Therefore TSX bits 256 * in val are always OFF and must be changed to be sign 257 * extension bits. Since bits 59:60 are guaranteed to be 258 * part of the sign extension bits, we can just copy them 259 * to 61:62. 260 */ 261 val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; 262 } 263 return val; 264 } 265 266 /* 267 * If quirk is needed, ensure sign extension is 61 bits: 268 */ 269 static u64 lbr_from_signext_quirk_rd(u64 val) 270 { 271 if (static_branch_unlikely(&lbr_from_quirk_key)) { 272 /* 273 * Quirk is on when TSX is not enabled. Therefore TSX 274 * flags must be read as OFF. 275 */ 276 val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); 277 } 278 return val; 279 } 280 281 static __always_inline void wrlbr_from(unsigned int idx, u64 val) 282 { 283 val = lbr_from_signext_quirk_wr(val); 284 wrmsrl(x86_pmu.lbr_from + idx, val); 285 } 286 287 static __always_inline void wrlbr_to(unsigned int idx, u64 val) 288 { 289 wrmsrl(x86_pmu.lbr_to + idx, val); 290 } 291 292 static __always_inline void wrlbr_info(unsigned int idx, u64 val) 293 { 294 wrmsrl(x86_pmu.lbr_info + idx, val); 295 } 296 297 static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) 298 { 299 u64 val; 300 301 if (lbr) 302 return lbr->from; 303 304 rdmsrl(x86_pmu.lbr_from + idx, val); 305 306 return lbr_from_signext_quirk_rd(val); 307 } 308 309 static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) 310 { 311 u64 val; 312 313 if (lbr) 314 return lbr->to; 315 316 rdmsrl(x86_pmu.lbr_to + idx, val); 317 318 return val; 319 } 320 321 static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) 322 { 323 u64 val; 324 325 if (lbr) 326 return lbr->info; 327 328 rdmsrl(x86_pmu.lbr_info + idx, val); 329 330 return val; 331 } 332 333 static inline void 334 wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 335 { 336 wrlbr_from(idx, lbr->from); 337 wrlbr_to(idx, lbr->to); 338 if (need_info) 339 wrlbr_info(idx, lbr->info); 340 } 341 342 static inline bool 343 rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 344 { 345 u64 from = rdlbr_from(idx, NULL); 346 347 /* Don't read invalid entry */ 348 if (!from) 349 return false; 350 351 lbr->from = from; 352 lbr->to = rdlbr_to(idx, NULL); 353 if (need_info) 354 lbr->info = rdlbr_info(idx, NULL); 355 356 return true; 357 } 358 359 void intel_pmu_lbr_restore(void *ctx) 360 { 361 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 362 struct x86_perf_task_context *task_ctx = ctx; 363 bool need_info = x86_pmu.lbr_has_info; 364 u64 tos = task_ctx->tos; 365 unsigned lbr_idx, mask; 366 int i; 367 368 mask = x86_pmu.lbr_nr - 1; 369 for (i = 0; i < task_ctx->valid_lbrs; i++) { 370 lbr_idx = (tos - i) & mask; 371 wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info); 372 } 373 374 for (; i < x86_pmu.lbr_nr; i++) { 375 lbr_idx = (tos - i) & mask; 376 wrlbr_from(lbr_idx, 0); 377 wrlbr_to(lbr_idx, 0); 378 if (need_info) 379 wrlbr_info(lbr_idx, 0); 380 } 381 382 wrmsrl(x86_pmu.lbr_tos, tos); 383 384 if (cpuc->lbr_select) 385 wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 386 } 387 388 static void intel_pmu_arch_lbr_restore(void *ctx) 389 { 390 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 391 struct lbr_entry *entries = task_ctx->entries; 392 int i; 393 394 /* Fast reset the LBRs before restore if the call stack is not full. */ 395 if (!entries[x86_pmu.lbr_nr - 1].from) 396 intel_pmu_arch_lbr_reset(); 397 398 for (i = 0; i < x86_pmu.lbr_nr; i++) { 399 if (!entries[i].from) 400 break; 401 wrlbr_all(&entries[i], i, true); 402 } 403 } 404 405 /* 406 * Restore the Architecture LBR state from the xsave area in the perf 407 * context data for the task via the XRSTORS instruction. 408 */ 409 static void intel_pmu_arch_lbr_xrstors(void *ctx) 410 { 411 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 412 413 xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); 414 } 415 416 static __always_inline bool lbr_is_reset_in_cstate(void *ctx) 417 { 418 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 419 return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL); 420 421 return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); 422 } 423 424 static void __intel_pmu_lbr_restore(void *ctx) 425 { 426 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 427 428 if (task_context_opt(ctx)->lbr_callstack_users == 0 || 429 task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { 430 intel_pmu_lbr_reset(); 431 return; 432 } 433 434 /* 435 * Does not restore the LBR registers, if 436 * - No one else touched them, and 437 * - Was not cleared in Cstate 438 */ 439 if ((ctx == cpuc->last_task_ctx) && 440 (task_context_opt(ctx)->log_id == cpuc->last_log_id) && 441 !lbr_is_reset_in_cstate(ctx)) { 442 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 443 return; 444 } 445 446 x86_pmu.lbr_restore(ctx); 447 448 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 449 } 450 451 void intel_pmu_lbr_save(void *ctx) 452 { 453 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 454 struct x86_perf_task_context *task_ctx = ctx; 455 bool need_info = x86_pmu.lbr_has_info; 456 unsigned lbr_idx, mask; 457 u64 tos; 458 int i; 459 460 mask = x86_pmu.lbr_nr - 1; 461 tos = intel_pmu_lbr_tos(); 462 for (i = 0; i < x86_pmu.lbr_nr; i++) { 463 lbr_idx = (tos - i) & mask; 464 if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info)) 465 break; 466 } 467 task_ctx->valid_lbrs = i; 468 task_ctx->tos = tos; 469 470 if (cpuc->lbr_select) 471 rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 472 } 473 474 static void intel_pmu_arch_lbr_save(void *ctx) 475 { 476 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 477 struct lbr_entry *entries = task_ctx->entries; 478 int i; 479 480 for (i = 0; i < x86_pmu.lbr_nr; i++) { 481 if (!rdlbr_all(&entries[i], i, true)) 482 break; 483 } 484 485 /* LBR call stack is not full. Reset is required in restore. */ 486 if (i < x86_pmu.lbr_nr) 487 entries[x86_pmu.lbr_nr - 1].from = 0; 488 } 489 490 /* 491 * Save the Architecture LBR state to the xsave area in the perf 492 * context data for the task via the XSAVES instruction. 493 */ 494 static void intel_pmu_arch_lbr_xsaves(void *ctx) 495 { 496 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 497 498 xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); 499 } 500 501 static void __intel_pmu_lbr_save(void *ctx) 502 { 503 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 504 505 if (task_context_opt(ctx)->lbr_callstack_users == 0) { 506 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 507 return; 508 } 509 510 x86_pmu.lbr_save(ctx); 511 512 task_context_opt(ctx)->lbr_stack_state = LBR_VALID; 513 514 cpuc->last_task_ctx = ctx; 515 cpuc->last_log_id = ++task_context_opt(ctx)->log_id; 516 } 517 518 void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, 519 struct perf_event_pmu_context *next_epc) 520 { 521 void *prev_ctx_data, *next_ctx_data; 522 523 swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); 524 525 /* 526 * Architecture specific synchronization makes sense in case 527 * both prev_epc->task_ctx_data and next_epc->task_ctx_data 528 * pointers are allocated. 529 */ 530 531 prev_ctx_data = next_epc->task_ctx_data; 532 next_ctx_data = prev_epc->task_ctx_data; 533 534 if (!prev_ctx_data || !next_ctx_data) 535 return; 536 537 swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, 538 task_context_opt(next_ctx_data)->lbr_callstack_users); 539 } 540 541 void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 542 { 543 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 544 void *task_ctx; 545 546 if (!cpuc->lbr_users) 547 return; 548 549 /* 550 * If LBR callstack feature is enabled and the stack was saved when 551 * the task was scheduled out, restore the stack. Otherwise flush 552 * the LBR stack. 553 */ 554 task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; 555 if (task_ctx) { 556 if (sched_in) 557 __intel_pmu_lbr_restore(task_ctx); 558 else 559 __intel_pmu_lbr_save(task_ctx); 560 return; 561 } 562 563 /* 564 * Since a context switch can flip the address space and LBR entries 565 * are not tagged with an identifier, we need to wipe the LBR, even for 566 * per-cpu events. You simply cannot resolve the branches from the old 567 * address space. 568 */ 569 if (sched_in) 570 intel_pmu_lbr_reset(); 571 } 572 573 static inline bool branch_user_callstack(unsigned br_sel) 574 { 575 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 576 } 577 578 void intel_pmu_lbr_add(struct perf_event *event) 579 { 580 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 581 582 if (!x86_pmu.lbr_nr) 583 return; 584 585 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 586 cpuc->lbr_select = 1; 587 588 cpuc->br_sel = event->hw.branch_reg.reg; 589 590 if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data) 591 task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++; 592 593 /* 594 * Request pmu::sched_task() callback, which will fire inside the 595 * regular perf event scheduling, so that call will: 596 * 597 * - restore or wipe; when LBR-callstack, 598 * - wipe; otherwise, 599 * 600 * when this is from __perf_event_task_sched_in(). 601 * 602 * However, if this is from perf_install_in_context(), no such callback 603 * will follow and we'll need to reset the LBR here if this is the 604 * first LBR event. 605 * 606 * The problem is, we cannot tell these cases apart... but we can 607 * exclude the biggest chunk of cases by looking at 608 * event->total_time_running. An event that has accrued runtime cannot 609 * be 'new'. Conversely, a new event can get installed through the 610 * context switch path for the first time. 611 */ 612 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 613 cpuc->lbr_pebs_users++; 614 perf_sched_cb_inc(event->pmu); 615 if (!cpuc->lbr_users++ && !event->total_time_running) 616 intel_pmu_lbr_reset(); 617 } 618 619 void release_lbr_buffers(void) 620 { 621 struct kmem_cache *kmem_cache; 622 struct cpu_hw_events *cpuc; 623 int cpu; 624 625 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 626 return; 627 628 for_each_possible_cpu(cpu) { 629 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 630 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 631 if (kmem_cache && cpuc->lbr_xsave) { 632 kmem_cache_free(kmem_cache, cpuc->lbr_xsave); 633 cpuc->lbr_xsave = NULL; 634 } 635 } 636 } 637 638 void reserve_lbr_buffers(void) 639 { 640 struct kmem_cache *kmem_cache; 641 struct cpu_hw_events *cpuc; 642 int cpu; 643 644 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 645 return; 646 647 for_each_possible_cpu(cpu) { 648 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 649 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 650 if (!kmem_cache || cpuc->lbr_xsave) 651 continue; 652 653 cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, 654 GFP_KERNEL | __GFP_ZERO, 655 cpu_to_node(cpu)); 656 } 657 } 658 659 void intel_pmu_lbr_del(struct perf_event *event) 660 { 661 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 662 663 if (!x86_pmu.lbr_nr) 664 return; 665 666 if (branch_user_callstack(cpuc->br_sel) && 667 event->pmu_ctx->task_ctx_data) 668 task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--; 669 670 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 671 cpuc->lbr_select = 0; 672 673 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 674 cpuc->lbr_pebs_users--; 675 cpuc->lbr_users--; 676 WARN_ON_ONCE(cpuc->lbr_users < 0); 677 WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); 678 perf_sched_cb_dec(event->pmu); 679 680 /* 681 * The logged occurrences information is only valid for the 682 * current LBR group. If another LBR group is scheduled in 683 * later, the information from the stale LBRs will be wrongly 684 * interpreted. Reset the LBRs here. 685 * 686 * Only clear once for a branch counter group with the leader 687 * event. Because 688 * - Cannot simply reset the LBRs with the !cpuc->lbr_users. 689 * Because it's possible that the last LBR user is not in a 690 * branch counter group, e.g., a branch_counters group + 691 * several normal LBR events. 692 * - The LBR reset can be done with any one of the events in a 693 * branch counter group, since they are always scheduled together. 694 * It's easy to force the leader event an LBR event. 695 */ 696 if (is_branch_counters_group(event) && event == event->group_leader) 697 intel_pmu_lbr_reset(); 698 } 699 700 static inline bool vlbr_exclude_host(void) 701 { 702 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 703 704 return test_bit(INTEL_PMC_IDX_FIXED_VLBR, 705 (unsigned long *)&cpuc->intel_ctrl_guest_mask); 706 } 707 708 void intel_pmu_lbr_enable_all(bool pmi) 709 { 710 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 711 712 if (cpuc->lbr_users && !vlbr_exclude_host()) 713 __intel_pmu_lbr_enable(pmi); 714 } 715 716 void intel_pmu_lbr_disable_all(void) 717 { 718 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 719 720 if (cpuc->lbr_users && !vlbr_exclude_host()) { 721 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 722 return __intel_pmu_arch_lbr_disable(); 723 724 __intel_pmu_lbr_disable(); 725 } 726 } 727 728 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 729 { 730 unsigned long mask = x86_pmu.lbr_nr - 1; 731 struct perf_branch_entry *br = cpuc->lbr_entries; 732 u64 tos = intel_pmu_lbr_tos(); 733 int i; 734 735 for (i = 0; i < x86_pmu.lbr_nr; i++) { 736 unsigned long lbr_idx = (tos - i) & mask; 737 union { 738 struct { 739 u32 from; 740 u32 to; 741 }; 742 u64 lbr; 743 } msr_lastbranch; 744 745 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 746 747 perf_clear_branch_entry_bitfields(br); 748 749 br->from = msr_lastbranch.from; 750 br->to = msr_lastbranch.to; 751 br++; 752 } 753 cpuc->lbr_stack.nr = i; 754 cpuc->lbr_stack.hw_idx = tos; 755 } 756 757 /* 758 * Due to lack of segmentation in Linux the effective address (offset) 759 * is the same as the linear address, allowing us to merge the LIP and EIP 760 * LBR formats. 761 */ 762 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 763 { 764 bool need_info = false, call_stack = false; 765 unsigned long mask = x86_pmu.lbr_nr - 1; 766 struct perf_branch_entry *br = cpuc->lbr_entries; 767 u64 tos = intel_pmu_lbr_tos(); 768 int i; 769 int out = 0; 770 int num = x86_pmu.lbr_nr; 771 772 if (cpuc->lbr_sel) { 773 need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); 774 if (cpuc->lbr_sel->config & LBR_CALL_STACK) 775 call_stack = true; 776 } 777 778 for (i = 0; i < num; i++) { 779 unsigned long lbr_idx = (tos - i) & mask; 780 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 781 u16 cycles = 0; 782 783 from = rdlbr_from(lbr_idx, NULL); 784 to = rdlbr_to(lbr_idx, NULL); 785 786 /* 787 * Read LBR call stack entries 788 * until invalid entry (0s) is detected. 789 */ 790 if (call_stack && !from) 791 break; 792 793 if (x86_pmu.lbr_has_info) { 794 if (need_info) { 795 u64 info; 796 797 info = rdlbr_info(lbr_idx, NULL); 798 mis = !!(info & LBR_INFO_MISPRED); 799 pred = !mis; 800 cycles = (info & LBR_INFO_CYCLES); 801 if (x86_pmu.lbr_has_tsx) { 802 in_tx = !!(info & LBR_INFO_IN_TX); 803 abort = !!(info & LBR_INFO_ABORT); 804 } 805 } 806 } else { 807 int skip = 0; 808 809 if (x86_pmu.lbr_from_flags) { 810 mis = !!(from & LBR_FROM_FLAG_MISPRED); 811 pred = !mis; 812 skip = 1; 813 } 814 if (x86_pmu.lbr_has_tsx) { 815 in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 816 abort = !!(from & LBR_FROM_FLAG_ABORT); 817 skip = 3; 818 } 819 from = (u64)((((s64)from) << skip) >> skip); 820 821 if (x86_pmu.lbr_to_cycles) { 822 cycles = ((to >> 48) & LBR_INFO_CYCLES); 823 to = (u64)((((s64)to) << 16) >> 16); 824 } 825 } 826 827 /* 828 * Some CPUs report duplicated abort records, 829 * with the second entry not having an abort bit set. 830 * Skip them here. This loop runs backwards, 831 * so we need to undo the previous record. 832 * If the abort just happened outside the window 833 * the extra entry cannot be removed. 834 */ 835 if (abort && x86_pmu.lbr_double_abort && out > 0) 836 out--; 837 838 perf_clear_branch_entry_bitfields(br+out); 839 br[out].from = from; 840 br[out].to = to; 841 br[out].mispred = mis; 842 br[out].predicted = pred; 843 br[out].in_tx = in_tx; 844 br[out].abort = abort; 845 br[out].cycles = cycles; 846 out++; 847 } 848 cpuc->lbr_stack.nr = out; 849 cpuc->lbr_stack.hw_idx = tos; 850 } 851 852 static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); 853 static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); 854 static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); 855 856 static __always_inline int get_lbr_br_type(u64 info) 857 { 858 int type = 0; 859 860 if (static_branch_likely(&x86_lbr_type)) 861 type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; 862 863 return type; 864 } 865 866 static __always_inline bool get_lbr_mispred(u64 info) 867 { 868 bool mispred = 0; 869 870 if (static_branch_likely(&x86_lbr_mispred)) 871 mispred = !!(info & LBR_INFO_MISPRED); 872 873 return mispred; 874 } 875 876 static __always_inline u16 get_lbr_cycles(u64 info) 877 { 878 u16 cycles = info & LBR_INFO_CYCLES; 879 880 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 881 (!static_branch_likely(&x86_lbr_cycles) || 882 !(info & LBR_INFO_CYC_CNT_VALID))) 883 cycles = 0; 884 885 return cycles; 886 } 887 888 static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS); 889 890 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, 891 struct lbr_entry *entries) 892 { 893 struct perf_branch_entry *e; 894 struct lbr_entry *lbr; 895 u64 from, to, info; 896 int i; 897 898 for (i = 0; i < x86_pmu.lbr_nr; i++) { 899 lbr = entries ? &entries[i] : NULL; 900 e = &cpuc->lbr_entries[i]; 901 902 from = rdlbr_from(i, lbr); 903 /* 904 * Read LBR entries until invalid entry (0s) is detected. 905 */ 906 if (!from) 907 break; 908 909 to = rdlbr_to(i, lbr); 910 info = rdlbr_info(i, lbr); 911 912 perf_clear_branch_entry_bitfields(e); 913 914 e->from = from; 915 e->to = to; 916 e->mispred = get_lbr_mispred(info); 917 e->predicted = !e->mispred; 918 e->in_tx = !!(info & LBR_INFO_IN_TX); 919 e->abort = !!(info & LBR_INFO_ABORT); 920 e->cycles = get_lbr_cycles(info); 921 e->type = get_lbr_br_type(info); 922 923 /* 924 * Leverage the reserved field of cpuc->lbr_entries[i] to 925 * temporarily store the branch counters information. 926 * The later code will decide what content can be disclosed 927 * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder(). 928 */ 929 e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK; 930 } 931 932 cpuc->lbr_stack.nr = i; 933 } 934 935 /* 936 * The enabled order may be different from the counter order. 937 * Update the lbr_counters with the enabled order. 938 */ 939 static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc, 940 struct perf_event *event) 941 { 942 int i, j, pos = 0, order[X86_PMC_IDX_MAX]; 943 struct perf_event *leader, *sibling; 944 u64 src, dst, cnt; 945 946 leader = event->group_leader; 947 if (branch_sample_counters(leader)) 948 order[pos++] = leader->hw.idx; 949 950 for_each_sibling_event(sibling, leader) { 951 if (!branch_sample_counters(sibling)) 952 continue; 953 order[pos++] = sibling->hw.idx; 954 } 955 956 WARN_ON_ONCE(!pos); 957 958 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 959 src = cpuc->lbr_entries[i].reserved; 960 dst = 0; 961 for (j = 0; j < pos; j++) { 962 cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK; 963 dst |= cnt << j * LBR_INFO_BR_CNTR_BITS; 964 } 965 cpuc->lbr_counters[i] = dst; 966 cpuc->lbr_entries[i].reserved = 0; 967 } 968 } 969 970 void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, 971 struct cpu_hw_events *cpuc, 972 struct perf_event *event) 973 { 974 if (is_branch_counters_group(event)) { 975 intel_pmu_lbr_counters_reorder(cpuc, event); 976 perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters); 977 return; 978 } 979 980 perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); 981 } 982 983 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) 984 { 985 intel_pmu_store_lbr(cpuc, NULL); 986 } 987 988 static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) 989 { 990 struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; 991 992 if (!xsave) { 993 intel_pmu_store_lbr(cpuc, NULL); 994 return; 995 } 996 xsaves(&xsave->xsave, XFEATURE_MASK_LBR); 997 998 intel_pmu_store_lbr(cpuc, xsave->lbr.entries); 999 } 1000 1001 void intel_pmu_lbr_read(void) 1002 { 1003 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1004 1005 /* 1006 * Don't read when all LBRs users are using adaptive PEBS. 1007 * 1008 * This could be smarter and actually check the event, 1009 * but this simple approach seems to work for now. 1010 */ 1011 if (!cpuc->lbr_users || vlbr_exclude_host() || 1012 cpuc->lbr_users == cpuc->lbr_pebs_users) 1013 return; 1014 1015 x86_pmu.lbr_read(cpuc); 1016 1017 intel_pmu_lbr_filter(cpuc); 1018 } 1019 1020 /* 1021 * SW filter is used: 1022 * - in case there is no HW filter 1023 * - in case the HW filter has errata or limitations 1024 */ 1025 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 1026 { 1027 u64 br_type = event->attr.branch_sample_type; 1028 int mask = 0; 1029 1030 if (br_type & PERF_SAMPLE_BRANCH_USER) 1031 mask |= X86_BR_USER; 1032 1033 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 1034 mask |= X86_BR_KERNEL; 1035 1036 /* we ignore BRANCH_HV here */ 1037 1038 if (br_type & PERF_SAMPLE_BRANCH_ANY) 1039 mask |= X86_BR_ANY; 1040 1041 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 1042 mask |= X86_BR_ANY_CALL; 1043 1044 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 1045 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 1046 1047 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 1048 mask |= X86_BR_IND_CALL; 1049 1050 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 1051 mask |= X86_BR_ABORT; 1052 1053 if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 1054 mask |= X86_BR_IN_TX; 1055 1056 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 1057 mask |= X86_BR_NO_TX; 1058 1059 if (br_type & PERF_SAMPLE_BRANCH_COND) 1060 mask |= X86_BR_JCC; 1061 1062 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 1063 if (!x86_pmu_has_lbr_callstack()) 1064 return -EOPNOTSUPP; 1065 if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 1066 return -EINVAL; 1067 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 1068 X86_BR_CALL_STACK; 1069 } 1070 1071 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 1072 mask |= X86_BR_IND_JMP; 1073 1074 if (br_type & PERF_SAMPLE_BRANCH_CALL) 1075 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 1076 1077 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) 1078 mask |= X86_BR_TYPE_SAVE; 1079 1080 /* 1081 * stash actual user request into reg, it may 1082 * be used by fixup code for some CPU 1083 */ 1084 event->hw.branch_reg.reg = mask; 1085 return 0; 1086 } 1087 1088 /* 1089 * setup the HW LBR filter 1090 * Used only when available, may not be enough to disambiguate 1091 * all branches, may need the help of the SW filter 1092 */ 1093 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 1094 { 1095 struct hw_perf_event_extra *reg; 1096 u64 br_type = event->attr.branch_sample_type; 1097 u64 mask = 0, v; 1098 int i; 1099 1100 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 1101 if (!(br_type & (1ULL << i))) 1102 continue; 1103 1104 v = x86_pmu.lbr_sel_map[i]; 1105 if (v == LBR_NOT_SUPP) 1106 return -EOPNOTSUPP; 1107 1108 if (v != LBR_IGN) 1109 mask |= v; 1110 } 1111 1112 reg = &event->hw.branch_reg; 1113 reg->idx = EXTRA_REG_LBR; 1114 1115 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { 1116 reg->config = mask; 1117 1118 /* 1119 * The Arch LBR HW can retrieve the common branch types 1120 * from the LBR_INFO. It doesn't require the high overhead 1121 * SW disassemble. 1122 * Enable the branch type by default for the Arch LBR. 1123 */ 1124 reg->reg |= X86_BR_TYPE_SAVE; 1125 return 0; 1126 } 1127 1128 /* 1129 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 1130 * in suppress mode. So LBR_SELECT should be set to 1131 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 1132 * But the 10th bit LBR_CALL_STACK does not operate 1133 * in suppress mode. 1134 */ 1135 reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); 1136 1137 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 1138 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 1139 x86_pmu.lbr_has_info) 1140 reg->config |= LBR_NO_INFO; 1141 1142 return 0; 1143 } 1144 1145 int intel_pmu_setup_lbr_filter(struct perf_event *event) 1146 { 1147 int ret = 0; 1148 1149 /* 1150 * no LBR on this PMU 1151 */ 1152 if (!x86_pmu.lbr_nr) 1153 return -EOPNOTSUPP; 1154 1155 /* 1156 * setup SW LBR filter 1157 */ 1158 ret = intel_pmu_setup_sw_lbr_filter(event); 1159 if (ret) 1160 return ret; 1161 1162 /* 1163 * setup HW LBR filter, if any 1164 */ 1165 if (x86_pmu.lbr_sel_map) 1166 ret = intel_pmu_setup_hw_lbr_filter(event); 1167 1168 return ret; 1169 } 1170 1171 enum { 1172 ARCH_LBR_BR_TYPE_JCC = 0, 1173 ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, 1174 ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, 1175 ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, 1176 ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, 1177 ARCH_LBR_BR_TYPE_NEAR_RET = 5, 1178 ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, 1179 1180 ARCH_LBR_BR_TYPE_MAP_MAX = 16, 1181 }; 1182 1183 static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { 1184 [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, 1185 [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, 1186 [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP, 1187 [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL, 1188 [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL, 1189 [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET, 1190 }; 1191 1192 /* 1193 * implement actual branch filter based on user demand. 1194 * Hardware may not exactly satisfy that request, thus 1195 * we need to inspect opcodes. Mismatched branches are 1196 * discarded. Therefore, the number of branches returned 1197 * in PERF_SAMPLE_BRANCH_STACK sample may vary. 1198 */ 1199 static void 1200 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 1201 { 1202 u64 from, to; 1203 int br_sel = cpuc->br_sel; 1204 int i, j, type, to_plm; 1205 bool compress = false; 1206 1207 /* if sampling all branches, then nothing to filter */ 1208 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && 1209 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) 1210 return; 1211 1212 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 1213 1214 from = cpuc->lbr_entries[i].from; 1215 to = cpuc->lbr_entries[i].to; 1216 type = cpuc->lbr_entries[i].type; 1217 1218 /* 1219 * Parse the branch type recorded in LBR_x_INFO MSR. 1220 * Doesn't support OTHER_BRANCH decoding for now. 1221 * OTHER_BRANCH branch type still rely on software decoding. 1222 */ 1223 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 1224 type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) { 1225 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 1226 type = arch_lbr_br_type_map[type] | to_plm; 1227 } else 1228 type = branch_type(from, to, cpuc->lbr_entries[i].abort); 1229 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 1230 if (cpuc->lbr_entries[i].in_tx) 1231 type |= X86_BR_IN_TX; 1232 else 1233 type |= X86_BR_NO_TX; 1234 } 1235 1236 /* if type does not correspond, then discard */ 1237 if (type == X86_BR_NONE || (br_sel & type) != type) { 1238 cpuc->lbr_entries[i].from = 0; 1239 compress = true; 1240 } 1241 1242 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) 1243 cpuc->lbr_entries[i].type = common_branch_type(type); 1244 } 1245 1246 if (!compress) 1247 return; 1248 1249 /* remove all entries with from=0 */ 1250 for (i = 0; i < cpuc->lbr_stack.nr; ) { 1251 if (!cpuc->lbr_entries[i].from) { 1252 j = i; 1253 while (++j < cpuc->lbr_stack.nr) { 1254 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 1255 cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j]; 1256 } 1257 cpuc->lbr_stack.nr--; 1258 if (!cpuc->lbr_entries[i].from) 1259 continue; 1260 } 1261 i++; 1262 } 1263 } 1264 1265 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) 1266 { 1267 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1268 1269 /* Cannot get TOS for large PEBS and Arch LBR */ 1270 if (static_cpu_has(X86_FEATURE_ARCH_LBR) || 1271 (cpuc->n_pebs == cpuc->n_large_pebs)) 1272 cpuc->lbr_stack.hw_idx = -1ULL; 1273 else 1274 cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); 1275 1276 intel_pmu_store_lbr(cpuc, lbr); 1277 intel_pmu_lbr_filter(cpuc); 1278 } 1279 1280 /* 1281 * Map interface branch filters onto LBR filters 1282 */ 1283 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1284 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1285 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1286 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1287 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1288 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 1289 | LBR_IND_JMP | LBR_FAR, 1290 /* 1291 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 1292 */ 1293 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 1294 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 1295 /* 1296 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 1297 */ 1298 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 1299 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1300 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1301 }; 1302 1303 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1304 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1305 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1306 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1307 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1308 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1309 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1310 | LBR_FAR, 1311 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1312 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1313 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1314 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1315 }; 1316 1317 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1318 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1319 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1320 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1321 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1322 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1323 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1324 | LBR_FAR, 1325 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1326 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1327 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1328 | LBR_RETURN | LBR_CALL_STACK, 1329 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1330 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1331 }; 1332 1333 static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1334 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY, 1335 [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER, 1336 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL, 1337 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1338 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN | 1339 ARCH_LBR_OTHER_BRANCH, 1340 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL | 1341 ARCH_LBR_IND_CALL | 1342 ARCH_LBR_OTHER_BRANCH, 1343 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL, 1344 [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC, 1345 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL | 1346 ARCH_LBR_IND_CALL | 1347 ARCH_LBR_RETURN | 1348 ARCH_LBR_CALL_STACK, 1349 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP, 1350 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL, 1351 }; 1352 1353 /* core */ 1354 void __init intel_pmu_lbr_init_core(void) 1355 { 1356 x86_pmu.lbr_nr = 4; 1357 x86_pmu.lbr_tos = MSR_LBR_TOS; 1358 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1359 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1360 1361 /* 1362 * SW branch filter usage: 1363 * - compensate for lack of HW filter 1364 */ 1365 } 1366 1367 /* nehalem/westmere */ 1368 void __init intel_pmu_lbr_init_nhm(void) 1369 { 1370 x86_pmu.lbr_nr = 16; 1371 x86_pmu.lbr_tos = MSR_LBR_TOS; 1372 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1373 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1374 1375 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1376 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1377 1378 /* 1379 * SW branch filter usage: 1380 * - workaround LBR_SEL errata (see above) 1381 * - support syscall, sysret capture. 1382 * That requires LBR_FAR but that means far 1383 * jmp need to be filtered out 1384 */ 1385 } 1386 1387 /* sandy bridge */ 1388 void __init intel_pmu_lbr_init_snb(void) 1389 { 1390 x86_pmu.lbr_nr = 16; 1391 x86_pmu.lbr_tos = MSR_LBR_TOS; 1392 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1393 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1394 1395 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1396 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1397 1398 /* 1399 * SW branch filter usage: 1400 * - support syscall, sysret capture. 1401 * That requires LBR_FAR but that means far 1402 * jmp need to be filtered out 1403 */ 1404 } 1405 1406 static inline struct kmem_cache * 1407 create_lbr_kmem_cache(size_t size, size_t align) 1408 { 1409 return kmem_cache_create("x86_lbr", size, align, 0, NULL); 1410 } 1411 1412 /* haswell */ 1413 void intel_pmu_lbr_init_hsw(void) 1414 { 1415 size_t size = sizeof(struct x86_perf_task_context); 1416 1417 x86_pmu.lbr_nr = 16; 1418 x86_pmu.lbr_tos = MSR_LBR_TOS; 1419 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1420 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1421 1422 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1423 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1424 1425 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1426 } 1427 1428 /* skylake */ 1429 __init void intel_pmu_lbr_init_skl(void) 1430 { 1431 size_t size = sizeof(struct x86_perf_task_context); 1432 1433 x86_pmu.lbr_nr = 32; 1434 x86_pmu.lbr_tos = MSR_LBR_TOS; 1435 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1436 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1437 x86_pmu.lbr_info = MSR_LBR_INFO_0; 1438 1439 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1440 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1441 1442 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1443 1444 /* 1445 * SW branch filter usage: 1446 * - support syscall, sysret capture. 1447 * That requires LBR_FAR but that means far 1448 * jmp need to be filtered out 1449 */ 1450 } 1451 1452 /* atom */ 1453 void __init intel_pmu_lbr_init_atom(void) 1454 { 1455 /* 1456 * only models starting at stepping 10 seems 1457 * to have an operational LBR which can freeze 1458 * on PMU interrupt 1459 */ 1460 if (boot_cpu_data.x86_model == 28 1461 && boot_cpu_data.x86_stepping < 10) { 1462 pr_cont("LBR disabled due to erratum"); 1463 return; 1464 } 1465 1466 x86_pmu.lbr_nr = 8; 1467 x86_pmu.lbr_tos = MSR_LBR_TOS; 1468 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1469 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1470 1471 /* 1472 * SW branch filter usage: 1473 * - compensate for lack of HW filter 1474 */ 1475 } 1476 1477 /* slm */ 1478 void __init intel_pmu_lbr_init_slm(void) 1479 { 1480 x86_pmu.lbr_nr = 8; 1481 x86_pmu.lbr_tos = MSR_LBR_TOS; 1482 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1483 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1484 1485 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1486 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1487 1488 /* 1489 * SW branch filter usage: 1490 * - compensate for lack of HW filter 1491 */ 1492 pr_cont("8-deep LBR, "); 1493 } 1494 1495 /* Knights Landing */ 1496 void intel_pmu_lbr_init_knl(void) 1497 { 1498 x86_pmu.lbr_nr = 8; 1499 x86_pmu.lbr_tos = MSR_LBR_TOS; 1500 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1501 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1502 1503 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1504 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1505 1506 /* Knights Landing does have MISPREDICT bit */ 1507 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) 1508 x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; 1509 } 1510 1511 void intel_pmu_lbr_init(void) 1512 { 1513 switch (x86_pmu.intel_cap.lbr_format) { 1514 case LBR_FORMAT_EIP_FLAGS2: 1515 x86_pmu.lbr_has_tsx = 1; 1516 x86_pmu.lbr_from_flags = 1; 1517 if (lbr_from_signext_quirk_needed()) 1518 static_branch_enable(&lbr_from_quirk_key); 1519 break; 1520 1521 case LBR_FORMAT_EIP_FLAGS: 1522 x86_pmu.lbr_from_flags = 1; 1523 break; 1524 1525 case LBR_FORMAT_INFO: 1526 x86_pmu.lbr_has_tsx = 1; 1527 fallthrough; 1528 case LBR_FORMAT_INFO2: 1529 x86_pmu.lbr_has_info = 1; 1530 break; 1531 1532 case LBR_FORMAT_TIME: 1533 x86_pmu.lbr_from_flags = 1; 1534 x86_pmu.lbr_to_cycles = 1; 1535 break; 1536 } 1537 1538 if (x86_pmu.lbr_has_info) { 1539 /* 1540 * Only used in combination with baseline pebs. 1541 */ 1542 static_branch_enable(&x86_lbr_mispred); 1543 static_branch_enable(&x86_lbr_cycles); 1544 } 1545 } 1546 1547 /* 1548 * LBR state size is variable based on the max number of registers. 1549 * This calculates the expected state size, which should match 1550 * what the hardware enumerates for the size of XFEATURE_LBR. 1551 */ 1552 static inline unsigned int get_lbr_state_size(void) 1553 { 1554 return sizeof(struct arch_lbr_state) + 1555 x86_pmu.lbr_nr * sizeof(struct lbr_entry); 1556 } 1557 1558 static bool is_arch_lbr_xsave_available(void) 1559 { 1560 if (!boot_cpu_has(X86_FEATURE_XSAVES)) 1561 return false; 1562 1563 /* 1564 * Check the LBR state with the corresponding software structure. 1565 * Disable LBR XSAVES support if the size doesn't match. 1566 */ 1567 if (xfeature_size(XFEATURE_LBR) == 0) 1568 return false; 1569 1570 if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) 1571 return false; 1572 1573 return true; 1574 } 1575 1576 void __init intel_pmu_arch_lbr_init(void) 1577 { 1578 struct pmu *pmu = x86_get_pmu(smp_processor_id()); 1579 union cpuid28_eax eax; 1580 union cpuid28_ebx ebx; 1581 union cpuid28_ecx ecx; 1582 unsigned int unused_edx; 1583 bool arch_lbr_xsave; 1584 size_t size; 1585 u64 lbr_nr; 1586 1587 /* Arch LBR Capabilities */ 1588 cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx); 1589 1590 lbr_nr = fls(eax.split.lbr_depth_mask) * 8; 1591 if (!lbr_nr) 1592 goto clear_arch_lbr; 1593 1594 /* Apply the max depth of Arch LBR */ 1595 if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) 1596 goto clear_arch_lbr; 1597 1598 x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; 1599 x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset; 1600 x86_pmu.lbr_lip = eax.split.lbr_lip; 1601 x86_pmu.lbr_cpl = ebx.split.lbr_cpl; 1602 x86_pmu.lbr_filter = ebx.split.lbr_filter; 1603 x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack; 1604 x86_pmu.lbr_mispred = ecx.split.lbr_mispred; 1605 x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; 1606 x86_pmu.lbr_br_type = ecx.split.lbr_br_type; 1607 x86_pmu.lbr_counters = ecx.split.lbr_counters; 1608 x86_pmu.lbr_nr = lbr_nr; 1609 1610 if (!!x86_pmu.lbr_counters) 1611 x86_pmu.flags |= PMU_FL_BR_CNTR; 1612 1613 if (x86_pmu.lbr_mispred) 1614 static_branch_enable(&x86_lbr_mispred); 1615 if (x86_pmu.lbr_timed_lbr) 1616 static_branch_enable(&x86_lbr_cycles); 1617 if (x86_pmu.lbr_br_type) 1618 static_branch_enable(&x86_lbr_type); 1619 1620 arch_lbr_xsave = is_arch_lbr_xsave_available(); 1621 if (arch_lbr_xsave) { 1622 size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) + 1623 get_lbr_state_size(); 1624 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 1625 XSAVE_ALIGNMENT); 1626 } 1627 1628 if (!pmu->task_ctx_cache) { 1629 arch_lbr_xsave = false; 1630 1631 size = sizeof(struct x86_perf_task_context_arch_lbr) + 1632 lbr_nr * sizeof(struct lbr_entry); 1633 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1634 } 1635 1636 x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; 1637 x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; 1638 x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0; 1639 1640 /* LBR callstack requires both CPL and Branch Filtering support */ 1641 if (!x86_pmu.lbr_cpl || 1642 !x86_pmu.lbr_filter || 1643 !x86_pmu.lbr_call_stack) 1644 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP; 1645 1646 if (!x86_pmu.lbr_cpl) { 1647 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP; 1648 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP; 1649 } else if (!x86_pmu.lbr_filter) { 1650 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP; 1651 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP; 1652 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP; 1653 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP; 1654 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP; 1655 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP; 1656 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP; 1657 } 1658 1659 x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK; 1660 x86_pmu.lbr_ctl_map = arch_lbr_ctl_map; 1661 1662 if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter) 1663 x86_pmu.lbr_ctl_map = NULL; 1664 1665 x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset; 1666 if (arch_lbr_xsave) { 1667 x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves; 1668 x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors; 1669 x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave; 1670 pr_cont("XSAVE "); 1671 } else { 1672 x86_pmu.lbr_save = intel_pmu_arch_lbr_save; 1673 x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore; 1674 x86_pmu.lbr_read = intel_pmu_arch_lbr_read; 1675 } 1676 1677 pr_cont("Architectural LBR, "); 1678 1679 return; 1680 1681 clear_arch_lbr: 1682 setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); 1683 } 1684 1685 /** 1686 * x86_perf_get_lbr - get the LBR records information 1687 * 1688 * @lbr: the caller's memory to store the LBR records information 1689 */ 1690 void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) 1691 { 1692 lbr->nr = x86_pmu.lbr_nr; 1693 lbr->from = x86_pmu.lbr_from; 1694 lbr->to = x86_pmu.lbr_to; 1695 lbr->info = x86_pmu.lbr_info; 1696 } 1697 EXPORT_SYMBOL_GPL(x86_perf_get_lbr); 1698 1699 struct event_constraint vlbr_constraint = 1700 __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), 1701 FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT); 1702