1 #include <linux/perf_event.h> 2 #include <linux/types.h> 3 4 #include <asm/perf_event.h> 5 #include <asm/msr.h> 6 #include <asm/insn.h> 7 8 #include "../perf_event.h" 9 10 enum { 11 LBR_FORMAT_32 = 0x00, 12 LBR_FORMAT_LIP = 0x01, 13 LBR_FORMAT_EIP = 0x02, 14 LBR_FORMAT_EIP_FLAGS = 0x03, 15 LBR_FORMAT_EIP_FLAGS2 = 0x04, 16 LBR_FORMAT_INFO = 0x05, 17 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO, 18 }; 19 20 static enum { 21 LBR_EIP_FLAGS = 1, 22 LBR_TSX = 2, 23 } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { 24 [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, 25 [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, 26 }; 27 28 /* 29 * Intel LBR_SELECT bits 30 * Intel Vol3a, April 2011, Section 16.7 Table 16-10 31 * 32 * Hardware branch filter (not available on all CPUs) 33 */ 34 #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 35 #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 36 #define LBR_JCC_BIT 2 /* do not capture conditional branches */ 37 #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 38 #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 39 #define LBR_RETURN_BIT 5 /* do not capture near returns */ 40 #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 41 #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 42 #define LBR_FAR_BIT 8 /* do not capture far branches */ 43 #define LBR_CALL_STACK_BIT 9 /* enable call stack */ 44 45 /* 46 * Following bit only exists in Linux; we mask it out before writing it to 47 * the actual MSR. But it helps the constraint perf code to understand 48 * that this is a separate configuration. 49 */ 50 #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ 51 52 #define LBR_KERNEL (1 << LBR_KERNEL_BIT) 53 #define LBR_USER (1 << LBR_USER_BIT) 54 #define LBR_JCC (1 << LBR_JCC_BIT) 55 #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 56 #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 57 #define LBR_RETURN (1 << LBR_RETURN_BIT) 58 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 59 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 60 #define LBR_FAR (1 << LBR_FAR_BIT) 61 #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 62 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) 63 64 #define LBR_PLM (LBR_KERNEL | LBR_USER) 65 66 #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ 67 #define LBR_NOT_SUPP -1 /* LBR filter not supported */ 68 #define LBR_IGN 0 /* ignored */ 69 70 #define LBR_ANY \ 71 (LBR_JCC |\ 72 LBR_REL_CALL |\ 73 LBR_IND_CALL |\ 74 LBR_RETURN |\ 75 LBR_REL_JMP |\ 76 LBR_IND_JMP |\ 77 LBR_FAR) 78 79 #define LBR_FROM_FLAG_MISPRED (1ULL << 63) 80 #define LBR_FROM_FLAG_IN_TX (1ULL << 62) 81 #define LBR_FROM_FLAG_ABORT (1ULL << 61) 82 83 /* 84 * x86control flow change classification 85 * x86control flow changes include branches, interrupts, traps, faults 86 */ 87 enum { 88 X86_BR_NONE = 0, /* unknown */ 89 90 X86_BR_USER = 1 << 0, /* branch target is user */ 91 X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ 92 93 X86_BR_CALL = 1 << 2, /* call */ 94 X86_BR_RET = 1 << 3, /* return */ 95 X86_BR_SYSCALL = 1 << 4, /* syscall */ 96 X86_BR_SYSRET = 1 << 5, /* syscall return */ 97 X86_BR_INT = 1 << 6, /* sw interrupt */ 98 X86_BR_IRET = 1 << 7, /* return from interrupt */ 99 X86_BR_JCC = 1 << 8, /* conditional */ 100 X86_BR_JMP = 1 << 9, /* jump */ 101 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ 102 X86_BR_IND_CALL = 1 << 11,/* indirect calls */ 103 X86_BR_ABORT = 1 << 12,/* transaction abort */ 104 X86_BR_IN_TX = 1 << 13,/* in transaction */ 105 X86_BR_NO_TX = 1 << 14,/* not in transaction */ 106 X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ 107 X86_BR_CALL_STACK = 1 << 16,/* call stack */ 108 X86_BR_IND_JMP = 1 << 17,/* indirect jump */ 109 }; 110 111 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) 112 #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) 113 114 #define X86_BR_ANY \ 115 (X86_BR_CALL |\ 116 X86_BR_RET |\ 117 X86_BR_SYSCALL |\ 118 X86_BR_SYSRET |\ 119 X86_BR_INT |\ 120 X86_BR_IRET |\ 121 X86_BR_JCC |\ 122 X86_BR_JMP |\ 123 X86_BR_IRQ |\ 124 X86_BR_ABORT |\ 125 X86_BR_IND_CALL |\ 126 X86_BR_IND_JMP |\ 127 X86_BR_ZERO_CALL) 128 129 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) 130 131 #define X86_BR_ANY_CALL \ 132 (X86_BR_CALL |\ 133 X86_BR_IND_CALL |\ 134 X86_BR_ZERO_CALL |\ 135 X86_BR_SYSCALL |\ 136 X86_BR_IRQ |\ 137 X86_BR_INT) 138 139 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 140 141 /* 142 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 143 * otherwise it becomes near impossible to get a reliable stack. 144 */ 145 146 static void __intel_pmu_lbr_enable(bool pmi) 147 { 148 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 149 u64 debugctl, lbr_select = 0, orig_debugctl; 150 151 /* 152 * No need to unfreeze manually, as v4 can do that as part 153 * of the GLOBAL_STATUS ack. 154 */ 155 if (pmi && x86_pmu.version >= 4) 156 return; 157 158 /* 159 * No need to reprogram LBR_SELECT in a PMI, as it 160 * did not change. 161 */ 162 if (cpuc->lbr_sel) 163 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; 164 if (!pmi && cpuc->lbr_sel) 165 wrmsrl(MSR_LBR_SELECT, lbr_select); 166 167 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 168 orig_debugctl = debugctl; 169 debugctl |= DEBUGCTLMSR_LBR; 170 /* 171 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 172 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 173 * may cause superfluous increase/decrease of LBR_TOS. 174 */ 175 if (!(lbr_select & LBR_CALL_STACK)) 176 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 177 if (orig_debugctl != debugctl) 178 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 179 } 180 181 static void __intel_pmu_lbr_disable(void) 182 { 183 u64 debugctl; 184 185 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 186 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 187 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 188 } 189 190 static void intel_pmu_lbr_reset_32(void) 191 { 192 int i; 193 194 for (i = 0; i < x86_pmu.lbr_nr; i++) 195 wrmsrl(x86_pmu.lbr_from + i, 0); 196 } 197 198 static void intel_pmu_lbr_reset_64(void) 199 { 200 int i; 201 202 for (i = 0; i < x86_pmu.lbr_nr; i++) { 203 wrmsrl(x86_pmu.lbr_from + i, 0); 204 wrmsrl(x86_pmu.lbr_to + i, 0); 205 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 206 wrmsrl(MSR_LBR_INFO_0 + i, 0); 207 } 208 } 209 210 void intel_pmu_lbr_reset(void) 211 { 212 if (!x86_pmu.lbr_nr) 213 return; 214 215 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) 216 intel_pmu_lbr_reset_32(); 217 else 218 intel_pmu_lbr_reset_64(); 219 } 220 221 /* 222 * TOS = most recently recorded branch 223 */ 224 static inline u64 intel_pmu_lbr_tos(void) 225 { 226 u64 tos; 227 228 rdmsrl(x86_pmu.lbr_tos, tos); 229 return tos; 230 } 231 232 enum { 233 LBR_NONE, 234 LBR_VALID, 235 }; 236 237 static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) 238 { 239 int i; 240 unsigned lbr_idx, mask; 241 u64 tos; 242 243 if (task_ctx->lbr_callstack_users == 0 || 244 task_ctx->lbr_stack_state == LBR_NONE) { 245 intel_pmu_lbr_reset(); 246 return; 247 } 248 249 mask = x86_pmu.lbr_nr - 1; 250 tos = task_ctx->tos; 251 for (i = 0; i < tos; i++) { 252 lbr_idx = (tos - i) & mask; 253 wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 254 wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 255 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 256 wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); 257 } 258 wrmsrl(x86_pmu.lbr_tos, tos); 259 task_ctx->lbr_stack_state = LBR_NONE; 260 } 261 262 static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) 263 { 264 int i; 265 unsigned lbr_idx, mask; 266 u64 tos; 267 268 if (task_ctx->lbr_callstack_users == 0) { 269 task_ctx->lbr_stack_state = LBR_NONE; 270 return; 271 } 272 273 mask = x86_pmu.lbr_nr - 1; 274 tos = intel_pmu_lbr_tos(); 275 for (i = 0; i < tos; i++) { 276 lbr_idx = (tos - i) & mask; 277 rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 278 rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 279 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 280 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); 281 } 282 task_ctx->tos = tos; 283 task_ctx->lbr_stack_state = LBR_VALID; 284 } 285 286 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) 287 { 288 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 289 struct x86_perf_task_context *task_ctx; 290 291 /* 292 * If LBR callstack feature is enabled and the stack was saved when 293 * the task was scheduled out, restore the stack. Otherwise flush 294 * the LBR stack. 295 */ 296 task_ctx = ctx ? ctx->task_ctx_data : NULL; 297 if (task_ctx) { 298 if (sched_in) { 299 __intel_pmu_lbr_restore(task_ctx); 300 cpuc->lbr_context = ctx; 301 } else { 302 __intel_pmu_lbr_save(task_ctx); 303 } 304 return; 305 } 306 307 /* 308 * When sampling the branck stack in system-wide, it may be 309 * necessary to flush the stack on context switch. This happens 310 * when the branch stack does not tag its entries with the pid 311 * of the current task. Otherwise it becomes impossible to 312 * associate a branch entry with a task. This ambiguity is more 313 * likely to appear when the branch stack supports priv level 314 * filtering and the user sets it to monitor only at the user 315 * level (which could be a useful measurement in system-wide 316 * mode). In that case, the risk is high of having a branch 317 * stack with branch from multiple tasks. 318 */ 319 if (sched_in) { 320 intel_pmu_lbr_reset(); 321 cpuc->lbr_context = ctx; 322 } 323 } 324 325 static inline bool branch_user_callstack(unsigned br_sel) 326 { 327 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 328 } 329 330 void intel_pmu_lbr_enable(struct perf_event *event) 331 { 332 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 333 struct x86_perf_task_context *task_ctx; 334 335 if (!x86_pmu.lbr_nr) 336 return; 337 338 /* 339 * Reset the LBR stack if we changed task context to 340 * avoid data leaks. 341 */ 342 if (event->ctx->task && cpuc->lbr_context != event->ctx) { 343 intel_pmu_lbr_reset(); 344 cpuc->lbr_context = event->ctx; 345 } 346 cpuc->br_sel = event->hw.branch_reg.reg; 347 348 if (branch_user_callstack(cpuc->br_sel) && event->ctx && 349 event->ctx->task_ctx_data) { 350 task_ctx = event->ctx->task_ctx_data; 351 task_ctx->lbr_callstack_users++; 352 } 353 354 cpuc->lbr_users++; 355 perf_sched_cb_inc(event->ctx->pmu); 356 } 357 358 void intel_pmu_lbr_disable(struct perf_event *event) 359 { 360 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 361 struct x86_perf_task_context *task_ctx; 362 363 if (!x86_pmu.lbr_nr) 364 return; 365 366 if (branch_user_callstack(cpuc->br_sel) && event->ctx && 367 event->ctx->task_ctx_data) { 368 task_ctx = event->ctx->task_ctx_data; 369 task_ctx->lbr_callstack_users--; 370 } 371 372 cpuc->lbr_users--; 373 WARN_ON_ONCE(cpuc->lbr_users < 0); 374 perf_sched_cb_dec(event->ctx->pmu); 375 376 if (cpuc->enabled && !cpuc->lbr_users) { 377 __intel_pmu_lbr_disable(); 378 /* avoid stale pointer */ 379 cpuc->lbr_context = NULL; 380 } 381 } 382 383 void intel_pmu_lbr_enable_all(bool pmi) 384 { 385 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 386 387 if (cpuc->lbr_users) 388 __intel_pmu_lbr_enable(pmi); 389 } 390 391 void intel_pmu_lbr_disable_all(void) 392 { 393 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 394 395 if (cpuc->lbr_users) 396 __intel_pmu_lbr_disable(); 397 } 398 399 static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 400 { 401 unsigned long mask = x86_pmu.lbr_nr - 1; 402 u64 tos = intel_pmu_lbr_tos(); 403 int i; 404 405 for (i = 0; i < x86_pmu.lbr_nr; i++) { 406 unsigned long lbr_idx = (tos - i) & mask; 407 union { 408 struct { 409 u32 from; 410 u32 to; 411 }; 412 u64 lbr; 413 } msr_lastbranch; 414 415 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 416 417 cpuc->lbr_entries[i].from = msr_lastbranch.from; 418 cpuc->lbr_entries[i].to = msr_lastbranch.to; 419 cpuc->lbr_entries[i].mispred = 0; 420 cpuc->lbr_entries[i].predicted = 0; 421 cpuc->lbr_entries[i].reserved = 0; 422 } 423 cpuc->lbr_stack.nr = i; 424 } 425 426 /* 427 * Due to lack of segmentation in Linux the effective address (offset) 428 * is the same as the linear address, allowing us to merge the LIP and EIP 429 * LBR formats. 430 */ 431 static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 432 { 433 bool need_info = false; 434 unsigned long mask = x86_pmu.lbr_nr - 1; 435 int lbr_format = x86_pmu.intel_cap.lbr_format; 436 u64 tos = intel_pmu_lbr_tos(); 437 int i; 438 int out = 0; 439 int num = x86_pmu.lbr_nr; 440 441 if (cpuc->lbr_sel) { 442 need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); 443 if (cpuc->lbr_sel->config & LBR_CALL_STACK) 444 num = tos; 445 } 446 447 for (i = 0; i < num; i++) { 448 unsigned long lbr_idx = (tos - i) & mask; 449 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 450 int skip = 0; 451 u16 cycles = 0; 452 int lbr_flags = lbr_desc[lbr_format]; 453 454 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 455 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 456 457 if (lbr_format == LBR_FORMAT_INFO && need_info) { 458 u64 info; 459 460 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info); 461 mis = !!(info & LBR_INFO_MISPRED); 462 pred = !mis; 463 in_tx = !!(info & LBR_INFO_IN_TX); 464 abort = !!(info & LBR_INFO_ABORT); 465 cycles = (info & LBR_INFO_CYCLES); 466 } 467 if (lbr_flags & LBR_EIP_FLAGS) { 468 mis = !!(from & LBR_FROM_FLAG_MISPRED); 469 pred = !mis; 470 skip = 1; 471 } 472 if (lbr_flags & LBR_TSX) { 473 in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 474 abort = !!(from & LBR_FROM_FLAG_ABORT); 475 skip = 3; 476 } 477 from = (u64)((((s64)from) << skip) >> skip); 478 479 /* 480 * Some CPUs report duplicated abort records, 481 * with the second entry not having an abort bit set. 482 * Skip them here. This loop runs backwards, 483 * so we need to undo the previous record. 484 * If the abort just happened outside the window 485 * the extra entry cannot be removed. 486 */ 487 if (abort && x86_pmu.lbr_double_abort && out > 0) 488 out--; 489 490 cpuc->lbr_entries[out].from = from; 491 cpuc->lbr_entries[out].to = to; 492 cpuc->lbr_entries[out].mispred = mis; 493 cpuc->lbr_entries[out].predicted = pred; 494 cpuc->lbr_entries[out].in_tx = in_tx; 495 cpuc->lbr_entries[out].abort = abort; 496 cpuc->lbr_entries[out].cycles = cycles; 497 cpuc->lbr_entries[out].reserved = 0; 498 out++; 499 } 500 cpuc->lbr_stack.nr = out; 501 } 502 503 void intel_pmu_lbr_read(void) 504 { 505 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 506 507 if (!cpuc->lbr_users) 508 return; 509 510 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) 511 intel_pmu_lbr_read_32(cpuc); 512 else 513 intel_pmu_lbr_read_64(cpuc); 514 515 intel_pmu_lbr_filter(cpuc); 516 } 517 518 /* 519 * SW filter is used: 520 * - in case there is no HW filter 521 * - in case the HW filter has errata or limitations 522 */ 523 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 524 { 525 u64 br_type = event->attr.branch_sample_type; 526 int mask = 0; 527 528 if (br_type & PERF_SAMPLE_BRANCH_USER) 529 mask |= X86_BR_USER; 530 531 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 532 mask |= X86_BR_KERNEL; 533 534 /* we ignore BRANCH_HV here */ 535 536 if (br_type & PERF_SAMPLE_BRANCH_ANY) 537 mask |= X86_BR_ANY; 538 539 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 540 mask |= X86_BR_ANY_CALL; 541 542 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 543 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 544 545 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 546 mask |= X86_BR_IND_CALL; 547 548 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 549 mask |= X86_BR_ABORT; 550 551 if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 552 mask |= X86_BR_IN_TX; 553 554 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 555 mask |= X86_BR_NO_TX; 556 557 if (br_type & PERF_SAMPLE_BRANCH_COND) 558 mask |= X86_BR_JCC; 559 560 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 561 if (!x86_pmu_has_lbr_callstack()) 562 return -EOPNOTSUPP; 563 if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 564 return -EINVAL; 565 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 566 X86_BR_CALL_STACK; 567 } 568 569 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 570 mask |= X86_BR_IND_JMP; 571 572 if (br_type & PERF_SAMPLE_BRANCH_CALL) 573 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 574 /* 575 * stash actual user request into reg, it may 576 * be used by fixup code for some CPU 577 */ 578 event->hw.branch_reg.reg = mask; 579 return 0; 580 } 581 582 /* 583 * setup the HW LBR filter 584 * Used only when available, may not be enough to disambiguate 585 * all branches, may need the help of the SW filter 586 */ 587 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 588 { 589 struct hw_perf_event_extra *reg; 590 u64 br_type = event->attr.branch_sample_type; 591 u64 mask = 0, v; 592 int i; 593 594 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 595 if (!(br_type & (1ULL << i))) 596 continue; 597 598 v = x86_pmu.lbr_sel_map[i]; 599 if (v == LBR_NOT_SUPP) 600 return -EOPNOTSUPP; 601 602 if (v != LBR_IGN) 603 mask |= v; 604 } 605 606 reg = &event->hw.branch_reg; 607 reg->idx = EXTRA_REG_LBR; 608 609 /* 610 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 611 * in suppress mode. So LBR_SELECT should be set to 612 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 613 */ 614 reg->config = mask ^ x86_pmu.lbr_sel_mask; 615 616 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 617 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 618 (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) 619 reg->config |= LBR_NO_INFO; 620 621 return 0; 622 } 623 624 int intel_pmu_setup_lbr_filter(struct perf_event *event) 625 { 626 int ret = 0; 627 628 /* 629 * no LBR on this PMU 630 */ 631 if (!x86_pmu.lbr_nr) 632 return -EOPNOTSUPP; 633 634 /* 635 * setup SW LBR filter 636 */ 637 ret = intel_pmu_setup_sw_lbr_filter(event); 638 if (ret) 639 return ret; 640 641 /* 642 * setup HW LBR filter, if any 643 */ 644 if (x86_pmu.lbr_sel_map) 645 ret = intel_pmu_setup_hw_lbr_filter(event); 646 647 return ret; 648 } 649 650 /* 651 * return the type of control flow change at address "from" 652 * instruction is not necessarily a branch (in case of interrupt). 653 * 654 * The branch type returned also includes the priv level of the 655 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). 656 * 657 * If a branch type is unknown OR the instruction cannot be 658 * decoded (e.g., text page not present), then X86_BR_NONE is 659 * returned. 660 */ 661 static int branch_type(unsigned long from, unsigned long to, int abort) 662 { 663 struct insn insn; 664 void *addr; 665 int bytes_read, bytes_left; 666 int ret = X86_BR_NONE; 667 int ext, to_plm, from_plm; 668 u8 buf[MAX_INSN_SIZE]; 669 int is64 = 0; 670 671 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 672 from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; 673 674 /* 675 * maybe zero if lbr did not fill up after a reset by the time 676 * we get a PMU interrupt 677 */ 678 if (from == 0 || to == 0) 679 return X86_BR_NONE; 680 681 if (abort) 682 return X86_BR_ABORT | to_plm; 683 684 if (from_plm == X86_BR_USER) { 685 /* 686 * can happen if measuring at the user level only 687 * and we interrupt in a kernel thread, e.g., idle. 688 */ 689 if (!current->mm) 690 return X86_BR_NONE; 691 692 /* may fail if text not present */ 693 bytes_left = copy_from_user_nmi(buf, (void __user *)from, 694 MAX_INSN_SIZE); 695 bytes_read = MAX_INSN_SIZE - bytes_left; 696 if (!bytes_read) 697 return X86_BR_NONE; 698 699 addr = buf; 700 } else { 701 /* 702 * The LBR logs any address in the IP, even if the IP just 703 * faulted. This means userspace can control the from address. 704 * Ensure we don't blindy read any address by validating it is 705 * a known text address. 706 */ 707 if (kernel_text_address(from)) { 708 addr = (void *)from; 709 /* 710 * Assume we can get the maximum possible size 711 * when grabbing kernel data. This is not 712 * _strictly_ true since we could possibly be 713 * executing up next to a memory hole, but 714 * it is very unlikely to be a problem. 715 */ 716 bytes_read = MAX_INSN_SIZE; 717 } else { 718 return X86_BR_NONE; 719 } 720 } 721 722 /* 723 * decoder needs to know the ABI especially 724 * on 64-bit systems running 32-bit apps 725 */ 726 #ifdef CONFIG_X86_64 727 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); 728 #endif 729 insn_init(&insn, addr, bytes_read, is64); 730 insn_get_opcode(&insn); 731 if (!insn.opcode.got) 732 return X86_BR_ABORT; 733 734 switch (insn.opcode.bytes[0]) { 735 case 0xf: 736 switch (insn.opcode.bytes[1]) { 737 case 0x05: /* syscall */ 738 case 0x34: /* sysenter */ 739 ret = X86_BR_SYSCALL; 740 break; 741 case 0x07: /* sysret */ 742 case 0x35: /* sysexit */ 743 ret = X86_BR_SYSRET; 744 break; 745 case 0x80 ... 0x8f: /* conditional */ 746 ret = X86_BR_JCC; 747 break; 748 default: 749 ret = X86_BR_NONE; 750 } 751 break; 752 case 0x70 ... 0x7f: /* conditional */ 753 ret = X86_BR_JCC; 754 break; 755 case 0xc2: /* near ret */ 756 case 0xc3: /* near ret */ 757 case 0xca: /* far ret */ 758 case 0xcb: /* far ret */ 759 ret = X86_BR_RET; 760 break; 761 case 0xcf: /* iret */ 762 ret = X86_BR_IRET; 763 break; 764 case 0xcc ... 0xce: /* int */ 765 ret = X86_BR_INT; 766 break; 767 case 0xe8: /* call near rel */ 768 insn_get_immediate(&insn); 769 if (insn.immediate1.value == 0) { 770 /* zero length call */ 771 ret = X86_BR_ZERO_CALL; 772 break; 773 } 774 case 0x9a: /* call far absolute */ 775 ret = X86_BR_CALL; 776 break; 777 case 0xe0 ... 0xe3: /* loop jmp */ 778 ret = X86_BR_JCC; 779 break; 780 case 0xe9 ... 0xeb: /* jmp */ 781 ret = X86_BR_JMP; 782 break; 783 case 0xff: /* call near absolute, call far absolute ind */ 784 insn_get_modrm(&insn); 785 ext = (insn.modrm.bytes[0] >> 3) & 0x7; 786 switch (ext) { 787 case 2: /* near ind call */ 788 case 3: /* far ind call */ 789 ret = X86_BR_IND_CALL; 790 break; 791 case 4: 792 case 5: 793 ret = X86_BR_IND_JMP; 794 break; 795 } 796 break; 797 default: 798 ret = X86_BR_NONE; 799 } 800 /* 801 * interrupts, traps, faults (and thus ring transition) may 802 * occur on any instructions. Thus, to classify them correctly, 803 * we need to first look at the from and to priv levels. If they 804 * are different and to is in the kernel, then it indicates 805 * a ring transition. If the from instruction is not a ring 806 * transition instr (syscall, systenter, int), then it means 807 * it was a irq, trap or fault. 808 * 809 * we have no way of detecting kernel to kernel faults. 810 */ 811 if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL 812 && ret != X86_BR_SYSCALL && ret != X86_BR_INT) 813 ret = X86_BR_IRQ; 814 815 /* 816 * branch priv level determined by target as 817 * is done by HW when LBR_SELECT is implemented 818 */ 819 if (ret != X86_BR_NONE) 820 ret |= to_plm; 821 822 return ret; 823 } 824 825 /* 826 * implement actual branch filter based on user demand. 827 * Hardware may not exactly satisfy that request, thus 828 * we need to inspect opcodes. Mismatched branches are 829 * discarded. Therefore, the number of branches returned 830 * in PERF_SAMPLE_BRANCH_STACK sample may vary. 831 */ 832 static void 833 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 834 { 835 u64 from, to; 836 int br_sel = cpuc->br_sel; 837 int i, j, type; 838 bool compress = false; 839 840 /* if sampling all branches, then nothing to filter */ 841 if ((br_sel & X86_BR_ALL) == X86_BR_ALL) 842 return; 843 844 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 845 846 from = cpuc->lbr_entries[i].from; 847 to = cpuc->lbr_entries[i].to; 848 849 type = branch_type(from, to, cpuc->lbr_entries[i].abort); 850 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 851 if (cpuc->lbr_entries[i].in_tx) 852 type |= X86_BR_IN_TX; 853 else 854 type |= X86_BR_NO_TX; 855 } 856 857 /* if type does not correspond, then discard */ 858 if (type == X86_BR_NONE || (br_sel & type) != type) { 859 cpuc->lbr_entries[i].from = 0; 860 compress = true; 861 } 862 } 863 864 if (!compress) 865 return; 866 867 /* remove all entries with from=0 */ 868 for (i = 0; i < cpuc->lbr_stack.nr; ) { 869 if (!cpuc->lbr_entries[i].from) { 870 j = i; 871 while (++j < cpuc->lbr_stack.nr) 872 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 873 cpuc->lbr_stack.nr--; 874 if (!cpuc->lbr_entries[i].from) 875 continue; 876 } 877 i++; 878 } 879 } 880 881 /* 882 * Map interface branch filters onto LBR filters 883 */ 884 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 885 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 886 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 887 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 888 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 889 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 890 | LBR_IND_JMP | LBR_FAR, 891 /* 892 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 893 */ 894 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 895 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 896 /* 897 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 898 */ 899 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 900 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 901 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 902 }; 903 904 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 905 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 906 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 907 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 908 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 909 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 910 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 911 | LBR_FAR, 912 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 913 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 914 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 915 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 916 }; 917 918 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 919 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 920 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 921 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 922 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 923 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 924 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 925 | LBR_FAR, 926 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 927 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 928 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 929 | LBR_RETURN | LBR_CALL_STACK, 930 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 931 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 932 }; 933 934 /* core */ 935 void __init intel_pmu_lbr_init_core(void) 936 { 937 x86_pmu.lbr_nr = 4; 938 x86_pmu.lbr_tos = MSR_LBR_TOS; 939 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 940 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 941 942 /* 943 * SW branch filter usage: 944 * - compensate for lack of HW filter 945 */ 946 pr_cont("4-deep LBR, "); 947 } 948 949 /* nehalem/westmere */ 950 void __init intel_pmu_lbr_init_nhm(void) 951 { 952 x86_pmu.lbr_nr = 16; 953 x86_pmu.lbr_tos = MSR_LBR_TOS; 954 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 955 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 956 957 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 958 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 959 960 /* 961 * SW branch filter usage: 962 * - workaround LBR_SEL errata (see above) 963 * - support syscall, sysret capture. 964 * That requires LBR_FAR but that means far 965 * jmp need to be filtered out 966 */ 967 pr_cont("16-deep LBR, "); 968 } 969 970 /* sandy bridge */ 971 void __init intel_pmu_lbr_init_snb(void) 972 { 973 x86_pmu.lbr_nr = 16; 974 x86_pmu.lbr_tos = MSR_LBR_TOS; 975 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 976 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 977 978 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 979 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 980 981 /* 982 * SW branch filter usage: 983 * - support syscall, sysret capture. 984 * That requires LBR_FAR but that means far 985 * jmp need to be filtered out 986 */ 987 pr_cont("16-deep LBR, "); 988 } 989 990 /* haswell */ 991 void intel_pmu_lbr_init_hsw(void) 992 { 993 x86_pmu.lbr_nr = 16; 994 x86_pmu.lbr_tos = MSR_LBR_TOS; 995 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 996 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 997 998 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 999 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1000 1001 pr_cont("16-deep LBR, "); 1002 } 1003 1004 /* skylake */ 1005 __init void intel_pmu_lbr_init_skl(void) 1006 { 1007 x86_pmu.lbr_nr = 32; 1008 x86_pmu.lbr_tos = MSR_LBR_TOS; 1009 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1010 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1011 1012 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1013 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1014 1015 /* 1016 * SW branch filter usage: 1017 * - support syscall, sysret capture. 1018 * That requires LBR_FAR but that means far 1019 * jmp need to be filtered out 1020 */ 1021 pr_cont("32-deep LBR, "); 1022 } 1023 1024 /* atom */ 1025 void __init intel_pmu_lbr_init_atom(void) 1026 { 1027 /* 1028 * only models starting at stepping 10 seems 1029 * to have an operational LBR which can freeze 1030 * on PMU interrupt 1031 */ 1032 if (boot_cpu_data.x86_model == 28 1033 && boot_cpu_data.x86_mask < 10) { 1034 pr_cont("LBR disabled due to erratum"); 1035 return; 1036 } 1037 1038 x86_pmu.lbr_nr = 8; 1039 x86_pmu.lbr_tos = MSR_LBR_TOS; 1040 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1041 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1042 1043 /* 1044 * SW branch filter usage: 1045 * - compensate for lack of HW filter 1046 */ 1047 pr_cont("8-deep LBR, "); 1048 } 1049 1050 /* Knights Landing */ 1051 void intel_pmu_lbr_init_knl(void) 1052 { 1053 x86_pmu.lbr_nr = 8; 1054 x86_pmu.lbr_tos = MSR_LBR_TOS; 1055 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1056 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1057 1058 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1059 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1060 1061 pr_cont("8-deep LBR, "); 1062 } 1063