1 #include <linux/perf_event.h> 2 #include <linux/types.h> 3 4 #include <asm/perf_event.h> 5 #include <asm/msr.h> 6 #include <asm/insn.h> 7 8 #include "../perf_event.h" 9 10 enum { 11 LBR_FORMAT_32 = 0x00, 12 LBR_FORMAT_LIP = 0x01, 13 LBR_FORMAT_EIP = 0x02, 14 LBR_FORMAT_EIP_FLAGS = 0x03, 15 LBR_FORMAT_EIP_FLAGS2 = 0x04, 16 LBR_FORMAT_INFO = 0x05, 17 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO, 18 }; 19 20 static enum { 21 LBR_EIP_FLAGS = 1, 22 LBR_TSX = 2, 23 } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { 24 [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, 25 [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, 26 }; 27 28 /* 29 * Intel LBR_SELECT bits 30 * Intel Vol3a, April 2011, Section 16.7 Table 16-10 31 * 32 * Hardware branch filter (not available on all CPUs) 33 */ 34 #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 35 #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 36 #define LBR_JCC_BIT 2 /* do not capture conditional branches */ 37 #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 38 #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 39 #define LBR_RETURN_BIT 5 /* do not capture near returns */ 40 #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 41 #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 42 #define LBR_FAR_BIT 8 /* do not capture far branches */ 43 #define LBR_CALL_STACK_BIT 9 /* enable call stack */ 44 45 /* 46 * Following bit only exists in Linux; we mask it out before writing it to 47 * the actual MSR. But it helps the constraint perf code to understand 48 * that this is a separate configuration. 49 */ 50 #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ 51 52 #define LBR_KERNEL (1 << LBR_KERNEL_BIT) 53 #define LBR_USER (1 << LBR_USER_BIT) 54 #define LBR_JCC (1 << LBR_JCC_BIT) 55 #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 56 #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 57 #define LBR_RETURN (1 << LBR_RETURN_BIT) 58 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 59 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 60 #define LBR_FAR (1 << LBR_FAR_BIT) 61 #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 62 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) 63 64 #define LBR_PLM (LBR_KERNEL | LBR_USER) 65 66 #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ 67 #define LBR_NOT_SUPP -1 /* LBR filter not supported */ 68 #define LBR_IGN 0 /* ignored */ 69 70 #define LBR_ANY \ 71 (LBR_JCC |\ 72 LBR_REL_CALL |\ 73 LBR_IND_CALL |\ 74 LBR_RETURN |\ 75 LBR_REL_JMP |\ 76 LBR_IND_JMP |\ 77 LBR_FAR) 78 79 #define LBR_FROM_FLAG_MISPRED (1ULL << 63) 80 #define LBR_FROM_FLAG_IN_TX (1ULL << 62) 81 #define LBR_FROM_FLAG_ABORT (1ULL << 61) 82 83 /* 84 * x86control flow change classification 85 * x86control flow changes include branches, interrupts, traps, faults 86 */ 87 enum { 88 X86_BR_NONE = 0, /* unknown */ 89 90 X86_BR_USER = 1 << 0, /* branch target is user */ 91 X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ 92 93 X86_BR_CALL = 1 << 2, /* call */ 94 X86_BR_RET = 1 << 3, /* return */ 95 X86_BR_SYSCALL = 1 << 4, /* syscall */ 96 X86_BR_SYSRET = 1 << 5, /* syscall return */ 97 X86_BR_INT = 1 << 6, /* sw interrupt */ 98 X86_BR_IRET = 1 << 7, /* return from interrupt */ 99 X86_BR_JCC = 1 << 8, /* conditional */ 100 X86_BR_JMP = 1 << 9, /* jump */ 101 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ 102 X86_BR_IND_CALL = 1 << 11,/* indirect calls */ 103 X86_BR_ABORT = 1 << 12,/* transaction abort */ 104 X86_BR_IN_TX = 1 << 13,/* in transaction */ 105 X86_BR_NO_TX = 1 << 14,/* not in transaction */ 106 X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ 107 X86_BR_CALL_STACK = 1 << 16,/* call stack */ 108 X86_BR_IND_JMP = 1 << 17,/* indirect jump */ 109 }; 110 111 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) 112 #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) 113 114 #define X86_BR_ANY \ 115 (X86_BR_CALL |\ 116 X86_BR_RET |\ 117 X86_BR_SYSCALL |\ 118 X86_BR_SYSRET |\ 119 X86_BR_INT |\ 120 X86_BR_IRET |\ 121 X86_BR_JCC |\ 122 X86_BR_JMP |\ 123 X86_BR_IRQ |\ 124 X86_BR_ABORT |\ 125 X86_BR_IND_CALL |\ 126 X86_BR_IND_JMP |\ 127 X86_BR_ZERO_CALL) 128 129 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) 130 131 #define X86_BR_ANY_CALL \ 132 (X86_BR_CALL |\ 133 X86_BR_IND_CALL |\ 134 X86_BR_ZERO_CALL |\ 135 X86_BR_SYSCALL |\ 136 X86_BR_IRQ |\ 137 X86_BR_INT) 138 139 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 140 141 /* 142 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 143 * otherwise it becomes near impossible to get a reliable stack. 144 */ 145 146 static void __intel_pmu_lbr_enable(bool pmi) 147 { 148 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 149 u64 debugctl, lbr_select = 0, orig_debugctl; 150 151 /* 152 * No need to unfreeze manually, as v4 can do that as part 153 * of the GLOBAL_STATUS ack. 154 */ 155 if (pmi && x86_pmu.version >= 4) 156 return; 157 158 /* 159 * No need to reprogram LBR_SELECT in a PMI, as it 160 * did not change. 161 */ 162 if (cpuc->lbr_sel) 163 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; 164 if (!pmi && cpuc->lbr_sel) 165 wrmsrl(MSR_LBR_SELECT, lbr_select); 166 167 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 168 orig_debugctl = debugctl; 169 debugctl |= DEBUGCTLMSR_LBR; 170 /* 171 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 172 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 173 * may cause superfluous increase/decrease of LBR_TOS. 174 */ 175 if (!(lbr_select & LBR_CALL_STACK)) 176 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 177 if (orig_debugctl != debugctl) 178 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 179 } 180 181 static void __intel_pmu_lbr_disable(void) 182 { 183 u64 debugctl; 184 185 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 186 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 187 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 188 } 189 190 static void intel_pmu_lbr_reset_32(void) 191 { 192 int i; 193 194 for (i = 0; i < x86_pmu.lbr_nr; i++) 195 wrmsrl(x86_pmu.lbr_from + i, 0); 196 } 197 198 static void intel_pmu_lbr_reset_64(void) 199 { 200 int i; 201 202 for (i = 0; i < x86_pmu.lbr_nr; i++) { 203 wrmsrl(x86_pmu.lbr_from + i, 0); 204 wrmsrl(x86_pmu.lbr_to + i, 0); 205 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 206 wrmsrl(MSR_LBR_INFO_0 + i, 0); 207 } 208 } 209 210 void intel_pmu_lbr_reset(void) 211 { 212 if (!x86_pmu.lbr_nr) 213 return; 214 215 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) 216 intel_pmu_lbr_reset_32(); 217 else 218 intel_pmu_lbr_reset_64(); 219 } 220 221 /* 222 * TOS = most recently recorded branch 223 */ 224 static inline u64 intel_pmu_lbr_tos(void) 225 { 226 u64 tos; 227 228 rdmsrl(x86_pmu.lbr_tos, tos); 229 return tos; 230 } 231 232 enum { 233 LBR_NONE, 234 LBR_VALID, 235 }; 236 237 static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) 238 { 239 int i; 240 unsigned lbr_idx, mask; 241 u64 tos; 242 243 if (task_ctx->lbr_callstack_users == 0 || 244 task_ctx->lbr_stack_state == LBR_NONE) { 245 intel_pmu_lbr_reset(); 246 return; 247 } 248 249 mask = x86_pmu.lbr_nr - 1; 250 tos = task_ctx->tos; 251 for (i = 0; i < tos; i++) { 252 lbr_idx = (tos - i) & mask; 253 wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 254 wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 255 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 256 wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); 257 } 258 wrmsrl(x86_pmu.lbr_tos, tos); 259 task_ctx->lbr_stack_state = LBR_NONE; 260 } 261 262 static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) 263 { 264 int i; 265 unsigned lbr_idx, mask; 266 u64 tos; 267 268 if (task_ctx->lbr_callstack_users == 0) { 269 task_ctx->lbr_stack_state = LBR_NONE; 270 return; 271 } 272 273 mask = x86_pmu.lbr_nr - 1; 274 tos = intel_pmu_lbr_tos(); 275 for (i = 0; i < tos; i++) { 276 lbr_idx = (tos - i) & mask; 277 rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 278 rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 279 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 280 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); 281 } 282 task_ctx->tos = tos; 283 task_ctx->lbr_stack_state = LBR_VALID; 284 } 285 286 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) 287 { 288 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 289 struct x86_perf_task_context *task_ctx; 290 291 /* 292 * If LBR callstack feature is enabled and the stack was saved when 293 * the task was scheduled out, restore the stack. Otherwise flush 294 * the LBR stack. 295 */ 296 task_ctx = ctx ? ctx->task_ctx_data : NULL; 297 if (task_ctx) { 298 if (sched_in) { 299 __intel_pmu_lbr_restore(task_ctx); 300 cpuc->lbr_context = ctx; 301 } else { 302 __intel_pmu_lbr_save(task_ctx); 303 } 304 return; 305 } 306 307 /* 308 * When sampling the branck stack in system-wide, it may be 309 * necessary to flush the stack on context switch. This happens 310 * when the branch stack does not tag its entries with the pid 311 * of the current task. Otherwise it becomes impossible to 312 * associate a branch entry with a task. This ambiguity is more 313 * likely to appear when the branch stack supports priv level 314 * filtering and the user sets it to monitor only at the user 315 * level (which could be a useful measurement in system-wide 316 * mode). In that case, the risk is high of having a branch 317 * stack with branch from multiple tasks. 318 */ 319 if (sched_in) { 320 intel_pmu_lbr_reset(); 321 cpuc->lbr_context = ctx; 322 } 323 } 324 325 static inline bool branch_user_callstack(unsigned br_sel) 326 { 327 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 328 } 329 330 void intel_pmu_lbr_enable(struct perf_event *event) 331 { 332 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 333 struct x86_perf_task_context *task_ctx; 334 335 if (!x86_pmu.lbr_nr) 336 return; 337 338 /* 339 * Reset the LBR stack if we changed task context to 340 * avoid data leaks. 341 */ 342 if (event->ctx->task && cpuc->lbr_context != event->ctx) { 343 intel_pmu_lbr_reset(); 344 cpuc->lbr_context = event->ctx; 345 } 346 cpuc->br_sel = event->hw.branch_reg.reg; 347 348 if (branch_user_callstack(cpuc->br_sel) && event->ctx && 349 event->ctx->task_ctx_data) { 350 task_ctx = event->ctx->task_ctx_data; 351 task_ctx->lbr_callstack_users++; 352 } 353 354 cpuc->lbr_users++; 355 perf_sched_cb_inc(event->ctx->pmu); 356 } 357 358 void intel_pmu_lbr_disable(struct perf_event *event) 359 { 360 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 361 struct x86_perf_task_context *task_ctx; 362 363 if (!x86_pmu.lbr_nr) 364 return; 365 366 if (branch_user_callstack(cpuc->br_sel) && event->ctx && 367 event->ctx->task_ctx_data) { 368 task_ctx = event->ctx->task_ctx_data; 369 task_ctx->lbr_callstack_users--; 370 } 371 372 cpuc->lbr_users--; 373 WARN_ON_ONCE(cpuc->lbr_users < 0); 374 perf_sched_cb_dec(event->ctx->pmu); 375 376 if (cpuc->enabled && !cpuc->lbr_users) { 377 __intel_pmu_lbr_disable(); 378 /* avoid stale pointer */ 379 cpuc->lbr_context = NULL; 380 } 381 } 382 383 void intel_pmu_lbr_enable_all(bool pmi) 384 { 385 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 386 387 if (cpuc->lbr_users) 388 __intel_pmu_lbr_enable(pmi); 389 } 390 391 void intel_pmu_lbr_disable_all(void) 392 { 393 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 394 395 if (cpuc->lbr_users) 396 __intel_pmu_lbr_disable(); 397 } 398 399 static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 400 { 401 unsigned long mask = x86_pmu.lbr_nr - 1; 402 u64 tos = intel_pmu_lbr_tos(); 403 int i; 404 405 for (i = 0; i < x86_pmu.lbr_nr; i++) { 406 unsigned long lbr_idx = (tos - i) & mask; 407 union { 408 struct { 409 u32 from; 410 u32 to; 411 }; 412 u64 lbr; 413 } msr_lastbranch; 414 415 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 416 417 cpuc->lbr_entries[i].from = msr_lastbranch.from; 418 cpuc->lbr_entries[i].to = msr_lastbranch.to; 419 cpuc->lbr_entries[i].mispred = 0; 420 cpuc->lbr_entries[i].predicted = 0; 421 cpuc->lbr_entries[i].reserved = 0; 422 } 423 cpuc->lbr_stack.nr = i; 424 } 425 426 /* 427 * Due to lack of segmentation in Linux the effective address (offset) 428 * is the same as the linear address, allowing us to merge the LIP and EIP 429 * LBR formats. 430 */ 431 static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 432 { 433 bool need_info = false; 434 unsigned long mask = x86_pmu.lbr_nr - 1; 435 int lbr_format = x86_pmu.intel_cap.lbr_format; 436 u64 tos = intel_pmu_lbr_tos(); 437 int i; 438 int out = 0; 439 int num = x86_pmu.lbr_nr; 440 441 if (cpuc->lbr_sel) { 442 need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); 443 if (cpuc->lbr_sel->config & LBR_CALL_STACK) 444 num = tos; 445 } 446 447 for (i = 0; i < num; i++) { 448 unsigned long lbr_idx = (tos - i) & mask; 449 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 450 int skip = 0; 451 u16 cycles = 0; 452 int lbr_flags = lbr_desc[lbr_format]; 453 454 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 455 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 456 457 if (lbr_format == LBR_FORMAT_INFO && need_info) { 458 u64 info; 459 460 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info); 461 mis = !!(info & LBR_INFO_MISPRED); 462 pred = !mis; 463 in_tx = !!(info & LBR_INFO_IN_TX); 464 abort = !!(info & LBR_INFO_ABORT); 465 cycles = (info & LBR_INFO_CYCLES); 466 } 467 if (lbr_flags & LBR_EIP_FLAGS) { 468 mis = !!(from & LBR_FROM_FLAG_MISPRED); 469 pred = !mis; 470 skip = 1; 471 } 472 if (lbr_flags & LBR_TSX) { 473 in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 474 abort = !!(from & LBR_FROM_FLAG_ABORT); 475 skip = 3; 476 } 477 from = (u64)((((s64)from) << skip) >> skip); 478 479 /* 480 * Some CPUs report duplicated abort records, 481 * with the second entry not having an abort bit set. 482 * Skip them here. This loop runs backwards, 483 * so we need to undo the previous record. 484 * If the abort just happened outside the window 485 * the extra entry cannot be removed. 486 */ 487 if (abort && x86_pmu.lbr_double_abort && out > 0) 488 out--; 489 490 cpuc->lbr_entries[out].from = from; 491 cpuc->lbr_entries[out].to = to; 492 cpuc->lbr_entries[out].mispred = mis; 493 cpuc->lbr_entries[out].predicted = pred; 494 cpuc->lbr_entries[out].in_tx = in_tx; 495 cpuc->lbr_entries[out].abort = abort; 496 cpuc->lbr_entries[out].cycles = cycles; 497 cpuc->lbr_entries[out].reserved = 0; 498 out++; 499 } 500 cpuc->lbr_stack.nr = out; 501 } 502 503 void intel_pmu_lbr_read(void) 504 { 505 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 506 507 if (!cpuc->lbr_users) 508 return; 509 510 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) 511 intel_pmu_lbr_read_32(cpuc); 512 else 513 intel_pmu_lbr_read_64(cpuc); 514 515 intel_pmu_lbr_filter(cpuc); 516 } 517 518 /* 519 * SW filter is used: 520 * - in case there is no HW filter 521 * - in case the HW filter has errata or limitations 522 */ 523 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 524 { 525 u64 br_type = event->attr.branch_sample_type; 526 int mask = 0; 527 528 if (br_type & PERF_SAMPLE_BRANCH_USER) 529 mask |= X86_BR_USER; 530 531 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 532 mask |= X86_BR_KERNEL; 533 534 /* we ignore BRANCH_HV here */ 535 536 if (br_type & PERF_SAMPLE_BRANCH_ANY) 537 mask |= X86_BR_ANY; 538 539 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 540 mask |= X86_BR_ANY_CALL; 541 542 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 543 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 544 545 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 546 mask |= X86_BR_IND_CALL; 547 548 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 549 mask |= X86_BR_ABORT; 550 551 if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 552 mask |= X86_BR_IN_TX; 553 554 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 555 mask |= X86_BR_NO_TX; 556 557 if (br_type & PERF_SAMPLE_BRANCH_COND) 558 mask |= X86_BR_JCC; 559 560 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 561 if (!x86_pmu_has_lbr_callstack()) 562 return -EOPNOTSUPP; 563 if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 564 return -EINVAL; 565 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 566 X86_BR_CALL_STACK; 567 } 568 569 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 570 mask |= X86_BR_IND_JMP; 571 572 if (br_type & PERF_SAMPLE_BRANCH_CALL) 573 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 574 /* 575 * stash actual user request into reg, it may 576 * be used by fixup code for some CPU 577 */ 578 event->hw.branch_reg.reg = mask; 579 return 0; 580 } 581 582 /* 583 * setup the HW LBR filter 584 * Used only when available, may not be enough to disambiguate 585 * all branches, may need the help of the SW filter 586 */ 587 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 588 { 589 struct hw_perf_event_extra *reg; 590 u64 br_type = event->attr.branch_sample_type; 591 u64 mask = 0, v; 592 int i; 593 594 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 595 if (!(br_type & (1ULL << i))) 596 continue; 597 598 v = x86_pmu.lbr_sel_map[i]; 599 if (v == LBR_NOT_SUPP) 600 return -EOPNOTSUPP; 601 602 if (v != LBR_IGN) 603 mask |= v; 604 } 605 606 reg = &event->hw.branch_reg; 607 reg->idx = EXTRA_REG_LBR; 608 609 /* 610 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 611 * in suppress mode. So LBR_SELECT should be set to 612 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 613 * But the 10th bit LBR_CALL_STACK does not operate 614 * in suppress mode. 615 */ 616 reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); 617 618 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 619 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 620 (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) 621 reg->config |= LBR_NO_INFO; 622 623 return 0; 624 } 625 626 int intel_pmu_setup_lbr_filter(struct perf_event *event) 627 { 628 int ret = 0; 629 630 /* 631 * no LBR on this PMU 632 */ 633 if (!x86_pmu.lbr_nr) 634 return -EOPNOTSUPP; 635 636 /* 637 * setup SW LBR filter 638 */ 639 ret = intel_pmu_setup_sw_lbr_filter(event); 640 if (ret) 641 return ret; 642 643 /* 644 * setup HW LBR filter, if any 645 */ 646 if (x86_pmu.lbr_sel_map) 647 ret = intel_pmu_setup_hw_lbr_filter(event); 648 649 return ret; 650 } 651 652 /* 653 * return the type of control flow change at address "from" 654 * instruction is not necessarily a branch (in case of interrupt). 655 * 656 * The branch type returned also includes the priv level of the 657 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). 658 * 659 * If a branch type is unknown OR the instruction cannot be 660 * decoded (e.g., text page not present), then X86_BR_NONE is 661 * returned. 662 */ 663 static int branch_type(unsigned long from, unsigned long to, int abort) 664 { 665 struct insn insn; 666 void *addr; 667 int bytes_read, bytes_left; 668 int ret = X86_BR_NONE; 669 int ext, to_plm, from_plm; 670 u8 buf[MAX_INSN_SIZE]; 671 int is64 = 0; 672 673 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 674 from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; 675 676 /* 677 * maybe zero if lbr did not fill up after a reset by the time 678 * we get a PMU interrupt 679 */ 680 if (from == 0 || to == 0) 681 return X86_BR_NONE; 682 683 if (abort) 684 return X86_BR_ABORT | to_plm; 685 686 if (from_plm == X86_BR_USER) { 687 /* 688 * can happen if measuring at the user level only 689 * and we interrupt in a kernel thread, e.g., idle. 690 */ 691 if (!current->mm) 692 return X86_BR_NONE; 693 694 /* may fail if text not present */ 695 bytes_left = copy_from_user_nmi(buf, (void __user *)from, 696 MAX_INSN_SIZE); 697 bytes_read = MAX_INSN_SIZE - bytes_left; 698 if (!bytes_read) 699 return X86_BR_NONE; 700 701 addr = buf; 702 } else { 703 /* 704 * The LBR logs any address in the IP, even if the IP just 705 * faulted. This means userspace can control the from address. 706 * Ensure we don't blindy read any address by validating it is 707 * a known text address. 708 */ 709 if (kernel_text_address(from)) { 710 addr = (void *)from; 711 /* 712 * Assume we can get the maximum possible size 713 * when grabbing kernel data. This is not 714 * _strictly_ true since we could possibly be 715 * executing up next to a memory hole, but 716 * it is very unlikely to be a problem. 717 */ 718 bytes_read = MAX_INSN_SIZE; 719 } else { 720 return X86_BR_NONE; 721 } 722 } 723 724 /* 725 * decoder needs to know the ABI especially 726 * on 64-bit systems running 32-bit apps 727 */ 728 #ifdef CONFIG_X86_64 729 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); 730 #endif 731 insn_init(&insn, addr, bytes_read, is64); 732 insn_get_opcode(&insn); 733 if (!insn.opcode.got) 734 return X86_BR_ABORT; 735 736 switch (insn.opcode.bytes[0]) { 737 case 0xf: 738 switch (insn.opcode.bytes[1]) { 739 case 0x05: /* syscall */ 740 case 0x34: /* sysenter */ 741 ret = X86_BR_SYSCALL; 742 break; 743 case 0x07: /* sysret */ 744 case 0x35: /* sysexit */ 745 ret = X86_BR_SYSRET; 746 break; 747 case 0x80 ... 0x8f: /* conditional */ 748 ret = X86_BR_JCC; 749 break; 750 default: 751 ret = X86_BR_NONE; 752 } 753 break; 754 case 0x70 ... 0x7f: /* conditional */ 755 ret = X86_BR_JCC; 756 break; 757 case 0xc2: /* near ret */ 758 case 0xc3: /* near ret */ 759 case 0xca: /* far ret */ 760 case 0xcb: /* far ret */ 761 ret = X86_BR_RET; 762 break; 763 case 0xcf: /* iret */ 764 ret = X86_BR_IRET; 765 break; 766 case 0xcc ... 0xce: /* int */ 767 ret = X86_BR_INT; 768 break; 769 case 0xe8: /* call near rel */ 770 insn_get_immediate(&insn); 771 if (insn.immediate1.value == 0) { 772 /* zero length call */ 773 ret = X86_BR_ZERO_CALL; 774 break; 775 } 776 case 0x9a: /* call far absolute */ 777 ret = X86_BR_CALL; 778 break; 779 case 0xe0 ... 0xe3: /* loop jmp */ 780 ret = X86_BR_JCC; 781 break; 782 case 0xe9 ... 0xeb: /* jmp */ 783 ret = X86_BR_JMP; 784 break; 785 case 0xff: /* call near absolute, call far absolute ind */ 786 insn_get_modrm(&insn); 787 ext = (insn.modrm.bytes[0] >> 3) & 0x7; 788 switch (ext) { 789 case 2: /* near ind call */ 790 case 3: /* far ind call */ 791 ret = X86_BR_IND_CALL; 792 break; 793 case 4: 794 case 5: 795 ret = X86_BR_IND_JMP; 796 break; 797 } 798 break; 799 default: 800 ret = X86_BR_NONE; 801 } 802 /* 803 * interrupts, traps, faults (and thus ring transition) may 804 * occur on any instructions. Thus, to classify them correctly, 805 * we need to first look at the from and to priv levels. If they 806 * are different and to is in the kernel, then it indicates 807 * a ring transition. If the from instruction is not a ring 808 * transition instr (syscall, systenter, int), then it means 809 * it was a irq, trap or fault. 810 * 811 * we have no way of detecting kernel to kernel faults. 812 */ 813 if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL 814 && ret != X86_BR_SYSCALL && ret != X86_BR_INT) 815 ret = X86_BR_IRQ; 816 817 /* 818 * branch priv level determined by target as 819 * is done by HW when LBR_SELECT is implemented 820 */ 821 if (ret != X86_BR_NONE) 822 ret |= to_plm; 823 824 return ret; 825 } 826 827 /* 828 * implement actual branch filter based on user demand. 829 * Hardware may not exactly satisfy that request, thus 830 * we need to inspect opcodes. Mismatched branches are 831 * discarded. Therefore, the number of branches returned 832 * in PERF_SAMPLE_BRANCH_STACK sample may vary. 833 */ 834 static void 835 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 836 { 837 u64 from, to; 838 int br_sel = cpuc->br_sel; 839 int i, j, type; 840 bool compress = false; 841 842 /* if sampling all branches, then nothing to filter */ 843 if ((br_sel & X86_BR_ALL) == X86_BR_ALL) 844 return; 845 846 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 847 848 from = cpuc->lbr_entries[i].from; 849 to = cpuc->lbr_entries[i].to; 850 851 type = branch_type(from, to, cpuc->lbr_entries[i].abort); 852 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 853 if (cpuc->lbr_entries[i].in_tx) 854 type |= X86_BR_IN_TX; 855 else 856 type |= X86_BR_NO_TX; 857 } 858 859 /* if type does not correspond, then discard */ 860 if (type == X86_BR_NONE || (br_sel & type) != type) { 861 cpuc->lbr_entries[i].from = 0; 862 compress = true; 863 } 864 } 865 866 if (!compress) 867 return; 868 869 /* remove all entries with from=0 */ 870 for (i = 0; i < cpuc->lbr_stack.nr; ) { 871 if (!cpuc->lbr_entries[i].from) { 872 j = i; 873 while (++j < cpuc->lbr_stack.nr) 874 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 875 cpuc->lbr_stack.nr--; 876 if (!cpuc->lbr_entries[i].from) 877 continue; 878 } 879 i++; 880 } 881 } 882 883 /* 884 * Map interface branch filters onto LBR filters 885 */ 886 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 887 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 888 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 889 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 890 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 891 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 892 | LBR_IND_JMP | LBR_FAR, 893 /* 894 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 895 */ 896 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 897 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 898 /* 899 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 900 */ 901 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 902 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 903 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 904 }; 905 906 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 907 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 908 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 909 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 910 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 911 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 912 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 913 | LBR_FAR, 914 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 915 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 916 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 917 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 918 }; 919 920 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 921 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 922 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 923 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 924 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 925 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 926 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 927 | LBR_FAR, 928 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 929 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 930 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 931 | LBR_RETURN | LBR_CALL_STACK, 932 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 933 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 934 }; 935 936 /* core */ 937 void __init intel_pmu_lbr_init_core(void) 938 { 939 x86_pmu.lbr_nr = 4; 940 x86_pmu.lbr_tos = MSR_LBR_TOS; 941 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 942 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 943 944 /* 945 * SW branch filter usage: 946 * - compensate for lack of HW filter 947 */ 948 pr_cont("4-deep LBR, "); 949 } 950 951 /* nehalem/westmere */ 952 void __init intel_pmu_lbr_init_nhm(void) 953 { 954 x86_pmu.lbr_nr = 16; 955 x86_pmu.lbr_tos = MSR_LBR_TOS; 956 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 957 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 958 959 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 960 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 961 962 /* 963 * SW branch filter usage: 964 * - workaround LBR_SEL errata (see above) 965 * - support syscall, sysret capture. 966 * That requires LBR_FAR but that means far 967 * jmp need to be filtered out 968 */ 969 pr_cont("16-deep LBR, "); 970 } 971 972 /* sandy bridge */ 973 void __init intel_pmu_lbr_init_snb(void) 974 { 975 x86_pmu.lbr_nr = 16; 976 x86_pmu.lbr_tos = MSR_LBR_TOS; 977 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 978 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 979 980 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 981 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 982 983 /* 984 * SW branch filter usage: 985 * - support syscall, sysret capture. 986 * That requires LBR_FAR but that means far 987 * jmp need to be filtered out 988 */ 989 pr_cont("16-deep LBR, "); 990 } 991 992 /* haswell */ 993 void intel_pmu_lbr_init_hsw(void) 994 { 995 x86_pmu.lbr_nr = 16; 996 x86_pmu.lbr_tos = MSR_LBR_TOS; 997 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 998 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 999 1000 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1001 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1002 1003 pr_cont("16-deep LBR, "); 1004 } 1005 1006 /* skylake */ 1007 __init void intel_pmu_lbr_init_skl(void) 1008 { 1009 x86_pmu.lbr_nr = 32; 1010 x86_pmu.lbr_tos = MSR_LBR_TOS; 1011 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1012 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1013 1014 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1015 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1016 1017 /* 1018 * SW branch filter usage: 1019 * - support syscall, sysret capture. 1020 * That requires LBR_FAR but that means far 1021 * jmp need to be filtered out 1022 */ 1023 pr_cont("32-deep LBR, "); 1024 } 1025 1026 /* atom */ 1027 void __init intel_pmu_lbr_init_atom(void) 1028 { 1029 /* 1030 * only models starting at stepping 10 seems 1031 * to have an operational LBR which can freeze 1032 * on PMU interrupt 1033 */ 1034 if (boot_cpu_data.x86_model == 28 1035 && boot_cpu_data.x86_mask < 10) { 1036 pr_cont("LBR disabled due to erratum"); 1037 return; 1038 } 1039 1040 x86_pmu.lbr_nr = 8; 1041 x86_pmu.lbr_tos = MSR_LBR_TOS; 1042 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1043 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1044 1045 /* 1046 * SW branch filter usage: 1047 * - compensate for lack of HW filter 1048 */ 1049 pr_cont("8-deep LBR, "); 1050 } 1051 1052 /* Knights Landing */ 1053 void intel_pmu_lbr_init_knl(void) 1054 { 1055 x86_pmu.lbr_nr = 8; 1056 x86_pmu.lbr_tos = MSR_LBR_TOS; 1057 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1058 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1059 1060 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1061 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1062 1063 pr_cont("8-deep LBR, "); 1064 } 1065