1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2026, Ali Jose Mashtizadeh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* Support for the AMD IBS */ 30 31 #include <sys/param.h> 32 #include <sys/lock.h> 33 #include <sys/malloc.h> 34 #include <sys/mutex.h> 35 #include <sys/pcpu.h> 36 #include <sys/pmc.h> 37 #include <sys/pmckern.h> 38 #include <sys/pmclog.h> 39 #include <sys/smp.h> 40 #include <sys/systm.h> 41 42 #include <machine/cpu.h> 43 #include <machine/cpufunc.h> 44 #include <machine/md_var.h> 45 #include <machine/specialreg.h> 46 47 #define IBS_STOP_ITER 50 /* Stopping iterations */ 48 49 /* AMD IBS PMCs */ 50 struct ibs_descr { 51 struct pmc_descr pm_descr; /* "base class" */ 52 }; 53 54 /* 55 * Globals 56 */ 57 static uint64_t ibs_features; 58 59 /* 60 * Per-processor information 61 */ 62 #define IBS_CPU_RUNNING 1 63 #define IBS_CPU_STOPPING 2 64 #define IBS_CPU_STOPPED 3 65 66 struct ibs_cpu { 67 int pc_status; 68 struct pmc_hw pc_ibspmcs[IBS_NPMCS]; 69 }; 70 static struct ibs_cpu **ibs_pcpu; 71 72 /* 73 * Read a PMC value from the MSR. 74 */ 75 static int 76 ibs_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) 77 { 78 79 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 80 ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); 81 KASSERT(ri >= 0 && ri < IBS_NPMCS, 82 ("[ibs,%d] illegal row-index %d", __LINE__, ri)); 83 KASSERT(ibs_pcpu[cpu], 84 ("[ibs,%d] null per-cpu, cpu %d", __LINE__, cpu)); 85 86 /* read the IBS ctl */ 87 switch (ri) { 88 case IBS_PMC_FETCH: 89 *v = rdmsr(IBS_FETCH_CTL); 90 break; 91 case IBS_PMC_OP: 92 *v = rdmsr(IBS_OP_CTL); 93 break; 94 } 95 96 PMCDBG2(MDP, REA, 2, "ibs-read id=%d -> %jd", ri, *v); 97 98 return (0); 99 } 100 101 /* 102 * Write a PMC MSR. 103 */ 104 static int 105 ibs_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) 106 { 107 108 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 109 ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); 110 KASSERT(ri >= 0 && ri < IBS_NPMCS, 111 ("[ibs,%d] illegal row-index %d", __LINE__, ri)); 112 113 PMCDBG3(MDP, WRI, 1, "ibs-write cpu=%d ri=%d v=%jx", cpu, ri, v); 114 115 return (0); 116 } 117 118 /* 119 * Configure hardware PMC according to the configuration recorded in 'pm'. 120 */ 121 static int 122 ibs_config_pmc(int cpu, int ri, struct pmc *pm) 123 { 124 struct pmc_hw *phw; 125 126 PMCDBG3(MDP, CFG, 1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); 127 128 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 129 ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); 130 KASSERT(ri >= 0 && ri < IBS_NPMCS, 131 ("[ibs,%d] illegal row-index %d", __LINE__, ri)); 132 133 phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri]; 134 135 KASSERT(pm == NULL || phw->phw_pmc == NULL, 136 ("[ibs,%d] pm=%p phw->pm=%p hwpmc not unconfigured", 137 __LINE__, pm, phw->phw_pmc)); 138 139 phw->phw_pmc = pm; 140 141 return (0); 142 } 143 144 /* 145 * Retrieve a configured PMC pointer from hardware state. 146 */ 147 static int 148 ibs_get_config(int cpu, int ri, struct pmc **ppm) 149 { 150 151 *ppm = ibs_pcpu[cpu]->pc_ibspmcs[ri].phw_pmc; 152 153 return (0); 154 } 155 156 /* 157 * Check if a given PMC allocation is feasible. 158 */ 159 static int 160 ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm, 161 const struct pmc_op_pmcallocate *a) 162 { 163 uint64_t caps, config; 164 165 KASSERT(ri >= 0 && ri < IBS_NPMCS, 166 ("[ibs,%d] illegal row index %d", __LINE__, ri)); 167 168 /* check class match */ 169 if (a->pm_class != PMC_CLASS_IBS) 170 return (EINVAL); 171 if (a->pm_md.pm_ibs.ibs_type != ri) 172 return (EINVAL); 173 174 caps = pm->pm_caps; 175 176 PMCDBG2(MDP, ALL, 1, "ibs-allocate ri=%d caps=0x%x", ri, caps); 177 178 if ((caps & PMC_CAP_SYSTEM) == 0) 179 return (EINVAL); 180 181 config = a->pm_md.pm_ibs.ibs_ctl; 182 pm->pm_md.pm_ibs.ibs_ctl = config; 183 184 PMCDBG2(MDP, ALL, 2, "ibs-allocate ri=%d -> config=0x%x", ri, config); 185 186 return (0); 187 } 188 189 /* 190 * Release machine dependent state associated with a PMC. This is a 191 * no-op on this architecture. 192 */ 193 static int 194 ibs_release_pmc(int cpu, int ri, struct pmc *pmc __unused) 195 { 196 struct pmc_hw *phw __diagused; 197 198 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 199 ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); 200 KASSERT(ri >= 0 && ri < IBS_NPMCS, 201 ("[ibs,%d] illegal row-index %d", __LINE__, ri)); 202 203 PMCDBG1(MDP, ALL, 1, "ibs-release ri=%d", ri); 204 205 phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri]; 206 207 KASSERT(phw->phw_pmc == NULL, 208 ("[ibs,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc)); 209 210 return (0); 211 } 212 213 /* 214 * Start a PMC. 215 */ 216 static int 217 ibs_start_pmc(int cpu __diagused, int ri, struct pmc *pm) 218 { 219 uint64_t config; 220 221 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 222 ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); 223 KASSERT(ri >= 0 && ri < IBS_NPMCS, 224 ("[ibs,%d] illegal row-index %d", __LINE__, ri)); 225 226 PMCDBG2(MDP, STA, 1, "ibs-start cpu=%d ri=%d", cpu, ri); 227 228 /* 229 * This is used to handle spurious NMIs. All that matters is that it 230 * is not in the stopping state. 231 */ 232 atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_RUNNING); 233 234 /* 235 * Turn on the ENABLE bit. Zeroing out the control register eliminates 236 * stale valid bits from spurious NMIs and it resets the counter. 237 */ 238 switch (ri) { 239 case IBS_PMC_FETCH: 240 wrmsr(IBS_FETCH_CTL, 0); 241 config = pm->pm_md.pm_ibs.ibs_ctl | IBS_FETCH_CTL_ENABLE; 242 wrmsr(IBS_FETCH_CTL, config); 243 break; 244 case IBS_PMC_OP: 245 wrmsr(IBS_OP_CTL, 0); 246 config = pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE; 247 wrmsr(IBS_OP_CTL, config); 248 break; 249 } 250 251 return (0); 252 } 253 254 /* 255 * Stop a PMC. 256 */ 257 static int 258 ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm) 259 { 260 int i; 261 uint64_t config; 262 263 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 264 ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); 265 KASSERT(ri >= 0 && ri < IBS_NPMCS, 266 ("[ibs,%d] illegal row-index %d", __LINE__, ri)); 267 268 PMCDBG1(MDP, STO, 1, "ibs-stop ri=%d", ri); 269 270 /* 271 * Turn off the ENABLE bit, but unfortunately there are a few quirks 272 * that generate excess NMIs. Workaround #420 in the Revision Guide 273 * for AMD Family 10h Processors 41322 Rev. 3.92 March 2012. requires 274 * that we clear the count before clearing enable. 275 * 276 * Even after clearing the counter spurious NMIs are still possible so 277 * we use a per-CPU atomic variable to notify the interrupt handler we 278 * are stopping and discard spurious NMIs. We then retry clearing the 279 * control register for 50us. This gives us enough time and ensures 280 * that the valid bit is not accidently stuck after a spurious NMI. 281 */ 282 config = pm->pm_md.pm_ibs.ibs_ctl; 283 284 atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPING); 285 286 switch (ri) { 287 case IBS_PMC_FETCH: 288 wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK); 289 DELAY(1); 290 config &= ~IBS_FETCH_CTL_ENABLE; 291 wrmsr(IBS_FETCH_CTL, config); 292 break; 293 case IBS_PMC_OP: 294 wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK); 295 DELAY(1); 296 config &= ~IBS_OP_CTL_ENABLE; 297 wrmsr(IBS_OP_CTL, config); 298 break; 299 } 300 301 for (i = 0; i < IBS_STOP_ITER; i++) { 302 DELAY(1); 303 304 switch (ri) { 305 case IBS_PMC_FETCH: 306 wrmsr(IBS_FETCH_CTL, 0); 307 break; 308 case IBS_PMC_OP: 309 wrmsr(IBS_OP_CTL, 0); 310 break; 311 } 312 } 313 314 atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPED); 315 316 return (0); 317 } 318 319 static void 320 pmc_ibs_process_fetch(struct pmc *pm, struct trapframe *tf, uint64_t config) 321 { 322 struct pmc_multipart mpd; 323 324 if (pm == NULL) 325 return; 326 327 if (pm->pm_state != PMC_STATE_RUNNING) 328 return; 329 330 memset(&mpd, 0, sizeof(mpd)); 331 332 mpd.pl_type = PMC_CC_MULTIPART_IBS_FETCH; 333 mpd.pl_length = 4; 334 mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL] = config; 335 if (ibs_features) { 336 mpd.pl_mpdata[PMC_MPIDX_FETCH_EXTCTL] = rdmsr(IBS_FETCH_EXTCTL); 337 } 338 mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL] = config; 339 mpd.pl_mpdata[PMC_MPIDX_FETCH_LINADDR] = rdmsr(IBS_FETCH_LINADDR); 340 if ((config & IBS_FETCH_CTL_PHYSADDRVALID) != 0) { 341 mpd.pl_mpdata[PMC_MPIDX_FETCH_PHYSADDR] = 342 rdmsr(IBS_FETCH_PHYSADDR); 343 } 344 345 pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd); 346 } 347 348 static void 349 pmc_ibs_process_op(struct pmc *pm, struct trapframe *tf, uint64_t config) 350 { 351 struct pmc_multipart mpd; 352 353 if (pm == NULL) 354 return; 355 356 if (pm->pm_state != PMC_STATE_RUNNING) 357 return; 358 359 memset(&mpd, 0, sizeof(mpd)); 360 361 mpd.pl_type = PMC_CC_MULTIPART_IBS_OP; 362 mpd.pl_length = 8; 363 mpd.pl_mpdata[PMC_MPIDX_OP_CTL] = config; 364 mpd.pl_mpdata[PMC_MPIDX_OP_RIP] = rdmsr(IBS_OP_RIP); 365 mpd.pl_mpdata[PMC_MPIDX_OP_DATA] = rdmsr(IBS_OP_DATA); 366 mpd.pl_mpdata[PMC_MPIDX_OP_DATA2] = rdmsr(IBS_OP_DATA2); 367 mpd.pl_mpdata[PMC_MPIDX_OP_DATA3] = rdmsr(IBS_OP_DATA3); 368 mpd.pl_mpdata[PMC_MPIDX_OP_DC_LINADDR] = rdmsr(IBS_OP_DC_LINADDR); 369 mpd.pl_mpdata[PMC_MPIDX_OP_DC_PHYSADDR] = rdmsr(IBS_OP_DC_PHYSADDR); 370 371 pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd); 372 373 wrmsr(IBS_OP_CTL, pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE); 374 } 375 376 /* 377 * Interrupt handler. This function needs to return '1' if the 378 * interrupt was this CPU's PMCs or '0' otherwise. It is not allowed 379 * to sleep or do anything a 'fast' interrupt handler is not allowed 380 * to do. 381 */ 382 int 383 pmc_ibs_intr(struct trapframe *tf) 384 { 385 struct ibs_cpu *pac; 386 struct pmc *pm; 387 int retval, cpu; 388 uint64_t config; 389 390 cpu = curcpu; 391 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 392 ("[ibs,%d] out of range CPU %d", __LINE__, cpu)); 393 394 PMCDBG3(MDP, INT, 1, "cpu=%d tf=%p um=%d", cpu, tf, TRAPF_USERMODE(tf)); 395 396 retval = 0; 397 398 pac = ibs_pcpu[cpu]; 399 400 config = rdmsr(IBS_FETCH_CTL); 401 if ((config & IBS_FETCH_CTL_VALID) != 0) { 402 pm = pac->pc_ibspmcs[IBS_PMC_FETCH].phw_pmc; 403 404 retval = 1; 405 406 pmc_ibs_process_fetch(pm, tf, config); 407 } 408 409 config = rdmsr(IBS_OP_CTL); 410 if ((retval == 0) && ((config & IBS_OP_CTL_VALID) != 0)) { 411 pm = pac->pc_ibspmcs[IBS_PMC_OP].phw_pmc; 412 413 retval = 1; 414 415 pmc_ibs_process_op(pm, tf, config); 416 } 417 418 if (retval == 0) { 419 // Lets check for a stray NMI when stopping 420 if (atomic_load_int(&pac->pc_status) == IBS_CPU_STOPPING) { 421 return (1); 422 } 423 } 424 425 426 if (retval) 427 counter_u64_add(pmc_stats.pm_intr_processed, 1); 428 else 429 counter_u64_add(pmc_stats.pm_intr_ignored, 1); 430 431 PMCDBG1(MDP, INT, 2, "retval=%d", retval); 432 433 return (retval); 434 } 435 436 /* 437 * Describe a PMC. 438 */ 439 static int 440 ibs_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) 441 { 442 struct pmc_hw *phw; 443 444 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 445 ("[ibs,%d] illegal CPU %d", __LINE__, cpu)); 446 KASSERT(ri >= 0 && ri < IBS_NPMCS, 447 ("[ibs,%d] row-index %d out of range", __LINE__, ri)); 448 449 phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri]; 450 451 if (ri == IBS_PMC_FETCH) { 452 strlcpy(pi->pm_name, "IBS-FETCH", sizeof(pi->pm_name)); 453 pi->pm_class = PMC_CLASS_IBS; 454 pi->pm_enabled = true; 455 *ppmc = phw->phw_pmc; 456 } else { 457 strlcpy(pi->pm_name, "IBS-OP", sizeof(pi->pm_name)); 458 pi->pm_class = PMC_CLASS_IBS; 459 pi->pm_enabled = true; 460 *ppmc = phw->phw_pmc; 461 } 462 463 return (0); 464 } 465 466 /* 467 * Processor-dependent initialization. 468 */ 469 static int 470 ibs_pcpu_init(struct pmc_mdep *md, int cpu) 471 { 472 struct ibs_cpu *pac; 473 struct pmc_cpu *pc; 474 struct pmc_hw *phw; 475 int first_ri, n; 476 477 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 478 ("[ibs,%d] insane cpu number %d", __LINE__, cpu)); 479 480 PMCDBG1(MDP, INI, 1, "ibs-init cpu=%d", cpu); 481 482 ibs_pcpu[cpu] = pac = malloc(sizeof(struct ibs_cpu), M_PMC, 483 M_WAITOK | M_ZERO); 484 485 /* 486 * Set the content of the hardware descriptors to a known 487 * state and initialize pointers in the MI per-cpu descriptor. 488 */ 489 pc = pmc_pcpu[cpu]; 490 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS].pcd_ri; 491 492 KASSERT(pc != NULL, ("[ibs,%d] NULL per-cpu pointer", __LINE__)); 493 494 for (n = 0, phw = pac->pc_ibspmcs; n < IBS_NPMCS; n++, phw++) { 495 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | 496 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); 497 phw->phw_pmc = NULL; 498 pc->pc_hwpmcs[n + first_ri] = phw; 499 } 500 501 return (0); 502 } 503 504 /* 505 * Processor-dependent cleanup prior to the KLD being unloaded. 506 */ 507 static int 508 ibs_pcpu_fini(struct pmc_mdep *md, int cpu) 509 { 510 struct ibs_cpu *pac; 511 struct pmc_cpu *pc; 512 int first_ri, i; 513 514 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 515 ("[ibs,%d] insane cpu number (%d)", __LINE__, cpu)); 516 517 PMCDBG1(MDP, INI, 1, "ibs-cleanup cpu=%d", cpu); 518 519 /* 520 * Turn off IBS. 521 */ 522 wrmsr(IBS_FETCH_CTL, 0); 523 wrmsr(IBS_OP_CTL, 0); 524 525 /* 526 * Free up allocated space. 527 */ 528 if ((pac = ibs_pcpu[cpu]) == NULL) 529 return (0); 530 531 ibs_pcpu[cpu] = NULL; 532 533 pc = pmc_pcpu[cpu]; 534 KASSERT(pc != NULL, ("[ibs,%d] NULL per-cpu state", __LINE__)); 535 536 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS].pcd_ri; 537 538 /* 539 * Reset pointers in the MI 'per-cpu' state. 540 */ 541 for (i = 0; i < IBS_NPMCS; i++) 542 pc->pc_hwpmcs[i + first_ri] = NULL; 543 544 free(pac, M_PMC); 545 546 return (0); 547 } 548 549 /* 550 * Initialize ourselves. 551 */ 552 int 553 pmc_ibs_initialize(struct pmc_mdep *pmc_mdep, int ncpus) 554 { 555 u_int regs[4]; 556 struct pmc_classdep *pcd; 557 558 /* 559 * Allocate space for pointers to PMC HW descriptors and for 560 * the MDEP structure used by MI code. 561 */ 562 ibs_pcpu = malloc(sizeof(struct ibs_cpu *) * pmc_cpu_max(), M_PMC, 563 M_WAITOK | M_ZERO); 564 565 /* Initialize AMD IBS handling. */ 566 pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS]; 567 568 pcd->pcd_caps = IBS_PMC_CAPS; 569 pcd->pcd_class = PMC_CLASS_IBS; 570 pcd->pcd_num = IBS_NPMCS; 571 pcd->pcd_ri = pmc_mdep->pmd_npmc; 572 pcd->pcd_width = 0; 573 574 pcd->pcd_allocate_pmc = ibs_allocate_pmc; 575 pcd->pcd_config_pmc = ibs_config_pmc; 576 pcd->pcd_describe = ibs_describe; 577 pcd->pcd_get_config = ibs_get_config; 578 pcd->pcd_pcpu_fini = ibs_pcpu_fini; 579 pcd->pcd_pcpu_init = ibs_pcpu_init; 580 pcd->pcd_release_pmc = ibs_release_pmc; 581 pcd->pcd_start_pmc = ibs_start_pmc; 582 pcd->pcd_stop_pmc = ibs_stop_pmc; 583 pcd->pcd_read_pmc = ibs_read_pmc; 584 pcd->pcd_write_pmc = ibs_write_pmc; 585 586 pmc_mdep->pmd_npmc += IBS_NPMCS; 587 588 if (cpu_exthigh >= CPUID_IBSID) { 589 do_cpuid(CPUID_IBSID, regs); 590 ibs_features = regs[0]; 591 } else { 592 ibs_features = 0; 593 } 594 595 PMCDBG0(MDP, INI, 0, "ibs-initialize"); 596 597 return (0); 598 } 599 600 /* 601 * Finalization code for AMD CPUs. 602 */ 603 void 604 pmc_ibs_finalize(struct pmc_mdep *md) 605 { 606 PMCDBG0(MDP, INI, 1, "ibs-finalize"); 607 608 for (int i = 0; i < pmc_cpu_max(); i++) 609 KASSERT(ibs_pcpu[i] == NULL, 610 ("[ibs,%d] non-null pcpu cpu %d", __LINE__, i)); 611 612 free(ibs_pcpu, M_PMC); 613 ibs_pcpu = NULL; 614 } 615