1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * AMD specific. Provide textual annotation for IBS raw sample data. 4 */ 5 6 #include <unistd.h> 7 #include <stdio.h> 8 #include <string.h> 9 #include <inttypes.h> 10 11 #include <linux/string.h> 12 #include "../../arch/x86/include/asm/amd/ibs.h" 13 14 #include "debug.h" 15 #include "session.h" 16 #include "evlist.h" 17 #include "sample-raw.h" 18 #include "util/sample.h" 19 20 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; 21 static bool zen4_ibs_extensions; 22 static bool ldlat_cap; 23 static bool dtlb_pgsize_cap; 24 static bool rmtsocket_cap; 25 static bool strmst_cap; 26 27 /* 28 * Status fields of IBS_FETCH_CTL and IBS_FETCH_CTL_EXT are valid only if 29 * IBS_FETCH_CTL[PhyAddrValid] is set. 30 */ 31 static int fetch_ctl_depends_on_phy_addr_valid(void) 32 { 33 static int depends = -1; /* -1: Don't know, 1: Yes, 0: No */ 34 35 if (depends != -1) 36 return depends; 37 38 depends = 0; 39 if (cpu_family > 0x1a || 40 (cpu_family == 0x1a && ( 41 (cpu_model >= 0x50 && cpu_model <= 0x5f) || 42 (cpu_model >= 0x80 && cpu_model <= 0xaf) || 43 (cpu_model >= 0xc0 && cpu_model <= 0xcf)))) { 44 depends = 1; 45 } 46 47 return depends; 48 } 49 50 static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) 51 { 52 const char * const ic_miss_strs[] = { 53 " IcMiss 0", 54 " IcMiss 1", 55 }; 56 const char * const l1tlb_pgsz_strs[] = { 57 " L1TlbPgSz 4KB", 58 " L1TlbPgSz 2MB", 59 " L1TlbPgSz 1GB", 60 " L1TlbPgSz RESERVED" 61 }; 62 const char * const l1tlb_pgsz_strs_erratum1347[] = { 63 " L1TlbPgSz 4KB", 64 " L1TlbPgSz 16KB", 65 " L1TlbPgSz 2MB", 66 " L1TlbPgSz 1GB" 67 }; 68 const char *ic_miss_str = NULL; 69 const char *l1tlb_pgsz_str = NULL; 70 char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = ""; 71 char l3_miss_only_str[sizeof(" L3MissOnly _")] = ""; 72 73 if (fetch_ctl_depends_on_phy_addr_valid() && !reg.phy_addr_valid) { 74 snprintf(l3_miss_only_str, sizeof(l3_miss_only_str), 75 " L3MissOnly %d", reg.l3_miss_only); 76 77 printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d En %d Val %d Comp %d " 78 "PhyAddrValid 0 RandEn %d%s\n", reg.val, reg.fetch_maxcnt << 4, 79 reg.fetch_cnt << 4, reg.fetch_en, reg.fetch_val, reg.fetch_comp, 80 reg.rand_en, l3_miss_only_str); 81 return; 82 } 83 84 if (cpu_family == 0x19 && cpu_model < 0x10) { 85 /* 86 * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] 87 * Erratum #1347 workaround is to use table provided in erratum 88 */ 89 if (reg.phy_addr_valid) 90 l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; 91 } else { 92 if (reg.phy_addr_valid) 93 l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; 94 ic_miss_str = ic_miss_strs[reg.ic_miss]; 95 } 96 97 if (zen4_ibs_extensions) { 98 snprintf(l3_miss_str, sizeof(l3_miss_str), 99 " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d", 100 reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss); 101 } 102 103 printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " 104 "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n", 105 reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, 106 reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", 107 reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, 108 reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "", 109 l3_miss_str); 110 } 111 112 static void pr_ic_ibs_extd_ctl(union ibs_fetch_ctl fetch_ctl, union ic_ibs_extd_ctl reg) 113 { 114 if (fetch_ctl_depends_on_phy_addr_valid() && !fetch_ctl.phy_addr_valid) 115 return; 116 117 printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat); 118 } 119 120 static void pr_ibs_op_ctl(union ibs_op_ctl reg) 121 { 122 char l3_miss_only[sizeof(" L3MissOnly _")] = ""; 123 char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = ""; 124 125 if (zen4_ibs_extensions) 126 snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); 127 128 if (ldlat_cap) { 129 snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d", 130 reg.ldlat_thrsh, reg.ldlat_en); 131 } 132 133 printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n", 134 reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, 135 reg.op_en, reg.op_val, reg.cnt_ctl, 136 reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat); 137 } 138 139 static void pr_ibs_op_data(union ibs_op_data reg) 140 { 141 printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " 142 " RipInvalid %d BrnFuse %d Microcode %d\n", 143 reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, 144 reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "", 145 reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "", 146 reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "", 147 reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); 148 } 149 150 static void pr_ibs_op_data2_extended(union ibs_op_data2 reg) 151 { 152 static const char * const data_src_str[] = { 153 "", 154 " DataSrc 1=Local L3 or other L1/L2 in CCX", 155 " DataSrc 2=Another CCX cache in the same NUMA node", 156 " DataSrc 3=DRAM", 157 " DataSrc 4=(reserved)", 158 " DataSrc 5=Another CCX cache in a different NUMA node", 159 " DataSrc 6=Long-latency DIMM", 160 " DataSrc 7=MMIO/Config/PCI/APIC", 161 " DataSrc 8=Extension Memory", 162 " DataSrc 9=(reserved)", 163 " DataSrc 10=(reserved)", 164 " DataSrc 11=(reserved)", 165 " DataSrc 12=Coherent Memory of a different processor type", 166 /* 13 to 31 are reserved. Avoid printing them. */ 167 }; 168 int data_src = (reg.data_src_hi << 3) | reg.data_src_lo; 169 char rmtsocket[sizeof("RmtSocket _ ")] = ""; 170 char strmst[sizeof("StrmSt _ ")] = ""; 171 172 if (rmtsocket_cap) 173 snprintf(rmtsocket, sizeof(rmtsocket), "RmtSocket %d ", reg.rmt_socket); 174 if (strmst_cap) 175 snprintf(strmst, sizeof(strmst), "StrmSt %d ", reg.strm_st); 176 177 printf("ibs_op_data2:\t%016llx %s%s%sRmtNode %d%s\n", reg.val, 178 rmtsocket, strmst, 179 (data_src == 1 || data_src == 2 || data_src == 5) ? 180 (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "", 181 reg.rmt_node, 182 data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : ""); 183 } 184 185 static void pr_ibs_op_data2_default(union ibs_op_data2 reg) 186 { 187 static const char * const data_src_str[] = { 188 "", 189 " DataSrc 1=(reserved)", 190 " DataSrc 2=Local node cache", 191 " DataSrc 3=DRAM", 192 " DataSrc 4=Remote node cache", 193 " DataSrc 5=(reserved)", 194 " DataSrc 6=(reserved)", 195 " DataSrc 7=Other" 196 }; 197 char rmtsocket[sizeof("RmtSocket _ ")] = ""; 198 char strmst[sizeof("StrmSt _ ")] = ""; 199 200 if (rmtsocket_cap) 201 snprintf(rmtsocket, sizeof(rmtsocket), "RmtSocket %d ", reg.rmt_socket); 202 if (strmst_cap) 203 snprintf(strmst, sizeof(strmst), "StrmSt %d ", reg.strm_st); 204 205 printf("ibs_op_data2:\t%016llx %s%s%sRmtNode %d%s\n", reg.val, 206 rmtsocket, strmst, 207 reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " 208 : "CacheHitSt 0=M-state ") : "", 209 reg.rmt_node, data_src_str[reg.data_src_lo]); 210 } 211 212 static void pr_ibs_op_data2(union ibs_op_data2 reg) 213 { 214 if (zen4_ibs_extensions) 215 return pr_ibs_op_data2_extended(reg); 216 pr_ibs_op_data2_default(reg); 217 } 218 219 static void pr_ibs_op_data3(union ibs_op_data3 reg) 220 { 221 static const char * const dc_page_sizes[] = { 222 " 4K", 223 " 2M", 224 " 1G", 225 " ??", 226 }; 227 char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; 228 char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = ""; 229 char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = ""; 230 char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; 231 char tlb_refill_lat_str[sizeof(" TlbRefillLat _____")] = ""; 232 char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = ""; 233 char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = ""; 234 char dc_page_size_str[sizeof(" DcPageSize ____")] = ""; 235 char l2_miss_str[sizeof(" L2Miss _")] = ""; 236 237 /* 238 * Erratum #1293 239 * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set 240 */ 241 if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { 242 snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss); 243 snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str), 244 " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs); 245 } 246 247 if (reg.op_mem_width) 248 snprintf(op_mem_width_str, sizeof(op_mem_width_str), 249 " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); 250 251 if (dtlb_pgsize_cap) { 252 if (reg.dc_phy_addr_valid) { 253 int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m; 254 255 snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), 256 " DcL1TlbMiss %d DcL2TlbMiss %d", 257 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); 258 snprintf(dc_page_size_str, sizeof(dc_page_size_str), 259 " DcPageSize %4s", dc_page_sizes[idx]); 260 } 261 } else { 262 snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), 263 " DcL1TlbMiss %d DcL2TlbMiss %d", 264 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); 265 snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str), 266 " DcL1TlbHit2M %d DcL1TlbHit1G %d", 267 reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g); 268 snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str), 269 " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m); 270 snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str), 271 " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g); 272 } 273 274 /* Use !zen4_ibs_extensions as a proxy for Zen3 and earlier */ 275 if (!zen4_ibs_extensions || reg.dc_phy_addr_valid) { 276 snprintf(tlb_refill_lat_str, sizeof(tlb_refill_lat_str), 277 " TlbRefillLat %5d", reg.tlb_refill_lat); 278 } 279 280 printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d " 281 "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d " 282 "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s " 283 "DcMissLat %5d%s\n", 284 reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str, 285 dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str, 286 dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc, 287 reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc, 288 reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str, 289 l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str, 290 reg.dc_miss_lat, tlb_refill_lat_str); 291 } 292 293 /* 294 * IBS Op/Execution MSRs always saved, in order, are: 295 * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, 296 * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP 297 */ 298 static void amd_dump_ibs_op(struct perf_sample *sample) 299 { 300 struct perf_ibs_data *data = sample->raw_data; 301 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 302 __u64 *rip = (__u64 *)op_ctl + 1; 303 union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); 304 union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); 305 306 pr_ibs_op_ctl(*op_ctl); 307 if (!op_data->op_rip_invalid) 308 printf("IbsOpRip:\t%016llx\n", *rip); 309 pr_ibs_op_data(*op_data); 310 /* 311 * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set 312 */ 313 if (!(cpu_family == 0x19 && cpu_model < 0x10 && 314 (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) 315 pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2)); 316 pr_ibs_op_data3(*op_data3); 317 if (op_data3->dc_lin_addr_valid) 318 printf("IbsDCLinAd:\t%016llx\n", *(rip + 4)); 319 320 /* Use !zen4_ibs_extensions as a proxy for Zen3 and earlier */ 321 if (op_data3->dc_phy_addr_valid && *(rip + 5) && 322 (!zen4_ibs_extensions || op_data3->dc_lin_addr_valid)) { 323 printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5)); 324 } 325 if (op_data->op_brn_ret && *(rip + 6)) 326 printf("IbsBrTarget:\t%016llx\n", *(rip + 6)); 327 } 328 329 /* 330 * IBS Fetch MSRs always saved, in order, are: 331 * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL 332 */ 333 static void amd_dump_ibs_fetch(struct perf_sample *sample) 334 { 335 struct perf_ibs_data *data = sample->raw_data; 336 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 337 __u64 *addr = (__u64 *)fetch_ctl + 1; 338 union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; 339 340 pr_ibs_fetch_ctl(*fetch_ctl); 341 printf("IbsFetchLinAd:\t%016llx\n", *addr++); 342 if (fetch_ctl->phy_addr_valid) 343 printf("IbsFetchPhysAd:\t%016llx\n", *addr); 344 pr_ic_ibs_extd_ctl(*fetch_ctl, *extd_ctl); 345 } 346 347 /* 348 * Test for enable and valid bits in captured control MSRs. 349 */ 350 static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) 351 { 352 struct perf_ibs_data *data = sample->raw_data; 353 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 354 355 if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) 356 return true; 357 358 return false; 359 } 360 361 static bool is_valid_ibs_op_sample(struct perf_sample *sample) 362 { 363 struct perf_ibs_data *data = sample->raw_data; 364 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 365 366 if (op_ctl->op_en && op_ctl->op_val) 367 return true; 368 369 return false; 370 } 371 372 /* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events 373 * and if the event was triggered by IBS, display its raw data with decoded text. 374 * The function is only invoked when the dump flag -D is set. 375 */ 376 void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, 377 struct perf_sample *sample) 378 { 379 struct evsel *evsel; 380 381 if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) 382 return; 383 384 evsel = evlist__event2evsel(evlist, event); 385 if (!evsel) 386 return; 387 388 if (evsel->core.attr.type == ibs_fetch_type) { 389 if (!is_valid_ibs_fetch_sample(sample)) { 390 pr_debug("Invalid raw IBS Fetch MSR data encountered\n"); 391 return; 392 } 393 amd_dump_ibs_fetch(sample); 394 } else if (evsel->core.attr.type == ibs_op_type) { 395 if (!is_valid_ibs_op_sample(sample)) { 396 pr_debug("Invalid raw IBS Op MSR data encountered\n"); 397 return; 398 } 399 amd_dump_ibs_op(sample); 400 } 401 } 402 403 static void parse_cpuid(struct perf_env *env) 404 { 405 const char *cpuid; 406 int ret; 407 408 cpuid = perf_env__cpuid(env); 409 /* 410 * cpuid = "AuthenticAMD,family,model,stepping" 411 */ 412 ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model); 413 if (ret != 2) 414 pr_debug("problem parsing cpuid\n"); 415 } 416 417 /* 418 * Find and assign the type number used for ibs_op or ibs_fetch samples. 419 * Device names can be large - we are only interested in the first 9 characters, 420 * to match "ibs_fetch". 421 */ 422 bool evlist__has_amd_ibs(struct evlist *evlist) 423 { 424 struct perf_env *env = perf_session__env(evlist->session); 425 int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); 426 const char *pmu_mapping = perf_env__pmu_mappings(env); 427 char name[sizeof("ibs_fetch")]; 428 u32 type; 429 430 while (nr_pmu_mappings--) { 431 ret = sscanf(pmu_mapping, "%u:%9s", &type, name); 432 if (ret == 2) { 433 if (strstarts(name, "ibs_op")) 434 ibs_op_type = type; 435 else if (strstarts(name, "ibs_fetch")) 436 ibs_fetch_type = type; 437 } 438 pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; 439 } 440 441 if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) 442 zen4_ibs_extensions = 1; 443 444 if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat")) 445 ldlat_cap = 1; 446 447 if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize")) 448 dtlb_pgsize_cap = 1; 449 450 if (perf_env__find_pmu_cap(env, "ibs_op", "rmtsocket")) 451 rmtsocket_cap = 1; 452 453 if (perf_env__find_pmu_cap(env, "ibs_op", "strmst")) 454 strmst_cap = 1; 455 456 if (ibs_fetch_type || ibs_op_type) { 457 if (!cpu_family) 458 parse_cpuid(env); 459 return true; 460 } 461 462 return false; 463 } 464