xref: /linux/tools/perf/util/amd-sample-raw.c (revision 390d5ea26622f794c2d29cefd5a01ef116b4fe1d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * AMD specific. Provide textual annotation for IBS raw sample data.
4  */
5 
6 #include <unistd.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <inttypes.h>
10 
11 #include <linux/string.h>
12 #include "../../arch/x86/include/asm/amd/ibs.h"
13 
14 #include "debug.h"
15 #include "session.h"
16 #include "evlist.h"
17 #include "sample-raw.h"
18 #include "util/sample.h"
19 
20 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
21 static bool zen4_ibs_extensions;
22 static bool ldlat_cap;
23 static bool dtlb_pgsize_cap;
24 static bool rmtsocket_cap;
25 static bool strmst_cap;
26 
27 /*
28  * Status fields of IBS_FETCH_CTL and IBS_FETCH_CTL_EXT are valid only if
29  * IBS_FETCH_CTL[PhyAddrValid] is set.
30  */
31 static int fetch_ctl_depends_on_phy_addr_valid(void)
32 {
33 	static int depends = -1; /* -1: Don't know, 1: Yes, 0: No */
34 
35 	if (depends != -1)
36 		return depends;
37 
38 	depends = 0;
39 	if (cpu_family > 0x1a ||
40 	    (cpu_family == 0x1a && (
41 	     (cpu_model >= 0x50 && cpu_model <= 0x5f) ||
42 	     (cpu_model >= 0x80 && cpu_model <= 0xaf) ||
43 	     (cpu_model >= 0xc0 && cpu_model <= 0xcf)))) {
44 		depends = 1;
45 	}
46 
47 	return depends;
48 }
49 
50 static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
51 {
52 	const char * const ic_miss_strs[] = {
53 		" IcMiss 0",
54 		" IcMiss 1",
55 	};
56 	const char * const l1tlb_pgsz_strs[] = {
57 		" L1TlbPgSz 4KB",
58 		" L1TlbPgSz 2MB",
59 		" L1TlbPgSz 1GB",
60 		" L1TlbPgSz RESERVED"
61 	};
62 	const char * const l1tlb_pgsz_strs_erratum1347[] = {
63 		" L1TlbPgSz 4KB",
64 		" L1TlbPgSz 16KB",
65 		" L1TlbPgSz 2MB",
66 		" L1TlbPgSz 1GB"
67 	};
68 	const char *ic_miss_str = NULL;
69 	const char *l1tlb_pgsz_str = NULL;
70 	char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
71 	char l3_miss_only_str[sizeof(" L3MissOnly _")] = "";
72 
73 	if (fetch_ctl_depends_on_phy_addr_valid() && !reg.phy_addr_valid) {
74 		snprintf(l3_miss_only_str, sizeof(l3_miss_only_str),
75 			 " L3MissOnly %d", reg.l3_miss_only);
76 
77 		printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d En %d Val %d Comp %d "
78 		       "PhyAddrValid 0 RandEn %d%s\n", reg.val, reg.fetch_maxcnt << 4,
79 		       reg.fetch_cnt << 4, reg.fetch_en, reg.fetch_val, reg.fetch_comp,
80 		       reg.rand_en, l3_miss_only_str);
81 		return;
82 	}
83 
84 	if (cpu_family == 0x19 && cpu_model < 0x10) {
85 		/*
86 		 * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
87 		 * Erratum #1347 workaround is to use table provided in erratum
88 		 */
89 		if (reg.phy_addr_valid)
90 			l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
91 	} else {
92 		if (reg.phy_addr_valid)
93 			l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
94 		ic_miss_str = ic_miss_strs[reg.ic_miss];
95 	}
96 
97 	if (zen4_ibs_extensions) {
98 		snprintf(l3_miss_str, sizeof(l3_miss_str),
99 			 " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
100 			 reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
101 	}
102 
103 	printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
104 		"PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
105 		reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
106 		reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
107 		reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
108 		reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
109 		l3_miss_str);
110 }
111 
112 static void pr_ic_ibs_extd_ctl(union ibs_fetch_ctl fetch_ctl, union ic_ibs_extd_ctl reg)
113 {
114 	if (fetch_ctl_depends_on_phy_addr_valid() && !fetch_ctl.phy_addr_valid)
115 		return;
116 
117 	printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
118 }
119 
120 static void pr_ibs_op_ctl(union ibs_op_ctl reg)
121 {
122 	char l3_miss_only[sizeof(" L3MissOnly _")] = "";
123 	char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = "";
124 
125 	if (zen4_ibs_extensions)
126 		snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
127 
128 	if (ldlat_cap) {
129 		snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d",
130 			 reg.ldlat_thrsh, reg.ldlat_en);
131 	}
132 
133 	printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n",
134 		reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
135 		reg.op_en, reg.op_val, reg.cnt_ctl,
136 		reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat);
137 }
138 
139 static void pr_ibs_op_data(union ibs_op_data reg)
140 {
141 	printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
142 		" RipInvalid %d BrnFuse %d Microcode %d\n",
143 		reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
144 		reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
145 		reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
146 		reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
147 		reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
148 }
149 
150 static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
151 {
152 	static const char * const data_src_str[] = {
153 		"",
154 		" DataSrc 1=Local L3 or other L1/L2 in CCX",
155 		" DataSrc 2=Another CCX cache in the same NUMA node",
156 		" DataSrc 3=DRAM",
157 		" DataSrc 4=(reserved)",
158 		" DataSrc 5=Another CCX cache in a different NUMA node",
159 		" DataSrc 6=Long-latency DIMM",
160 		" DataSrc 7=MMIO/Config/PCI/APIC",
161 		" DataSrc 8=Extension Memory",
162 		" DataSrc 9=(reserved)",
163 		" DataSrc 10=(reserved)",
164 		" DataSrc 11=(reserved)",
165 		" DataSrc 12=Coherent Memory of a different processor type",
166 		/* 13 to 31 are reserved. Avoid printing them. */
167 	};
168 	int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
169 	char rmtsocket[sizeof("RmtSocket _ ")] = "";
170 	char strmst[sizeof("StrmSt _ ")] = "";
171 
172 	if (rmtsocket_cap)
173 		snprintf(rmtsocket, sizeof(rmtsocket), "RmtSocket %d ", reg.rmt_socket);
174 	if (strmst_cap)
175 		snprintf(strmst, sizeof(strmst), "StrmSt %d ", reg.strm_st);
176 
177 	printf("ibs_op_data2:\t%016llx %s%s%sRmtNode %d%s\n", reg.val,
178 		rmtsocket, strmst,
179 		(data_src == 1 || data_src == 2 || data_src == 5) ?
180 			(reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
181 		reg.rmt_node,
182 		data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
183 }
184 
185 static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
186 {
187 	static const char * const data_src_str[] = {
188 		"",
189 		" DataSrc 1=(reserved)",
190 		" DataSrc 2=Local node cache",
191 		" DataSrc 3=DRAM",
192 		" DataSrc 4=Remote node cache",
193 		" DataSrc 5=(reserved)",
194 		" DataSrc 6=(reserved)",
195 		" DataSrc 7=Other"
196 	};
197 	char rmtsocket[sizeof("RmtSocket _ ")] = "";
198 	char strmst[sizeof("StrmSt _ ")] = "";
199 
200 	if (rmtsocket_cap)
201 		snprintf(rmtsocket, sizeof(rmtsocket), "RmtSocket %d ", reg.rmt_socket);
202 	if (strmst_cap)
203 		snprintf(strmst, sizeof(strmst), "StrmSt %d ", reg.strm_st);
204 
205 	printf("ibs_op_data2:\t%016llx %s%s%sRmtNode %d%s\n", reg.val,
206 	       rmtsocket, strmst,
207 	       reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
208 						     : "CacheHitSt 0=M-state ") : "",
209 	       reg.rmt_node, data_src_str[reg.data_src_lo]);
210 }
211 
212 static void pr_ibs_op_data2(union ibs_op_data2 reg)
213 {
214 	if (zen4_ibs_extensions)
215 		return pr_ibs_op_data2_extended(reg);
216 	pr_ibs_op_data2_default(reg);
217 }
218 
219 static void pr_ibs_op_data3(union ibs_op_data3 reg)
220 {
221 	static const char * const dc_page_sizes[] = {
222 		"  4K",
223 		"  2M",
224 		"  1G",
225 		"  ??",
226 	};
227 	char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
228 	char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = "";
229 	char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = "";
230 	char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
231 	char tlb_refill_lat_str[sizeof(" TlbRefillLat _____")] = "";
232 	char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = "";
233 	char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = "";
234 	char dc_page_size_str[sizeof(" DcPageSize ____")] = "";
235 	char l2_miss_str[sizeof(" L2Miss _")] = "";
236 
237 	/*
238 	 * Erratum #1293
239 	 * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
240 	 */
241 	if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
242 		snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
243 		snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
244 			 " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
245 	}
246 
247 	if (reg.op_mem_width)
248 		snprintf(op_mem_width_str, sizeof(op_mem_width_str),
249 			 " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
250 
251 	if (dtlb_pgsize_cap) {
252 		if (reg.dc_phy_addr_valid) {
253 			int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m;
254 
255 			snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
256 				 " DcL1TlbMiss %d DcL2TlbMiss %d",
257 				 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
258 			snprintf(dc_page_size_str, sizeof(dc_page_size_str),
259 				 " DcPageSize %4s", dc_page_sizes[idx]);
260 		}
261 	} else {
262 		snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
263 			 " DcL1TlbMiss %d DcL2TlbMiss %d",
264 			 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
265 		snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str),
266 			 " DcL1TlbHit2M %d DcL1TlbHit1G %d",
267 			 reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g);
268 		snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str),
269 			 " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m);
270 		snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str),
271 			 " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g);
272 	}
273 
274 	/* Use !zen4_ibs_extensions as a proxy for Zen3 and earlier */
275 	if (!zen4_ibs_extensions || reg.dc_phy_addr_valid) {
276 		snprintf(tlb_refill_lat_str, sizeof(tlb_refill_lat_str),
277 			 " TlbRefillLat %5d", reg.tlb_refill_lat);
278 	}
279 
280 	printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d "
281 		"DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d "
282 		"DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s "
283 		"DcMissLat %5d%s\n",
284 		reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str,
285 		dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str,
286 		dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc,
287 		reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc,
288 		reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str,
289 		l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str,
290 		reg.dc_miss_lat, tlb_refill_lat_str);
291 }
292 
293 /*
294  * IBS Op/Execution MSRs always saved, in order, are:
295  * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
296  * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
297  */
298 static void amd_dump_ibs_op(struct perf_sample *sample)
299 {
300 	struct perf_ibs_data *data = sample->raw_data;
301 	union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
302 	__u64 *rip = (__u64 *)op_ctl + 1;
303 	union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
304 	union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
305 
306 	pr_ibs_op_ctl(*op_ctl);
307 	if (!op_data->op_rip_invalid)
308 		printf("IbsOpRip:\t%016llx\n", *rip);
309 	pr_ibs_op_data(*op_data);
310 	/*
311 	 * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
312 	 */
313 	if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
314 	      (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
315 		pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
316 	pr_ibs_op_data3(*op_data3);
317 	if (op_data3->dc_lin_addr_valid)
318 		printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
319 
320 	/* Use !zen4_ibs_extensions as a proxy for Zen3 and earlier */
321 	if (op_data3->dc_phy_addr_valid && *(rip + 5) &&
322 	    (!zen4_ibs_extensions || op_data3->dc_lin_addr_valid)) {
323 		printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
324 	}
325 	if (op_data->op_brn_ret && *(rip + 6))
326 		printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
327 }
328 
329 /*
330  * IBS Fetch MSRs always saved, in order, are:
331  * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
332  */
333 static void amd_dump_ibs_fetch(struct perf_sample *sample)
334 {
335 	struct perf_ibs_data *data = sample->raw_data;
336 	union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
337 	__u64 *addr = (__u64 *)fetch_ctl + 1;
338 	union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
339 
340 	pr_ibs_fetch_ctl(*fetch_ctl);
341 	printf("IbsFetchLinAd:\t%016llx\n", *addr++);
342 	if (fetch_ctl->phy_addr_valid)
343 		printf("IbsFetchPhysAd:\t%016llx\n", *addr);
344 	pr_ic_ibs_extd_ctl(*fetch_ctl, *extd_ctl);
345 }
346 
347 /*
348  * Test for enable and valid bits in captured control MSRs.
349  */
350 static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
351 {
352 	struct perf_ibs_data *data = sample->raw_data;
353 	union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
354 
355 	if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
356 		return true;
357 
358 	return false;
359 }
360 
361 static bool is_valid_ibs_op_sample(struct perf_sample *sample)
362 {
363 	struct perf_ibs_data *data = sample->raw_data;
364 	union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
365 
366 	if (op_ctl->op_en && op_ctl->op_val)
367 		return true;
368 
369 	return false;
370 }
371 
372 /* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
373  * and if the event was triggered by IBS, display its raw data with decoded text.
374  * The function is only invoked when the dump flag -D is set.
375  */
376 void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
377 			    struct perf_sample *sample)
378 {
379 	struct evsel *evsel;
380 
381 	if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
382 		return;
383 
384 	evsel = evlist__event2evsel(evlist, event);
385 	if (!evsel)
386 		return;
387 
388 	if (evsel->core.attr.type == ibs_fetch_type) {
389 		if (!is_valid_ibs_fetch_sample(sample)) {
390 			pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
391 			return;
392 		}
393 		amd_dump_ibs_fetch(sample);
394 	} else if (evsel->core.attr.type == ibs_op_type) {
395 		if (!is_valid_ibs_op_sample(sample)) {
396 			pr_debug("Invalid raw IBS Op MSR data encountered\n");
397 			return;
398 		}
399 		amd_dump_ibs_op(sample);
400 	}
401 }
402 
403 static void parse_cpuid(struct perf_env *env)
404 {
405 	const char *cpuid;
406 	int ret;
407 
408 	cpuid = perf_env__cpuid(env);
409 	/*
410 	 * cpuid = "AuthenticAMD,family,model,stepping"
411 	 */
412 	ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
413 	if (ret != 2)
414 		pr_debug("problem parsing cpuid\n");
415 }
416 
417 /*
418  * Find and assign the type number used for ibs_op or ibs_fetch samples.
419  * Device names can be large - we are only interested in the first 9 characters,
420  * to match "ibs_fetch".
421  */
422 bool evlist__has_amd_ibs(struct evlist *evlist)
423 {
424 	struct perf_env *env = perf_session__env(evlist->session);
425 	int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
426 	const char *pmu_mapping = perf_env__pmu_mappings(env);
427 	char name[sizeof("ibs_fetch")];
428 	u32 type;
429 
430 	while (nr_pmu_mappings--) {
431 		ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
432 		if (ret == 2) {
433 			if (strstarts(name, "ibs_op"))
434 				ibs_op_type = type;
435 			else if (strstarts(name, "ibs_fetch"))
436 				ibs_fetch_type = type;
437 		}
438 		pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
439 	}
440 
441 	if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
442 		zen4_ibs_extensions = 1;
443 
444 	if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat"))
445 		ldlat_cap = 1;
446 
447 	if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize"))
448 		dtlb_pgsize_cap = 1;
449 
450 	if (perf_env__find_pmu_cap(env, "ibs_op", "rmtsocket"))
451 		rmtsocket_cap = 1;
452 
453 	if (perf_env__find_pmu_cap(env, "ibs_op", "strmst"))
454 		strmst_cap = 1;
455 
456 	if (ibs_fetch_type || ibs_op_type) {
457 		if (!cpu_family)
458 			parse_cpuid(env);
459 		return true;
460 	}
461 
462 	return false;
463 }
464