xref: /linux/tools/perf/util/mem-events.c (revision 9e906a9dead17d81d6c2687f65e159231d0e3286)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stddef.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <api/fs/fs.h>
10 #include <linux/kernel.h>
11 #include "cpumap.h"
12 #include "map_symbol.h"
13 #include "mem-events.h"
14 #include "mem-info.h"
15 #include "debug.h"
16 #include "evsel.h"
17 #include "symbol.h"
18 #include "pmu.h"
19 #include "pmus.h"
20 
21 unsigned int perf_mem_events__loads_ldlat = 30;
22 
23 #define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
24 
25 struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
26 	E("ldlat-loads",	"%s/mem-loads,ldlat=%u/P",	"mem-loads",	true,	0),
27 	E("ldlat-stores",	"%s/mem-stores/P",		"mem-stores",	false,	0),
28 	E(NULL,			NULL,				NULL,		false,	0),
29 };
30 #undef E
31 
32 bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 };
33 
perf_pmu__mem_events_ptr(struct perf_pmu * pmu,int i)34 struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i)
35 {
36 	if (i >= PERF_MEM_EVENTS__MAX || !pmu)
37 		return NULL;
38 
39 	return &pmu->mem_events[i];
40 }
41 
perf_pmus__scan_mem(struct perf_pmu * pmu)42 static struct perf_pmu *perf_pmus__scan_mem(struct perf_pmu *pmu)
43 {
44 	while ((pmu = perf_pmus__scan(pmu)) != NULL) {
45 		if (pmu->mem_events)
46 			return pmu;
47 	}
48 	return NULL;
49 }
50 
perf_mem_events_find_pmu(void)51 struct perf_pmu *perf_mem_events_find_pmu(void)
52 {
53 	/*
54 	 * The current perf mem doesn't support per-PMU configuration.
55 	 * The exact same configuration is applied to all the
56 	 * mem_events supported PMUs.
57 	 * Return the first mem_events supported PMU.
58 	 *
59 	 * Notes: The only case which may support multiple mem_events
60 	 * supported PMUs is Intel hybrid. The exact same mem_events
61 	 * is shared among the PMUs. Only configure the first PMU
62 	 * is good enough as well.
63 	 */
64 	return perf_pmus__scan_mem(NULL);
65 }
66 
67 /**
68  * perf_pmu__mem_events_num_mem_pmus - Get the number of mem PMUs since the given pmu
69  * @pmu: Start pmu. If it's NULL, search the entire PMU list.
70  */
perf_pmu__mem_events_num_mem_pmus(struct perf_pmu * pmu)71 int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu)
72 {
73 	int num = 0;
74 
75 	while ((pmu = perf_pmus__scan_mem(pmu)) != NULL)
76 		num++;
77 
78 	return num;
79 }
80 
perf_pmu__mem_events_name(struct perf_pmu * pmu,int i,char * buf,size_t buf_size)81 static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i,
82 					     char *buf, size_t buf_size)
83 {
84 	struct perf_mem_event *e;
85 
86 	if (i >= PERF_MEM_EVENTS__MAX || !pmu)
87 		return NULL;
88 
89 	e = &pmu->mem_events[i];
90 	if (!e || !e->name)
91 		return NULL;
92 
93 	if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) {
94 		if (e->ldlat) {
95 			if (!e->aux_event) {
96 				/* ARM and Most of Intel */
97 				scnprintf(buf, buf_size,
98 					  e->name, pmu->name,
99 					  perf_mem_events__loads_ldlat);
100 			} else {
101 				/* Intel with mem-loads-aux event */
102 				scnprintf(buf, buf_size,
103 					  e->name, pmu->name, pmu->name,
104 					  perf_mem_events__loads_ldlat);
105 			}
106 		} else {
107 			if (!e->aux_event) {
108 				/* AMD and POWER */
109 				scnprintf(buf, buf_size,
110 					  e->name, pmu->name);
111 			} else {
112 				return NULL;
113 			}
114 		}
115 		return buf;
116 	}
117 
118 	if (i == PERF_MEM_EVENTS__STORE) {
119 		scnprintf(buf, buf_size,
120 			  e->name, pmu->name);
121 		return buf;
122 	}
123 
124 	return NULL;
125 }
126 
is_mem_loads_aux_event(struct evsel * leader)127 bool is_mem_loads_aux_event(struct evsel *leader)
128 {
129 	struct perf_pmu *pmu = leader->pmu;
130 	struct perf_mem_event *e;
131 
132 	if (!pmu || !pmu->mem_events)
133 		return false;
134 
135 	e = &pmu->mem_events[PERF_MEM_EVENTS__LOAD];
136 	if (!e->aux_event)
137 		return false;
138 
139 	return leader->core.attr.config == e->aux_event;
140 }
141 
perf_pmu__mem_events_parse(struct perf_pmu * pmu,const char * str)142 int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str)
143 {
144 	char *tok, *saveptr = NULL;
145 	bool found = false;
146 	char *buf;
147 	int j;
148 
149 	/* We need buffer that we know we can write to. */
150 	buf = malloc(strlen(str) + 1);
151 	if (!buf)
152 		return -ENOMEM;
153 
154 	strcpy(buf, str);
155 
156 	tok = strtok_r((char *)buf, ",", &saveptr);
157 
158 	while (tok) {
159 		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
160 			struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
161 
162 			if (!e->tag)
163 				continue;
164 
165 			if (strstr(e->tag, tok))
166 				perf_mem_record[j] = found = true;
167 		}
168 
169 		tok = strtok_r(NULL, ",", &saveptr);
170 	}
171 
172 	free(buf);
173 
174 	if (found)
175 		return 0;
176 
177 	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
178 	return -1;
179 }
180 
perf_pmu__mem_events_supported(const char * mnt,struct perf_pmu * pmu,struct perf_mem_event * e)181 static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu,
182 				      struct perf_mem_event *e)
183 {
184 	char path[PATH_MAX];
185 	struct stat st;
186 
187 	if (!e->event_name)
188 		return true;
189 
190 	scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name);
191 
192 	return !stat(path, &st);
193 }
194 
__perf_pmu__mem_events_init(struct perf_pmu * pmu)195 static int __perf_pmu__mem_events_init(struct perf_pmu *pmu)
196 {
197 	const char *mnt = sysfs__mount();
198 	bool found = false;
199 	int j;
200 
201 	if (!mnt)
202 		return -ENOENT;
203 
204 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
205 		struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
206 
207 		/*
208 		 * If the event entry isn't valid, skip initialization
209 		 * and "e->supported" will keep false.
210 		 */
211 		if (!e->tag)
212 			continue;
213 
214 		e->supported |= perf_pmu__mem_events_supported(mnt, pmu, e);
215 		if (e->supported)
216 			found = true;
217 	}
218 
219 	return found ? 0 : -ENOENT;
220 }
221 
perf_pmu__mem_events_init(void)222 int perf_pmu__mem_events_init(void)
223 {
224 	struct perf_pmu *pmu = NULL;
225 
226 	while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
227 		if (__perf_pmu__mem_events_init(pmu))
228 			return -ENOENT;
229 	}
230 
231 	return 0;
232 }
233 
perf_pmu__mem_events_list(struct perf_pmu * pmu)234 void perf_pmu__mem_events_list(struct perf_pmu *pmu)
235 {
236 	int j;
237 
238 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
239 		char buf[128];
240 		struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
241 
242 		fprintf(stderr, "%-*s%-*s%s",
243 			e->tag ? 13 : 0,
244 			e->tag ? : "",
245 			e->tag && verbose > 0 ? 25 : 0,
246 			e->tag && verbose > 0
247 			? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))
248 			: "",
249 			e->supported ? ": available\n" : "");
250 	}
251 }
252 
perf_mem_events__record_args(const char ** rec_argv,int * argv_nr,char ** event_name_storage_out)253 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out)
254 {
255 	const char *mnt = sysfs__mount();
256 	struct perf_pmu *pmu = NULL;
257 	int i = *argv_nr;
258 	struct perf_cpu_map *cpu_map = NULL;
259 	size_t event_name_storage_size =
260 		perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128;
261 	size_t event_name_storage_remaining = event_name_storage_size;
262 	char *event_name_storage = malloc(event_name_storage_size);
263 	char *event_name_storage_ptr = event_name_storage;
264 
265 	if (!event_name_storage)
266 		return -ENOMEM;
267 
268 	*event_name_storage_out = NULL;
269 	while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
270 		for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
271 			const char *s;
272 			struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
273 			int ret;
274 
275 			if (!perf_mem_record[j])
276 				continue;
277 
278 			if (!e->supported) {
279 				char buf[128];
280 
281 				pr_err("failed: event '%s' not supported\n",
282 					perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)));
283 				free(event_name_storage);
284 				return -1;
285 			}
286 
287 			s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr,
288 						      event_name_storage_remaining);
289 			if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e))
290 				continue;
291 
292 			rec_argv[i++] = "-e";
293 			rec_argv[i++] = event_name_storage_ptr;
294 			event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1;
295 			event_name_storage_ptr += strlen(event_name_storage_ptr) + 1;
296 
297 			ret = perf_cpu_map__merge(&cpu_map, pmu->cpus);
298 			if (ret < 0) {
299 				free(event_name_storage);
300 				return ret;
301 			}
302 		}
303 	}
304 
305 	if (cpu_map) {
306 		struct perf_cpu_map *online = cpu_map__online();
307 
308 		if (!perf_cpu_map__equal(cpu_map, online)) {
309 			char buf[200];
310 
311 			cpu_map__snprint(cpu_map, buf, sizeof(buf));
312 			pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
313 		}
314 		perf_cpu_map__put(online);
315 		perf_cpu_map__put(cpu_map);
316 	}
317 
318 	*argv_nr = i;
319 	*event_name_storage_out = event_name_storage;
320 	return 0;
321 }
322 
323 static const char * const tlb_access[] = {
324 	"N/A",
325 	"HIT",
326 	"MISS",
327 	"L1",
328 	"L2",
329 	"Walker",
330 	"Fault",
331 };
332 
perf_mem__tlb_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)333 int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
334 {
335 	size_t l = 0, i;
336 	u64 m = PERF_MEM_TLB_NA;
337 	u64 hit, miss;
338 
339 	sz -= 1; /* -1 for null termination */
340 	out[0] = '\0';
341 
342 	if (mem_info)
343 		m = mem_info__const_data_src(mem_info)->mem_dtlb;
344 
345 	hit = m & PERF_MEM_TLB_HIT;
346 	miss = m & PERF_MEM_TLB_MISS;
347 
348 	/* already taken care of */
349 	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
350 
351 	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
352 		if (!(m & 0x1))
353 			continue;
354 		if (l) {
355 			strcat(out, " or ");
356 			l += 4;
357 		}
358 		l += scnprintf(out + l, sz - l, tlb_access[i]);
359 	}
360 	if (*out == '\0')
361 		l += scnprintf(out, sz - l, "N/A");
362 	if (hit)
363 		l += scnprintf(out + l, sz - l, " hit");
364 	if (miss)
365 		l += scnprintf(out + l, sz - l, " miss");
366 
367 	return l;
368 }
369 
370 static const char * const mem_lvl[] = {
371 	"N/A",
372 	"HIT",
373 	"MISS",
374 	"L1",
375 	"LFB/MAB",
376 	"L2",
377 	"L3",
378 	"Local RAM",
379 	"Remote RAM (1 hop)",
380 	"Remote RAM (2 hops)",
381 	"Remote Cache (1 hop)",
382 	"Remote Cache (2 hops)",
383 	"I/O",
384 	"Uncached",
385 };
386 
387 static const char * const mem_lvlnum[] = {
388 	[PERF_MEM_LVLNUM_L1] = "L1",
389 	[PERF_MEM_LVLNUM_L2] = "L2",
390 	[PERF_MEM_LVLNUM_L3] = "L3",
391 	[PERF_MEM_LVLNUM_L4] = "L4",
392 	[PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB",
393 	[PERF_MEM_LVLNUM_MSC] = "Memory-side Cache",
394 	[PERF_MEM_LVLNUM_UNC] = "Uncached",
395 	[PERF_MEM_LVLNUM_CXL] = "CXL",
396 	[PERF_MEM_LVLNUM_IO] = "I/O",
397 	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
398 	[PERF_MEM_LVLNUM_LFB] = "LFB/MAB",
399 	[PERF_MEM_LVLNUM_RAM] = "RAM",
400 	[PERF_MEM_LVLNUM_PMEM] = "PMEM",
401 	[PERF_MEM_LVLNUM_NA] = "N/A",
402 };
403 
404 static const char * const mem_hops[] = {
405 	"N/A",
406 	/*
407 	 * While printing, 'Remote' will be added to represent
408 	 * 'Remote core, same node' accesses as remote field need
409 	 * to be set with mem_hops field.
410 	 */
411 	"core, same node",
412 	"node, same socket",
413 	"socket, same board",
414 	"board",
415 };
416 
perf_mem__op_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)417 static int perf_mem__op_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
418 {
419 	u64 op = PERF_MEM_LOCK_NA;
420 	int l;
421 
422 	if (mem_info)
423 		op = mem_info__const_data_src(mem_info)->mem_op;
424 
425 	if (op & PERF_MEM_OP_NA)
426 		l = scnprintf(out, sz, "N/A");
427 	else if (op & PERF_MEM_OP_LOAD)
428 		l = scnprintf(out, sz, "LOAD");
429 	else if (op & PERF_MEM_OP_STORE)
430 		l = scnprintf(out, sz, "STORE");
431 	else if (op & PERF_MEM_OP_PFETCH)
432 		l = scnprintf(out, sz, "PFETCH");
433 	else if (op & PERF_MEM_OP_EXEC)
434 		l = scnprintf(out, sz, "EXEC");
435 	else
436 		l = scnprintf(out, sz, "No");
437 
438 	return l;
439 }
440 
perf_mem__lvl_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)441 int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
442 {
443 	union perf_mem_data_src data_src;
444 	int printed = 0;
445 	size_t l = 0;
446 	size_t i;
447 	int lvl;
448 	char hit_miss[5] = {0};
449 
450 	sz -= 1; /* -1 for null termination */
451 	out[0] = '\0';
452 
453 	if (!mem_info)
454 		goto na;
455 
456 	data_src = *mem_info__const_data_src(mem_info);
457 
458 	if (data_src.mem_lvl & PERF_MEM_LVL_HIT)
459 		memcpy(hit_miss, "hit", 3);
460 	else if (data_src.mem_lvl & PERF_MEM_LVL_MISS)
461 		memcpy(hit_miss, "miss", 4);
462 
463 	lvl = data_src.mem_lvl_num;
464 	if (lvl && lvl != PERF_MEM_LVLNUM_NA) {
465 		if (data_src.mem_remote) {
466 			strcat(out, "Remote ");
467 			l += 7;
468 		}
469 
470 		if (data_src.mem_hops)
471 			l += scnprintf(out + l, sz - l, "%s ", mem_hops[data_src.mem_hops]);
472 
473 		if (mem_lvlnum[lvl])
474 			l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
475 		else
476 			l += scnprintf(out + l, sz - l, "Unknown level %d", lvl);
477 
478 		l += scnprintf(out + l, sz - l, " %s", hit_miss);
479 		return l;
480 	}
481 
482 	lvl = data_src.mem_lvl;
483 	if (!lvl)
484 		goto na;
485 
486 	lvl &= ~(PERF_MEM_LVL_NA | PERF_MEM_LVL_HIT | PERF_MEM_LVL_MISS);
487 	if (!lvl)
488 		goto na;
489 
490 	for (i = 0; lvl && i < ARRAY_SIZE(mem_lvl); i++, lvl >>= 1) {
491 		if (!(lvl & 0x1))
492 			continue;
493 		if (printed++) {
494 			strcat(out, " or ");
495 			l += 4;
496 		}
497 		l += scnprintf(out + l, sz - l, mem_lvl[i]);
498 	}
499 
500 	if (printed) {
501 		l += scnprintf(out + l, sz - l, " %s", hit_miss);
502 		return l;
503 	}
504 
505 na:
506 	strcat(out, "N/A");
507 	return 3;
508 }
509 
510 static const char * const snoop_access[] = {
511 	"N/A",
512 	"None",
513 	"Hit",
514 	"Miss",
515 	"HitM",
516 };
517 
518 static const char * const snoopx_access[] = {
519 	"Fwd",
520 	"Peer",
521 };
522 
perf_mem__snp_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)523 int perf_mem__snp_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
524 {
525 	size_t i, l = 0;
526 	u64 m = PERF_MEM_SNOOP_NA;
527 
528 	sz -= 1; /* -1 for null termination */
529 	out[0] = '\0';
530 
531 	if (mem_info)
532 		m = mem_info__const_data_src(mem_info)->mem_snoop;
533 
534 	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
535 		if (!(m & 0x1))
536 			continue;
537 		if (l) {
538 			strcat(out, " or ");
539 			l += 4;
540 		}
541 		l += scnprintf(out + l, sz - l, snoop_access[i]);
542 	}
543 
544 	m = 0;
545 	if (mem_info)
546 		m = mem_info__const_data_src(mem_info)->mem_snoopx;
547 
548 	for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) {
549 		if (!(m & 0x1))
550 			continue;
551 
552 		if (l) {
553 			strcat(out, " or ");
554 			l += 4;
555 		}
556 		l += scnprintf(out + l, sz - l, snoopx_access[i]);
557 	}
558 
559 	if (*out == '\0')
560 		l += scnprintf(out, sz - l, "N/A");
561 
562 	return l;
563 }
564 
perf_mem__lck_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)565 int perf_mem__lck_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
566 {
567 	u64 mask = PERF_MEM_LOCK_NA;
568 	int l;
569 
570 	if (mem_info)
571 		mask = mem_info__const_data_src(mem_info)->mem_lock;
572 
573 	if (mask & PERF_MEM_LOCK_NA)
574 		l = scnprintf(out, sz, "N/A");
575 	else if (mask & PERF_MEM_LOCK_LOCKED)
576 		l = scnprintf(out, sz, "Yes");
577 	else
578 		l = scnprintf(out, sz, "No");
579 
580 	return l;
581 }
582 
perf_mem__blk_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)583 int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
584 {
585 	size_t l = 0;
586 	u64 mask = PERF_MEM_BLK_NA;
587 
588 	sz -= 1; /* -1 for null termination */
589 	out[0] = '\0';
590 
591 	if (mem_info)
592 		mask = mem_info__const_data_src(mem_info)->mem_blk;
593 
594 	if (!mask || (mask & PERF_MEM_BLK_NA)) {
595 		l += scnprintf(out + l, sz - l, " N/A");
596 		return l;
597 	}
598 	if (mask & PERF_MEM_BLK_DATA)
599 		l += scnprintf(out + l, sz - l, " Data");
600 	if (mask & PERF_MEM_BLK_ADDR)
601 		l += scnprintf(out + l, sz - l, " Addr");
602 
603 	return l;
604 }
605 
perf_script__meminfo_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)606 int perf_script__meminfo_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
607 {
608 	int i = 0;
609 
610 	i += scnprintf(out, sz, "|OP ");
611 	i += perf_mem__op_scnprintf(out + i, sz - i, mem_info);
612 	i += scnprintf(out + i, sz - i, "|LVL ");
613 	i += perf_mem__lvl_scnprintf(out + i, sz, mem_info);
614 	i += scnprintf(out + i, sz - i, "|SNP ");
615 	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
616 	i += scnprintf(out + i, sz - i, "|TLB ");
617 	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
618 	i += scnprintf(out + i, sz - i, "|LCK ");
619 	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
620 	i += scnprintf(out + i, sz - i, "|BLK ");
621 	i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
622 
623 	return i;
624 }
625 
c2c_decode_stats(struct c2c_stats * stats,struct mem_info * mi)626 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
627 {
628 	union perf_mem_data_src *data_src = mem_info__data_src(mi);
629 	u64 daddr  = mem_info__daddr(mi)->addr;
630 	u64 op     = data_src->mem_op;
631 	u64 lvl    = data_src->mem_lvl;
632 	u64 snoop  = data_src->mem_snoop;
633 	u64 snoopx = data_src->mem_snoopx;
634 	u64 lock   = data_src->mem_lock;
635 	u64 blk    = data_src->mem_blk;
636 	/*
637 	 * Skylake might report unknown remote level via this
638 	 * bit, consider it when evaluating remote HITMs.
639 	 *
640 	 * Incase of power, remote field can also be used to denote cache
641 	 * accesses from the another core of same node. Hence, setting
642 	 * mrem only when HOPS is zero along with set remote field.
643 	 */
644 	bool mrem  = (data_src->mem_remote && !data_src->mem_hops);
645 	int err = 0;
646 
647 #define HITM_INC(__f)		\
648 do {				\
649 	stats->__f++;		\
650 	stats->tot_hitm++;	\
651 } while (0)
652 
653 #define PEER_INC(__f)		\
654 do {				\
655 	stats->__f++;		\
656 	stats->tot_peer++;	\
657 } while (0)
658 
659 #define P(a, b) PERF_MEM_##a##_##b
660 
661 	stats->nr_entries++;
662 
663 	if (lock & P(LOCK, LOCKED)) stats->locks++;
664 
665 	if (blk & P(BLK, DATA)) stats->blk_data++;
666 	if (blk & P(BLK, ADDR)) stats->blk_addr++;
667 
668 	if (op & P(OP, LOAD)) {
669 		/* load */
670 		stats->load++;
671 
672 		if (!daddr) {
673 			stats->ld_noadrs++;
674 			return -1;
675 		}
676 
677 		if (lvl & P(LVL, HIT)) {
678 			if (lvl & P(LVL, UNC)) stats->ld_uncache++;
679 			if (lvl & P(LVL, IO))  stats->ld_io++;
680 			if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
681 			if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
682 			if (lvl & P(LVL, L2)) {
683 				if (snoop & P(SNOOP, HITM))
684 					HITM_INC(lcl_hitm);
685 				else
686 					stats->ld_l2hit++;
687 
688 				if (snoopx & P(SNOOPX, PEER))
689 					PEER_INC(lcl_peer);
690 			}
691 			if (lvl & P(LVL, L3 )) {
692 				if (snoop & P(SNOOP, HITM))
693 					HITM_INC(lcl_hitm);
694 				else
695 					stats->ld_llchit++;
696 
697 				if (snoopx & P(SNOOPX, PEER))
698 					PEER_INC(lcl_peer);
699 			}
700 
701 			if (lvl & P(LVL, LOC_RAM)) {
702 				stats->lcl_dram++;
703 				if (snoop & P(SNOOP, HIT))
704 					stats->ld_shared++;
705 				else
706 					stats->ld_excl++;
707 			}
708 
709 			if ((lvl & P(LVL, REM_RAM1)) ||
710 			    (lvl & P(LVL, REM_RAM2)) ||
711 			     mrem) {
712 				stats->rmt_dram++;
713 				if (snoop & P(SNOOP, HIT))
714 					stats->ld_shared++;
715 				else
716 					stats->ld_excl++;
717 			}
718 		}
719 
720 		if ((lvl & P(LVL, REM_CCE1)) ||
721 		    (lvl & P(LVL, REM_CCE2)) ||
722 		     mrem) {
723 			if (snoop & P(SNOOP, HIT)) {
724 				stats->rmt_hit++;
725 			} else if (snoop & P(SNOOP, HITM)) {
726 				HITM_INC(rmt_hitm);
727 			} else if (snoopx & P(SNOOPX, PEER)) {
728 				stats->rmt_hit++;
729 				PEER_INC(rmt_peer);
730 			}
731 		}
732 
733 		if ((lvl & P(LVL, MISS)))
734 			stats->ld_miss++;
735 
736 	} else if (op & P(OP, STORE)) {
737 		/* store */
738 		stats->store++;
739 
740 		if (!daddr) {
741 			stats->st_noadrs++;
742 			return -1;
743 		}
744 
745 		if (lvl & P(LVL, HIT)) {
746 			if (lvl & P(LVL, UNC)) stats->st_uncache++;
747 			if (lvl & P(LVL, L1 )) stats->st_l1hit++;
748 		}
749 		if (lvl & P(LVL, MISS))
750 			if (lvl & P(LVL, L1)) stats->st_l1miss++;
751 		if (lvl & P(LVL, NA))
752 			stats->st_na++;
753 	} else {
754 		/* unparsable data_src? */
755 		stats->noparse++;
756 		return -1;
757 	}
758 
759 	if (!mem_info__daddr(mi)->ms.map || !mem_info__iaddr(mi)->ms.map) {
760 		stats->nomap++;
761 		return -1;
762 	}
763 
764 #undef P
765 #undef HITM_INC
766 	return err;
767 }
768 
c2c_add_stats(struct c2c_stats * stats,struct c2c_stats * add)769 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
770 {
771 	stats->nr_entries	+= add->nr_entries;
772 
773 	stats->locks		+= add->locks;
774 	stats->store		+= add->store;
775 	stats->st_uncache	+= add->st_uncache;
776 	stats->st_noadrs	+= add->st_noadrs;
777 	stats->st_l1hit		+= add->st_l1hit;
778 	stats->st_l1miss	+= add->st_l1miss;
779 	stats->st_na		+= add->st_na;
780 	stats->load		+= add->load;
781 	stats->ld_excl		+= add->ld_excl;
782 	stats->ld_shared	+= add->ld_shared;
783 	stats->ld_uncache	+= add->ld_uncache;
784 	stats->ld_io		+= add->ld_io;
785 	stats->ld_miss		+= add->ld_miss;
786 	stats->ld_noadrs	+= add->ld_noadrs;
787 	stats->ld_fbhit		+= add->ld_fbhit;
788 	stats->ld_l1hit		+= add->ld_l1hit;
789 	stats->ld_l2hit		+= add->ld_l2hit;
790 	stats->ld_llchit	+= add->ld_llchit;
791 	stats->lcl_hitm		+= add->lcl_hitm;
792 	stats->rmt_hitm		+= add->rmt_hitm;
793 	stats->tot_hitm		+= add->tot_hitm;
794 	stats->lcl_peer		+= add->lcl_peer;
795 	stats->rmt_peer		+= add->rmt_peer;
796 	stats->tot_peer		+= add->tot_peer;
797 	stats->rmt_hit		+= add->rmt_hit;
798 	stats->lcl_dram		+= add->lcl_dram;
799 	stats->rmt_dram		+= add->rmt_dram;
800 	stats->blk_data		+= add->blk_data;
801 	stats->blk_addr		+= add->blk_addr;
802 	stats->nomap		+= add->nomap;
803 	stats->noparse		+= add->noparse;
804 }
805 
806 /*
807  * It returns an index in hist_entry->mem_stat array for the given val which
808  * represents a data-src based on the mem_stat_type.
809  */
mem_stat_index(const enum mem_stat_type mst,const u64 val)810 int mem_stat_index(const enum mem_stat_type mst, const u64 val)
811 {
812 	union perf_mem_data_src src = {
813 		.val = val,
814 	};
815 
816 	switch (mst) {
817 	case PERF_MEM_STAT_OP:
818 		switch (src.mem_op) {
819 		case PERF_MEM_OP_LOAD:
820 			return MEM_STAT_OP_LOAD;
821 		case PERF_MEM_OP_STORE:
822 			return MEM_STAT_OP_STORE;
823 		case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE:
824 			return MEM_STAT_OP_LDST;
825 		default:
826 			if (src.mem_op & PERF_MEM_OP_PFETCH)
827 				return MEM_STAT_OP_PFETCH;
828 			if (src.mem_op & PERF_MEM_OP_EXEC)
829 				return MEM_STAT_OP_EXEC;
830 			return MEM_STAT_OP_OTHER;
831 		}
832 	case PERF_MEM_STAT_CACHE:
833 		switch (src.mem_lvl_num) {
834 		case PERF_MEM_LVLNUM_L1:
835 			return MEM_STAT_CACHE_L1;
836 		case PERF_MEM_LVLNUM_L2:
837 			return MEM_STAT_CACHE_L2;
838 		case PERF_MEM_LVLNUM_L3:
839 			return MEM_STAT_CACHE_L3;
840 		case PERF_MEM_LVLNUM_L4:
841 			return MEM_STAT_CACHE_L4;
842 		case PERF_MEM_LVLNUM_LFB:
843 			return MEM_STAT_CACHE_L1_BUF;
844 		case PERF_MEM_LVLNUM_L2_MHB:
845 			return MEM_STAT_CACHE_L2_BUF;
846 		default:
847 			return MEM_STAT_CACHE_OTHER;
848 		}
849 	case PERF_MEM_STAT_MEMORY:
850 		switch (src.mem_lvl_num) {
851 		case PERF_MEM_LVLNUM_MSC:
852 			return MEM_STAT_MEMORY_MSC;
853 		case PERF_MEM_LVLNUM_RAM:
854 			return MEM_STAT_MEMORY_RAM;
855 		case PERF_MEM_LVLNUM_UNC:
856 			return MEM_STAT_MEMORY_UNC;
857 		case PERF_MEM_LVLNUM_CXL:
858 			return MEM_STAT_MEMORY_CXL;
859 		case PERF_MEM_LVLNUM_IO:
860 			return MEM_STAT_MEMORY_IO;
861 		case PERF_MEM_LVLNUM_PMEM:
862 			return MEM_STAT_MEMORY_PMEM;
863 		default:
864 			return MEM_STAT_MEMORY_OTHER;
865 		}
866 	case PERF_MEM_STAT_SNOOP:
867 		switch (src.mem_snoop) {
868 		case PERF_MEM_SNOOP_HIT:
869 			return MEM_STAT_SNOOP_HIT;
870 		case PERF_MEM_SNOOP_HITM:
871 			return MEM_STAT_SNOOP_HITM;
872 		case PERF_MEM_SNOOP_MISS:
873 			return MEM_STAT_SNOOP_MISS;
874 		default:
875 			return MEM_STAT_SNOOP_OTHER;
876 		}
877 	case PERF_MEM_STAT_DTLB:
878 		switch (src.mem_dtlb) {
879 		case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT:
880 			return MEM_STAT_DTLB_L1_HIT;
881 		case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
882 			return MEM_STAT_DTLB_L2_HIT;
883 		case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
884 			return MEM_STAT_DTLB_ANY_HIT;
885 		default:
886 			if (src.mem_dtlb & PERF_MEM_TLB_MISS)
887 				return MEM_STAT_DTLB_MISS;
888 			return MEM_STAT_DTLB_OTHER;
889 		}
890 	default:
891 		break;
892 	}
893 	return -1;
894 }
895 
896 /* To align output, returned string should be shorter than MEM_STAT_PRINT_LEN */
mem_stat_name(const enum mem_stat_type mst,const int idx)897 const char *mem_stat_name(const enum mem_stat_type mst, const int idx)
898 {
899 	switch (mst) {
900 	case PERF_MEM_STAT_OP:
901 		switch (idx) {
902 		case MEM_STAT_OP_LOAD:
903 			return "Load";
904 		case MEM_STAT_OP_STORE:
905 			return "Store";
906 		case MEM_STAT_OP_LDST:
907 			return "Ld+St";
908 		case MEM_STAT_OP_PFETCH:
909 			return "Pfetch";
910 		case MEM_STAT_OP_EXEC:
911 			return "Exec";
912 		case MEM_STAT_OP_OTHER:
913 		default:
914 			return "Other";
915 		}
916 	case PERF_MEM_STAT_CACHE:
917 		switch (idx) {
918 		case MEM_STAT_CACHE_L1:
919 			return "L1";
920 		case MEM_STAT_CACHE_L2:
921 			return "L2";
922 		case MEM_STAT_CACHE_L3:
923 			return "L3";
924 		case MEM_STAT_CACHE_L4:
925 			return "L4";
926 		case MEM_STAT_CACHE_L1_BUF:
927 			return "L1-buf";
928 		case MEM_STAT_CACHE_L2_BUF:
929 			return "L2-buf";
930 		case MEM_STAT_CACHE_OTHER:
931 		default:
932 			return "Other";
933 		}
934 	case PERF_MEM_STAT_MEMORY:
935 		switch (idx) {
936 		case MEM_STAT_MEMORY_RAM:
937 			return "RAM";
938 		case MEM_STAT_MEMORY_MSC:
939 			return "MSC";
940 		case MEM_STAT_MEMORY_UNC:
941 			return "Uncach";
942 		case MEM_STAT_MEMORY_CXL:
943 			return "CXL";
944 		case MEM_STAT_MEMORY_IO:
945 			return "IO";
946 		case MEM_STAT_MEMORY_PMEM:
947 			return "PMEM";
948 		case MEM_STAT_MEMORY_OTHER:
949 		default:
950 			return "Other";
951 		}
952 	case PERF_MEM_STAT_SNOOP:
953 		switch (idx) {
954 		case MEM_STAT_SNOOP_HIT:
955 			return "Hit";
956 		case MEM_STAT_SNOOP_HITM:
957 			return "HitM";
958 		case MEM_STAT_SNOOP_MISS:
959 			return "Miss";
960 		case MEM_STAT_SNOOP_OTHER:
961 		default:
962 			return "Other";
963 		}
964 	case PERF_MEM_STAT_DTLB:
965 		switch (idx) {
966 		case MEM_STAT_DTLB_L1_HIT:
967 			return "L1-Hit";
968 		case MEM_STAT_DTLB_L2_HIT:
969 			return "L2-Hit";
970 		case MEM_STAT_DTLB_ANY_HIT:
971 			return "L?-Hit";
972 		case MEM_STAT_DTLB_MISS:
973 			return "Miss";
974 		case MEM_STAT_DTLB_OTHER:
975 		default:
976 			return "Other";
977 		}
978 	default:
979 		break;
980 	}
981 	return "N/A";
982 }
983