1 // SPDX-License-Identifier: GPL-2.0
2 #include <stddef.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <api/fs/fs.h>
10 #include <linux/kernel.h>
11 #include "cpumap.h"
12 #include "map_symbol.h"
13 #include "mem-events.h"
14 #include "mem-info.h"
15 #include "debug.h"
16 #include "evsel.h"
17 #include "symbol.h"
18 #include "pmu.h"
19 #include "pmus.h"
20
21 unsigned int perf_mem_events__loads_ldlat = 30;
22
23 #define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
24
25 struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
26 E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "mem-loads", true, 0),
27 E("ldlat-stores", "%s/mem-stores/P", "mem-stores", false, 0),
28 E(NULL, NULL, NULL, false, 0),
29 };
30 #undef E
31
32 bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 };
33
perf_pmu__mem_events_ptr(struct perf_pmu * pmu,int i)34 struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i)
35 {
36 if (i >= PERF_MEM_EVENTS__MAX || !pmu)
37 return NULL;
38
39 return &pmu->mem_events[i];
40 }
41
perf_pmus__scan_mem(struct perf_pmu * pmu)42 static struct perf_pmu *perf_pmus__scan_mem(struct perf_pmu *pmu)
43 {
44 while ((pmu = perf_pmus__scan(pmu)) != NULL) {
45 if (pmu->mem_events)
46 return pmu;
47 }
48 return NULL;
49 }
50
perf_mem_events_find_pmu(void)51 struct perf_pmu *perf_mem_events_find_pmu(void)
52 {
53 /*
54 * The current perf mem doesn't support per-PMU configuration.
55 * The exact same configuration is applied to all the
56 * mem_events supported PMUs.
57 * Return the first mem_events supported PMU.
58 *
59 * Notes: The only case which may support multiple mem_events
60 * supported PMUs is Intel hybrid. The exact same mem_events
61 * is shared among the PMUs. Only configure the first PMU
62 * is good enough as well.
63 */
64 return perf_pmus__scan_mem(NULL);
65 }
66
67 /**
68 * perf_pmu__mem_events_num_mem_pmus - Get the number of mem PMUs since the given pmu
69 * @pmu: Start pmu. If it's NULL, search the entire PMU list.
70 */
perf_pmu__mem_events_num_mem_pmus(struct perf_pmu * pmu)71 int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu)
72 {
73 int num = 0;
74
75 while ((pmu = perf_pmus__scan_mem(pmu)) != NULL)
76 num++;
77
78 return num;
79 }
80
perf_pmu__mem_events_name(struct perf_pmu * pmu,int i,char * buf,size_t buf_size)81 static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i,
82 char *buf, size_t buf_size)
83 {
84 struct perf_mem_event *e;
85
86 if (i >= PERF_MEM_EVENTS__MAX || !pmu)
87 return NULL;
88
89 e = &pmu->mem_events[i];
90 if (!e || !e->name)
91 return NULL;
92
93 if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) {
94 if (e->ldlat) {
95 if (!e->aux_event) {
96 /* ARM and Most of Intel */
97 scnprintf(buf, buf_size,
98 e->name, pmu->name,
99 perf_mem_events__loads_ldlat);
100 } else {
101 /* Intel with mem-loads-aux event */
102 scnprintf(buf, buf_size,
103 e->name, pmu->name, pmu->name,
104 perf_mem_events__loads_ldlat);
105 }
106 } else {
107 if (!e->aux_event) {
108 /* AMD and POWER */
109 scnprintf(buf, buf_size,
110 e->name, pmu->name);
111 } else {
112 return NULL;
113 }
114 }
115 return buf;
116 }
117
118 if (i == PERF_MEM_EVENTS__STORE) {
119 scnprintf(buf, buf_size,
120 e->name, pmu->name);
121 return buf;
122 }
123
124 return NULL;
125 }
126
is_mem_loads_aux_event(struct evsel * leader)127 bool is_mem_loads_aux_event(struct evsel *leader)
128 {
129 struct perf_pmu *pmu = leader->pmu;
130 struct perf_mem_event *e;
131
132 if (!pmu || !pmu->mem_events)
133 return false;
134
135 e = &pmu->mem_events[PERF_MEM_EVENTS__LOAD];
136 if (!e->aux_event)
137 return false;
138
139 return leader->core.attr.config == e->aux_event;
140 }
141
perf_pmu__mem_events_parse(struct perf_pmu * pmu,const char * str)142 int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str)
143 {
144 char *tok, *saveptr = NULL;
145 bool found = false;
146 char *buf;
147 int j;
148
149 /* We need buffer that we know we can write to. */
150 buf = malloc(strlen(str) + 1);
151 if (!buf)
152 return -ENOMEM;
153
154 strcpy(buf, str);
155
156 tok = strtok_r((char *)buf, ",", &saveptr);
157
158 while (tok) {
159 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
160 struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
161
162 if (!e->tag)
163 continue;
164
165 if (strstr(e->tag, tok))
166 perf_mem_record[j] = found = true;
167 }
168
169 tok = strtok_r(NULL, ",", &saveptr);
170 }
171
172 free(buf);
173
174 if (found)
175 return 0;
176
177 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
178 return -1;
179 }
180
perf_pmu__mem_events_supported(const char * mnt,struct perf_pmu * pmu,struct perf_mem_event * e)181 static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu,
182 struct perf_mem_event *e)
183 {
184 char path[PATH_MAX];
185 struct stat st;
186
187 if (!e->event_name)
188 return true;
189
190 scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name);
191
192 return !stat(path, &st);
193 }
194
__perf_pmu__mem_events_init(struct perf_pmu * pmu)195 static int __perf_pmu__mem_events_init(struct perf_pmu *pmu)
196 {
197 const char *mnt = sysfs__mount();
198 bool found = false;
199 int j;
200
201 if (!mnt)
202 return -ENOENT;
203
204 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
205 struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
206
207 /*
208 * If the event entry isn't valid, skip initialization
209 * and "e->supported" will keep false.
210 */
211 if (!e->tag)
212 continue;
213
214 e->supported |= perf_pmu__mem_events_supported(mnt, pmu, e);
215 if (e->supported)
216 found = true;
217 }
218
219 return found ? 0 : -ENOENT;
220 }
221
perf_pmu__mem_events_init(void)222 int perf_pmu__mem_events_init(void)
223 {
224 struct perf_pmu *pmu = NULL;
225
226 while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
227 if (__perf_pmu__mem_events_init(pmu))
228 return -ENOENT;
229 }
230
231 return 0;
232 }
233
perf_pmu__mem_events_list(struct perf_pmu * pmu)234 void perf_pmu__mem_events_list(struct perf_pmu *pmu)
235 {
236 int j;
237
238 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
239 char buf[128];
240 struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
241
242 fprintf(stderr, "%-*s%-*s%s",
243 e->tag ? 13 : 0,
244 e->tag ? : "",
245 e->tag && verbose > 0 ? 25 : 0,
246 e->tag && verbose > 0
247 ? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))
248 : "",
249 e->supported ? ": available\n" : "");
250 }
251 }
252
perf_mem_events__record_args(const char ** rec_argv,int * argv_nr,char ** event_name_storage_out)253 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out)
254 {
255 const char *mnt = sysfs__mount();
256 struct perf_pmu *pmu = NULL;
257 int i = *argv_nr;
258 struct perf_cpu_map *cpu_map = NULL;
259 size_t event_name_storage_size =
260 perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128;
261 size_t event_name_storage_remaining = event_name_storage_size;
262 char *event_name_storage = malloc(event_name_storage_size);
263 char *event_name_storage_ptr = event_name_storage;
264
265 if (!event_name_storage)
266 return -ENOMEM;
267
268 *event_name_storage_out = NULL;
269 while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
270 for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
271 const char *s;
272 struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
273 int ret;
274
275 if (!perf_mem_record[j])
276 continue;
277
278 if (!e->supported) {
279 char buf[128];
280
281 pr_err("failed: event '%s' not supported\n",
282 perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)));
283 free(event_name_storage);
284 return -1;
285 }
286
287 s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr,
288 event_name_storage_remaining);
289 if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e))
290 continue;
291
292 rec_argv[i++] = "-e";
293 rec_argv[i++] = event_name_storage_ptr;
294 event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1;
295 event_name_storage_ptr += strlen(event_name_storage_ptr) + 1;
296
297 ret = perf_cpu_map__merge(&cpu_map, pmu->cpus);
298 if (ret < 0) {
299 free(event_name_storage);
300 return ret;
301 }
302 }
303 }
304
305 if (cpu_map) {
306 if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) {
307 char buf[200];
308
309 cpu_map__snprint(cpu_map, buf, sizeof(buf));
310 pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
311 }
312 perf_cpu_map__put(cpu_map);
313 }
314
315 *argv_nr = i;
316 *event_name_storage_out = event_name_storage;
317 return 0;
318 }
319
320 static const char * const tlb_access[] = {
321 "N/A",
322 "HIT",
323 "MISS",
324 "L1",
325 "L2",
326 "Walker",
327 "Fault",
328 };
329
perf_mem__tlb_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)330 int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
331 {
332 size_t l = 0, i;
333 u64 m = PERF_MEM_TLB_NA;
334 u64 hit, miss;
335
336 sz -= 1; /* -1 for null termination */
337 out[0] = '\0';
338
339 if (mem_info)
340 m = mem_info__const_data_src(mem_info)->mem_dtlb;
341
342 hit = m & PERF_MEM_TLB_HIT;
343 miss = m & PERF_MEM_TLB_MISS;
344
345 /* already taken care of */
346 m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
347
348 for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
349 if (!(m & 0x1))
350 continue;
351 if (l) {
352 strcat(out, " or ");
353 l += 4;
354 }
355 l += scnprintf(out + l, sz - l, tlb_access[i]);
356 }
357 if (*out == '\0')
358 l += scnprintf(out, sz - l, "N/A");
359 if (hit)
360 l += scnprintf(out + l, sz - l, " hit");
361 if (miss)
362 l += scnprintf(out + l, sz - l, " miss");
363
364 return l;
365 }
366
367 static const char * const mem_lvl[] = {
368 "N/A",
369 "HIT",
370 "MISS",
371 "L1",
372 "LFB/MAB",
373 "L2",
374 "L3",
375 "Local RAM",
376 "Remote RAM (1 hop)",
377 "Remote RAM (2 hops)",
378 "Remote Cache (1 hop)",
379 "Remote Cache (2 hops)",
380 "I/O",
381 "Uncached",
382 };
383
384 static const char * const mem_lvlnum[] = {
385 [PERF_MEM_LVLNUM_L1] = "L1",
386 [PERF_MEM_LVLNUM_L2] = "L2",
387 [PERF_MEM_LVLNUM_L3] = "L3",
388 [PERF_MEM_LVLNUM_L4] = "L4",
389 [PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB",
390 [PERF_MEM_LVLNUM_MSC] = "Memory-side Cache",
391 [PERF_MEM_LVLNUM_UNC] = "Uncached",
392 [PERF_MEM_LVLNUM_CXL] = "CXL",
393 [PERF_MEM_LVLNUM_IO] = "I/O",
394 [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
395 [PERF_MEM_LVLNUM_LFB] = "LFB/MAB",
396 [PERF_MEM_LVLNUM_RAM] = "RAM",
397 [PERF_MEM_LVLNUM_PMEM] = "PMEM",
398 [PERF_MEM_LVLNUM_NA] = "N/A",
399 };
400
401 static const char * const mem_hops[] = {
402 "N/A",
403 /*
404 * While printing, 'Remote' will be added to represent
405 * 'Remote core, same node' accesses as remote field need
406 * to be set with mem_hops field.
407 */
408 "core, same node",
409 "node, same socket",
410 "socket, same board",
411 "board",
412 };
413
perf_mem__op_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)414 static int perf_mem__op_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
415 {
416 u64 op = PERF_MEM_LOCK_NA;
417 int l;
418
419 if (mem_info)
420 op = mem_info__const_data_src(mem_info)->mem_op;
421
422 if (op & PERF_MEM_OP_NA)
423 l = scnprintf(out, sz, "N/A");
424 else if (op & PERF_MEM_OP_LOAD)
425 l = scnprintf(out, sz, "LOAD");
426 else if (op & PERF_MEM_OP_STORE)
427 l = scnprintf(out, sz, "STORE");
428 else if (op & PERF_MEM_OP_PFETCH)
429 l = scnprintf(out, sz, "PFETCH");
430 else if (op & PERF_MEM_OP_EXEC)
431 l = scnprintf(out, sz, "EXEC");
432 else
433 l = scnprintf(out, sz, "No");
434
435 return l;
436 }
437
perf_mem__lvl_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)438 int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
439 {
440 union perf_mem_data_src data_src;
441 int printed = 0;
442 size_t l = 0;
443 size_t i;
444 int lvl;
445 char hit_miss[5] = {0};
446
447 sz -= 1; /* -1 for null termination */
448 out[0] = '\0';
449
450 if (!mem_info)
451 goto na;
452
453 data_src = *mem_info__const_data_src(mem_info);
454
455 if (data_src.mem_lvl & PERF_MEM_LVL_HIT)
456 memcpy(hit_miss, "hit", 3);
457 else if (data_src.mem_lvl & PERF_MEM_LVL_MISS)
458 memcpy(hit_miss, "miss", 4);
459
460 lvl = data_src.mem_lvl_num;
461 if (lvl && lvl != PERF_MEM_LVLNUM_NA) {
462 if (data_src.mem_remote) {
463 strcat(out, "Remote ");
464 l += 7;
465 }
466
467 if (data_src.mem_hops)
468 l += scnprintf(out + l, sz - l, "%s ", mem_hops[data_src.mem_hops]);
469
470 if (mem_lvlnum[lvl])
471 l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
472 else
473 l += scnprintf(out + l, sz - l, "Unknown level %d", lvl);
474
475 l += scnprintf(out + l, sz - l, " %s", hit_miss);
476 return l;
477 }
478
479 lvl = data_src.mem_lvl;
480 if (!lvl)
481 goto na;
482
483 lvl &= ~(PERF_MEM_LVL_NA | PERF_MEM_LVL_HIT | PERF_MEM_LVL_MISS);
484 if (!lvl)
485 goto na;
486
487 for (i = 0; lvl && i < ARRAY_SIZE(mem_lvl); i++, lvl >>= 1) {
488 if (!(lvl & 0x1))
489 continue;
490 if (printed++) {
491 strcat(out, " or ");
492 l += 4;
493 }
494 l += scnprintf(out + l, sz - l, mem_lvl[i]);
495 }
496
497 if (printed) {
498 l += scnprintf(out + l, sz - l, " %s", hit_miss);
499 return l;
500 }
501
502 na:
503 strcat(out, "N/A");
504 return 3;
505 }
506
507 static const char * const snoop_access[] = {
508 "N/A",
509 "None",
510 "Hit",
511 "Miss",
512 "HitM",
513 };
514
515 static const char * const snoopx_access[] = {
516 "Fwd",
517 "Peer",
518 };
519
perf_mem__snp_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)520 int perf_mem__snp_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
521 {
522 size_t i, l = 0;
523 u64 m = PERF_MEM_SNOOP_NA;
524
525 sz -= 1; /* -1 for null termination */
526 out[0] = '\0';
527
528 if (mem_info)
529 m = mem_info__const_data_src(mem_info)->mem_snoop;
530
531 for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
532 if (!(m & 0x1))
533 continue;
534 if (l) {
535 strcat(out, " or ");
536 l += 4;
537 }
538 l += scnprintf(out + l, sz - l, snoop_access[i]);
539 }
540
541 m = 0;
542 if (mem_info)
543 m = mem_info__const_data_src(mem_info)->mem_snoopx;
544
545 for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) {
546 if (!(m & 0x1))
547 continue;
548
549 if (l) {
550 strcat(out, " or ");
551 l += 4;
552 }
553 l += scnprintf(out + l, sz - l, snoopx_access[i]);
554 }
555
556 if (*out == '\0')
557 l += scnprintf(out, sz - l, "N/A");
558
559 return l;
560 }
561
perf_mem__lck_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)562 int perf_mem__lck_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
563 {
564 u64 mask = PERF_MEM_LOCK_NA;
565 int l;
566
567 if (mem_info)
568 mask = mem_info__const_data_src(mem_info)->mem_lock;
569
570 if (mask & PERF_MEM_LOCK_NA)
571 l = scnprintf(out, sz, "N/A");
572 else if (mask & PERF_MEM_LOCK_LOCKED)
573 l = scnprintf(out, sz, "Yes");
574 else
575 l = scnprintf(out, sz, "No");
576
577 return l;
578 }
579
perf_mem__blk_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)580 int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
581 {
582 size_t l = 0;
583 u64 mask = PERF_MEM_BLK_NA;
584
585 sz -= 1; /* -1 for null termination */
586 out[0] = '\0';
587
588 if (mem_info)
589 mask = mem_info__const_data_src(mem_info)->mem_blk;
590
591 if (!mask || (mask & PERF_MEM_BLK_NA)) {
592 l += scnprintf(out + l, sz - l, " N/A");
593 return l;
594 }
595 if (mask & PERF_MEM_BLK_DATA)
596 l += scnprintf(out + l, sz - l, " Data");
597 if (mask & PERF_MEM_BLK_ADDR)
598 l += scnprintf(out + l, sz - l, " Addr");
599
600 return l;
601 }
602
perf_script__meminfo_scnprintf(char * out,size_t sz,const struct mem_info * mem_info)603 int perf_script__meminfo_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
604 {
605 int i = 0;
606
607 i += scnprintf(out, sz, "|OP ");
608 i += perf_mem__op_scnprintf(out + i, sz - i, mem_info);
609 i += scnprintf(out + i, sz - i, "|LVL ");
610 i += perf_mem__lvl_scnprintf(out + i, sz, mem_info);
611 i += scnprintf(out + i, sz - i, "|SNP ");
612 i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
613 i += scnprintf(out + i, sz - i, "|TLB ");
614 i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
615 i += scnprintf(out + i, sz - i, "|LCK ");
616 i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
617 i += scnprintf(out + i, sz - i, "|BLK ");
618 i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
619
620 return i;
621 }
622
c2c_decode_stats(struct c2c_stats * stats,struct mem_info * mi)623 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
624 {
625 union perf_mem_data_src *data_src = mem_info__data_src(mi);
626 u64 daddr = mem_info__daddr(mi)->addr;
627 u64 op = data_src->mem_op;
628 u64 lvl = data_src->mem_lvl;
629 u64 snoop = data_src->mem_snoop;
630 u64 snoopx = data_src->mem_snoopx;
631 u64 lock = data_src->mem_lock;
632 u64 blk = data_src->mem_blk;
633 /*
634 * Skylake might report unknown remote level via this
635 * bit, consider it when evaluating remote HITMs.
636 *
637 * Incase of power, remote field can also be used to denote cache
638 * accesses from the another core of same node. Hence, setting
639 * mrem only when HOPS is zero along with set remote field.
640 */
641 bool mrem = (data_src->mem_remote && !data_src->mem_hops);
642 int err = 0;
643
644 #define HITM_INC(__f) \
645 do { \
646 stats->__f++; \
647 stats->tot_hitm++; \
648 } while (0)
649
650 #define PEER_INC(__f) \
651 do { \
652 stats->__f++; \
653 stats->tot_peer++; \
654 } while (0)
655
656 #define P(a, b) PERF_MEM_##a##_##b
657
658 stats->nr_entries++;
659
660 if (lock & P(LOCK, LOCKED)) stats->locks++;
661
662 if (blk & P(BLK, DATA)) stats->blk_data++;
663 if (blk & P(BLK, ADDR)) stats->blk_addr++;
664
665 if (op & P(OP, LOAD)) {
666 /* load */
667 stats->load++;
668
669 if (!daddr) {
670 stats->ld_noadrs++;
671 return -1;
672 }
673
674 if (lvl & P(LVL, HIT)) {
675 if (lvl & P(LVL, UNC)) stats->ld_uncache++;
676 if (lvl & P(LVL, IO)) stats->ld_io++;
677 if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
678 if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
679 if (lvl & P(LVL, L2)) {
680 if (snoop & P(SNOOP, HITM))
681 HITM_INC(lcl_hitm);
682 else
683 stats->ld_l2hit++;
684
685 if (snoopx & P(SNOOPX, PEER))
686 PEER_INC(lcl_peer);
687 }
688 if (lvl & P(LVL, L3 )) {
689 if (snoop & P(SNOOP, HITM))
690 HITM_INC(lcl_hitm);
691 else
692 stats->ld_llchit++;
693
694 if (snoopx & P(SNOOPX, PEER))
695 PEER_INC(lcl_peer);
696 }
697
698 if (lvl & P(LVL, LOC_RAM)) {
699 stats->lcl_dram++;
700 if (snoop & P(SNOOP, HIT))
701 stats->ld_shared++;
702 else
703 stats->ld_excl++;
704 }
705
706 if ((lvl & P(LVL, REM_RAM1)) ||
707 (lvl & P(LVL, REM_RAM2)) ||
708 mrem) {
709 stats->rmt_dram++;
710 if (snoop & P(SNOOP, HIT))
711 stats->ld_shared++;
712 else
713 stats->ld_excl++;
714 }
715 }
716
717 if ((lvl & P(LVL, REM_CCE1)) ||
718 (lvl & P(LVL, REM_CCE2)) ||
719 mrem) {
720 if (snoop & P(SNOOP, HIT)) {
721 stats->rmt_hit++;
722 } else if (snoop & P(SNOOP, HITM)) {
723 HITM_INC(rmt_hitm);
724 } else if (snoopx & P(SNOOPX, PEER)) {
725 stats->rmt_hit++;
726 PEER_INC(rmt_peer);
727 }
728 }
729
730 if ((lvl & P(LVL, MISS)))
731 stats->ld_miss++;
732
733 } else if (op & P(OP, STORE)) {
734 /* store */
735 stats->store++;
736
737 if (!daddr) {
738 stats->st_noadrs++;
739 return -1;
740 }
741
742 if (lvl & P(LVL, HIT)) {
743 if (lvl & P(LVL, UNC)) stats->st_uncache++;
744 if (lvl & P(LVL, L1 )) stats->st_l1hit++;
745 }
746 if (lvl & P(LVL, MISS))
747 if (lvl & P(LVL, L1)) stats->st_l1miss++;
748 if (lvl & P(LVL, NA))
749 stats->st_na++;
750 } else {
751 /* unparsable data_src? */
752 stats->noparse++;
753 return -1;
754 }
755
756 if (!mem_info__daddr(mi)->ms.map || !mem_info__iaddr(mi)->ms.map) {
757 stats->nomap++;
758 return -1;
759 }
760
761 #undef P
762 #undef HITM_INC
763 return err;
764 }
765
c2c_add_stats(struct c2c_stats * stats,struct c2c_stats * add)766 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
767 {
768 stats->nr_entries += add->nr_entries;
769
770 stats->locks += add->locks;
771 stats->store += add->store;
772 stats->st_uncache += add->st_uncache;
773 stats->st_noadrs += add->st_noadrs;
774 stats->st_l1hit += add->st_l1hit;
775 stats->st_l1miss += add->st_l1miss;
776 stats->st_na += add->st_na;
777 stats->load += add->load;
778 stats->ld_excl += add->ld_excl;
779 stats->ld_shared += add->ld_shared;
780 stats->ld_uncache += add->ld_uncache;
781 stats->ld_io += add->ld_io;
782 stats->ld_miss += add->ld_miss;
783 stats->ld_noadrs += add->ld_noadrs;
784 stats->ld_fbhit += add->ld_fbhit;
785 stats->ld_l1hit += add->ld_l1hit;
786 stats->ld_l2hit += add->ld_l2hit;
787 stats->ld_llchit += add->ld_llchit;
788 stats->lcl_hitm += add->lcl_hitm;
789 stats->rmt_hitm += add->rmt_hitm;
790 stats->tot_hitm += add->tot_hitm;
791 stats->lcl_peer += add->lcl_peer;
792 stats->rmt_peer += add->rmt_peer;
793 stats->tot_peer += add->tot_peer;
794 stats->rmt_hit += add->rmt_hit;
795 stats->lcl_dram += add->lcl_dram;
796 stats->rmt_dram += add->rmt_dram;
797 stats->blk_data += add->blk_data;
798 stats->blk_addr += add->blk_addr;
799 stats->nomap += add->nomap;
800 stats->noparse += add->noparse;
801 }
802
803 /*
804 * It returns an index in hist_entry->mem_stat array for the given val which
805 * represents a data-src based on the mem_stat_type.
806 */
mem_stat_index(const enum mem_stat_type mst,const u64 val)807 int mem_stat_index(const enum mem_stat_type mst, const u64 val)
808 {
809 union perf_mem_data_src src = {
810 .val = val,
811 };
812
813 switch (mst) {
814 case PERF_MEM_STAT_OP:
815 switch (src.mem_op) {
816 case PERF_MEM_OP_LOAD:
817 return MEM_STAT_OP_LOAD;
818 case PERF_MEM_OP_STORE:
819 return MEM_STAT_OP_STORE;
820 case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE:
821 return MEM_STAT_OP_LDST;
822 default:
823 if (src.mem_op & PERF_MEM_OP_PFETCH)
824 return MEM_STAT_OP_PFETCH;
825 if (src.mem_op & PERF_MEM_OP_EXEC)
826 return MEM_STAT_OP_EXEC;
827 return MEM_STAT_OP_OTHER;
828 }
829 case PERF_MEM_STAT_CACHE:
830 switch (src.mem_lvl_num) {
831 case PERF_MEM_LVLNUM_L1:
832 return MEM_STAT_CACHE_L1;
833 case PERF_MEM_LVLNUM_L2:
834 return MEM_STAT_CACHE_L2;
835 case PERF_MEM_LVLNUM_L3:
836 return MEM_STAT_CACHE_L3;
837 case PERF_MEM_LVLNUM_L4:
838 return MEM_STAT_CACHE_L4;
839 case PERF_MEM_LVLNUM_LFB:
840 return MEM_STAT_CACHE_L1_BUF;
841 case PERF_MEM_LVLNUM_L2_MHB:
842 return MEM_STAT_CACHE_L2_BUF;
843 default:
844 return MEM_STAT_CACHE_OTHER;
845 }
846 case PERF_MEM_STAT_MEMORY:
847 switch (src.mem_lvl_num) {
848 case PERF_MEM_LVLNUM_MSC:
849 return MEM_STAT_MEMORY_MSC;
850 case PERF_MEM_LVLNUM_RAM:
851 return MEM_STAT_MEMORY_RAM;
852 case PERF_MEM_LVLNUM_UNC:
853 return MEM_STAT_MEMORY_UNC;
854 case PERF_MEM_LVLNUM_CXL:
855 return MEM_STAT_MEMORY_CXL;
856 case PERF_MEM_LVLNUM_IO:
857 return MEM_STAT_MEMORY_IO;
858 case PERF_MEM_LVLNUM_PMEM:
859 return MEM_STAT_MEMORY_PMEM;
860 default:
861 return MEM_STAT_MEMORY_OTHER;
862 }
863 case PERF_MEM_STAT_SNOOP:
864 switch (src.mem_snoop) {
865 case PERF_MEM_SNOOP_HIT:
866 return MEM_STAT_SNOOP_HIT;
867 case PERF_MEM_SNOOP_HITM:
868 return MEM_STAT_SNOOP_HITM;
869 case PERF_MEM_SNOOP_MISS:
870 return MEM_STAT_SNOOP_MISS;
871 default:
872 return MEM_STAT_SNOOP_OTHER;
873 }
874 case PERF_MEM_STAT_DTLB:
875 switch (src.mem_dtlb) {
876 case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT:
877 return MEM_STAT_DTLB_L1_HIT;
878 case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
879 return MEM_STAT_DTLB_L2_HIT;
880 case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
881 return MEM_STAT_DTLB_ANY_HIT;
882 default:
883 if (src.mem_dtlb & PERF_MEM_TLB_MISS)
884 return MEM_STAT_DTLB_MISS;
885 return MEM_STAT_DTLB_OTHER;
886 }
887 default:
888 break;
889 }
890 return -1;
891 }
892
893 /* To align output, returned string should be shorter than MEM_STAT_PRINT_LEN */
mem_stat_name(const enum mem_stat_type mst,const int idx)894 const char *mem_stat_name(const enum mem_stat_type mst, const int idx)
895 {
896 switch (mst) {
897 case PERF_MEM_STAT_OP:
898 switch (idx) {
899 case MEM_STAT_OP_LOAD:
900 return "Load";
901 case MEM_STAT_OP_STORE:
902 return "Store";
903 case MEM_STAT_OP_LDST:
904 return "Ld+St";
905 case MEM_STAT_OP_PFETCH:
906 return "Pfetch";
907 case MEM_STAT_OP_EXEC:
908 return "Exec";
909 case MEM_STAT_OP_OTHER:
910 default:
911 return "Other";
912 }
913 case PERF_MEM_STAT_CACHE:
914 switch (idx) {
915 case MEM_STAT_CACHE_L1:
916 return "L1";
917 case MEM_STAT_CACHE_L2:
918 return "L2";
919 case MEM_STAT_CACHE_L3:
920 return "L3";
921 case MEM_STAT_CACHE_L4:
922 return "L4";
923 case MEM_STAT_CACHE_L1_BUF:
924 return "L1-buf";
925 case MEM_STAT_CACHE_L2_BUF:
926 return "L2-buf";
927 case MEM_STAT_CACHE_OTHER:
928 default:
929 return "Other";
930 }
931 case PERF_MEM_STAT_MEMORY:
932 switch (idx) {
933 case MEM_STAT_MEMORY_RAM:
934 return "RAM";
935 case MEM_STAT_MEMORY_MSC:
936 return "MSC";
937 case MEM_STAT_MEMORY_UNC:
938 return "Uncach";
939 case MEM_STAT_MEMORY_CXL:
940 return "CXL";
941 case MEM_STAT_MEMORY_IO:
942 return "IO";
943 case MEM_STAT_MEMORY_PMEM:
944 return "PMEM";
945 case MEM_STAT_MEMORY_OTHER:
946 default:
947 return "Other";
948 }
949 case PERF_MEM_STAT_SNOOP:
950 switch (idx) {
951 case MEM_STAT_SNOOP_HIT:
952 return "Hit";
953 case MEM_STAT_SNOOP_HITM:
954 return "HitM";
955 case MEM_STAT_SNOOP_MISS:
956 return "Miss";
957 case MEM_STAT_SNOOP_OTHER:
958 default:
959 return "Other";
960 }
961 case PERF_MEM_STAT_DTLB:
962 switch (idx) {
963 case MEM_STAT_DTLB_L1_HIT:
964 return "L1-Hit";
965 case MEM_STAT_DTLB_L2_HIT:
966 return "L2-Hit";
967 case MEM_STAT_DTLB_ANY_HIT:
968 return "L?-Hit";
969 case MEM_STAT_DTLB_MISS:
970 return "Miss";
971 case MEM_STAT_DTLB_OTHER:
972 default:
973 return "Other";
974 }
975 default:
976 break;
977 }
978 return "N/A";
979 }
980