1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 struct arm_spe { 41 struct auxtrace auxtrace; 42 struct auxtrace_queues queues; 43 struct auxtrace_heap heap; 44 struct itrace_synth_opts synth_opts; 45 u32 auxtrace_type; 46 struct perf_session *session; 47 struct machine *machine; 48 u32 pmu_type; 49 50 struct perf_tsc_conversion tc; 51 52 u8 timeless_decoding; 53 u8 data_queued; 54 55 u64 sample_type; 56 u8 sample_flc; 57 u8 sample_llc; 58 u8 sample_tlb; 59 u8 sample_branch; 60 u8 sample_remote_access; 61 u8 sample_memory; 62 u8 sample_instructions; 63 u64 instructions_sample_period; 64 65 u64 l1d_miss_id; 66 u64 l1d_access_id; 67 u64 llc_miss_id; 68 u64 llc_access_id; 69 u64 tlb_miss_id; 70 u64 tlb_access_id; 71 u64 branch_id; 72 u64 remote_access_id; 73 u64 memory_id; 74 u64 instructions_id; 75 76 u64 kernel_start; 77 78 unsigned long num_events; 79 u8 use_ctx_pkt_for_pid; 80 81 u64 **metadata; 82 u64 metadata_ver; 83 u64 metadata_nr_cpu; 84 bool is_homogeneous; 85 }; 86 87 struct arm_spe_queue { 88 struct arm_spe *spe; 89 unsigned int queue_nr; 90 struct auxtrace_buffer *buffer; 91 struct auxtrace_buffer *old_buffer; 92 union perf_event *event_buf; 93 bool on_heap; 94 bool done; 95 pid_t pid; 96 pid_t tid; 97 int cpu; 98 struct arm_spe_decoder *decoder; 99 u64 time; 100 u64 timestamp; 101 struct thread *thread; 102 u64 period_instructions; 103 u32 flags; 104 }; 105 106 struct data_source_handle { 107 const struct midr_range *midr_ranges; 108 void (*ds_synth)(const struct arm_spe_record *record, 109 union perf_mem_data_src *data_src); 110 }; 111 112 #define DS(range, func) \ 113 { \ 114 .midr_ranges = range, \ 115 .ds_synth = arm_spe__synth_##func, \ 116 } 117 118 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 119 unsigned char *buf, size_t len) 120 { 121 struct arm_spe_pkt packet; 122 size_t pos = 0; 123 int ret, pkt_len, i; 124 char desc[ARM_SPE_PKT_DESC_MAX]; 125 const char *color = PERF_COLOR_BLUE; 126 127 color_fprintf(stdout, color, 128 ". ... ARM SPE data: size %#zx bytes\n", 129 len); 130 131 while (len) { 132 ret = arm_spe_get_packet(buf, len, &packet); 133 if (ret > 0) 134 pkt_len = ret; 135 else 136 pkt_len = 1; 137 printf("."); 138 color_fprintf(stdout, color, " %08zx: ", pos); 139 for (i = 0; i < pkt_len; i++) 140 color_fprintf(stdout, color, " %02x", buf[i]); 141 for (; i < 16; i++) 142 color_fprintf(stdout, color, " "); 143 if (ret > 0) { 144 ret = arm_spe_pkt_desc(&packet, desc, 145 ARM_SPE_PKT_DESC_MAX); 146 if (!ret) 147 color_fprintf(stdout, color, " %s\n", desc); 148 } else { 149 color_fprintf(stdout, color, " Bad packet!\n"); 150 } 151 pos += pkt_len; 152 buf += pkt_len; 153 len -= pkt_len; 154 } 155 } 156 157 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 158 size_t len) 159 { 160 printf(".\n"); 161 arm_spe_dump(spe, buf, len); 162 } 163 164 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 165 { 166 struct arm_spe_queue *speq = data; 167 struct auxtrace_buffer *buffer = speq->buffer; 168 struct auxtrace_buffer *old_buffer = speq->old_buffer; 169 struct auxtrace_queue *queue; 170 171 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 172 173 buffer = auxtrace_buffer__next(queue, buffer); 174 /* If no more data, drop the previous auxtrace_buffer and return */ 175 if (!buffer) { 176 if (old_buffer) 177 auxtrace_buffer__drop_data(old_buffer); 178 b->len = 0; 179 return 0; 180 } 181 182 speq->buffer = buffer; 183 184 /* If the aux_buffer doesn't have data associated, try to load it */ 185 if (!buffer->data) { 186 /* get the file desc associated with the perf data file */ 187 int fd = perf_data__fd(speq->spe->session->data); 188 189 buffer->data = auxtrace_buffer__get_data(buffer, fd); 190 if (!buffer->data) 191 return -ENOMEM; 192 } 193 194 b->len = buffer->size; 195 b->buf = buffer->data; 196 197 if (b->len) { 198 if (old_buffer) 199 auxtrace_buffer__drop_data(old_buffer); 200 speq->old_buffer = buffer; 201 } else { 202 auxtrace_buffer__drop_data(buffer); 203 return arm_spe_get_trace(b, data); 204 } 205 206 return 0; 207 } 208 209 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 210 unsigned int queue_nr) 211 { 212 struct arm_spe_params params = { .get_trace = 0, }; 213 struct arm_spe_queue *speq; 214 215 speq = zalloc(sizeof(*speq)); 216 if (!speq) 217 return NULL; 218 219 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 220 if (!speq->event_buf) 221 goto out_free; 222 223 speq->spe = spe; 224 speq->queue_nr = queue_nr; 225 speq->pid = -1; 226 speq->tid = -1; 227 speq->cpu = -1; 228 speq->period_instructions = 0; 229 230 /* params set */ 231 params.get_trace = arm_spe_get_trace; 232 params.data = speq; 233 234 /* create new decoder */ 235 speq->decoder = arm_spe_decoder_new(¶ms); 236 if (!speq->decoder) 237 goto out_free; 238 239 return speq; 240 241 out_free: 242 zfree(&speq->event_buf); 243 free(speq); 244 245 return NULL; 246 } 247 248 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 249 { 250 return ip >= spe->kernel_start ? 251 PERF_RECORD_MISC_KERNEL : 252 PERF_RECORD_MISC_USER; 253 } 254 255 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 256 struct auxtrace_queue *queue) 257 { 258 struct arm_spe_queue *speq = queue->priv; 259 pid_t tid; 260 261 tid = machine__get_current_tid(spe->machine, speq->cpu); 262 if (tid != -1) { 263 speq->tid = tid; 264 thread__zput(speq->thread); 265 } else 266 speq->tid = queue->tid; 267 268 if ((!speq->thread) && (speq->tid != -1)) { 269 speq->thread = machine__find_thread(spe->machine, -1, 270 speq->tid); 271 } 272 273 if (speq->thread) { 274 speq->pid = thread__pid(speq->thread); 275 if (queue->cpu == -1) 276 speq->cpu = thread__cpu(speq->thread); 277 } 278 } 279 280 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 281 { 282 struct arm_spe *spe = speq->spe; 283 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 284 285 if (err) 286 return err; 287 288 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 289 290 return 0; 291 } 292 293 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) 294 { 295 u64 i; 296 297 if (!spe->metadata) 298 return NULL; 299 300 for (i = 0; i < spe->metadata_nr_cpu; i++) 301 if (spe->metadata[i][ARM_SPE_CPU] == cpu) 302 return spe->metadata[i]; 303 304 return NULL; 305 } 306 307 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 308 { 309 struct simd_flags simd_flags = {}; 310 311 if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) 312 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 313 314 if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) 315 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 316 317 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 318 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; 319 320 if (record->type & ARM_SPE_SVE_EMPTY_PRED) 321 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; 322 323 return simd_flags; 324 } 325 326 static void arm_spe_prep_sample(struct arm_spe *spe, 327 struct arm_spe_queue *speq, 328 union perf_event *event, 329 struct perf_sample *sample) 330 { 331 struct arm_spe_record *record = &speq->decoder->record; 332 333 if (!spe->timeless_decoding) 334 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 335 336 sample->ip = record->from_ip; 337 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 338 sample->pid = speq->pid; 339 sample->tid = speq->tid; 340 sample->period = 1; 341 sample->cpu = speq->cpu; 342 sample->simd_flags = arm_spe__synth_simd_flags(record); 343 344 event->sample.header.type = PERF_RECORD_SAMPLE; 345 event->sample.header.misc = sample->cpumode; 346 event->sample.header.size = sizeof(struct perf_event_header); 347 } 348 349 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 350 { 351 event->header.size = perf_event__sample_event_size(sample, type, 0); 352 return perf_event__synthesize_sample(event, type, 0, sample); 353 } 354 355 static inline int 356 arm_spe_deliver_synth_event(struct arm_spe *spe, 357 struct arm_spe_queue *speq __maybe_unused, 358 union perf_event *event, 359 struct perf_sample *sample) 360 { 361 int ret; 362 363 if (spe->synth_opts.inject) { 364 ret = arm_spe__inject_event(event, sample, spe->sample_type); 365 if (ret) 366 return ret; 367 } 368 369 ret = perf_session__deliver_synth_event(spe->session, event, sample); 370 if (ret) 371 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 372 373 return ret; 374 } 375 376 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 377 u64 spe_events_id, u64 data_src) 378 { 379 struct arm_spe *spe = speq->spe; 380 struct arm_spe_record *record = &speq->decoder->record; 381 union perf_event *event = speq->event_buf; 382 struct perf_sample sample = { .ip = 0, }; 383 384 arm_spe_prep_sample(spe, speq, event, &sample); 385 386 sample.id = spe_events_id; 387 sample.stream_id = spe_events_id; 388 sample.addr = record->virt_addr; 389 sample.phys_addr = record->phys_addr; 390 sample.data_src = data_src; 391 sample.weight = record->latency; 392 393 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 394 } 395 396 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 397 u64 spe_events_id) 398 { 399 struct arm_spe *spe = speq->spe; 400 struct arm_spe_record *record = &speq->decoder->record; 401 union perf_event *event = speq->event_buf; 402 struct perf_sample sample = { .ip = 0, }; 403 404 arm_spe_prep_sample(spe, speq, event, &sample); 405 406 sample.id = spe_events_id; 407 sample.stream_id = spe_events_id; 408 sample.addr = record->to_ip; 409 sample.weight = record->latency; 410 sample.flags = speq->flags; 411 412 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 413 } 414 415 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 416 u64 spe_events_id, u64 data_src) 417 { 418 struct arm_spe *spe = speq->spe; 419 struct arm_spe_record *record = &speq->decoder->record; 420 union perf_event *event = speq->event_buf; 421 struct perf_sample sample = { .ip = 0, }; 422 423 /* 424 * Handles perf instruction sampling period. 425 */ 426 speq->period_instructions++; 427 if (speq->period_instructions < spe->instructions_sample_period) 428 return 0; 429 speq->period_instructions = 0; 430 431 arm_spe_prep_sample(spe, speq, event, &sample); 432 433 sample.id = spe_events_id; 434 sample.stream_id = spe_events_id; 435 sample.addr = record->to_ip; 436 sample.phys_addr = record->phys_addr; 437 sample.data_src = data_src; 438 sample.period = spe->instructions_sample_period; 439 sample.weight = record->latency; 440 sample.flags = speq->flags; 441 442 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 443 } 444 445 static const struct midr_range common_ds_encoding_cpus[] = { 446 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 447 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 448 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 449 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 450 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 451 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 452 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 453 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 454 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 455 {}, 456 }; 457 458 static const struct midr_range ampereone_ds_encoding_cpus[] = { 459 MIDR_ALL_VERSIONS(MIDR_AMPERE1A), 460 {}, 461 }; 462 463 static void arm_spe__sample_flags(struct arm_spe_queue *speq) 464 { 465 const struct arm_spe_record *record = &speq->decoder->record; 466 467 speq->flags = 0; 468 if (record->op & ARM_SPE_OP_BRANCH_ERET) { 469 speq->flags = PERF_IP_FLAG_BRANCH; 470 471 if (record->type & ARM_SPE_BRANCH_MISS) 472 speq->flags |= PERF_IP_FLAG_BRANCH_MISS; 473 } 474 } 475 476 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 477 union perf_mem_data_src *data_src) 478 { 479 /* 480 * Even though four levels of cache hierarchy are possible, no known 481 * production Neoverse systems currently include more than three levels 482 * so for the time being we assume three exist. If a production system 483 * is built with four the this function would have to be changed to 484 * detect the number of levels for reporting. 485 */ 486 487 /* 488 * We have no data on the hit level or data source for stores in the 489 * Neoverse SPE records. 490 */ 491 if (record->op & ARM_SPE_OP_ST) { 492 data_src->mem_lvl = PERF_MEM_LVL_NA; 493 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 494 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 495 return; 496 } 497 498 switch (record->source) { 499 case ARM_SPE_COMMON_DS_L1D: 500 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 501 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 502 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 503 break; 504 case ARM_SPE_COMMON_DS_L2: 505 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 506 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 507 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 508 break; 509 case ARM_SPE_COMMON_DS_PEER_CORE: 510 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 511 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 512 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 513 break; 514 /* 515 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 516 * transfer, so set SNOOPX_PEER 517 */ 518 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 519 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 520 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 521 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 522 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 523 break; 524 /* 525 * System cache is assumed to be L3 526 */ 527 case ARM_SPE_COMMON_DS_SYS_CACHE: 528 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 529 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 530 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 531 break; 532 /* 533 * We don't know what level it hit in, except it came from the other 534 * socket 535 */ 536 case ARM_SPE_COMMON_DS_REMOTE: 537 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; 538 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 539 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 540 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 541 break; 542 case ARM_SPE_COMMON_DS_DRAM: 543 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 544 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 545 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 546 break; 547 default: 548 break; 549 } 550 } 551 552 /* 553 * Source is IMPDEF. Here we convert the source code used on AmpereOne cores 554 * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. 555 */ 556 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, 557 union perf_mem_data_src *data_src) 558 { 559 struct arm_spe_record common_record; 560 561 switch (record->source) { 562 case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: 563 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; 564 break; 565 case ARM_SPE_AMPEREONE_SLC: 566 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; 567 break; 568 case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: 569 common_record.source = ARM_SPE_COMMON_DS_REMOTE; 570 break; 571 case ARM_SPE_AMPEREONE_DDR: 572 common_record.source = ARM_SPE_COMMON_DS_DRAM; 573 break; 574 case ARM_SPE_AMPEREONE_L1D: 575 common_record.source = ARM_SPE_COMMON_DS_L1D; 576 break; 577 case ARM_SPE_AMPEREONE_L2D: 578 common_record.source = ARM_SPE_COMMON_DS_L2; 579 break; 580 default: 581 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", 582 record->source); 583 return; 584 } 585 586 common_record.op = record->op; 587 arm_spe__synth_data_source_common(&common_record, data_src); 588 } 589 590 static const struct data_source_handle data_source_handles[] = { 591 DS(common_ds_encoding_cpus, data_source_common), 592 DS(ampereone_ds_encoding_cpus, data_source_ampereone), 593 }; 594 595 static void arm_spe__synth_memory_level(const struct arm_spe_record *record, 596 union perf_mem_data_src *data_src) 597 { 598 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { 599 data_src->mem_lvl = PERF_MEM_LVL_L3; 600 601 if (record->type & ARM_SPE_LLC_MISS) 602 data_src->mem_lvl |= PERF_MEM_LVL_MISS; 603 else 604 data_src->mem_lvl |= PERF_MEM_LVL_HIT; 605 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { 606 data_src->mem_lvl = PERF_MEM_LVL_L1; 607 608 if (record->type & ARM_SPE_L1D_MISS) 609 data_src->mem_lvl |= PERF_MEM_LVL_MISS; 610 else 611 data_src->mem_lvl |= PERF_MEM_LVL_HIT; 612 } 613 614 if (record->type & ARM_SPE_REMOTE_ACCESS) 615 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; 616 } 617 618 static bool arm_spe__synth_ds(struct arm_spe_queue *speq, 619 const struct arm_spe_record *record, 620 union perf_mem_data_src *data_src) 621 { 622 struct arm_spe *spe = speq->spe; 623 u64 *metadata = NULL; 624 u64 midr; 625 unsigned int i; 626 627 /* Metadata version 1 assumes all CPUs are the same (old behavior) */ 628 if (spe->metadata_ver == 1) { 629 const char *cpuid; 630 631 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); 632 cpuid = perf_env__cpuid(spe->session->evlist->env); 633 midr = strtol(cpuid, NULL, 16); 634 } else { 635 /* CPU ID is -1 for per-thread mode */ 636 if (speq->cpu < 0) { 637 /* 638 * On the heterogeneous system, due to CPU ID is -1, 639 * cannot confirm the data source packet is supported. 640 */ 641 if (!spe->is_homogeneous) 642 return false; 643 644 /* In homogeneous system, simply use CPU0's metadata */ 645 if (spe->metadata) 646 metadata = spe->metadata[0]; 647 } else { 648 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 649 } 650 651 if (!metadata) 652 return false; 653 654 midr = metadata[ARM_SPE_CPU_MIDR]; 655 } 656 657 for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { 658 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { 659 data_source_handles[i].ds_synth(record, data_src); 660 return true; 661 } 662 } 663 664 return false; 665 } 666 667 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, 668 const struct arm_spe_record *record) 669 { 670 union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; 671 672 if (record->op & ARM_SPE_OP_LD) 673 data_src.mem_op = PERF_MEM_OP_LOAD; 674 else if (record->op & ARM_SPE_OP_ST) 675 data_src.mem_op = PERF_MEM_OP_STORE; 676 else 677 return 0; 678 679 if (!arm_spe__synth_ds(speq, record, &data_src)) 680 arm_spe__synth_memory_level(record, &data_src); 681 682 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 683 data_src.mem_dtlb = PERF_MEM_TLB_WK; 684 685 if (record->type & ARM_SPE_TLB_MISS) 686 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 687 else 688 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 689 } 690 691 return data_src.val; 692 } 693 694 static int arm_spe_sample(struct arm_spe_queue *speq) 695 { 696 const struct arm_spe_record *record = &speq->decoder->record; 697 struct arm_spe *spe = speq->spe; 698 u64 data_src; 699 int err; 700 701 arm_spe__sample_flags(speq); 702 data_src = arm_spe__synth_data_source(speq, record); 703 704 if (spe->sample_flc) { 705 if (record->type & ARM_SPE_L1D_MISS) { 706 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 707 data_src); 708 if (err) 709 return err; 710 } 711 712 if (record->type & ARM_SPE_L1D_ACCESS) { 713 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 714 data_src); 715 if (err) 716 return err; 717 } 718 } 719 720 if (spe->sample_llc) { 721 if (record->type & ARM_SPE_LLC_MISS) { 722 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 723 data_src); 724 if (err) 725 return err; 726 } 727 728 if (record->type & ARM_SPE_LLC_ACCESS) { 729 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 730 data_src); 731 if (err) 732 return err; 733 } 734 } 735 736 if (spe->sample_tlb) { 737 if (record->type & ARM_SPE_TLB_MISS) { 738 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 739 data_src); 740 if (err) 741 return err; 742 } 743 744 if (record->type & ARM_SPE_TLB_ACCESS) { 745 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 746 data_src); 747 if (err) 748 return err; 749 } 750 } 751 752 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { 753 err = arm_spe__synth_branch_sample(speq, spe->branch_id); 754 if (err) 755 return err; 756 } 757 758 if (spe->sample_remote_access && 759 (record->type & ARM_SPE_REMOTE_ACCESS)) { 760 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 761 data_src); 762 if (err) 763 return err; 764 } 765 766 /* 767 * When data_src is zero it means the record is not a memory operation, 768 * skip to synthesize memory sample for this case. 769 */ 770 if (spe->sample_memory && data_src) { 771 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 772 if (err) 773 return err; 774 } 775 776 if (spe->sample_instructions) { 777 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 778 if (err) 779 return err; 780 } 781 782 return 0; 783 } 784 785 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 786 { 787 struct arm_spe *spe = speq->spe; 788 struct arm_spe_record *record; 789 int ret; 790 791 if (!spe->kernel_start) 792 spe->kernel_start = machine__kernel_start(spe->machine); 793 794 while (1) { 795 /* 796 * The usual logic is firstly to decode the packets, and then 797 * based the record to synthesize sample; but here the flow is 798 * reversed: it calls arm_spe_sample() for synthesizing samples 799 * prior to arm_spe_decode(). 800 * 801 * Two reasons for this code logic: 802 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 803 * has decoded trace data and generated a record, but the record 804 * is left to generate sample until run to here, so it's correct 805 * to synthesize sample for the left record. 806 * 2. After decoding trace data, it needs to compare the record 807 * timestamp with the coming perf event, if the record timestamp 808 * is later than the perf event, it needs bail out and pushs the 809 * record into auxtrace heap, thus the record can be deferred to 810 * synthesize sample until run to here at the next time; so this 811 * can correlate samples between Arm SPE trace data and other 812 * perf events with correct time ordering. 813 */ 814 815 /* 816 * Update pid/tid info. 817 */ 818 record = &speq->decoder->record; 819 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 820 ret = arm_spe_set_tid(speq, record->context_id); 821 if (ret) 822 return ret; 823 824 spe->use_ctx_pkt_for_pid = true; 825 } 826 827 ret = arm_spe_sample(speq); 828 if (ret) 829 return ret; 830 831 ret = arm_spe_decode(speq->decoder); 832 if (!ret) { 833 pr_debug("No data or all data has been processed.\n"); 834 return 1; 835 } 836 837 /* 838 * Error is detected when decode SPE trace data, continue to 839 * the next trace data and find out more records. 840 */ 841 if (ret < 0) 842 continue; 843 844 record = &speq->decoder->record; 845 846 /* Update timestamp for the last record */ 847 if (record->timestamp > speq->timestamp) 848 speq->timestamp = record->timestamp; 849 850 /* 851 * If the timestamp of the queue is later than timestamp of the 852 * coming perf event, bail out so can allow the perf event to 853 * be processed ahead. 854 */ 855 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 856 *timestamp = speq->timestamp; 857 return 0; 858 } 859 } 860 861 return 0; 862 } 863 864 static int arm_spe__setup_queue(struct arm_spe *spe, 865 struct auxtrace_queue *queue, 866 unsigned int queue_nr) 867 { 868 struct arm_spe_queue *speq = queue->priv; 869 struct arm_spe_record *record; 870 871 if (list_empty(&queue->head) || speq) 872 return 0; 873 874 speq = arm_spe__alloc_queue(spe, queue_nr); 875 876 if (!speq) 877 return -ENOMEM; 878 879 queue->priv = speq; 880 881 if (queue->cpu != -1) 882 speq->cpu = queue->cpu; 883 884 if (!speq->on_heap) { 885 int ret; 886 887 if (spe->timeless_decoding) 888 return 0; 889 890 retry: 891 ret = arm_spe_decode(speq->decoder); 892 893 if (!ret) 894 return 0; 895 896 if (ret < 0) 897 goto retry; 898 899 record = &speq->decoder->record; 900 901 speq->timestamp = record->timestamp; 902 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 903 if (ret) 904 return ret; 905 speq->on_heap = true; 906 } 907 908 return 0; 909 } 910 911 static int arm_spe__setup_queues(struct arm_spe *spe) 912 { 913 unsigned int i; 914 int ret; 915 916 for (i = 0; i < spe->queues.nr_queues; i++) { 917 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 918 if (ret) 919 return ret; 920 } 921 922 return 0; 923 } 924 925 static int arm_spe__update_queues(struct arm_spe *spe) 926 { 927 if (spe->queues.new_data) { 928 spe->queues.new_data = false; 929 return arm_spe__setup_queues(spe); 930 } 931 932 return 0; 933 } 934 935 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 936 { 937 struct evsel *evsel; 938 struct evlist *evlist = spe->session->evlist; 939 bool timeless_decoding = true; 940 941 /* 942 * Circle through the list of event and complain if we find one 943 * with the time bit set. 944 */ 945 evlist__for_each_entry(evlist, evsel) { 946 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 947 timeless_decoding = false; 948 } 949 950 return timeless_decoding; 951 } 952 953 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 954 { 955 unsigned int queue_nr; 956 u64 ts; 957 int ret; 958 959 while (1) { 960 struct auxtrace_queue *queue; 961 struct arm_spe_queue *speq; 962 963 if (!spe->heap.heap_cnt) 964 return 0; 965 966 if (spe->heap.heap_array[0].ordinal >= timestamp) 967 return 0; 968 969 queue_nr = spe->heap.heap_array[0].queue_nr; 970 queue = &spe->queues.queue_array[queue_nr]; 971 speq = queue->priv; 972 973 auxtrace_heap__pop(&spe->heap); 974 975 if (spe->heap.heap_cnt) { 976 ts = spe->heap.heap_array[0].ordinal + 1; 977 if (ts > timestamp) 978 ts = timestamp; 979 } else { 980 ts = timestamp; 981 } 982 983 /* 984 * A previous context-switch event has set pid/tid in the machine's context, so 985 * here we need to update the pid/tid in the thread and SPE queue. 986 */ 987 if (!spe->use_ctx_pkt_for_pid) 988 arm_spe_set_pid_tid_cpu(spe, queue); 989 990 ret = arm_spe_run_decoder(speq, &ts); 991 if (ret < 0) { 992 auxtrace_heap__add(&spe->heap, queue_nr, ts); 993 return ret; 994 } 995 996 if (!ret) { 997 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 998 if (ret < 0) 999 return ret; 1000 } else { 1001 speq->on_heap = false; 1002 } 1003 } 1004 1005 return 0; 1006 } 1007 1008 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 1009 u64 time_) 1010 { 1011 struct auxtrace_queues *queues = &spe->queues; 1012 unsigned int i; 1013 u64 ts = 0; 1014 1015 for (i = 0; i < queues->nr_queues; i++) { 1016 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 1017 struct arm_spe_queue *speq = queue->priv; 1018 1019 if (speq && (tid == -1 || speq->tid == tid)) { 1020 speq->time = time_; 1021 arm_spe_set_pid_tid_cpu(spe, queue); 1022 arm_spe_run_decoder(speq, &ts); 1023 } 1024 } 1025 return 0; 1026 } 1027 1028 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 1029 struct perf_sample *sample) 1030 { 1031 pid_t pid, tid; 1032 int cpu; 1033 1034 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 1035 return 0; 1036 1037 pid = event->context_switch.next_prev_pid; 1038 tid = event->context_switch.next_prev_tid; 1039 cpu = sample->cpu; 1040 1041 if (tid == -1) 1042 pr_warning("context_switch event has no tid\n"); 1043 1044 return machine__set_current_tid(spe->machine, cpu, pid, tid); 1045 } 1046 1047 static int arm_spe_process_event(struct perf_session *session, 1048 union perf_event *event, 1049 struct perf_sample *sample, 1050 const struct perf_tool *tool) 1051 { 1052 int err = 0; 1053 u64 timestamp; 1054 struct arm_spe *spe = container_of(session->auxtrace, 1055 struct arm_spe, auxtrace); 1056 1057 if (dump_trace) 1058 return 0; 1059 1060 if (!tool->ordered_events) { 1061 pr_err("SPE trace requires ordered events\n"); 1062 return -EINVAL; 1063 } 1064 1065 if (sample->time && (sample->time != (u64) -1)) 1066 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 1067 else 1068 timestamp = 0; 1069 1070 if (timestamp || spe->timeless_decoding) { 1071 err = arm_spe__update_queues(spe); 1072 if (err) 1073 return err; 1074 } 1075 1076 if (spe->timeless_decoding) { 1077 if (event->header.type == PERF_RECORD_EXIT) { 1078 err = arm_spe_process_timeless_queues(spe, 1079 event->fork.tid, 1080 sample->time); 1081 } 1082 } else if (timestamp) { 1083 err = arm_spe_process_queues(spe, timestamp); 1084 if (err) 1085 return err; 1086 1087 if (!spe->use_ctx_pkt_for_pid && 1088 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1089 event->header.type == PERF_RECORD_SWITCH)) 1090 err = arm_spe_context_switch(spe, event, sample); 1091 } 1092 1093 return err; 1094 } 1095 1096 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1097 union perf_event *event, 1098 const struct perf_tool *tool __maybe_unused) 1099 { 1100 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1101 auxtrace); 1102 1103 if (!spe->data_queued) { 1104 struct auxtrace_buffer *buffer; 1105 off_t data_offset; 1106 int fd = perf_data__fd(session->data); 1107 int err; 1108 1109 if (perf_data__is_pipe(session->data)) { 1110 data_offset = 0; 1111 } else { 1112 data_offset = lseek(fd, 0, SEEK_CUR); 1113 if (data_offset == -1) 1114 return -errno; 1115 } 1116 1117 err = auxtrace_queues__add_event(&spe->queues, session, event, 1118 data_offset, &buffer); 1119 if (err) 1120 return err; 1121 1122 /* Dump here now we have copied a piped trace out of the pipe */ 1123 if (dump_trace) { 1124 if (auxtrace_buffer__get_data(buffer, fd)) { 1125 arm_spe_dump_event(spe, buffer->data, 1126 buffer->size); 1127 auxtrace_buffer__put_data(buffer); 1128 } 1129 } 1130 } 1131 1132 return 0; 1133 } 1134 1135 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1136 const struct perf_tool *tool __maybe_unused) 1137 { 1138 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1139 auxtrace); 1140 int ret; 1141 1142 if (dump_trace) 1143 return 0; 1144 1145 if (!tool->ordered_events) 1146 return -EINVAL; 1147 1148 ret = arm_spe__update_queues(spe); 1149 if (ret < 0) 1150 return ret; 1151 1152 if (spe->timeless_decoding) 1153 return arm_spe_process_timeless_queues(spe, -1, 1154 MAX_TIMESTAMP - 1); 1155 1156 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1157 if (ret) 1158 return ret; 1159 1160 if (!spe->use_ctx_pkt_for_pid) 1161 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1162 "Matching of TIDs to SPE events could be inaccurate.\n"); 1163 1164 return 0; 1165 } 1166 1167 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1168 { 1169 u64 *metadata; 1170 1171 metadata = zalloc(per_cpu_size); 1172 if (!metadata) 1173 return NULL; 1174 1175 memcpy(metadata, buf, per_cpu_size); 1176 return metadata; 1177 } 1178 1179 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1180 { 1181 int i; 1182 1183 for (i = 0; i < nr_cpu; i++) 1184 zfree(&metadata[i]); 1185 free(metadata); 1186 } 1187 1188 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1189 u64 *ver, int *nr_cpu) 1190 { 1191 u64 *ptr = (u64 *)info->priv; 1192 u64 metadata_size; 1193 u64 **metadata = NULL; 1194 int hdr_sz, per_cpu_sz, i; 1195 1196 metadata_size = info->header.size - 1197 sizeof(struct perf_record_auxtrace_info); 1198 1199 /* Metadata version 1 */ 1200 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1201 *ver = 1; 1202 *nr_cpu = 0; 1203 /* No per CPU metadata */ 1204 return NULL; 1205 } 1206 1207 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1208 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1209 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1210 1211 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1212 if (!metadata) 1213 return NULL; 1214 1215 /* Locate the start address of per CPU metadata */ 1216 ptr += hdr_sz; 1217 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1218 1219 for (i = 0; i < *nr_cpu; i++) { 1220 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1221 if (!metadata[i]) 1222 goto err_per_cpu_metadata; 1223 1224 ptr += per_cpu_sz / sizeof(u64); 1225 } 1226 1227 return metadata; 1228 1229 err_per_cpu_metadata: 1230 arm_spe__free_metadata(metadata, *nr_cpu); 1231 return NULL; 1232 } 1233 1234 static void arm_spe_free_queue(void *priv) 1235 { 1236 struct arm_spe_queue *speq = priv; 1237 1238 if (!speq) 1239 return; 1240 thread__zput(speq->thread); 1241 arm_spe_decoder_free(speq->decoder); 1242 zfree(&speq->event_buf); 1243 free(speq); 1244 } 1245 1246 static void arm_spe_free_events(struct perf_session *session) 1247 { 1248 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1249 auxtrace); 1250 struct auxtrace_queues *queues = &spe->queues; 1251 unsigned int i; 1252 1253 for (i = 0; i < queues->nr_queues; i++) { 1254 arm_spe_free_queue(queues->queue_array[i].priv); 1255 queues->queue_array[i].priv = NULL; 1256 } 1257 auxtrace_queues__free(queues); 1258 } 1259 1260 static void arm_spe_free(struct perf_session *session) 1261 { 1262 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1263 auxtrace); 1264 1265 auxtrace_heap__free(&spe->heap); 1266 arm_spe_free_events(session); 1267 session->auxtrace = NULL; 1268 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1269 free(spe); 1270 } 1271 1272 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1273 struct evsel *evsel) 1274 { 1275 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1276 1277 return evsel->core.attr.type == spe->pmu_type; 1278 } 1279 1280 static const char * const metadata_hdr_v1_fmts[] = { 1281 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1282 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1283 }; 1284 1285 static const char * const metadata_hdr_fmts[] = { 1286 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1287 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1288 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1289 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1290 }; 1291 1292 static const char * const metadata_per_cpu_fmts[] = { 1293 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1294 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1295 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1296 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1297 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1298 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1299 }; 1300 1301 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1302 { 1303 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1304 const char * const *hdr_fmts; 1305 1306 if (!dump_trace) 1307 return; 1308 1309 if (spe->metadata_ver == 1) { 1310 cpu_num = 0; 1311 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1312 hdr_fmts = metadata_hdr_v1_fmts; 1313 } else { 1314 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1315 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1316 hdr_fmts = metadata_hdr_fmts; 1317 } 1318 1319 for (i = 0; i < hdr_size; i++) 1320 fprintf(stdout, hdr_fmts[i], arr[i]); 1321 1322 arr += hdr_size; 1323 for (cpu = 0; cpu < cpu_num; cpu++) { 1324 /* 1325 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1326 * are fixed. The sequential parameter size is decided by the 1327 * field 'ARM_SPE_CPU_NR_PARAMS'. 1328 */ 1329 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1330 for (i = 0; i < cpu_size; i++) 1331 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1332 arr += cpu_size; 1333 } 1334 } 1335 1336 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1337 const char *name) 1338 { 1339 struct evsel *evsel; 1340 1341 evlist__for_each_entry(evlist, evsel) { 1342 if (evsel->core.id && evsel->core.id[0] == id) { 1343 if (evsel->name) 1344 zfree(&evsel->name); 1345 evsel->name = strdup(name); 1346 break; 1347 } 1348 } 1349 } 1350 1351 static int 1352 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1353 { 1354 struct evlist *evlist = session->evlist; 1355 struct evsel *evsel; 1356 struct perf_event_attr attr; 1357 bool found = false; 1358 u64 id; 1359 int err; 1360 1361 evlist__for_each_entry(evlist, evsel) { 1362 if (evsel->core.attr.type == spe->pmu_type) { 1363 found = true; 1364 break; 1365 } 1366 } 1367 1368 if (!found) { 1369 pr_debug("No selected events with SPE trace data\n"); 1370 return 0; 1371 } 1372 1373 memset(&attr, 0, sizeof(struct perf_event_attr)); 1374 attr.size = sizeof(struct perf_event_attr); 1375 attr.type = PERF_TYPE_HARDWARE; 1376 attr.sample_type = evsel->core.attr.sample_type & 1377 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1378 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1379 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1380 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1381 if (spe->timeless_decoding) 1382 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1383 else 1384 attr.sample_type |= PERF_SAMPLE_TIME; 1385 1386 spe->sample_type = attr.sample_type; 1387 1388 attr.exclude_user = evsel->core.attr.exclude_user; 1389 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1390 attr.exclude_hv = evsel->core.attr.exclude_hv; 1391 attr.exclude_host = evsel->core.attr.exclude_host; 1392 attr.exclude_guest = evsel->core.attr.exclude_guest; 1393 attr.sample_id_all = evsel->core.attr.sample_id_all; 1394 attr.read_format = evsel->core.attr.read_format; 1395 1396 /* create new id val to be a fixed offset from evsel id */ 1397 id = evsel->core.id[0] + 1000000000; 1398 1399 if (!id) 1400 id = 1; 1401 1402 if (spe->synth_opts.flc) { 1403 spe->sample_flc = true; 1404 1405 /* Level 1 data cache miss */ 1406 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1407 if (err) 1408 return err; 1409 spe->l1d_miss_id = id; 1410 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1411 id += 1; 1412 1413 /* Level 1 data cache access */ 1414 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1415 if (err) 1416 return err; 1417 spe->l1d_access_id = id; 1418 arm_spe_set_event_name(evlist, id, "l1d-access"); 1419 id += 1; 1420 } 1421 1422 if (spe->synth_opts.llc) { 1423 spe->sample_llc = true; 1424 1425 /* Last level cache miss */ 1426 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1427 if (err) 1428 return err; 1429 spe->llc_miss_id = id; 1430 arm_spe_set_event_name(evlist, id, "llc-miss"); 1431 id += 1; 1432 1433 /* Last level cache access */ 1434 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1435 if (err) 1436 return err; 1437 spe->llc_access_id = id; 1438 arm_spe_set_event_name(evlist, id, "llc-access"); 1439 id += 1; 1440 } 1441 1442 if (spe->synth_opts.tlb) { 1443 spe->sample_tlb = true; 1444 1445 /* TLB miss */ 1446 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1447 if (err) 1448 return err; 1449 spe->tlb_miss_id = id; 1450 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1451 id += 1; 1452 1453 /* TLB access */ 1454 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1455 if (err) 1456 return err; 1457 spe->tlb_access_id = id; 1458 arm_spe_set_event_name(evlist, id, "tlb-access"); 1459 id += 1; 1460 } 1461 1462 if (spe->synth_opts.branches) { 1463 spe->sample_branch = true; 1464 1465 /* Branch */ 1466 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1467 if (err) 1468 return err; 1469 spe->branch_id = id; 1470 arm_spe_set_event_name(evlist, id, "branch"); 1471 id += 1; 1472 } 1473 1474 if (spe->synth_opts.remote_access) { 1475 spe->sample_remote_access = true; 1476 1477 /* Remote access */ 1478 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1479 if (err) 1480 return err; 1481 spe->remote_access_id = id; 1482 arm_spe_set_event_name(evlist, id, "remote-access"); 1483 id += 1; 1484 } 1485 1486 if (spe->synth_opts.mem) { 1487 spe->sample_memory = true; 1488 1489 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1490 if (err) 1491 return err; 1492 spe->memory_id = id; 1493 arm_spe_set_event_name(evlist, id, "memory"); 1494 id += 1; 1495 } 1496 1497 if (spe->synth_opts.instructions) { 1498 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1499 pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); 1500 goto synth_instructions_out; 1501 } 1502 if (spe->synth_opts.period > 1) 1503 pr_warning("Arm SPE has a hardware-based sample period.\n" 1504 "Additional instruction events will be discarded by --itrace\n"); 1505 1506 spe->sample_instructions = true; 1507 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1508 attr.sample_period = spe->synth_opts.period; 1509 spe->instructions_sample_period = attr.sample_period; 1510 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1511 if (err) 1512 return err; 1513 spe->instructions_id = id; 1514 arm_spe_set_event_name(evlist, id, "instructions"); 1515 } 1516 synth_instructions_out: 1517 1518 return 0; 1519 } 1520 1521 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1522 { 1523 u64 midr; 1524 int i; 1525 1526 if (!nr_cpu) 1527 return false; 1528 1529 for (i = 0; i < nr_cpu; i++) { 1530 if (!metadata[i]) 1531 return false; 1532 1533 if (i == 0) { 1534 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1535 continue; 1536 } 1537 1538 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1539 return false; 1540 } 1541 1542 return true; 1543 } 1544 1545 int arm_spe_process_auxtrace_info(union perf_event *event, 1546 struct perf_session *session) 1547 { 1548 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1549 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1550 struct perf_record_time_conv *tc = &session->time_conv; 1551 struct arm_spe *spe; 1552 u64 **metadata = NULL; 1553 u64 metadata_ver; 1554 int nr_cpu, err; 1555 1556 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1557 min_sz) 1558 return -EINVAL; 1559 1560 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1561 &nr_cpu); 1562 if (!metadata && metadata_ver != 1) { 1563 pr_err("Failed to parse Arm SPE metadata.\n"); 1564 return -EINVAL; 1565 } 1566 1567 spe = zalloc(sizeof(struct arm_spe)); 1568 if (!spe) { 1569 err = -ENOMEM; 1570 goto err_free_metadata; 1571 } 1572 1573 err = auxtrace_queues__init(&spe->queues); 1574 if (err) 1575 goto err_free; 1576 1577 spe->session = session; 1578 spe->machine = &session->machines.host; /* No kvm support */ 1579 spe->auxtrace_type = auxtrace_info->type; 1580 if (metadata_ver == 1) 1581 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1582 else 1583 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1584 spe->metadata = metadata; 1585 spe->metadata_ver = metadata_ver; 1586 spe->metadata_nr_cpu = nr_cpu; 1587 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1588 1589 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1590 1591 /* 1592 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1593 * and the parameters for hardware clock are stored in the session 1594 * context. Passes these parameters to the struct perf_tsc_conversion 1595 * in "spe->tc", which is used for later conversion between clock 1596 * counter and timestamp. 1597 * 1598 * For backward compatibility, copies the fields starting from 1599 * "time_cycles" only if they are contained in the event. 1600 */ 1601 spe->tc.time_shift = tc->time_shift; 1602 spe->tc.time_mult = tc->time_mult; 1603 spe->tc.time_zero = tc->time_zero; 1604 1605 if (event_contains(*tc, time_cycles)) { 1606 spe->tc.time_cycles = tc->time_cycles; 1607 spe->tc.time_mask = tc->time_mask; 1608 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1609 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1610 } 1611 1612 spe->auxtrace.process_event = arm_spe_process_event; 1613 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1614 spe->auxtrace.flush_events = arm_spe_flush; 1615 spe->auxtrace.free_events = arm_spe_free_events; 1616 spe->auxtrace.free = arm_spe_free; 1617 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1618 session->auxtrace = &spe->auxtrace; 1619 1620 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1621 1622 if (dump_trace) 1623 return 0; 1624 1625 if (session->itrace_synth_opts && session->itrace_synth_opts->set) 1626 spe->synth_opts = *session->itrace_synth_opts; 1627 else 1628 itrace_synth_opts__set_default(&spe->synth_opts, false); 1629 1630 err = arm_spe_synth_events(spe, session); 1631 if (err) 1632 goto err_free_queues; 1633 1634 err = auxtrace_queues__process_index(&spe->queues, session); 1635 if (err) 1636 goto err_free_queues; 1637 1638 if (spe->queues.populated) 1639 spe->data_queued = true; 1640 1641 return 0; 1642 1643 err_free_queues: 1644 auxtrace_queues__free(&spe->queues); 1645 session->auxtrace = NULL; 1646 err_free: 1647 free(spe); 1648 err_free_metadata: 1649 arm_spe__free_metadata(metadata, nr_cpu); 1650 return err; 1651 } 1652