1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 struct arm_spe { 41 struct auxtrace auxtrace; 42 struct auxtrace_queues queues; 43 struct auxtrace_heap heap; 44 struct itrace_synth_opts synth_opts; 45 u32 auxtrace_type; 46 struct perf_session *session; 47 struct machine *machine; 48 u32 pmu_type; 49 50 struct perf_tsc_conversion tc; 51 52 u8 timeless_decoding; 53 u8 data_queued; 54 55 u64 sample_type; 56 u8 sample_flc; 57 u8 sample_llc; 58 u8 sample_tlb; 59 u8 sample_branch; 60 u8 sample_remote_access; 61 u8 sample_memory; 62 u8 sample_instructions; 63 u64 instructions_sample_period; 64 65 u64 l1d_miss_id; 66 u64 l1d_access_id; 67 u64 llc_miss_id; 68 u64 llc_access_id; 69 u64 tlb_miss_id; 70 u64 tlb_access_id; 71 u64 branch_id; 72 u64 remote_access_id; 73 u64 memory_id; 74 u64 instructions_id; 75 76 u64 kernel_start; 77 78 unsigned long num_events; 79 u8 use_ctx_pkt_for_pid; 80 81 u64 **metadata; 82 u64 metadata_ver; 83 u64 metadata_nr_cpu; 84 bool is_homogeneous; 85 }; 86 87 struct arm_spe_queue { 88 struct arm_spe *spe; 89 unsigned int queue_nr; 90 struct auxtrace_buffer *buffer; 91 struct auxtrace_buffer *old_buffer; 92 union perf_event *event_buf; 93 bool on_heap; 94 bool done; 95 pid_t pid; 96 pid_t tid; 97 int cpu; 98 struct arm_spe_decoder *decoder; 99 u64 time; 100 u64 timestamp; 101 struct thread *thread; 102 u64 period_instructions; 103 u32 flags; 104 }; 105 106 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 107 unsigned char *buf, size_t len) 108 { 109 struct arm_spe_pkt packet; 110 size_t pos = 0; 111 int ret, pkt_len, i; 112 char desc[ARM_SPE_PKT_DESC_MAX]; 113 const char *color = PERF_COLOR_BLUE; 114 115 color_fprintf(stdout, color, 116 ". ... ARM SPE data: size %#zx bytes\n", 117 len); 118 119 while (len) { 120 ret = arm_spe_get_packet(buf, len, &packet); 121 if (ret > 0) 122 pkt_len = ret; 123 else 124 pkt_len = 1; 125 printf("."); 126 color_fprintf(stdout, color, " %08zx: ", pos); 127 for (i = 0; i < pkt_len; i++) 128 color_fprintf(stdout, color, " %02x", buf[i]); 129 for (; i < 16; i++) 130 color_fprintf(stdout, color, " "); 131 if (ret > 0) { 132 ret = arm_spe_pkt_desc(&packet, desc, 133 ARM_SPE_PKT_DESC_MAX); 134 if (!ret) 135 color_fprintf(stdout, color, " %s\n", desc); 136 } else { 137 color_fprintf(stdout, color, " Bad packet!\n"); 138 } 139 pos += pkt_len; 140 buf += pkt_len; 141 len -= pkt_len; 142 } 143 } 144 145 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 146 size_t len) 147 { 148 printf(".\n"); 149 arm_spe_dump(spe, buf, len); 150 } 151 152 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 153 { 154 struct arm_spe_queue *speq = data; 155 struct auxtrace_buffer *buffer = speq->buffer; 156 struct auxtrace_buffer *old_buffer = speq->old_buffer; 157 struct auxtrace_queue *queue; 158 159 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 160 161 buffer = auxtrace_buffer__next(queue, buffer); 162 /* If no more data, drop the previous auxtrace_buffer and return */ 163 if (!buffer) { 164 if (old_buffer) 165 auxtrace_buffer__drop_data(old_buffer); 166 b->len = 0; 167 return 0; 168 } 169 170 speq->buffer = buffer; 171 172 /* If the aux_buffer doesn't have data associated, try to load it */ 173 if (!buffer->data) { 174 /* get the file desc associated with the perf data file */ 175 int fd = perf_data__fd(speq->spe->session->data); 176 177 buffer->data = auxtrace_buffer__get_data(buffer, fd); 178 if (!buffer->data) 179 return -ENOMEM; 180 } 181 182 b->len = buffer->size; 183 b->buf = buffer->data; 184 185 if (b->len) { 186 if (old_buffer) 187 auxtrace_buffer__drop_data(old_buffer); 188 speq->old_buffer = buffer; 189 } else { 190 auxtrace_buffer__drop_data(buffer); 191 return arm_spe_get_trace(b, data); 192 } 193 194 return 0; 195 } 196 197 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 198 unsigned int queue_nr) 199 { 200 struct arm_spe_params params = { .get_trace = 0, }; 201 struct arm_spe_queue *speq; 202 203 speq = zalloc(sizeof(*speq)); 204 if (!speq) 205 return NULL; 206 207 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 208 if (!speq->event_buf) 209 goto out_free; 210 211 speq->spe = spe; 212 speq->queue_nr = queue_nr; 213 speq->pid = -1; 214 speq->tid = -1; 215 speq->cpu = -1; 216 speq->period_instructions = 0; 217 218 /* params set */ 219 params.get_trace = arm_spe_get_trace; 220 params.data = speq; 221 222 /* create new decoder */ 223 speq->decoder = arm_spe_decoder_new(¶ms); 224 if (!speq->decoder) 225 goto out_free; 226 227 return speq; 228 229 out_free: 230 zfree(&speq->event_buf); 231 free(speq); 232 233 return NULL; 234 } 235 236 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 237 { 238 return ip >= spe->kernel_start ? 239 PERF_RECORD_MISC_KERNEL : 240 PERF_RECORD_MISC_USER; 241 } 242 243 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 244 struct auxtrace_queue *queue) 245 { 246 struct arm_spe_queue *speq = queue->priv; 247 pid_t tid; 248 249 tid = machine__get_current_tid(spe->machine, speq->cpu); 250 if (tid != -1) { 251 speq->tid = tid; 252 thread__zput(speq->thread); 253 } else 254 speq->tid = queue->tid; 255 256 if ((!speq->thread) && (speq->tid != -1)) { 257 speq->thread = machine__find_thread(spe->machine, -1, 258 speq->tid); 259 } 260 261 if (speq->thread) { 262 speq->pid = thread__pid(speq->thread); 263 if (queue->cpu == -1) 264 speq->cpu = thread__cpu(speq->thread); 265 } 266 } 267 268 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 269 { 270 struct arm_spe *spe = speq->spe; 271 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 272 273 if (err) 274 return err; 275 276 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 277 278 return 0; 279 } 280 281 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) 282 { 283 u64 i; 284 285 if (!spe->metadata) 286 return NULL; 287 288 for (i = 0; i < spe->metadata_nr_cpu; i++) 289 if (spe->metadata[i][ARM_SPE_CPU] == cpu) 290 return spe->metadata[i]; 291 292 return NULL; 293 } 294 295 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 296 { 297 struct simd_flags simd_flags = {}; 298 299 if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) 300 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 301 302 if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) 303 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 304 305 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 306 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; 307 308 if (record->type & ARM_SPE_SVE_EMPTY_PRED) 309 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; 310 311 return simd_flags; 312 } 313 314 static void arm_spe_prep_sample(struct arm_spe *spe, 315 struct arm_spe_queue *speq, 316 union perf_event *event, 317 struct perf_sample *sample) 318 { 319 struct arm_spe_record *record = &speq->decoder->record; 320 321 if (!spe->timeless_decoding) 322 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 323 324 sample->ip = record->from_ip; 325 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 326 sample->pid = speq->pid; 327 sample->tid = speq->tid; 328 sample->period = 1; 329 sample->cpu = speq->cpu; 330 sample->simd_flags = arm_spe__synth_simd_flags(record); 331 332 event->sample.header.type = PERF_RECORD_SAMPLE; 333 event->sample.header.misc = sample->cpumode; 334 event->sample.header.size = sizeof(struct perf_event_header); 335 } 336 337 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 338 { 339 event->header.size = perf_event__sample_event_size(sample, type, 0); 340 return perf_event__synthesize_sample(event, type, 0, sample); 341 } 342 343 static inline int 344 arm_spe_deliver_synth_event(struct arm_spe *spe, 345 struct arm_spe_queue *speq __maybe_unused, 346 union perf_event *event, 347 struct perf_sample *sample) 348 { 349 int ret; 350 351 if (spe->synth_opts.inject) { 352 ret = arm_spe__inject_event(event, sample, spe->sample_type); 353 if (ret) 354 return ret; 355 } 356 357 ret = perf_session__deliver_synth_event(spe->session, event, sample); 358 if (ret) 359 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 360 361 return ret; 362 } 363 364 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 365 u64 spe_events_id, u64 data_src) 366 { 367 struct arm_spe *spe = speq->spe; 368 struct arm_spe_record *record = &speq->decoder->record; 369 union perf_event *event = speq->event_buf; 370 struct perf_sample sample = { .ip = 0, }; 371 372 arm_spe_prep_sample(spe, speq, event, &sample); 373 374 sample.id = spe_events_id; 375 sample.stream_id = spe_events_id; 376 sample.addr = record->virt_addr; 377 sample.phys_addr = record->phys_addr; 378 sample.data_src = data_src; 379 sample.weight = record->latency; 380 381 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 382 } 383 384 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 385 u64 spe_events_id) 386 { 387 struct arm_spe *spe = speq->spe; 388 struct arm_spe_record *record = &speq->decoder->record; 389 union perf_event *event = speq->event_buf; 390 struct perf_sample sample = { .ip = 0, }; 391 392 arm_spe_prep_sample(spe, speq, event, &sample); 393 394 sample.id = spe_events_id; 395 sample.stream_id = spe_events_id; 396 sample.addr = record->to_ip; 397 sample.weight = record->latency; 398 sample.flags = speq->flags; 399 400 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 401 } 402 403 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 404 u64 spe_events_id, u64 data_src) 405 { 406 struct arm_spe *spe = speq->spe; 407 struct arm_spe_record *record = &speq->decoder->record; 408 union perf_event *event = speq->event_buf; 409 struct perf_sample sample = { .ip = 0, }; 410 411 /* 412 * Handles perf instruction sampling period. 413 */ 414 speq->period_instructions++; 415 if (speq->period_instructions < spe->instructions_sample_period) 416 return 0; 417 speq->period_instructions = 0; 418 419 arm_spe_prep_sample(spe, speq, event, &sample); 420 421 sample.id = spe_events_id; 422 sample.stream_id = spe_events_id; 423 sample.addr = record->to_ip; 424 sample.phys_addr = record->phys_addr; 425 sample.data_src = data_src; 426 sample.period = spe->instructions_sample_period; 427 sample.weight = record->latency; 428 sample.flags = speq->flags; 429 430 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 431 } 432 433 static const struct midr_range common_ds_encoding_cpus[] = { 434 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 435 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 436 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 437 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 438 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 439 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 440 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 441 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 442 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 443 {}, 444 }; 445 446 static void arm_spe__sample_flags(struct arm_spe_queue *speq) 447 { 448 const struct arm_spe_record *record = &speq->decoder->record; 449 450 speq->flags = 0; 451 if (record->op & ARM_SPE_OP_BRANCH_ERET) { 452 speq->flags = PERF_IP_FLAG_BRANCH; 453 454 if (record->type & ARM_SPE_BRANCH_MISS) 455 speq->flags |= PERF_IP_FLAG_BRANCH_MISS; 456 } 457 } 458 459 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 460 union perf_mem_data_src *data_src) 461 { 462 /* 463 * Even though four levels of cache hierarchy are possible, no known 464 * production Neoverse systems currently include more than three levels 465 * so for the time being we assume three exist. If a production system 466 * is built with four the this function would have to be changed to 467 * detect the number of levels for reporting. 468 */ 469 470 /* 471 * We have no data on the hit level or data source for stores in the 472 * Neoverse SPE records. 473 */ 474 if (record->op & ARM_SPE_OP_ST) { 475 data_src->mem_lvl = PERF_MEM_LVL_NA; 476 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 477 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 478 return; 479 } 480 481 switch (record->source) { 482 case ARM_SPE_COMMON_DS_L1D: 483 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 484 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 485 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 486 break; 487 case ARM_SPE_COMMON_DS_L2: 488 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 489 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 490 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 491 break; 492 case ARM_SPE_COMMON_DS_PEER_CORE: 493 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 494 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 495 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 496 break; 497 /* 498 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 499 * transfer, so set SNOOPX_PEER 500 */ 501 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 502 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 503 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 504 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 505 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 506 break; 507 /* 508 * System cache is assumed to be L3 509 */ 510 case ARM_SPE_COMMON_DS_SYS_CACHE: 511 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 512 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 513 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 514 break; 515 /* 516 * We don't know what level it hit in, except it came from the other 517 * socket 518 */ 519 case ARM_SPE_COMMON_DS_REMOTE: 520 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; 521 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 522 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 523 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 524 break; 525 case ARM_SPE_COMMON_DS_DRAM: 526 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 527 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 528 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 529 break; 530 default: 531 break; 532 } 533 } 534 535 static void arm_spe__synth_memory_level(const struct arm_spe_record *record, 536 union perf_mem_data_src *data_src) 537 { 538 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { 539 data_src->mem_lvl = PERF_MEM_LVL_L3; 540 541 if (record->type & ARM_SPE_LLC_MISS) 542 data_src->mem_lvl |= PERF_MEM_LVL_MISS; 543 else 544 data_src->mem_lvl |= PERF_MEM_LVL_HIT; 545 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { 546 data_src->mem_lvl = PERF_MEM_LVL_L1; 547 548 if (record->type & ARM_SPE_L1D_MISS) 549 data_src->mem_lvl |= PERF_MEM_LVL_MISS; 550 else 551 data_src->mem_lvl |= PERF_MEM_LVL_HIT; 552 } 553 554 if (record->type & ARM_SPE_REMOTE_ACCESS) 555 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; 556 } 557 558 static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq) 559 { 560 struct arm_spe *spe = speq->spe; 561 bool is_in_cpu_list; 562 u64 *metadata = NULL; 563 u64 midr = 0; 564 565 /* Metadata version 1 assumes all CPUs are the same (old behavior) */ 566 if (spe->metadata_ver == 1) { 567 const char *cpuid; 568 569 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); 570 cpuid = perf_env__cpuid(spe->session->evlist->env); 571 midr = strtol(cpuid, NULL, 16); 572 } else { 573 /* CPU ID is -1 for per-thread mode */ 574 if (speq->cpu < 0) { 575 /* 576 * On the heterogeneous system, due to CPU ID is -1, 577 * cannot confirm the data source packet is supported. 578 */ 579 if (!spe->is_homogeneous) 580 return false; 581 582 /* In homogeneous system, simply use CPU0's metadata */ 583 if (spe->metadata) 584 metadata = spe->metadata[0]; 585 } else { 586 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 587 } 588 589 if (!metadata) 590 return false; 591 592 midr = metadata[ARM_SPE_CPU_MIDR]; 593 } 594 595 is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus); 596 if (is_in_cpu_list) 597 return true; 598 else 599 return false; 600 } 601 602 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, 603 const struct arm_spe_record *record) 604 { 605 union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; 606 bool is_common = arm_spe__is_common_ds_encoding(speq); 607 608 if (record->op & ARM_SPE_OP_LD) 609 data_src.mem_op = PERF_MEM_OP_LOAD; 610 else if (record->op & ARM_SPE_OP_ST) 611 data_src.mem_op = PERF_MEM_OP_STORE; 612 else 613 return 0; 614 615 if (is_common) 616 arm_spe__synth_data_source_common(record, &data_src); 617 else 618 arm_spe__synth_memory_level(record, &data_src); 619 620 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 621 data_src.mem_dtlb = PERF_MEM_TLB_WK; 622 623 if (record->type & ARM_SPE_TLB_MISS) 624 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 625 else 626 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 627 } 628 629 return data_src.val; 630 } 631 632 static int arm_spe_sample(struct arm_spe_queue *speq) 633 { 634 const struct arm_spe_record *record = &speq->decoder->record; 635 struct arm_spe *spe = speq->spe; 636 u64 data_src; 637 int err; 638 639 arm_spe__sample_flags(speq); 640 data_src = arm_spe__synth_data_source(speq, record); 641 642 if (spe->sample_flc) { 643 if (record->type & ARM_SPE_L1D_MISS) { 644 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 645 data_src); 646 if (err) 647 return err; 648 } 649 650 if (record->type & ARM_SPE_L1D_ACCESS) { 651 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 652 data_src); 653 if (err) 654 return err; 655 } 656 } 657 658 if (spe->sample_llc) { 659 if (record->type & ARM_SPE_LLC_MISS) { 660 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 661 data_src); 662 if (err) 663 return err; 664 } 665 666 if (record->type & ARM_SPE_LLC_ACCESS) { 667 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 668 data_src); 669 if (err) 670 return err; 671 } 672 } 673 674 if (spe->sample_tlb) { 675 if (record->type & ARM_SPE_TLB_MISS) { 676 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 677 data_src); 678 if (err) 679 return err; 680 } 681 682 if (record->type & ARM_SPE_TLB_ACCESS) { 683 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 684 data_src); 685 if (err) 686 return err; 687 } 688 } 689 690 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { 691 err = arm_spe__synth_branch_sample(speq, spe->branch_id); 692 if (err) 693 return err; 694 } 695 696 if (spe->sample_remote_access && 697 (record->type & ARM_SPE_REMOTE_ACCESS)) { 698 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 699 data_src); 700 if (err) 701 return err; 702 } 703 704 /* 705 * When data_src is zero it means the record is not a memory operation, 706 * skip to synthesize memory sample for this case. 707 */ 708 if (spe->sample_memory && data_src) { 709 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 710 if (err) 711 return err; 712 } 713 714 if (spe->sample_instructions) { 715 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 716 if (err) 717 return err; 718 } 719 720 return 0; 721 } 722 723 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 724 { 725 struct arm_spe *spe = speq->spe; 726 struct arm_spe_record *record; 727 int ret; 728 729 if (!spe->kernel_start) 730 spe->kernel_start = machine__kernel_start(spe->machine); 731 732 while (1) { 733 /* 734 * The usual logic is firstly to decode the packets, and then 735 * based the record to synthesize sample; but here the flow is 736 * reversed: it calls arm_spe_sample() for synthesizing samples 737 * prior to arm_spe_decode(). 738 * 739 * Two reasons for this code logic: 740 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 741 * has decoded trace data and generated a record, but the record 742 * is left to generate sample until run to here, so it's correct 743 * to synthesize sample for the left record. 744 * 2. After decoding trace data, it needs to compare the record 745 * timestamp with the coming perf event, if the record timestamp 746 * is later than the perf event, it needs bail out and pushs the 747 * record into auxtrace heap, thus the record can be deferred to 748 * synthesize sample until run to here at the next time; so this 749 * can correlate samples between Arm SPE trace data and other 750 * perf events with correct time ordering. 751 */ 752 753 /* 754 * Update pid/tid info. 755 */ 756 record = &speq->decoder->record; 757 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 758 ret = arm_spe_set_tid(speq, record->context_id); 759 if (ret) 760 return ret; 761 762 spe->use_ctx_pkt_for_pid = true; 763 } 764 765 ret = arm_spe_sample(speq); 766 if (ret) 767 return ret; 768 769 ret = arm_spe_decode(speq->decoder); 770 if (!ret) { 771 pr_debug("No data or all data has been processed.\n"); 772 return 1; 773 } 774 775 /* 776 * Error is detected when decode SPE trace data, continue to 777 * the next trace data and find out more records. 778 */ 779 if (ret < 0) 780 continue; 781 782 record = &speq->decoder->record; 783 784 /* Update timestamp for the last record */ 785 if (record->timestamp > speq->timestamp) 786 speq->timestamp = record->timestamp; 787 788 /* 789 * If the timestamp of the queue is later than timestamp of the 790 * coming perf event, bail out so can allow the perf event to 791 * be processed ahead. 792 */ 793 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 794 *timestamp = speq->timestamp; 795 return 0; 796 } 797 } 798 799 return 0; 800 } 801 802 static int arm_spe__setup_queue(struct arm_spe *spe, 803 struct auxtrace_queue *queue, 804 unsigned int queue_nr) 805 { 806 struct arm_spe_queue *speq = queue->priv; 807 struct arm_spe_record *record; 808 809 if (list_empty(&queue->head) || speq) 810 return 0; 811 812 speq = arm_spe__alloc_queue(spe, queue_nr); 813 814 if (!speq) 815 return -ENOMEM; 816 817 queue->priv = speq; 818 819 if (queue->cpu != -1) 820 speq->cpu = queue->cpu; 821 822 if (!speq->on_heap) { 823 int ret; 824 825 if (spe->timeless_decoding) 826 return 0; 827 828 retry: 829 ret = arm_spe_decode(speq->decoder); 830 831 if (!ret) 832 return 0; 833 834 if (ret < 0) 835 goto retry; 836 837 record = &speq->decoder->record; 838 839 speq->timestamp = record->timestamp; 840 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 841 if (ret) 842 return ret; 843 speq->on_heap = true; 844 } 845 846 return 0; 847 } 848 849 static int arm_spe__setup_queues(struct arm_spe *spe) 850 { 851 unsigned int i; 852 int ret; 853 854 for (i = 0; i < spe->queues.nr_queues; i++) { 855 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 856 if (ret) 857 return ret; 858 } 859 860 return 0; 861 } 862 863 static int arm_spe__update_queues(struct arm_spe *spe) 864 { 865 if (spe->queues.new_data) { 866 spe->queues.new_data = false; 867 return arm_spe__setup_queues(spe); 868 } 869 870 return 0; 871 } 872 873 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 874 { 875 struct evsel *evsel; 876 struct evlist *evlist = spe->session->evlist; 877 bool timeless_decoding = true; 878 879 /* 880 * Circle through the list of event and complain if we find one 881 * with the time bit set. 882 */ 883 evlist__for_each_entry(evlist, evsel) { 884 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 885 timeless_decoding = false; 886 } 887 888 return timeless_decoding; 889 } 890 891 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 892 { 893 unsigned int queue_nr; 894 u64 ts; 895 int ret; 896 897 while (1) { 898 struct auxtrace_queue *queue; 899 struct arm_spe_queue *speq; 900 901 if (!spe->heap.heap_cnt) 902 return 0; 903 904 if (spe->heap.heap_array[0].ordinal >= timestamp) 905 return 0; 906 907 queue_nr = spe->heap.heap_array[0].queue_nr; 908 queue = &spe->queues.queue_array[queue_nr]; 909 speq = queue->priv; 910 911 auxtrace_heap__pop(&spe->heap); 912 913 if (spe->heap.heap_cnt) { 914 ts = spe->heap.heap_array[0].ordinal + 1; 915 if (ts > timestamp) 916 ts = timestamp; 917 } else { 918 ts = timestamp; 919 } 920 921 /* 922 * A previous context-switch event has set pid/tid in the machine's context, so 923 * here we need to update the pid/tid in the thread and SPE queue. 924 */ 925 if (!spe->use_ctx_pkt_for_pid) 926 arm_spe_set_pid_tid_cpu(spe, queue); 927 928 ret = arm_spe_run_decoder(speq, &ts); 929 if (ret < 0) { 930 auxtrace_heap__add(&spe->heap, queue_nr, ts); 931 return ret; 932 } 933 934 if (!ret) { 935 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 936 if (ret < 0) 937 return ret; 938 } else { 939 speq->on_heap = false; 940 } 941 } 942 943 return 0; 944 } 945 946 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 947 u64 time_) 948 { 949 struct auxtrace_queues *queues = &spe->queues; 950 unsigned int i; 951 u64 ts = 0; 952 953 for (i = 0; i < queues->nr_queues; i++) { 954 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 955 struct arm_spe_queue *speq = queue->priv; 956 957 if (speq && (tid == -1 || speq->tid == tid)) { 958 speq->time = time_; 959 arm_spe_set_pid_tid_cpu(spe, queue); 960 arm_spe_run_decoder(speq, &ts); 961 } 962 } 963 return 0; 964 } 965 966 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 967 struct perf_sample *sample) 968 { 969 pid_t pid, tid; 970 int cpu; 971 972 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 973 return 0; 974 975 pid = event->context_switch.next_prev_pid; 976 tid = event->context_switch.next_prev_tid; 977 cpu = sample->cpu; 978 979 if (tid == -1) 980 pr_warning("context_switch event has no tid\n"); 981 982 return machine__set_current_tid(spe->machine, cpu, pid, tid); 983 } 984 985 static int arm_spe_process_event(struct perf_session *session, 986 union perf_event *event, 987 struct perf_sample *sample, 988 const struct perf_tool *tool) 989 { 990 int err = 0; 991 u64 timestamp; 992 struct arm_spe *spe = container_of(session->auxtrace, 993 struct arm_spe, auxtrace); 994 995 if (dump_trace) 996 return 0; 997 998 if (!tool->ordered_events) { 999 pr_err("SPE trace requires ordered events\n"); 1000 return -EINVAL; 1001 } 1002 1003 if (sample->time && (sample->time != (u64) -1)) 1004 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 1005 else 1006 timestamp = 0; 1007 1008 if (timestamp || spe->timeless_decoding) { 1009 err = arm_spe__update_queues(spe); 1010 if (err) 1011 return err; 1012 } 1013 1014 if (spe->timeless_decoding) { 1015 if (event->header.type == PERF_RECORD_EXIT) { 1016 err = arm_spe_process_timeless_queues(spe, 1017 event->fork.tid, 1018 sample->time); 1019 } 1020 } else if (timestamp) { 1021 err = arm_spe_process_queues(spe, timestamp); 1022 if (err) 1023 return err; 1024 1025 if (!spe->use_ctx_pkt_for_pid && 1026 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1027 event->header.type == PERF_RECORD_SWITCH)) 1028 err = arm_spe_context_switch(spe, event, sample); 1029 } 1030 1031 return err; 1032 } 1033 1034 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1035 union perf_event *event, 1036 const struct perf_tool *tool __maybe_unused) 1037 { 1038 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1039 auxtrace); 1040 1041 if (!spe->data_queued) { 1042 struct auxtrace_buffer *buffer; 1043 off_t data_offset; 1044 int fd = perf_data__fd(session->data); 1045 int err; 1046 1047 if (perf_data__is_pipe(session->data)) { 1048 data_offset = 0; 1049 } else { 1050 data_offset = lseek(fd, 0, SEEK_CUR); 1051 if (data_offset == -1) 1052 return -errno; 1053 } 1054 1055 err = auxtrace_queues__add_event(&spe->queues, session, event, 1056 data_offset, &buffer); 1057 if (err) 1058 return err; 1059 1060 /* Dump here now we have copied a piped trace out of the pipe */ 1061 if (dump_trace) { 1062 if (auxtrace_buffer__get_data(buffer, fd)) { 1063 arm_spe_dump_event(spe, buffer->data, 1064 buffer->size); 1065 auxtrace_buffer__put_data(buffer); 1066 } 1067 } 1068 } 1069 1070 return 0; 1071 } 1072 1073 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1074 const struct perf_tool *tool __maybe_unused) 1075 { 1076 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1077 auxtrace); 1078 int ret; 1079 1080 if (dump_trace) 1081 return 0; 1082 1083 if (!tool->ordered_events) 1084 return -EINVAL; 1085 1086 ret = arm_spe__update_queues(spe); 1087 if (ret < 0) 1088 return ret; 1089 1090 if (spe->timeless_decoding) 1091 return arm_spe_process_timeless_queues(spe, -1, 1092 MAX_TIMESTAMP - 1); 1093 1094 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1095 if (ret) 1096 return ret; 1097 1098 if (!spe->use_ctx_pkt_for_pid) 1099 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1100 "Matching of TIDs to SPE events could be inaccurate.\n"); 1101 1102 return 0; 1103 } 1104 1105 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1106 { 1107 u64 *metadata; 1108 1109 metadata = zalloc(per_cpu_size); 1110 if (!metadata) 1111 return NULL; 1112 1113 memcpy(metadata, buf, per_cpu_size); 1114 return metadata; 1115 } 1116 1117 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1118 { 1119 int i; 1120 1121 for (i = 0; i < nr_cpu; i++) 1122 zfree(&metadata[i]); 1123 free(metadata); 1124 } 1125 1126 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1127 u64 *ver, int *nr_cpu) 1128 { 1129 u64 *ptr = (u64 *)info->priv; 1130 u64 metadata_size; 1131 u64 **metadata = NULL; 1132 int hdr_sz, per_cpu_sz, i; 1133 1134 metadata_size = info->header.size - 1135 sizeof(struct perf_record_auxtrace_info); 1136 1137 /* Metadata version 1 */ 1138 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1139 *ver = 1; 1140 *nr_cpu = 0; 1141 /* No per CPU metadata */ 1142 return NULL; 1143 } 1144 1145 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1146 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1147 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1148 1149 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1150 if (!metadata) 1151 return NULL; 1152 1153 /* Locate the start address of per CPU metadata */ 1154 ptr += hdr_sz; 1155 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1156 1157 for (i = 0; i < *nr_cpu; i++) { 1158 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1159 if (!metadata[i]) 1160 goto err_per_cpu_metadata; 1161 1162 ptr += per_cpu_sz / sizeof(u64); 1163 } 1164 1165 return metadata; 1166 1167 err_per_cpu_metadata: 1168 arm_spe__free_metadata(metadata, *nr_cpu); 1169 return NULL; 1170 } 1171 1172 static void arm_spe_free_queue(void *priv) 1173 { 1174 struct arm_spe_queue *speq = priv; 1175 1176 if (!speq) 1177 return; 1178 thread__zput(speq->thread); 1179 arm_spe_decoder_free(speq->decoder); 1180 zfree(&speq->event_buf); 1181 free(speq); 1182 } 1183 1184 static void arm_spe_free_events(struct perf_session *session) 1185 { 1186 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1187 auxtrace); 1188 struct auxtrace_queues *queues = &spe->queues; 1189 unsigned int i; 1190 1191 for (i = 0; i < queues->nr_queues; i++) { 1192 arm_spe_free_queue(queues->queue_array[i].priv); 1193 queues->queue_array[i].priv = NULL; 1194 } 1195 auxtrace_queues__free(queues); 1196 } 1197 1198 static void arm_spe_free(struct perf_session *session) 1199 { 1200 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1201 auxtrace); 1202 1203 auxtrace_heap__free(&spe->heap); 1204 arm_spe_free_events(session); 1205 session->auxtrace = NULL; 1206 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1207 free(spe); 1208 } 1209 1210 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1211 struct evsel *evsel) 1212 { 1213 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1214 1215 return evsel->core.attr.type == spe->pmu_type; 1216 } 1217 1218 static const char * const metadata_hdr_v1_fmts[] = { 1219 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1220 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1221 }; 1222 1223 static const char * const metadata_hdr_fmts[] = { 1224 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1225 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1226 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1227 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1228 }; 1229 1230 static const char * const metadata_per_cpu_fmts[] = { 1231 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1232 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1233 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1234 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1235 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1236 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1237 }; 1238 1239 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1240 { 1241 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1242 const char * const *hdr_fmts; 1243 1244 if (!dump_trace) 1245 return; 1246 1247 if (spe->metadata_ver == 1) { 1248 cpu_num = 0; 1249 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1250 hdr_fmts = metadata_hdr_v1_fmts; 1251 } else { 1252 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1253 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1254 hdr_fmts = metadata_hdr_fmts; 1255 } 1256 1257 for (i = 0; i < hdr_size; i++) 1258 fprintf(stdout, hdr_fmts[i], arr[i]); 1259 1260 arr += hdr_size; 1261 for (cpu = 0; cpu < cpu_num; cpu++) { 1262 /* 1263 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1264 * are fixed. The sequential parameter size is decided by the 1265 * field 'ARM_SPE_CPU_NR_PARAMS'. 1266 */ 1267 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1268 for (i = 0; i < cpu_size; i++) 1269 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1270 arr += cpu_size; 1271 } 1272 } 1273 1274 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1275 const char *name) 1276 { 1277 struct evsel *evsel; 1278 1279 evlist__for_each_entry(evlist, evsel) { 1280 if (evsel->core.id && evsel->core.id[0] == id) { 1281 if (evsel->name) 1282 zfree(&evsel->name); 1283 evsel->name = strdup(name); 1284 break; 1285 } 1286 } 1287 } 1288 1289 static int 1290 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1291 { 1292 struct evlist *evlist = session->evlist; 1293 struct evsel *evsel; 1294 struct perf_event_attr attr; 1295 bool found = false; 1296 u64 id; 1297 int err; 1298 1299 evlist__for_each_entry(evlist, evsel) { 1300 if (evsel->core.attr.type == spe->pmu_type) { 1301 found = true; 1302 break; 1303 } 1304 } 1305 1306 if (!found) { 1307 pr_debug("No selected events with SPE trace data\n"); 1308 return 0; 1309 } 1310 1311 memset(&attr, 0, sizeof(struct perf_event_attr)); 1312 attr.size = sizeof(struct perf_event_attr); 1313 attr.type = PERF_TYPE_HARDWARE; 1314 attr.sample_type = evsel->core.attr.sample_type & 1315 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1316 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1317 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1318 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1319 if (spe->timeless_decoding) 1320 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1321 else 1322 attr.sample_type |= PERF_SAMPLE_TIME; 1323 1324 spe->sample_type = attr.sample_type; 1325 1326 attr.exclude_user = evsel->core.attr.exclude_user; 1327 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1328 attr.exclude_hv = evsel->core.attr.exclude_hv; 1329 attr.exclude_host = evsel->core.attr.exclude_host; 1330 attr.exclude_guest = evsel->core.attr.exclude_guest; 1331 attr.sample_id_all = evsel->core.attr.sample_id_all; 1332 attr.read_format = evsel->core.attr.read_format; 1333 1334 /* create new id val to be a fixed offset from evsel id */ 1335 id = evsel->core.id[0] + 1000000000; 1336 1337 if (!id) 1338 id = 1; 1339 1340 if (spe->synth_opts.flc) { 1341 spe->sample_flc = true; 1342 1343 /* Level 1 data cache miss */ 1344 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1345 if (err) 1346 return err; 1347 spe->l1d_miss_id = id; 1348 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1349 id += 1; 1350 1351 /* Level 1 data cache access */ 1352 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1353 if (err) 1354 return err; 1355 spe->l1d_access_id = id; 1356 arm_spe_set_event_name(evlist, id, "l1d-access"); 1357 id += 1; 1358 } 1359 1360 if (spe->synth_opts.llc) { 1361 spe->sample_llc = true; 1362 1363 /* Last level cache miss */ 1364 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1365 if (err) 1366 return err; 1367 spe->llc_miss_id = id; 1368 arm_spe_set_event_name(evlist, id, "llc-miss"); 1369 id += 1; 1370 1371 /* Last level cache access */ 1372 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1373 if (err) 1374 return err; 1375 spe->llc_access_id = id; 1376 arm_spe_set_event_name(evlist, id, "llc-access"); 1377 id += 1; 1378 } 1379 1380 if (spe->synth_opts.tlb) { 1381 spe->sample_tlb = true; 1382 1383 /* TLB miss */ 1384 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1385 if (err) 1386 return err; 1387 spe->tlb_miss_id = id; 1388 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1389 id += 1; 1390 1391 /* TLB access */ 1392 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1393 if (err) 1394 return err; 1395 spe->tlb_access_id = id; 1396 arm_spe_set_event_name(evlist, id, "tlb-access"); 1397 id += 1; 1398 } 1399 1400 if (spe->synth_opts.branches) { 1401 spe->sample_branch = true; 1402 1403 /* Branch */ 1404 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1405 if (err) 1406 return err; 1407 spe->branch_id = id; 1408 arm_spe_set_event_name(evlist, id, "branch"); 1409 id += 1; 1410 } 1411 1412 if (spe->synth_opts.remote_access) { 1413 spe->sample_remote_access = true; 1414 1415 /* Remote access */ 1416 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1417 if (err) 1418 return err; 1419 spe->remote_access_id = id; 1420 arm_spe_set_event_name(evlist, id, "remote-access"); 1421 id += 1; 1422 } 1423 1424 if (spe->synth_opts.mem) { 1425 spe->sample_memory = true; 1426 1427 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1428 if (err) 1429 return err; 1430 spe->memory_id = id; 1431 arm_spe_set_event_name(evlist, id, "memory"); 1432 id += 1; 1433 } 1434 1435 if (spe->synth_opts.instructions) { 1436 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1437 pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); 1438 goto synth_instructions_out; 1439 } 1440 if (spe->synth_opts.period > 1) 1441 pr_warning("Arm SPE has a hardware-based sample period.\n" 1442 "Additional instruction events will be discarded by --itrace\n"); 1443 1444 spe->sample_instructions = true; 1445 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1446 attr.sample_period = spe->synth_opts.period; 1447 spe->instructions_sample_period = attr.sample_period; 1448 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1449 if (err) 1450 return err; 1451 spe->instructions_id = id; 1452 arm_spe_set_event_name(evlist, id, "instructions"); 1453 } 1454 synth_instructions_out: 1455 1456 return 0; 1457 } 1458 1459 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1460 { 1461 u64 midr; 1462 int i; 1463 1464 if (!nr_cpu) 1465 return false; 1466 1467 for (i = 0; i < nr_cpu; i++) { 1468 if (!metadata[i]) 1469 return false; 1470 1471 if (i == 0) { 1472 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1473 continue; 1474 } 1475 1476 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1477 return false; 1478 } 1479 1480 return true; 1481 } 1482 1483 int arm_spe_process_auxtrace_info(union perf_event *event, 1484 struct perf_session *session) 1485 { 1486 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1487 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1488 struct perf_record_time_conv *tc = &session->time_conv; 1489 struct arm_spe *spe; 1490 u64 **metadata = NULL; 1491 u64 metadata_ver; 1492 int nr_cpu, err; 1493 1494 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1495 min_sz) 1496 return -EINVAL; 1497 1498 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1499 &nr_cpu); 1500 if (!metadata && metadata_ver != 1) { 1501 pr_err("Failed to parse Arm SPE metadata.\n"); 1502 return -EINVAL; 1503 } 1504 1505 spe = zalloc(sizeof(struct arm_spe)); 1506 if (!spe) { 1507 err = -ENOMEM; 1508 goto err_free_metadata; 1509 } 1510 1511 err = auxtrace_queues__init(&spe->queues); 1512 if (err) 1513 goto err_free; 1514 1515 spe->session = session; 1516 spe->machine = &session->machines.host; /* No kvm support */ 1517 spe->auxtrace_type = auxtrace_info->type; 1518 if (metadata_ver == 1) 1519 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1520 else 1521 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1522 spe->metadata = metadata; 1523 spe->metadata_ver = metadata_ver; 1524 spe->metadata_nr_cpu = nr_cpu; 1525 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1526 1527 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1528 1529 /* 1530 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1531 * and the parameters for hardware clock are stored in the session 1532 * context. Passes these parameters to the struct perf_tsc_conversion 1533 * in "spe->tc", which is used for later conversion between clock 1534 * counter and timestamp. 1535 * 1536 * For backward compatibility, copies the fields starting from 1537 * "time_cycles" only if they are contained in the event. 1538 */ 1539 spe->tc.time_shift = tc->time_shift; 1540 spe->tc.time_mult = tc->time_mult; 1541 spe->tc.time_zero = tc->time_zero; 1542 1543 if (event_contains(*tc, time_cycles)) { 1544 spe->tc.time_cycles = tc->time_cycles; 1545 spe->tc.time_mask = tc->time_mask; 1546 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1547 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1548 } 1549 1550 spe->auxtrace.process_event = arm_spe_process_event; 1551 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1552 spe->auxtrace.flush_events = arm_spe_flush; 1553 spe->auxtrace.free_events = arm_spe_free_events; 1554 spe->auxtrace.free = arm_spe_free; 1555 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1556 session->auxtrace = &spe->auxtrace; 1557 1558 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1559 1560 if (dump_trace) 1561 return 0; 1562 1563 if (session->itrace_synth_opts && session->itrace_synth_opts->set) 1564 spe->synth_opts = *session->itrace_synth_opts; 1565 else 1566 itrace_synth_opts__set_default(&spe->synth_opts, false); 1567 1568 err = arm_spe_synth_events(spe, session); 1569 if (err) 1570 goto err_free_queues; 1571 1572 err = auxtrace_queues__process_index(&spe->queues, session); 1573 if (err) 1574 goto err_free_queues; 1575 1576 if (spe->queues.populated) 1577 spe->data_queued = true; 1578 1579 return 0; 1580 1581 err_free_queues: 1582 auxtrace_queues__free(&spe->queues); 1583 session->auxtrace = NULL; 1584 err_free: 1585 free(spe); 1586 err_free_metadata: 1587 arm_spe__free_metadata(metadata, nr_cpu); 1588 return err; 1589 } 1590