1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 struct arm_spe { 41 struct auxtrace auxtrace; 42 struct auxtrace_queues queues; 43 struct auxtrace_heap heap; 44 struct itrace_synth_opts synth_opts; 45 u32 auxtrace_type; 46 struct perf_session *session; 47 struct machine *machine; 48 u32 pmu_type; 49 50 struct perf_tsc_conversion tc; 51 52 u8 timeless_decoding; 53 u8 data_queued; 54 55 u64 sample_type; 56 u8 sample_flc; 57 u8 sample_llc; 58 u8 sample_tlb; 59 u8 sample_branch; 60 u8 sample_remote_access; 61 u8 sample_memory; 62 u8 sample_instructions; 63 u64 instructions_sample_period; 64 65 u64 l1d_miss_id; 66 u64 l1d_access_id; 67 u64 llc_miss_id; 68 u64 llc_access_id; 69 u64 tlb_miss_id; 70 u64 tlb_access_id; 71 u64 branch_miss_id; 72 u64 remote_access_id; 73 u64 memory_id; 74 u64 instructions_id; 75 76 u64 kernel_start; 77 78 unsigned long num_events; 79 u8 use_ctx_pkt_for_pid; 80 81 u64 **metadata; 82 u64 metadata_ver; 83 u64 metadata_nr_cpu; 84 bool is_homogeneous; 85 }; 86 87 struct arm_spe_queue { 88 struct arm_spe *spe; 89 unsigned int queue_nr; 90 struct auxtrace_buffer *buffer; 91 struct auxtrace_buffer *old_buffer; 92 union perf_event *event_buf; 93 bool on_heap; 94 bool done; 95 pid_t pid; 96 pid_t tid; 97 int cpu; 98 struct arm_spe_decoder *decoder; 99 u64 time; 100 u64 timestamp; 101 struct thread *thread; 102 u64 period_instructions; 103 }; 104 105 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 106 unsigned char *buf, size_t len) 107 { 108 struct arm_spe_pkt packet; 109 size_t pos = 0; 110 int ret, pkt_len, i; 111 char desc[ARM_SPE_PKT_DESC_MAX]; 112 const char *color = PERF_COLOR_BLUE; 113 114 color_fprintf(stdout, color, 115 ". ... ARM SPE data: size %#zx bytes\n", 116 len); 117 118 while (len) { 119 ret = arm_spe_get_packet(buf, len, &packet); 120 if (ret > 0) 121 pkt_len = ret; 122 else 123 pkt_len = 1; 124 printf("."); 125 color_fprintf(stdout, color, " %08zx: ", pos); 126 for (i = 0; i < pkt_len; i++) 127 color_fprintf(stdout, color, " %02x", buf[i]); 128 for (; i < 16; i++) 129 color_fprintf(stdout, color, " "); 130 if (ret > 0) { 131 ret = arm_spe_pkt_desc(&packet, desc, 132 ARM_SPE_PKT_DESC_MAX); 133 if (!ret) 134 color_fprintf(stdout, color, " %s\n", desc); 135 } else { 136 color_fprintf(stdout, color, " Bad packet!\n"); 137 } 138 pos += pkt_len; 139 buf += pkt_len; 140 len -= pkt_len; 141 } 142 } 143 144 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 145 size_t len) 146 { 147 printf(".\n"); 148 arm_spe_dump(spe, buf, len); 149 } 150 151 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 152 { 153 struct arm_spe_queue *speq = data; 154 struct auxtrace_buffer *buffer = speq->buffer; 155 struct auxtrace_buffer *old_buffer = speq->old_buffer; 156 struct auxtrace_queue *queue; 157 158 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 159 160 buffer = auxtrace_buffer__next(queue, buffer); 161 /* If no more data, drop the previous auxtrace_buffer and return */ 162 if (!buffer) { 163 if (old_buffer) 164 auxtrace_buffer__drop_data(old_buffer); 165 b->len = 0; 166 return 0; 167 } 168 169 speq->buffer = buffer; 170 171 /* If the aux_buffer doesn't have data associated, try to load it */ 172 if (!buffer->data) { 173 /* get the file desc associated with the perf data file */ 174 int fd = perf_data__fd(speq->spe->session->data); 175 176 buffer->data = auxtrace_buffer__get_data(buffer, fd); 177 if (!buffer->data) 178 return -ENOMEM; 179 } 180 181 b->len = buffer->size; 182 b->buf = buffer->data; 183 184 if (b->len) { 185 if (old_buffer) 186 auxtrace_buffer__drop_data(old_buffer); 187 speq->old_buffer = buffer; 188 } else { 189 auxtrace_buffer__drop_data(buffer); 190 return arm_spe_get_trace(b, data); 191 } 192 193 return 0; 194 } 195 196 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 197 unsigned int queue_nr) 198 { 199 struct arm_spe_params params = { .get_trace = 0, }; 200 struct arm_spe_queue *speq; 201 202 speq = zalloc(sizeof(*speq)); 203 if (!speq) 204 return NULL; 205 206 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 207 if (!speq->event_buf) 208 goto out_free; 209 210 speq->spe = spe; 211 speq->queue_nr = queue_nr; 212 speq->pid = -1; 213 speq->tid = -1; 214 speq->cpu = -1; 215 speq->period_instructions = 0; 216 217 /* params set */ 218 params.get_trace = arm_spe_get_trace; 219 params.data = speq; 220 221 /* create new decoder */ 222 speq->decoder = arm_spe_decoder_new(¶ms); 223 if (!speq->decoder) 224 goto out_free; 225 226 return speq; 227 228 out_free: 229 zfree(&speq->event_buf); 230 free(speq); 231 232 return NULL; 233 } 234 235 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 236 { 237 return ip >= spe->kernel_start ? 238 PERF_RECORD_MISC_KERNEL : 239 PERF_RECORD_MISC_USER; 240 } 241 242 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 243 struct auxtrace_queue *queue) 244 { 245 struct arm_spe_queue *speq = queue->priv; 246 pid_t tid; 247 248 tid = machine__get_current_tid(spe->machine, speq->cpu); 249 if (tid != -1) { 250 speq->tid = tid; 251 thread__zput(speq->thread); 252 } else 253 speq->tid = queue->tid; 254 255 if ((!speq->thread) && (speq->tid != -1)) { 256 speq->thread = machine__find_thread(spe->machine, -1, 257 speq->tid); 258 } 259 260 if (speq->thread) { 261 speq->pid = thread__pid(speq->thread); 262 if (queue->cpu == -1) 263 speq->cpu = thread__cpu(speq->thread); 264 } 265 } 266 267 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 268 { 269 struct arm_spe *spe = speq->spe; 270 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 271 272 if (err) 273 return err; 274 275 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 276 277 return 0; 278 } 279 280 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) 281 { 282 u64 i; 283 284 if (!spe->metadata) 285 return NULL; 286 287 for (i = 0; i < spe->metadata_nr_cpu; i++) 288 if (spe->metadata[i][ARM_SPE_CPU] == cpu) 289 return spe->metadata[i]; 290 291 return NULL; 292 } 293 294 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 295 { 296 struct simd_flags simd_flags = {}; 297 298 if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) 299 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 300 301 if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) 302 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 303 304 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 305 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; 306 307 if (record->type & ARM_SPE_SVE_EMPTY_PRED) 308 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; 309 310 return simd_flags; 311 } 312 313 static void arm_spe_prep_sample(struct arm_spe *spe, 314 struct arm_spe_queue *speq, 315 union perf_event *event, 316 struct perf_sample *sample) 317 { 318 struct arm_spe_record *record = &speq->decoder->record; 319 320 if (!spe->timeless_decoding) 321 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 322 323 sample->ip = record->from_ip; 324 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 325 sample->pid = speq->pid; 326 sample->tid = speq->tid; 327 sample->period = 1; 328 sample->cpu = speq->cpu; 329 sample->simd_flags = arm_spe__synth_simd_flags(record); 330 331 event->sample.header.type = PERF_RECORD_SAMPLE; 332 event->sample.header.misc = sample->cpumode; 333 event->sample.header.size = sizeof(struct perf_event_header); 334 } 335 336 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 337 { 338 event->header.size = perf_event__sample_event_size(sample, type, 0); 339 return perf_event__synthesize_sample(event, type, 0, sample); 340 } 341 342 static inline int 343 arm_spe_deliver_synth_event(struct arm_spe *spe, 344 struct arm_spe_queue *speq __maybe_unused, 345 union perf_event *event, 346 struct perf_sample *sample) 347 { 348 int ret; 349 350 if (spe->synth_opts.inject) { 351 ret = arm_spe__inject_event(event, sample, spe->sample_type); 352 if (ret) 353 return ret; 354 } 355 356 ret = perf_session__deliver_synth_event(spe->session, event, sample); 357 if (ret) 358 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 359 360 return ret; 361 } 362 363 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 364 u64 spe_events_id, u64 data_src) 365 { 366 struct arm_spe *spe = speq->spe; 367 struct arm_spe_record *record = &speq->decoder->record; 368 union perf_event *event = speq->event_buf; 369 struct perf_sample sample = { .ip = 0, }; 370 371 arm_spe_prep_sample(spe, speq, event, &sample); 372 373 sample.id = spe_events_id; 374 sample.stream_id = spe_events_id; 375 sample.addr = record->virt_addr; 376 sample.phys_addr = record->phys_addr; 377 sample.data_src = data_src; 378 sample.weight = record->latency; 379 380 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 381 } 382 383 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 384 u64 spe_events_id) 385 { 386 struct arm_spe *spe = speq->spe; 387 struct arm_spe_record *record = &speq->decoder->record; 388 union perf_event *event = speq->event_buf; 389 struct perf_sample sample = { .ip = 0, }; 390 391 arm_spe_prep_sample(spe, speq, event, &sample); 392 393 sample.id = spe_events_id; 394 sample.stream_id = spe_events_id; 395 sample.addr = record->to_ip; 396 sample.weight = record->latency; 397 398 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 399 } 400 401 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 402 u64 spe_events_id, u64 data_src) 403 { 404 struct arm_spe *spe = speq->spe; 405 struct arm_spe_record *record = &speq->decoder->record; 406 union perf_event *event = speq->event_buf; 407 struct perf_sample sample = { .ip = 0, }; 408 409 /* 410 * Handles perf instruction sampling period. 411 */ 412 speq->period_instructions++; 413 if (speq->period_instructions < spe->instructions_sample_period) 414 return 0; 415 speq->period_instructions = 0; 416 417 arm_spe_prep_sample(spe, speq, event, &sample); 418 419 sample.id = spe_events_id; 420 sample.stream_id = spe_events_id; 421 sample.addr = record->virt_addr; 422 sample.phys_addr = record->phys_addr; 423 sample.data_src = data_src; 424 sample.period = spe->instructions_sample_period; 425 sample.weight = record->latency; 426 427 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 428 } 429 430 static const struct midr_range common_ds_encoding_cpus[] = { 431 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 432 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 433 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 434 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 435 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 436 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 437 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 438 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 439 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 440 {}, 441 }; 442 443 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 444 union perf_mem_data_src *data_src) 445 { 446 /* 447 * Even though four levels of cache hierarchy are possible, no known 448 * production Neoverse systems currently include more than three levels 449 * so for the time being we assume three exist. If a production system 450 * is built with four the this function would have to be changed to 451 * detect the number of levels for reporting. 452 */ 453 454 /* 455 * We have no data on the hit level or data source for stores in the 456 * Neoverse SPE records. 457 */ 458 if (record->op & ARM_SPE_OP_ST) { 459 data_src->mem_lvl = PERF_MEM_LVL_NA; 460 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 461 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 462 return; 463 } 464 465 switch (record->source) { 466 case ARM_SPE_COMMON_DS_L1D: 467 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 468 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 469 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 470 break; 471 case ARM_SPE_COMMON_DS_L2: 472 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 473 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 474 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 475 break; 476 case ARM_SPE_COMMON_DS_PEER_CORE: 477 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 478 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 479 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 480 break; 481 /* 482 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 483 * transfer, so set SNOOPX_PEER 484 */ 485 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 486 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 487 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 488 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 489 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 490 break; 491 /* 492 * System cache is assumed to be L3 493 */ 494 case ARM_SPE_COMMON_DS_SYS_CACHE: 495 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 496 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 497 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 498 break; 499 /* 500 * We don't know what level it hit in, except it came from the other 501 * socket 502 */ 503 case ARM_SPE_COMMON_DS_REMOTE: 504 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; 505 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 506 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 507 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 508 break; 509 case ARM_SPE_COMMON_DS_DRAM: 510 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 511 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 512 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 513 break; 514 default: 515 break; 516 } 517 } 518 519 static void arm_spe__synth_memory_level(const struct arm_spe_record *record, 520 union perf_mem_data_src *data_src) 521 { 522 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { 523 data_src->mem_lvl = PERF_MEM_LVL_L3; 524 525 if (record->type & ARM_SPE_LLC_MISS) 526 data_src->mem_lvl |= PERF_MEM_LVL_MISS; 527 else 528 data_src->mem_lvl |= PERF_MEM_LVL_HIT; 529 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { 530 data_src->mem_lvl = PERF_MEM_LVL_L1; 531 532 if (record->type & ARM_SPE_L1D_MISS) 533 data_src->mem_lvl |= PERF_MEM_LVL_MISS; 534 else 535 data_src->mem_lvl |= PERF_MEM_LVL_HIT; 536 } 537 538 if (record->type & ARM_SPE_REMOTE_ACCESS) 539 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; 540 } 541 542 static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq) 543 { 544 struct arm_spe *spe = speq->spe; 545 bool is_in_cpu_list; 546 u64 *metadata = NULL; 547 u64 midr = 0; 548 549 /* 550 * Metadata version 1 doesn't contain any info for MIDR. 551 * Simply return false in this case. 552 */ 553 if (spe->metadata_ver == 1) { 554 pr_warning_once("The data file contains metadata version 1, " 555 "which is absent the info for data source. " 556 "Please upgrade the tool to record data.\n"); 557 return false; 558 } 559 560 /* CPU ID is -1 for per-thread mode */ 561 if (speq->cpu < 0) { 562 /* 563 * On the heterogeneous system, due to CPU ID is -1, 564 * cannot confirm the data source packet is supported. 565 */ 566 if (!spe->is_homogeneous) 567 return false; 568 569 /* In homogeneous system, simply use CPU0's metadata */ 570 if (spe->metadata) 571 metadata = spe->metadata[0]; 572 } else { 573 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 574 } 575 576 if (!metadata) 577 return false; 578 579 midr = metadata[ARM_SPE_CPU_MIDR]; 580 581 is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus); 582 if (is_in_cpu_list) 583 return true; 584 else 585 return false; 586 } 587 588 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, 589 const struct arm_spe_record *record) 590 { 591 union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; 592 bool is_common = arm_spe__is_common_ds_encoding(speq); 593 594 if (record->op & ARM_SPE_OP_LD) 595 data_src.mem_op = PERF_MEM_OP_LOAD; 596 else if (record->op & ARM_SPE_OP_ST) 597 data_src.mem_op = PERF_MEM_OP_STORE; 598 else 599 return 0; 600 601 if (is_common) 602 arm_spe__synth_data_source_common(record, &data_src); 603 else 604 arm_spe__synth_memory_level(record, &data_src); 605 606 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 607 data_src.mem_dtlb = PERF_MEM_TLB_WK; 608 609 if (record->type & ARM_SPE_TLB_MISS) 610 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 611 else 612 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 613 } 614 615 return data_src.val; 616 } 617 618 static int arm_spe_sample(struct arm_spe_queue *speq) 619 { 620 const struct arm_spe_record *record = &speq->decoder->record; 621 struct arm_spe *spe = speq->spe; 622 u64 data_src; 623 int err; 624 625 data_src = arm_spe__synth_data_source(speq, record); 626 627 if (spe->sample_flc) { 628 if (record->type & ARM_SPE_L1D_MISS) { 629 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 630 data_src); 631 if (err) 632 return err; 633 } 634 635 if (record->type & ARM_SPE_L1D_ACCESS) { 636 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 637 data_src); 638 if (err) 639 return err; 640 } 641 } 642 643 if (spe->sample_llc) { 644 if (record->type & ARM_SPE_LLC_MISS) { 645 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 646 data_src); 647 if (err) 648 return err; 649 } 650 651 if (record->type & ARM_SPE_LLC_ACCESS) { 652 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 653 data_src); 654 if (err) 655 return err; 656 } 657 } 658 659 if (spe->sample_tlb) { 660 if (record->type & ARM_SPE_TLB_MISS) { 661 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 662 data_src); 663 if (err) 664 return err; 665 } 666 667 if (record->type & ARM_SPE_TLB_ACCESS) { 668 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 669 data_src); 670 if (err) 671 return err; 672 } 673 } 674 675 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { 676 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); 677 if (err) 678 return err; 679 } 680 681 if (spe->sample_remote_access && 682 (record->type & ARM_SPE_REMOTE_ACCESS)) { 683 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 684 data_src); 685 if (err) 686 return err; 687 } 688 689 /* 690 * When data_src is zero it means the record is not a memory operation, 691 * skip to synthesize memory sample for this case. 692 */ 693 if (spe->sample_memory && data_src) { 694 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 695 if (err) 696 return err; 697 } 698 699 if (spe->sample_instructions) { 700 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 701 if (err) 702 return err; 703 } 704 705 return 0; 706 } 707 708 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 709 { 710 struct arm_spe *spe = speq->spe; 711 struct arm_spe_record *record; 712 int ret; 713 714 if (!spe->kernel_start) 715 spe->kernel_start = machine__kernel_start(spe->machine); 716 717 while (1) { 718 /* 719 * The usual logic is firstly to decode the packets, and then 720 * based the record to synthesize sample; but here the flow is 721 * reversed: it calls arm_spe_sample() for synthesizing samples 722 * prior to arm_spe_decode(). 723 * 724 * Two reasons for this code logic: 725 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 726 * has decoded trace data and generated a record, but the record 727 * is left to generate sample until run to here, so it's correct 728 * to synthesize sample for the left record. 729 * 2. After decoding trace data, it needs to compare the record 730 * timestamp with the coming perf event, if the record timestamp 731 * is later than the perf event, it needs bail out and pushs the 732 * record into auxtrace heap, thus the record can be deferred to 733 * synthesize sample until run to here at the next time; so this 734 * can correlate samples between Arm SPE trace data and other 735 * perf events with correct time ordering. 736 */ 737 738 /* 739 * Update pid/tid info. 740 */ 741 record = &speq->decoder->record; 742 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 743 ret = arm_spe_set_tid(speq, record->context_id); 744 if (ret) 745 return ret; 746 747 spe->use_ctx_pkt_for_pid = true; 748 } 749 750 ret = arm_spe_sample(speq); 751 if (ret) 752 return ret; 753 754 ret = arm_spe_decode(speq->decoder); 755 if (!ret) { 756 pr_debug("No data or all data has been processed.\n"); 757 return 1; 758 } 759 760 /* 761 * Error is detected when decode SPE trace data, continue to 762 * the next trace data and find out more records. 763 */ 764 if (ret < 0) 765 continue; 766 767 record = &speq->decoder->record; 768 769 /* Update timestamp for the last record */ 770 if (record->timestamp > speq->timestamp) 771 speq->timestamp = record->timestamp; 772 773 /* 774 * If the timestamp of the queue is later than timestamp of the 775 * coming perf event, bail out so can allow the perf event to 776 * be processed ahead. 777 */ 778 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 779 *timestamp = speq->timestamp; 780 return 0; 781 } 782 } 783 784 return 0; 785 } 786 787 static int arm_spe__setup_queue(struct arm_spe *spe, 788 struct auxtrace_queue *queue, 789 unsigned int queue_nr) 790 { 791 struct arm_spe_queue *speq = queue->priv; 792 struct arm_spe_record *record; 793 794 if (list_empty(&queue->head) || speq) 795 return 0; 796 797 speq = arm_spe__alloc_queue(spe, queue_nr); 798 799 if (!speq) 800 return -ENOMEM; 801 802 queue->priv = speq; 803 804 if (queue->cpu != -1) 805 speq->cpu = queue->cpu; 806 807 if (!speq->on_heap) { 808 int ret; 809 810 if (spe->timeless_decoding) 811 return 0; 812 813 retry: 814 ret = arm_spe_decode(speq->decoder); 815 816 if (!ret) 817 return 0; 818 819 if (ret < 0) 820 goto retry; 821 822 record = &speq->decoder->record; 823 824 speq->timestamp = record->timestamp; 825 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 826 if (ret) 827 return ret; 828 speq->on_heap = true; 829 } 830 831 return 0; 832 } 833 834 static int arm_spe__setup_queues(struct arm_spe *spe) 835 { 836 unsigned int i; 837 int ret; 838 839 for (i = 0; i < spe->queues.nr_queues; i++) { 840 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 841 if (ret) 842 return ret; 843 } 844 845 return 0; 846 } 847 848 static int arm_spe__update_queues(struct arm_spe *spe) 849 { 850 if (spe->queues.new_data) { 851 spe->queues.new_data = false; 852 return arm_spe__setup_queues(spe); 853 } 854 855 return 0; 856 } 857 858 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 859 { 860 struct evsel *evsel; 861 struct evlist *evlist = spe->session->evlist; 862 bool timeless_decoding = true; 863 864 /* 865 * Circle through the list of event and complain if we find one 866 * with the time bit set. 867 */ 868 evlist__for_each_entry(evlist, evsel) { 869 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 870 timeless_decoding = false; 871 } 872 873 return timeless_decoding; 874 } 875 876 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 877 { 878 unsigned int queue_nr; 879 u64 ts; 880 int ret; 881 882 while (1) { 883 struct auxtrace_queue *queue; 884 struct arm_spe_queue *speq; 885 886 if (!spe->heap.heap_cnt) 887 return 0; 888 889 if (spe->heap.heap_array[0].ordinal >= timestamp) 890 return 0; 891 892 queue_nr = spe->heap.heap_array[0].queue_nr; 893 queue = &spe->queues.queue_array[queue_nr]; 894 speq = queue->priv; 895 896 auxtrace_heap__pop(&spe->heap); 897 898 if (spe->heap.heap_cnt) { 899 ts = spe->heap.heap_array[0].ordinal + 1; 900 if (ts > timestamp) 901 ts = timestamp; 902 } else { 903 ts = timestamp; 904 } 905 906 /* 907 * A previous context-switch event has set pid/tid in the machine's context, so 908 * here we need to update the pid/tid in the thread and SPE queue. 909 */ 910 if (!spe->use_ctx_pkt_for_pid) 911 arm_spe_set_pid_tid_cpu(spe, queue); 912 913 ret = arm_spe_run_decoder(speq, &ts); 914 if (ret < 0) { 915 auxtrace_heap__add(&spe->heap, queue_nr, ts); 916 return ret; 917 } 918 919 if (!ret) { 920 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 921 if (ret < 0) 922 return ret; 923 } else { 924 speq->on_heap = false; 925 } 926 } 927 928 return 0; 929 } 930 931 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 932 u64 time_) 933 { 934 struct auxtrace_queues *queues = &spe->queues; 935 unsigned int i; 936 u64 ts = 0; 937 938 for (i = 0; i < queues->nr_queues; i++) { 939 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 940 struct arm_spe_queue *speq = queue->priv; 941 942 if (speq && (tid == -1 || speq->tid == tid)) { 943 speq->time = time_; 944 arm_spe_set_pid_tid_cpu(spe, queue); 945 arm_spe_run_decoder(speq, &ts); 946 } 947 } 948 return 0; 949 } 950 951 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 952 struct perf_sample *sample) 953 { 954 pid_t pid, tid; 955 int cpu; 956 957 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 958 return 0; 959 960 pid = event->context_switch.next_prev_pid; 961 tid = event->context_switch.next_prev_tid; 962 cpu = sample->cpu; 963 964 if (tid == -1) 965 pr_warning("context_switch event has no tid\n"); 966 967 return machine__set_current_tid(spe->machine, cpu, pid, tid); 968 } 969 970 static int arm_spe_process_event(struct perf_session *session, 971 union perf_event *event, 972 struct perf_sample *sample, 973 const struct perf_tool *tool) 974 { 975 int err = 0; 976 u64 timestamp; 977 struct arm_spe *spe = container_of(session->auxtrace, 978 struct arm_spe, auxtrace); 979 980 if (dump_trace) 981 return 0; 982 983 if (!tool->ordered_events) { 984 pr_err("SPE trace requires ordered events\n"); 985 return -EINVAL; 986 } 987 988 if (sample->time && (sample->time != (u64) -1)) 989 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 990 else 991 timestamp = 0; 992 993 if (timestamp || spe->timeless_decoding) { 994 err = arm_spe__update_queues(spe); 995 if (err) 996 return err; 997 } 998 999 if (spe->timeless_decoding) { 1000 if (event->header.type == PERF_RECORD_EXIT) { 1001 err = arm_spe_process_timeless_queues(spe, 1002 event->fork.tid, 1003 sample->time); 1004 } 1005 } else if (timestamp) { 1006 err = arm_spe_process_queues(spe, timestamp); 1007 if (err) 1008 return err; 1009 1010 if (!spe->use_ctx_pkt_for_pid && 1011 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1012 event->header.type == PERF_RECORD_SWITCH)) 1013 err = arm_spe_context_switch(spe, event, sample); 1014 } 1015 1016 return err; 1017 } 1018 1019 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1020 union perf_event *event, 1021 const struct perf_tool *tool __maybe_unused) 1022 { 1023 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1024 auxtrace); 1025 1026 if (!spe->data_queued) { 1027 struct auxtrace_buffer *buffer; 1028 off_t data_offset; 1029 int fd = perf_data__fd(session->data); 1030 int err; 1031 1032 if (perf_data__is_pipe(session->data)) { 1033 data_offset = 0; 1034 } else { 1035 data_offset = lseek(fd, 0, SEEK_CUR); 1036 if (data_offset == -1) 1037 return -errno; 1038 } 1039 1040 err = auxtrace_queues__add_event(&spe->queues, session, event, 1041 data_offset, &buffer); 1042 if (err) 1043 return err; 1044 1045 /* Dump here now we have copied a piped trace out of the pipe */ 1046 if (dump_trace) { 1047 if (auxtrace_buffer__get_data(buffer, fd)) { 1048 arm_spe_dump_event(spe, buffer->data, 1049 buffer->size); 1050 auxtrace_buffer__put_data(buffer); 1051 } 1052 } 1053 } 1054 1055 return 0; 1056 } 1057 1058 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1059 const struct perf_tool *tool __maybe_unused) 1060 { 1061 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1062 auxtrace); 1063 int ret; 1064 1065 if (dump_trace) 1066 return 0; 1067 1068 if (!tool->ordered_events) 1069 return -EINVAL; 1070 1071 ret = arm_spe__update_queues(spe); 1072 if (ret < 0) 1073 return ret; 1074 1075 if (spe->timeless_decoding) 1076 return arm_spe_process_timeless_queues(spe, -1, 1077 MAX_TIMESTAMP - 1); 1078 1079 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1080 if (ret) 1081 return ret; 1082 1083 if (!spe->use_ctx_pkt_for_pid) 1084 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1085 "Matching of TIDs to SPE events could be inaccurate.\n"); 1086 1087 return 0; 1088 } 1089 1090 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1091 { 1092 u64 *metadata; 1093 1094 metadata = zalloc(per_cpu_size); 1095 if (!metadata) 1096 return NULL; 1097 1098 memcpy(metadata, buf, per_cpu_size); 1099 return metadata; 1100 } 1101 1102 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1103 { 1104 int i; 1105 1106 for (i = 0; i < nr_cpu; i++) 1107 zfree(&metadata[i]); 1108 free(metadata); 1109 } 1110 1111 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1112 u64 *ver, int *nr_cpu) 1113 { 1114 u64 *ptr = (u64 *)info->priv; 1115 u64 metadata_size; 1116 u64 **metadata = NULL; 1117 int hdr_sz, per_cpu_sz, i; 1118 1119 metadata_size = info->header.size - 1120 sizeof(struct perf_record_auxtrace_info); 1121 1122 /* Metadata version 1 */ 1123 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1124 *ver = 1; 1125 *nr_cpu = 0; 1126 /* No per CPU metadata */ 1127 return NULL; 1128 } 1129 1130 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1131 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1132 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1133 1134 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1135 if (!metadata) 1136 return NULL; 1137 1138 /* Locate the start address of per CPU metadata */ 1139 ptr += hdr_sz; 1140 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1141 1142 for (i = 0; i < *nr_cpu; i++) { 1143 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1144 if (!metadata[i]) 1145 goto err_per_cpu_metadata; 1146 1147 ptr += per_cpu_sz / sizeof(u64); 1148 } 1149 1150 return metadata; 1151 1152 err_per_cpu_metadata: 1153 arm_spe__free_metadata(metadata, *nr_cpu); 1154 return NULL; 1155 } 1156 1157 static void arm_spe_free_queue(void *priv) 1158 { 1159 struct arm_spe_queue *speq = priv; 1160 1161 if (!speq) 1162 return; 1163 thread__zput(speq->thread); 1164 arm_spe_decoder_free(speq->decoder); 1165 zfree(&speq->event_buf); 1166 free(speq); 1167 } 1168 1169 static void arm_spe_free_events(struct perf_session *session) 1170 { 1171 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1172 auxtrace); 1173 struct auxtrace_queues *queues = &spe->queues; 1174 unsigned int i; 1175 1176 for (i = 0; i < queues->nr_queues; i++) { 1177 arm_spe_free_queue(queues->queue_array[i].priv); 1178 queues->queue_array[i].priv = NULL; 1179 } 1180 auxtrace_queues__free(queues); 1181 } 1182 1183 static void arm_spe_free(struct perf_session *session) 1184 { 1185 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1186 auxtrace); 1187 1188 auxtrace_heap__free(&spe->heap); 1189 arm_spe_free_events(session); 1190 session->auxtrace = NULL; 1191 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1192 free(spe); 1193 } 1194 1195 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1196 struct evsel *evsel) 1197 { 1198 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1199 1200 return evsel->core.attr.type == spe->pmu_type; 1201 } 1202 1203 static const char * const metadata_hdr_v1_fmts[] = { 1204 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1205 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1206 }; 1207 1208 static const char * const metadata_hdr_fmts[] = { 1209 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1210 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1211 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1212 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1213 }; 1214 1215 static const char * const metadata_per_cpu_fmts[] = { 1216 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1217 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1218 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1219 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1220 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1221 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1222 }; 1223 1224 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1225 { 1226 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1227 const char * const *hdr_fmts; 1228 1229 if (!dump_trace) 1230 return; 1231 1232 if (spe->metadata_ver == 1) { 1233 cpu_num = 0; 1234 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1235 hdr_fmts = metadata_hdr_v1_fmts; 1236 } else { 1237 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1238 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1239 hdr_fmts = metadata_hdr_fmts; 1240 } 1241 1242 for (i = 0; i < hdr_size; i++) 1243 fprintf(stdout, hdr_fmts[i], arr[i]); 1244 1245 arr += hdr_size; 1246 for (cpu = 0; cpu < cpu_num; cpu++) { 1247 /* 1248 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1249 * are fixed. The sequential parameter size is decided by the 1250 * field 'ARM_SPE_CPU_NR_PARAMS'. 1251 */ 1252 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1253 for (i = 0; i < cpu_size; i++) 1254 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1255 arr += cpu_size; 1256 } 1257 } 1258 1259 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1260 const char *name) 1261 { 1262 struct evsel *evsel; 1263 1264 evlist__for_each_entry(evlist, evsel) { 1265 if (evsel->core.id && evsel->core.id[0] == id) { 1266 if (evsel->name) 1267 zfree(&evsel->name); 1268 evsel->name = strdup(name); 1269 break; 1270 } 1271 } 1272 } 1273 1274 static int 1275 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1276 { 1277 struct evlist *evlist = session->evlist; 1278 struct evsel *evsel; 1279 struct perf_event_attr attr; 1280 bool found = false; 1281 u64 id; 1282 int err; 1283 1284 evlist__for_each_entry(evlist, evsel) { 1285 if (evsel->core.attr.type == spe->pmu_type) { 1286 found = true; 1287 break; 1288 } 1289 } 1290 1291 if (!found) { 1292 pr_debug("No selected events with SPE trace data\n"); 1293 return 0; 1294 } 1295 1296 memset(&attr, 0, sizeof(struct perf_event_attr)); 1297 attr.size = sizeof(struct perf_event_attr); 1298 attr.type = PERF_TYPE_HARDWARE; 1299 attr.sample_type = evsel->core.attr.sample_type & 1300 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1301 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1302 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1303 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1304 if (spe->timeless_decoding) 1305 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1306 else 1307 attr.sample_type |= PERF_SAMPLE_TIME; 1308 1309 spe->sample_type = attr.sample_type; 1310 1311 attr.exclude_user = evsel->core.attr.exclude_user; 1312 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1313 attr.exclude_hv = evsel->core.attr.exclude_hv; 1314 attr.exclude_host = evsel->core.attr.exclude_host; 1315 attr.exclude_guest = evsel->core.attr.exclude_guest; 1316 attr.sample_id_all = evsel->core.attr.sample_id_all; 1317 attr.read_format = evsel->core.attr.read_format; 1318 1319 /* create new id val to be a fixed offset from evsel id */ 1320 id = evsel->core.id[0] + 1000000000; 1321 1322 if (!id) 1323 id = 1; 1324 1325 if (spe->synth_opts.flc) { 1326 spe->sample_flc = true; 1327 1328 /* Level 1 data cache miss */ 1329 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1330 if (err) 1331 return err; 1332 spe->l1d_miss_id = id; 1333 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1334 id += 1; 1335 1336 /* Level 1 data cache access */ 1337 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1338 if (err) 1339 return err; 1340 spe->l1d_access_id = id; 1341 arm_spe_set_event_name(evlist, id, "l1d-access"); 1342 id += 1; 1343 } 1344 1345 if (spe->synth_opts.llc) { 1346 spe->sample_llc = true; 1347 1348 /* Last level cache miss */ 1349 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1350 if (err) 1351 return err; 1352 spe->llc_miss_id = id; 1353 arm_spe_set_event_name(evlist, id, "llc-miss"); 1354 id += 1; 1355 1356 /* Last level cache access */ 1357 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1358 if (err) 1359 return err; 1360 spe->llc_access_id = id; 1361 arm_spe_set_event_name(evlist, id, "llc-access"); 1362 id += 1; 1363 } 1364 1365 if (spe->synth_opts.tlb) { 1366 spe->sample_tlb = true; 1367 1368 /* TLB miss */ 1369 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1370 if (err) 1371 return err; 1372 spe->tlb_miss_id = id; 1373 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1374 id += 1; 1375 1376 /* TLB access */ 1377 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1378 if (err) 1379 return err; 1380 spe->tlb_access_id = id; 1381 arm_spe_set_event_name(evlist, id, "tlb-access"); 1382 id += 1; 1383 } 1384 1385 if (spe->synth_opts.branches) { 1386 spe->sample_branch = true; 1387 1388 /* Branch miss */ 1389 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1390 if (err) 1391 return err; 1392 spe->branch_miss_id = id; 1393 arm_spe_set_event_name(evlist, id, "branch-miss"); 1394 id += 1; 1395 } 1396 1397 if (spe->synth_opts.remote_access) { 1398 spe->sample_remote_access = true; 1399 1400 /* Remote access */ 1401 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1402 if (err) 1403 return err; 1404 spe->remote_access_id = id; 1405 arm_spe_set_event_name(evlist, id, "remote-access"); 1406 id += 1; 1407 } 1408 1409 if (spe->synth_opts.mem) { 1410 spe->sample_memory = true; 1411 1412 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1413 if (err) 1414 return err; 1415 spe->memory_id = id; 1416 arm_spe_set_event_name(evlist, id, "memory"); 1417 id += 1; 1418 } 1419 1420 if (spe->synth_opts.instructions) { 1421 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1422 pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); 1423 goto synth_instructions_out; 1424 } 1425 if (spe->synth_opts.period > 1) 1426 pr_warning("Arm SPE has a hardware-based sample period.\n" 1427 "Additional instruction events will be discarded by --itrace\n"); 1428 1429 spe->sample_instructions = true; 1430 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1431 attr.sample_period = spe->synth_opts.period; 1432 spe->instructions_sample_period = attr.sample_period; 1433 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1434 if (err) 1435 return err; 1436 spe->instructions_id = id; 1437 arm_spe_set_event_name(evlist, id, "instructions"); 1438 } 1439 synth_instructions_out: 1440 1441 return 0; 1442 } 1443 1444 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1445 { 1446 u64 midr; 1447 int i; 1448 1449 if (!nr_cpu) 1450 return false; 1451 1452 for (i = 0; i < nr_cpu; i++) { 1453 if (!metadata[i]) 1454 return false; 1455 1456 if (i == 0) { 1457 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1458 continue; 1459 } 1460 1461 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1462 return false; 1463 } 1464 1465 return true; 1466 } 1467 1468 int arm_spe_process_auxtrace_info(union perf_event *event, 1469 struct perf_session *session) 1470 { 1471 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1472 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1473 struct perf_record_time_conv *tc = &session->time_conv; 1474 struct arm_spe *spe; 1475 u64 **metadata = NULL; 1476 u64 metadata_ver; 1477 int nr_cpu, err; 1478 1479 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1480 min_sz) 1481 return -EINVAL; 1482 1483 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1484 &nr_cpu); 1485 if (!metadata && metadata_ver != 1) { 1486 pr_err("Failed to parse Arm SPE metadata.\n"); 1487 return -EINVAL; 1488 } 1489 1490 spe = zalloc(sizeof(struct arm_spe)); 1491 if (!spe) { 1492 err = -ENOMEM; 1493 goto err_free_metadata; 1494 } 1495 1496 err = auxtrace_queues__init(&spe->queues); 1497 if (err) 1498 goto err_free; 1499 1500 spe->session = session; 1501 spe->machine = &session->machines.host; /* No kvm support */ 1502 spe->auxtrace_type = auxtrace_info->type; 1503 if (metadata_ver == 1) 1504 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1505 else 1506 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1507 spe->metadata = metadata; 1508 spe->metadata_ver = metadata_ver; 1509 spe->metadata_nr_cpu = nr_cpu; 1510 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1511 1512 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1513 1514 /* 1515 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1516 * and the parameters for hardware clock are stored in the session 1517 * context. Passes these parameters to the struct perf_tsc_conversion 1518 * in "spe->tc", which is used for later conversion between clock 1519 * counter and timestamp. 1520 * 1521 * For backward compatibility, copies the fields starting from 1522 * "time_cycles" only if they are contained in the event. 1523 */ 1524 spe->tc.time_shift = tc->time_shift; 1525 spe->tc.time_mult = tc->time_mult; 1526 spe->tc.time_zero = tc->time_zero; 1527 1528 if (event_contains(*tc, time_cycles)) { 1529 spe->tc.time_cycles = tc->time_cycles; 1530 spe->tc.time_mask = tc->time_mask; 1531 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1532 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1533 } 1534 1535 spe->auxtrace.process_event = arm_spe_process_event; 1536 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1537 spe->auxtrace.flush_events = arm_spe_flush; 1538 spe->auxtrace.free_events = arm_spe_free_events; 1539 spe->auxtrace.free = arm_spe_free; 1540 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1541 session->auxtrace = &spe->auxtrace; 1542 1543 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1544 1545 if (dump_trace) 1546 return 0; 1547 1548 if (session->itrace_synth_opts && session->itrace_synth_opts->set) 1549 spe->synth_opts = *session->itrace_synth_opts; 1550 else 1551 itrace_synth_opts__set_default(&spe->synth_opts, false); 1552 1553 err = arm_spe_synth_events(spe, session); 1554 if (err) 1555 goto err_free_queues; 1556 1557 err = auxtrace_queues__process_index(&spe->queues, session); 1558 if (err) 1559 goto err_free_queues; 1560 1561 if (spe->queues.populated) 1562 spe->data_queued = true; 1563 1564 return 0; 1565 1566 err_free_queues: 1567 auxtrace_queues__free(&spe->queues); 1568 session->auxtrace = NULL; 1569 err_free: 1570 free(spe); 1571 err_free_metadata: 1572 arm_spe__free_metadata(metadata, nr_cpu); 1573 return err; 1574 } 1575