1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 #define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) 41 42 #define ARM_SPE_CACHE_EVENT(lvl) \ 43 (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) 44 45 #define arm_spe_is_cache_level(type, lvl) \ 46 ((type) & ARM_SPE_CACHE_EVENT(lvl)) 47 48 #define arm_spe_is_cache_hit(type, lvl) \ 49 (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) 50 51 #define arm_spe_is_cache_miss(type, lvl) \ 52 ((type) & ARM_SPE_##lvl##_MISS) 53 54 struct arm_spe { 55 struct auxtrace auxtrace; 56 struct auxtrace_queues queues; 57 struct auxtrace_heap heap; 58 struct itrace_synth_opts synth_opts; 59 u32 auxtrace_type; 60 struct perf_session *session; 61 struct machine *machine; 62 u32 pmu_type; 63 64 struct perf_tsc_conversion tc; 65 66 u8 timeless_decoding; 67 u8 data_queued; 68 69 u64 sample_type; 70 u8 sample_flc; 71 u8 sample_llc; 72 u8 sample_tlb; 73 u8 sample_branch; 74 u8 sample_remote_access; 75 u8 sample_memory; 76 u8 sample_instructions; 77 78 u64 l1d_miss_id; 79 u64 l1d_access_id; 80 u64 llc_miss_id; 81 u64 llc_access_id; 82 u64 tlb_miss_id; 83 u64 tlb_access_id; 84 u64 branch_id; 85 u64 remote_access_id; 86 u64 memory_id; 87 u64 instructions_id; 88 89 u64 kernel_start; 90 91 unsigned long num_events; 92 u8 use_ctx_pkt_for_pid; 93 94 u64 **metadata; 95 u64 metadata_ver; 96 u64 metadata_nr_cpu; 97 bool is_homogeneous; 98 }; 99 100 struct arm_spe_queue { 101 struct arm_spe *spe; 102 unsigned int queue_nr; 103 struct auxtrace_buffer *buffer; 104 struct auxtrace_buffer *old_buffer; 105 union perf_event *event_buf; 106 bool on_heap; 107 bool done; 108 pid_t pid; 109 pid_t tid; 110 int cpu; 111 struct arm_spe_decoder *decoder; 112 u64 time; 113 u64 timestamp; 114 struct thread *thread; 115 u64 sample_count; 116 u32 flags; 117 struct branch_stack *last_branch; 118 }; 119 120 struct data_source_handle { 121 const struct midr_range *midr_ranges; 122 void (*ds_synth)(const struct arm_spe_record *record, 123 union perf_mem_data_src *data_src); 124 }; 125 126 #define DS(range, func) \ 127 { \ 128 .midr_ranges = range, \ 129 .ds_synth = arm_spe__synth_##func, \ 130 } 131 132 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 133 unsigned char *buf, size_t len) 134 { 135 struct arm_spe_pkt packet; 136 size_t pos = 0; 137 int ret, pkt_len, i; 138 char desc[ARM_SPE_PKT_DESC_MAX]; 139 const char *color = PERF_COLOR_BLUE; 140 141 color_fprintf(stdout, color, 142 ". ... ARM SPE data: size %#zx bytes\n", 143 len); 144 145 while (len) { 146 ret = arm_spe_get_packet(buf, len, &packet); 147 if (ret > 0) 148 pkt_len = ret; 149 else 150 pkt_len = 1; 151 printf("."); 152 color_fprintf(stdout, color, " %08zx: ", pos); 153 for (i = 0; i < pkt_len; i++) 154 color_fprintf(stdout, color, " %02x", buf[i]); 155 for (; i < 16; i++) 156 color_fprintf(stdout, color, " "); 157 if (ret > 0) { 158 ret = arm_spe_pkt_desc(&packet, desc, 159 ARM_SPE_PKT_DESC_MAX); 160 if (!ret) 161 color_fprintf(stdout, color, " %s\n", desc); 162 } else { 163 color_fprintf(stdout, color, " Bad packet!\n"); 164 } 165 pos += pkt_len; 166 buf += pkt_len; 167 len -= pkt_len; 168 } 169 } 170 171 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 172 size_t len) 173 { 174 printf(".\n"); 175 arm_spe_dump(spe, buf, len); 176 } 177 178 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 179 { 180 struct arm_spe_queue *speq = data; 181 struct auxtrace_buffer *buffer = speq->buffer; 182 struct auxtrace_buffer *old_buffer = speq->old_buffer; 183 struct auxtrace_queue *queue; 184 185 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 186 187 buffer = auxtrace_buffer__next(queue, buffer); 188 /* If no more data, drop the previous auxtrace_buffer and return */ 189 if (!buffer) { 190 if (old_buffer) 191 auxtrace_buffer__drop_data(old_buffer); 192 b->len = 0; 193 return 0; 194 } 195 196 speq->buffer = buffer; 197 198 /* If the aux_buffer doesn't have data associated, try to load it */ 199 if (!buffer->data) { 200 /* get the file desc associated with the perf data file */ 201 int fd = perf_data__fd(speq->spe->session->data); 202 203 buffer->data = auxtrace_buffer__get_data(buffer, fd); 204 if (!buffer->data) 205 return -ENOMEM; 206 } 207 208 b->len = buffer->size; 209 b->buf = buffer->data; 210 211 if (b->len) { 212 if (old_buffer) 213 auxtrace_buffer__drop_data(old_buffer); 214 speq->old_buffer = buffer; 215 } else { 216 auxtrace_buffer__drop_data(buffer); 217 return arm_spe_get_trace(b, data); 218 } 219 220 return 0; 221 } 222 223 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 224 unsigned int queue_nr) 225 { 226 struct arm_spe_params params = { .get_trace = 0, }; 227 struct arm_spe_queue *speq; 228 229 speq = zalloc(sizeof(*speq)); 230 if (!speq) 231 return NULL; 232 233 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 234 if (!speq->event_buf) 235 goto out_free; 236 237 speq->spe = spe; 238 speq->queue_nr = queue_nr; 239 speq->pid = -1; 240 speq->tid = -1; 241 speq->cpu = -1; 242 243 /* params set */ 244 params.get_trace = arm_spe_get_trace; 245 params.data = speq; 246 247 if (spe->synth_opts.last_branch) { 248 size_t sz = sizeof(struct branch_stack); 249 250 /* Allocate up to two entries for PBT + TGT */ 251 sz += sizeof(struct branch_entry) * 252 min(spe->synth_opts.last_branch_sz, 2U); 253 speq->last_branch = zalloc(sz); 254 if (!speq->last_branch) 255 goto out_free; 256 } 257 258 /* create new decoder */ 259 speq->decoder = arm_spe_decoder_new(¶ms); 260 if (!speq->decoder) 261 goto out_free; 262 263 return speq; 264 265 out_free: 266 zfree(&speq->event_buf); 267 zfree(&speq->last_branch); 268 free(speq); 269 270 return NULL; 271 } 272 273 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 274 { 275 return ip >= spe->kernel_start ? 276 PERF_RECORD_MISC_KERNEL : 277 PERF_RECORD_MISC_USER; 278 } 279 280 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 281 struct auxtrace_queue *queue) 282 { 283 struct arm_spe_queue *speq = queue->priv; 284 pid_t tid; 285 286 tid = machine__get_current_tid(spe->machine, speq->cpu); 287 if (tid != -1) { 288 speq->tid = tid; 289 thread__zput(speq->thread); 290 } else 291 speq->tid = queue->tid; 292 293 if ((!speq->thread) && (speq->tid != -1)) { 294 speq->thread = machine__find_thread(spe->machine, -1, 295 speq->tid); 296 } 297 298 if (speq->thread) { 299 speq->pid = thread__pid(speq->thread); 300 if (queue->cpu == -1) 301 speq->cpu = thread__cpu(speq->thread); 302 } 303 } 304 305 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 306 { 307 struct arm_spe *spe = speq->spe; 308 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 309 310 if (err) 311 return err; 312 313 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 314 315 return 0; 316 } 317 318 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu) 319 { 320 u64 i; 321 322 if (!spe->metadata) 323 return NULL; 324 325 /* CPU ID is -1 for per-thread mode */ 326 if (cpu < 0) { 327 /* 328 * On the heterogeneous system, due to CPU ID is -1, 329 * cannot confirm the data source packet is supported. 330 */ 331 if (!spe->is_homogeneous) 332 return NULL; 333 334 /* In homogeneous system, simply use CPU0's metadata */ 335 return spe->metadata[0]; 336 } 337 338 for (i = 0; i < spe->metadata_nr_cpu; i++) 339 if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu) 340 return spe->metadata[i]; 341 342 return NULL; 343 } 344 345 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 346 { 347 struct simd_flags simd_flags = {}; 348 349 if (record->op & ARM_SPE_OP_SVE) 350 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 351 352 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 353 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; 354 355 if (record->type & ARM_SPE_SVE_EMPTY_PRED) 356 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; 357 358 return simd_flags; 359 } 360 361 static void arm_spe_prep_sample(struct arm_spe *spe, 362 struct arm_spe_queue *speq, 363 union perf_event *event, 364 struct perf_sample *sample) 365 { 366 struct arm_spe_record *record = &speq->decoder->record; 367 368 if (!spe->timeless_decoding) 369 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 370 371 sample->ip = record->from_ip; 372 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 373 sample->pid = speq->pid; 374 sample->tid = speq->tid; 375 sample->period = spe->synth_opts.period; 376 sample->cpu = speq->cpu; 377 sample->simd_flags = arm_spe__synth_simd_flags(record); 378 379 event->sample.header.type = PERF_RECORD_SAMPLE; 380 event->sample.header.misc = sample->cpumode; 381 event->sample.header.size = sizeof(struct perf_event_header); 382 } 383 384 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) 385 { 386 struct arm_spe *spe = speq->spe; 387 struct arm_spe_record *record = &speq->decoder->record; 388 struct branch_stack *bstack = speq->last_branch; 389 struct branch_flags *bs_flags; 390 unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; 391 bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); 392 bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; 393 size_t sz = sizeof(struct branch_stack) + 394 sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; 395 int i = 0; 396 397 /* Clean up branch stack */ 398 memset(bstack, 0x0, sz); 399 400 if (!have_tgt && !have_pbt) 401 return; 402 403 if (have_tgt) { 404 bstack->entries[i].from = record->from_ip; 405 bstack->entries[i].to = record->to_ip; 406 407 bs_flags = &bstack->entries[i].flags; 408 bs_flags->value = 0; 409 410 if (record->op & ARM_SPE_OP_BR_CR_BL) { 411 if (record->op & ARM_SPE_OP_BR_COND) 412 bs_flags->type |= PERF_BR_COND_CALL; 413 else 414 bs_flags->type |= PERF_BR_CALL; 415 /* 416 * Indirect branch instruction without link (e.g. BR), 417 * take this case as function return. 418 */ 419 } else if (record->op & ARM_SPE_OP_BR_CR_RET || 420 record->op & ARM_SPE_OP_BR_INDIRECT) { 421 if (record->op & ARM_SPE_OP_BR_COND) 422 bs_flags->type |= PERF_BR_COND_RET; 423 else 424 bs_flags->type |= PERF_BR_RET; 425 } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { 426 if (record->op & ARM_SPE_OP_BR_COND) 427 bs_flags->type |= PERF_BR_COND; 428 else 429 bs_flags->type |= PERF_BR_UNCOND; 430 } else { 431 if (record->op & ARM_SPE_OP_BR_COND) 432 bs_flags->type |= PERF_BR_COND; 433 else 434 bs_flags->type |= PERF_BR_UNKNOWN; 435 } 436 437 if (record->type & ARM_SPE_BRANCH_MISS) { 438 bs_flags->mispred = 1; 439 bs_flags->predicted = 0; 440 } else { 441 bs_flags->mispred = 0; 442 bs_flags->predicted = 1; 443 } 444 445 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 446 bs_flags->not_taken = 1; 447 448 if (record->type & ARM_SPE_IN_TXN) 449 bs_flags->in_tx = 1; 450 451 bs_flags->cycles = min(record->latency, 0xFFFFU); 452 i++; 453 } 454 455 if (have_pbt) { 456 bs_flags = &bstack->entries[i].flags; 457 bs_flags->type |= PERF_BR_UNKNOWN; 458 bstack->entries[i].to = record->prev_br_tgt; 459 i++; 460 } 461 462 bstack->nr = i; 463 bstack->hw_idx = -1ULL; 464 } 465 466 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 467 { 468 event->header.size = perf_event__sample_event_size(sample, type, 0); 469 return perf_event__synthesize_sample(event, type, 0, sample); 470 } 471 472 static inline int 473 arm_spe_deliver_synth_event(struct arm_spe *spe, 474 struct arm_spe_queue *speq __maybe_unused, 475 union perf_event *event, 476 struct perf_sample *sample) 477 { 478 int ret; 479 480 if (spe->synth_opts.inject) { 481 ret = arm_spe__inject_event(event, sample, spe->sample_type); 482 if (ret) 483 return ret; 484 } 485 486 ret = perf_session__deliver_synth_event(spe->session, event, sample); 487 if (ret) 488 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 489 490 return ret; 491 } 492 493 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 494 u64 spe_events_id, 495 union perf_mem_data_src data_src) 496 { 497 struct arm_spe *spe = speq->spe; 498 struct arm_spe_record *record = &speq->decoder->record; 499 union perf_event *event = speq->event_buf; 500 struct perf_sample sample; 501 int ret; 502 503 perf_sample__init(&sample, /*all=*/true); 504 arm_spe_prep_sample(spe, speq, event, &sample); 505 506 sample.id = spe_events_id; 507 sample.stream_id = spe_events_id; 508 sample.addr = record->virt_addr; 509 sample.phys_addr = record->phys_addr; 510 sample.data_src = data_src.val; 511 sample.weight = record->latency; 512 513 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 514 perf_sample__exit(&sample); 515 return ret; 516 } 517 518 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 519 u64 spe_events_id) 520 { 521 struct arm_spe *spe = speq->spe; 522 struct arm_spe_record *record = &speq->decoder->record; 523 union perf_event *event = speq->event_buf; 524 struct perf_sample sample; 525 int ret; 526 527 perf_sample__init(&sample, /*all=*/true); 528 arm_spe_prep_sample(spe, speq, event, &sample); 529 530 sample.id = spe_events_id; 531 sample.stream_id = spe_events_id; 532 sample.addr = record->to_ip; 533 sample.weight = record->latency; 534 sample.flags = speq->flags; 535 sample.branch_stack = speq->last_branch; 536 537 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 538 perf_sample__exit(&sample); 539 return ret; 540 } 541 542 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 543 u64 spe_events_id, 544 union perf_mem_data_src data_src) 545 { 546 struct arm_spe *spe = speq->spe; 547 struct arm_spe_record *record = &speq->decoder->record; 548 union perf_event *event = speq->event_buf; 549 struct perf_sample sample; 550 int ret; 551 552 perf_sample__init(&sample, /*all=*/true); 553 arm_spe_prep_sample(spe, speq, event, &sample); 554 555 sample.id = spe_events_id; 556 sample.stream_id = spe_events_id; 557 sample.addr = record->to_ip; 558 sample.phys_addr = record->phys_addr; 559 sample.data_src = data_src.val; 560 sample.weight = record->latency; 561 sample.flags = speq->flags; 562 sample.branch_stack = speq->last_branch; 563 564 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 565 perf_sample__exit(&sample); 566 return ret; 567 } 568 569 static const struct midr_range common_ds_encoding_cpus[] = { 570 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 571 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), 572 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 573 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 574 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 575 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 576 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 577 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 578 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 579 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 580 {}, 581 }; 582 583 static const struct midr_range ampereone_ds_encoding_cpus[] = { 584 MIDR_ALL_VERSIONS(MIDR_AMPERE1A), 585 {}, 586 }; 587 588 static const struct midr_range hisi_hip_ds_encoding_cpus[] = { 589 MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), 590 {}, 591 }; 592 593 static void arm_spe__sample_flags(struct arm_spe_queue *speq) 594 { 595 const struct arm_spe_record *record = &speq->decoder->record; 596 597 speq->flags = 0; 598 if (record->op & ARM_SPE_OP_BRANCH_ERET) { 599 speq->flags = PERF_IP_FLAG_BRANCH; 600 601 if (record->type & ARM_SPE_BRANCH_MISS) 602 speq->flags |= PERF_IP_FLAG_BRANCH_MISS; 603 604 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 605 speq->flags |= PERF_IP_FLAG_NOT_TAKEN; 606 607 if (record->type & ARM_SPE_IN_TXN) 608 speq->flags |= PERF_IP_FLAG_IN_TX; 609 610 if (record->op & ARM_SPE_OP_BR_COND) 611 speq->flags |= PERF_IP_FLAG_CONDITIONAL; 612 613 if (record->op & ARM_SPE_OP_BR_CR_BL) 614 speq->flags |= PERF_IP_FLAG_CALL; 615 else if (record->op & ARM_SPE_OP_BR_CR_RET) 616 speq->flags |= PERF_IP_FLAG_RETURN; 617 /* 618 * Indirect branch instruction without link (e.g. BR), 619 * take it as a function return. 620 */ 621 else if (record->op & ARM_SPE_OP_BR_INDIRECT) 622 speq->flags |= PERF_IP_FLAG_RETURN; 623 } 624 } 625 626 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 627 union perf_mem_data_src *data_src) 628 { 629 /* 630 * Even though four levels of cache hierarchy are possible, no known 631 * production Neoverse systems currently include more than three levels 632 * so for the time being we assume three exist. If a production system 633 * is built with four the this function would have to be changed to 634 * detect the number of levels for reporting. 635 */ 636 637 /* 638 * We have no data on the hit level or data source for stores in the 639 * Neoverse SPE records. 640 */ 641 if (record->op & ARM_SPE_OP_ST) { 642 data_src->mem_lvl = PERF_MEM_LVL_NA; 643 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 644 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 645 return; 646 } 647 648 switch (record->source) { 649 case ARM_SPE_COMMON_DS_L1D: 650 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 651 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 652 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 653 break; 654 case ARM_SPE_COMMON_DS_L2: 655 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 656 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 657 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 658 break; 659 case ARM_SPE_COMMON_DS_PEER_CORE: 660 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 661 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 662 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 663 break; 664 /* 665 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 666 * transfer, so set SNOOPX_PEER 667 */ 668 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 669 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 670 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 671 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 672 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 673 break; 674 /* 675 * System cache is assumed to be L3 676 */ 677 case ARM_SPE_COMMON_DS_SYS_CACHE: 678 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 679 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 680 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 681 break; 682 /* 683 * We don't know what level it hit in, except it came from the other 684 * socket 685 */ 686 case ARM_SPE_COMMON_DS_REMOTE: 687 data_src->mem_lvl = PERF_MEM_LVL_NA; 688 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 689 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 690 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 691 break; 692 case ARM_SPE_COMMON_DS_DRAM: 693 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 694 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 695 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 696 break; 697 default: 698 break; 699 } 700 } 701 702 /* 703 * Source is IMPDEF. Here we convert the source code used on AmpereOne cores 704 * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. 705 */ 706 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, 707 union perf_mem_data_src *data_src) 708 { 709 struct arm_spe_record common_record; 710 711 switch (record->source) { 712 case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: 713 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; 714 break; 715 case ARM_SPE_AMPEREONE_SLC: 716 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; 717 break; 718 case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: 719 common_record.source = ARM_SPE_COMMON_DS_REMOTE; 720 break; 721 case ARM_SPE_AMPEREONE_DDR: 722 common_record.source = ARM_SPE_COMMON_DS_DRAM; 723 break; 724 case ARM_SPE_AMPEREONE_L1D: 725 common_record.source = ARM_SPE_COMMON_DS_L1D; 726 break; 727 case ARM_SPE_AMPEREONE_L2D: 728 common_record.source = ARM_SPE_COMMON_DS_L2; 729 break; 730 default: 731 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", 732 record->source); 733 return; 734 } 735 736 common_record.op = record->op; 737 arm_spe__synth_data_source_common(&common_record, data_src); 738 } 739 740 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, 741 union perf_mem_data_src *data_src) 742 { 743 /* Use common synthesis method to handle store operations */ 744 if (record->op & ARM_SPE_OP_ST) { 745 arm_spe__synth_data_source_common(record, data_src); 746 return; 747 } 748 749 switch (record->source) { 750 case ARM_SPE_HISI_HIP_PEER_CPU: 751 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 752 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 753 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 754 break; 755 case ARM_SPE_HISI_HIP_PEER_CPU_HITM: 756 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 757 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 758 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 759 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 760 break; 761 case ARM_SPE_HISI_HIP_L3: 762 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 763 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 764 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 765 break; 766 case ARM_SPE_HISI_HIP_L3_HITM: 767 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 768 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 769 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 770 break; 771 case ARM_SPE_HISI_HIP_PEER_CLUSTER: 772 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 773 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 774 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 775 break; 776 case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: 777 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 778 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 779 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 780 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 781 break; 782 case ARM_SPE_HISI_HIP_REMOTE_SOCKET: 783 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 784 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 785 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 786 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 787 break; 788 case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: 789 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 790 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 791 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 792 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 793 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 794 break; 795 case ARM_SPE_HISI_HIP_LOCAL_MEM: 796 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 797 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 798 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 799 break; 800 case ARM_SPE_HISI_HIP_REMOTE_MEM: 801 data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; 802 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 803 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 804 break; 805 case ARM_SPE_HISI_HIP_NC_DEV: 806 data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; 807 data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; 808 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 809 break; 810 case ARM_SPE_HISI_HIP_L2: 811 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 812 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 813 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 814 break; 815 case ARM_SPE_HISI_HIP_L2_HITM: 816 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 817 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 818 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 819 break; 820 case ARM_SPE_HISI_HIP_L1: 821 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 822 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 823 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 824 break; 825 default: 826 break; 827 } 828 } 829 830 static const struct data_source_handle data_source_handles[] = { 831 DS(common_ds_encoding_cpus, data_source_common), 832 DS(ampereone_ds_encoding_cpus, data_source_ampereone), 833 DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), 834 }; 835 836 static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, 837 union perf_mem_data_src *data_src) 838 { 839 /* 840 * To find a cache hit, search in ascending order from the lower level 841 * caches to the higher level caches. This reflects the best scenario 842 * for a cache hit. 843 */ 844 if (arm_spe_is_cache_hit(record->type, L1D)) { 845 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 846 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 847 } else if (record->type & ARM_SPE_RECENTLY_FETCHED) { 848 data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; 849 data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB; 850 } else if (arm_spe_is_cache_hit(record->type, L2D)) { 851 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 852 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 853 } else if (arm_spe_is_cache_hit(record->type, LLC)) { 854 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 855 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 856 /* 857 * To find a cache miss, search in descending order from the higher 858 * level cache to the lower level cache. This represents the worst 859 * scenario for a cache miss. 860 */ 861 } else if (arm_spe_is_cache_miss(record->type, LLC)) { 862 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; 863 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 864 } else if (arm_spe_is_cache_miss(record->type, L2D)) { 865 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS; 866 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 867 } else if (arm_spe_is_cache_miss(record->type, L1D)) { 868 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 869 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 870 } 871 } 872 873 static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, 874 union perf_mem_data_src *data_src) 875 { 876 /* Record the greatest level info for a store operation. */ 877 if (arm_spe_is_cache_level(record->type, LLC)) { 878 data_src->mem_lvl = PERF_MEM_LVL_L3; 879 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? 880 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 881 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 882 } else if (arm_spe_is_cache_level(record->type, L2D)) { 883 data_src->mem_lvl = PERF_MEM_LVL_L2; 884 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ? 885 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 886 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 887 } else if (arm_spe_is_cache_level(record->type, L1D)) { 888 data_src->mem_lvl = PERF_MEM_LVL_L1; 889 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ? 890 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 891 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 892 } 893 } 894 895 static void arm_spe__synth_memory_level(struct arm_spe_queue *speq, 896 const struct arm_spe_record *record, 897 union perf_mem_data_src *data_src) 898 { 899 struct arm_spe *spe = speq->spe; 900 901 /* 902 * The data source packet contains more info for cache levels for 903 * peer snooping. So respect the memory level if has been set by 904 * data source parsing. 905 */ 906 if (!data_src->mem_lvl) { 907 if (data_src->mem_op == PERF_MEM_OP_LOAD) 908 arm_spe__synth_ld_memory_level(record, data_src); 909 if (data_src->mem_op == PERF_MEM_OP_STORE) 910 arm_spe__synth_st_memory_level(record, data_src); 911 } 912 913 if (!data_src->mem_lvl) { 914 data_src->mem_lvl = PERF_MEM_LVL_NA; 915 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 916 } 917 918 /* 919 * If 'mem_snoop' has been set by data source packet, skip to set 920 * it at here. 921 */ 922 if (!data_src->mem_snoop) { 923 if (record->type & ARM_SPE_DATA_SNOOPED) { 924 if (record->type & ARM_SPE_HITM) 925 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 926 else 927 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 928 } else { 929 u64 *metadata = 930 arm_spe__get_metadata_by_cpu(spe, speq->cpu); 931 932 /* 933 * Set NA ("Not available") mode if no meta data or the 934 * SNOOPED event is not supported. 935 */ 936 if (!metadata || 937 !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED)) 938 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 939 else 940 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 941 } 942 } 943 944 if (!data_src->mem_remote) { 945 if (record->type & ARM_SPE_REMOTE_ACCESS) 946 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 947 } 948 } 949 950 static void arm_spe__synth_ds(struct arm_spe_queue *speq, 951 const struct arm_spe_record *record, 952 union perf_mem_data_src *data_src) 953 { 954 struct arm_spe *spe = speq->spe; 955 u64 *metadata = NULL; 956 u64 midr; 957 unsigned int i; 958 959 /* Metadata version 1 assumes all CPUs are the same (old behavior) */ 960 if (spe->metadata_ver == 1) { 961 const char *cpuid; 962 963 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); 964 cpuid = perf_env__cpuid(perf_session__env(spe->session)); 965 midr = strtol(cpuid, NULL, 16); 966 } else { 967 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 968 if (!metadata) 969 return; 970 971 midr = metadata[ARM_SPE_CPU_MIDR]; 972 } 973 974 for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { 975 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { 976 return data_source_handles[i].ds_synth(record, data_src); 977 } 978 } 979 980 return; 981 } 982 983 static union perf_mem_data_src 984 arm_spe__synth_data_source(struct arm_spe_queue *speq, 985 const struct arm_spe_record *record) 986 { 987 union perf_mem_data_src data_src = {}; 988 989 /* Only synthesize data source for LDST operations */ 990 if (!is_ldst_op(record->op)) 991 return data_src; 992 993 if (record->op & ARM_SPE_OP_LD) 994 data_src.mem_op = PERF_MEM_OP_LOAD; 995 else if (record->op & ARM_SPE_OP_ST) 996 data_src.mem_op = PERF_MEM_OP_STORE; 997 else 998 return data_src; 999 1000 arm_spe__synth_ds(speq, record, &data_src); 1001 arm_spe__synth_memory_level(speq, record, &data_src); 1002 1003 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 1004 data_src.mem_dtlb = PERF_MEM_TLB_WK; 1005 1006 if (record->type & ARM_SPE_TLB_MISS) 1007 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 1008 else 1009 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 1010 } 1011 1012 return data_src; 1013 } 1014 1015 static int arm_spe_sample(struct arm_spe_queue *speq) 1016 { 1017 const struct arm_spe_record *record = &speq->decoder->record; 1018 struct arm_spe *spe = speq->spe; 1019 union perf_mem_data_src data_src; 1020 int err; 1021 1022 /* 1023 * Discard all samples until period is reached 1024 */ 1025 speq->sample_count++; 1026 if (speq->sample_count < spe->synth_opts.period) 1027 return 0; 1028 speq->sample_count = 0; 1029 1030 arm_spe__sample_flags(speq); 1031 data_src = arm_spe__synth_data_source(speq, record); 1032 1033 if (spe->sample_flc) { 1034 if (record->type & ARM_SPE_L1D_MISS) { 1035 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 1036 data_src); 1037 if (err) 1038 return err; 1039 } 1040 1041 if (record->type & ARM_SPE_L1D_ACCESS) { 1042 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 1043 data_src); 1044 if (err) 1045 return err; 1046 } 1047 } 1048 1049 if (spe->sample_llc) { 1050 if (record->type & ARM_SPE_LLC_MISS) { 1051 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 1052 data_src); 1053 if (err) 1054 return err; 1055 } 1056 1057 if (record->type & ARM_SPE_LLC_ACCESS) { 1058 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 1059 data_src); 1060 if (err) 1061 return err; 1062 } 1063 } 1064 1065 if (spe->sample_tlb) { 1066 if (record->type & ARM_SPE_TLB_MISS) { 1067 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 1068 data_src); 1069 if (err) 1070 return err; 1071 } 1072 1073 if (record->type & ARM_SPE_TLB_ACCESS) { 1074 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 1075 data_src); 1076 if (err) 1077 return err; 1078 } 1079 } 1080 1081 if (spe->synth_opts.last_branch && 1082 (spe->sample_branch || spe->sample_instructions)) 1083 arm_spe__prep_branch_stack(speq); 1084 1085 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { 1086 err = arm_spe__synth_branch_sample(speq, spe->branch_id); 1087 if (err) 1088 return err; 1089 } 1090 1091 if (spe->sample_remote_access && 1092 (record->type & ARM_SPE_REMOTE_ACCESS)) { 1093 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 1094 data_src); 1095 if (err) 1096 return err; 1097 } 1098 1099 /* 1100 * When data_src is zero it means the record is not a memory operation, 1101 * skip to synthesize memory sample for this case. 1102 */ 1103 if (spe->sample_memory && is_ldst_op(record->op)) { 1104 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 1105 if (err) 1106 return err; 1107 } 1108 1109 if (spe->sample_instructions) { 1110 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 1111 if (err) 1112 return err; 1113 } 1114 1115 return 0; 1116 } 1117 1118 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 1119 { 1120 struct arm_spe *spe = speq->spe; 1121 struct arm_spe_record *record; 1122 int ret; 1123 1124 if (!spe->kernel_start) 1125 spe->kernel_start = machine__kernel_start(spe->machine); 1126 1127 while (1) { 1128 /* 1129 * The usual logic is firstly to decode the packets, and then 1130 * based the record to synthesize sample; but here the flow is 1131 * reversed: it calls arm_spe_sample() for synthesizing samples 1132 * prior to arm_spe_decode(). 1133 * 1134 * Two reasons for this code logic: 1135 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 1136 * has decoded trace data and generated a record, but the record 1137 * is left to generate sample until run to here, so it's correct 1138 * to synthesize sample for the left record. 1139 * 2. After decoding trace data, it needs to compare the record 1140 * timestamp with the coming perf event, if the record timestamp 1141 * is later than the perf event, it needs bail out and pushs the 1142 * record into auxtrace heap, thus the record can be deferred to 1143 * synthesize sample until run to here at the next time; so this 1144 * can correlate samples between Arm SPE trace data and other 1145 * perf events with correct time ordering. 1146 */ 1147 1148 /* 1149 * Update pid/tid info. 1150 */ 1151 record = &speq->decoder->record; 1152 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 1153 ret = arm_spe_set_tid(speq, record->context_id); 1154 if (ret) 1155 return ret; 1156 1157 spe->use_ctx_pkt_for_pid = true; 1158 } 1159 1160 ret = arm_spe_sample(speq); 1161 if (ret) 1162 return ret; 1163 1164 ret = arm_spe_decode(speq->decoder); 1165 if (!ret) { 1166 pr_debug("No data or all data has been processed.\n"); 1167 return 1; 1168 } 1169 1170 /* 1171 * Error is detected when decode SPE trace data, continue to 1172 * the next trace data and find out more records. 1173 */ 1174 if (ret < 0) 1175 continue; 1176 1177 record = &speq->decoder->record; 1178 1179 /* Update timestamp for the last record */ 1180 if (record->timestamp > speq->timestamp) 1181 speq->timestamp = record->timestamp; 1182 1183 /* 1184 * If the timestamp of the queue is later than timestamp of the 1185 * coming perf event, bail out so can allow the perf event to 1186 * be processed ahead. 1187 */ 1188 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 1189 *timestamp = speq->timestamp; 1190 return 0; 1191 } 1192 } 1193 1194 return 0; 1195 } 1196 1197 static int arm_spe__setup_queue(struct arm_spe *spe, 1198 struct auxtrace_queue *queue, 1199 unsigned int queue_nr) 1200 { 1201 struct arm_spe_queue *speq = queue->priv; 1202 struct arm_spe_record *record; 1203 1204 if (list_empty(&queue->head) || speq) 1205 return 0; 1206 1207 speq = arm_spe__alloc_queue(spe, queue_nr); 1208 1209 if (!speq) 1210 return -ENOMEM; 1211 1212 queue->priv = speq; 1213 1214 if (queue->cpu != -1) 1215 speq->cpu = queue->cpu; 1216 1217 if (!speq->on_heap) { 1218 int ret; 1219 1220 if (spe->timeless_decoding) 1221 return 0; 1222 1223 retry: 1224 ret = arm_spe_decode(speq->decoder); 1225 1226 if (!ret) 1227 return 0; 1228 1229 if (ret < 0) 1230 goto retry; 1231 1232 record = &speq->decoder->record; 1233 1234 speq->timestamp = record->timestamp; 1235 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 1236 if (ret) 1237 return ret; 1238 speq->on_heap = true; 1239 } 1240 1241 return 0; 1242 } 1243 1244 static int arm_spe__setup_queues(struct arm_spe *spe) 1245 { 1246 unsigned int i; 1247 int ret; 1248 1249 for (i = 0; i < spe->queues.nr_queues; i++) { 1250 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 1251 if (ret) 1252 return ret; 1253 } 1254 1255 return 0; 1256 } 1257 1258 static int arm_spe__update_queues(struct arm_spe *spe) 1259 { 1260 if (spe->queues.new_data) { 1261 spe->queues.new_data = false; 1262 return arm_spe__setup_queues(spe); 1263 } 1264 1265 return 0; 1266 } 1267 1268 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 1269 { 1270 struct evsel *evsel; 1271 struct evlist *evlist = spe->session->evlist; 1272 bool timeless_decoding = true; 1273 1274 /* 1275 * Circle through the list of event and complain if we find one 1276 * with the time bit set. 1277 */ 1278 evlist__for_each_entry(evlist, evsel) { 1279 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 1280 timeless_decoding = false; 1281 } 1282 1283 return timeless_decoding; 1284 } 1285 1286 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 1287 { 1288 unsigned int queue_nr; 1289 u64 ts; 1290 int ret; 1291 1292 while (1) { 1293 struct auxtrace_queue *queue; 1294 struct arm_spe_queue *speq; 1295 1296 if (!spe->heap.heap_cnt) 1297 return 0; 1298 1299 if (spe->heap.heap_array[0].ordinal >= timestamp) 1300 return 0; 1301 1302 queue_nr = spe->heap.heap_array[0].queue_nr; 1303 queue = &spe->queues.queue_array[queue_nr]; 1304 speq = queue->priv; 1305 1306 auxtrace_heap__pop(&spe->heap); 1307 1308 if (spe->heap.heap_cnt) { 1309 ts = spe->heap.heap_array[0].ordinal + 1; 1310 if (ts > timestamp) 1311 ts = timestamp; 1312 } else { 1313 ts = timestamp; 1314 } 1315 1316 /* 1317 * A previous context-switch event has set pid/tid in the machine's context, so 1318 * here we need to update the pid/tid in the thread and SPE queue. 1319 */ 1320 if (!spe->use_ctx_pkt_for_pid) 1321 arm_spe_set_pid_tid_cpu(spe, queue); 1322 1323 ret = arm_spe_run_decoder(speq, &ts); 1324 if (ret < 0) { 1325 auxtrace_heap__add(&spe->heap, queue_nr, ts); 1326 return ret; 1327 } 1328 1329 if (!ret) { 1330 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 1331 if (ret < 0) 1332 return ret; 1333 } else { 1334 speq->on_heap = false; 1335 } 1336 } 1337 1338 return 0; 1339 } 1340 1341 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 1342 u64 time_) 1343 { 1344 struct auxtrace_queues *queues = &spe->queues; 1345 unsigned int i; 1346 u64 ts = 0; 1347 1348 for (i = 0; i < queues->nr_queues; i++) { 1349 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 1350 struct arm_spe_queue *speq = queue->priv; 1351 1352 if (speq && (tid == -1 || speq->tid == tid)) { 1353 speq->time = time_; 1354 arm_spe_set_pid_tid_cpu(spe, queue); 1355 arm_spe_run_decoder(speq, &ts); 1356 } 1357 } 1358 return 0; 1359 } 1360 1361 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 1362 struct perf_sample *sample) 1363 { 1364 pid_t pid, tid; 1365 int cpu; 1366 1367 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 1368 return 0; 1369 1370 pid = event->context_switch.next_prev_pid; 1371 tid = event->context_switch.next_prev_tid; 1372 cpu = sample->cpu; 1373 1374 if (tid == -1) 1375 pr_warning("context_switch event has no tid\n"); 1376 1377 return machine__set_current_tid(spe->machine, cpu, pid, tid); 1378 } 1379 1380 static int arm_spe_process_event(struct perf_session *session, 1381 union perf_event *event, 1382 struct perf_sample *sample, 1383 const struct perf_tool *tool) 1384 { 1385 int err = 0; 1386 u64 timestamp; 1387 struct arm_spe *spe = container_of(session->auxtrace, 1388 struct arm_spe, auxtrace); 1389 1390 if (dump_trace) 1391 return 0; 1392 1393 if (!tool->ordered_events) { 1394 pr_err("SPE trace requires ordered events\n"); 1395 return -EINVAL; 1396 } 1397 1398 if (sample->time && (sample->time != (u64) -1)) 1399 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 1400 else 1401 timestamp = 0; 1402 1403 if (timestamp || spe->timeless_decoding) { 1404 err = arm_spe__update_queues(spe); 1405 if (err) 1406 return err; 1407 } 1408 1409 if (spe->timeless_decoding) { 1410 if (event->header.type == PERF_RECORD_EXIT) { 1411 err = arm_spe_process_timeless_queues(spe, 1412 event->fork.tid, 1413 sample->time); 1414 } 1415 } else if (timestamp) { 1416 err = arm_spe_process_queues(spe, timestamp); 1417 if (err) 1418 return err; 1419 1420 if (!spe->use_ctx_pkt_for_pid && 1421 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1422 event->header.type == PERF_RECORD_SWITCH)) 1423 err = arm_spe_context_switch(spe, event, sample); 1424 } 1425 1426 return err; 1427 } 1428 1429 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1430 union perf_event *event, 1431 const struct perf_tool *tool __maybe_unused) 1432 { 1433 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1434 auxtrace); 1435 1436 if (!spe->data_queued) { 1437 struct auxtrace_buffer *buffer; 1438 off_t data_offset; 1439 int fd = perf_data__fd(session->data); 1440 int err; 1441 1442 if (perf_data__is_pipe(session->data)) { 1443 data_offset = 0; 1444 } else { 1445 data_offset = lseek(fd, 0, SEEK_CUR); 1446 if (data_offset == -1) 1447 return -errno; 1448 } 1449 1450 err = auxtrace_queues__add_event(&spe->queues, session, event, 1451 data_offset, &buffer); 1452 if (err) 1453 return err; 1454 1455 /* Dump here now we have copied a piped trace out of the pipe */ 1456 if (dump_trace) { 1457 if (auxtrace_buffer__get_data(buffer, fd)) { 1458 arm_spe_dump_event(spe, buffer->data, 1459 buffer->size); 1460 auxtrace_buffer__put_data(buffer); 1461 } 1462 } 1463 } 1464 1465 return 0; 1466 } 1467 1468 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1469 const struct perf_tool *tool __maybe_unused) 1470 { 1471 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1472 auxtrace); 1473 int ret; 1474 1475 if (dump_trace) 1476 return 0; 1477 1478 if (!tool->ordered_events) 1479 return -EINVAL; 1480 1481 ret = arm_spe__update_queues(spe); 1482 if (ret < 0) 1483 return ret; 1484 1485 if (spe->timeless_decoding) 1486 return arm_spe_process_timeless_queues(spe, -1, 1487 MAX_TIMESTAMP - 1); 1488 1489 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1490 if (ret) 1491 return ret; 1492 1493 if (!spe->use_ctx_pkt_for_pid) 1494 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1495 "Matching of TIDs to SPE events could be inaccurate.\n"); 1496 1497 return 0; 1498 } 1499 1500 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1501 { 1502 u64 *metadata; 1503 1504 metadata = zalloc(per_cpu_size); 1505 if (!metadata) 1506 return NULL; 1507 1508 memcpy(metadata, buf, per_cpu_size); 1509 return metadata; 1510 } 1511 1512 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1513 { 1514 int i; 1515 1516 for (i = 0; i < nr_cpu; i++) 1517 zfree(&metadata[i]); 1518 free(metadata); 1519 } 1520 1521 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1522 u64 *ver, int *nr_cpu) 1523 { 1524 u64 *ptr = (u64 *)info->priv; 1525 u64 metadata_size; 1526 u64 **metadata = NULL; 1527 int hdr_sz, per_cpu_sz, i; 1528 1529 metadata_size = info->header.size - 1530 sizeof(struct perf_record_auxtrace_info); 1531 1532 /* Metadata version 1 */ 1533 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1534 *ver = 1; 1535 *nr_cpu = 0; 1536 /* No per CPU metadata */ 1537 return NULL; 1538 } 1539 1540 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1541 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1542 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1543 1544 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1545 if (!metadata) 1546 return NULL; 1547 1548 /* Locate the start address of per CPU metadata */ 1549 ptr += hdr_sz; 1550 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1551 1552 for (i = 0; i < *nr_cpu; i++) { 1553 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1554 if (!metadata[i]) 1555 goto err_per_cpu_metadata; 1556 1557 ptr += per_cpu_sz / sizeof(u64); 1558 } 1559 1560 return metadata; 1561 1562 err_per_cpu_metadata: 1563 arm_spe__free_metadata(metadata, *nr_cpu); 1564 return NULL; 1565 } 1566 1567 static void arm_spe_free_queue(void *priv) 1568 { 1569 struct arm_spe_queue *speq = priv; 1570 1571 if (!speq) 1572 return; 1573 thread__zput(speq->thread); 1574 arm_spe_decoder_free(speq->decoder); 1575 zfree(&speq->event_buf); 1576 zfree(&speq->last_branch); 1577 free(speq); 1578 } 1579 1580 static void arm_spe_free_events(struct perf_session *session) 1581 { 1582 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1583 auxtrace); 1584 struct auxtrace_queues *queues = &spe->queues; 1585 unsigned int i; 1586 1587 for (i = 0; i < queues->nr_queues; i++) { 1588 arm_spe_free_queue(queues->queue_array[i].priv); 1589 queues->queue_array[i].priv = NULL; 1590 } 1591 auxtrace_queues__free(queues); 1592 } 1593 1594 static void arm_spe_free(struct perf_session *session) 1595 { 1596 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1597 auxtrace); 1598 1599 auxtrace_heap__free(&spe->heap); 1600 arm_spe_free_events(session); 1601 session->auxtrace = NULL; 1602 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1603 free(spe); 1604 } 1605 1606 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1607 struct evsel *evsel) 1608 { 1609 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1610 1611 return evsel->core.attr.type == spe->pmu_type; 1612 } 1613 1614 static const char * const metadata_hdr_v1_fmts[] = { 1615 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1616 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1617 }; 1618 1619 static const char * const metadata_hdr_fmts[] = { 1620 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1621 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1622 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1623 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1624 }; 1625 1626 static const char * const metadata_per_cpu_fmts[] = { 1627 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1628 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1629 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1630 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1631 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1632 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1633 [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n", 1634 }; 1635 1636 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1637 { 1638 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1639 const char * const *hdr_fmts; 1640 1641 if (!dump_trace) 1642 return; 1643 1644 if (spe->metadata_ver == 1) { 1645 cpu_num = 0; 1646 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1647 hdr_fmts = metadata_hdr_v1_fmts; 1648 } else { 1649 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1650 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1651 hdr_fmts = metadata_hdr_fmts; 1652 } 1653 1654 for (i = 0; i < hdr_size; i++) 1655 fprintf(stdout, hdr_fmts[i], arr[i]); 1656 1657 arr += hdr_size; 1658 for (cpu = 0; cpu < cpu_num; cpu++) { 1659 /* 1660 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1661 * are fixed. The sequential parameter size is decided by the 1662 * field 'ARM_SPE_CPU_NR_PARAMS'. 1663 */ 1664 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1665 for (i = 0; i < cpu_size; i++) 1666 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1667 arr += cpu_size; 1668 } 1669 } 1670 1671 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1672 const char *name) 1673 { 1674 struct evsel *evsel; 1675 1676 evlist__for_each_entry(evlist, evsel) { 1677 if (evsel->core.id && evsel->core.id[0] == id) { 1678 if (evsel->name) 1679 zfree(&evsel->name); 1680 evsel->name = strdup(name); 1681 break; 1682 } 1683 } 1684 } 1685 1686 static int 1687 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1688 { 1689 struct evlist *evlist = session->evlist; 1690 struct evsel *evsel; 1691 struct perf_event_attr attr; 1692 bool found = false; 1693 u64 id; 1694 int err; 1695 1696 evlist__for_each_entry(evlist, evsel) { 1697 if (evsel->core.attr.type == spe->pmu_type) { 1698 found = true; 1699 break; 1700 } 1701 } 1702 1703 if (!found) { 1704 pr_debug("No selected events with SPE trace data\n"); 1705 return 0; 1706 } 1707 1708 memset(&attr, 0, sizeof(struct perf_event_attr)); 1709 attr.size = sizeof(struct perf_event_attr); 1710 attr.type = PERF_TYPE_HARDWARE; 1711 attr.sample_type = evsel->core.attr.sample_type & 1712 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1713 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1714 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1715 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1716 if (spe->timeless_decoding) 1717 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1718 else 1719 attr.sample_type |= PERF_SAMPLE_TIME; 1720 1721 spe->sample_type = attr.sample_type; 1722 1723 attr.exclude_user = evsel->core.attr.exclude_user; 1724 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1725 attr.exclude_hv = evsel->core.attr.exclude_hv; 1726 attr.exclude_host = evsel->core.attr.exclude_host; 1727 attr.exclude_guest = evsel->core.attr.exclude_guest; 1728 attr.sample_id_all = evsel->core.attr.sample_id_all; 1729 attr.read_format = evsel->core.attr.read_format; 1730 attr.sample_period = spe->synth_opts.period; 1731 1732 /* create new id val to be a fixed offset from evsel id */ 1733 id = auxtrace_synth_id_range_start(evsel); 1734 1735 if (spe->synth_opts.flc) { 1736 spe->sample_flc = true; 1737 1738 /* Level 1 data cache miss */ 1739 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1740 if (err) 1741 return err; 1742 spe->l1d_miss_id = id; 1743 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1744 id += 1; 1745 1746 /* Level 1 data cache access */ 1747 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1748 if (err) 1749 return err; 1750 spe->l1d_access_id = id; 1751 arm_spe_set_event_name(evlist, id, "l1d-access"); 1752 id += 1; 1753 } 1754 1755 if (spe->synth_opts.llc) { 1756 spe->sample_llc = true; 1757 1758 /* Last level cache miss */ 1759 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1760 if (err) 1761 return err; 1762 spe->llc_miss_id = id; 1763 arm_spe_set_event_name(evlist, id, "llc-miss"); 1764 id += 1; 1765 1766 /* Last level cache access */ 1767 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1768 if (err) 1769 return err; 1770 spe->llc_access_id = id; 1771 arm_spe_set_event_name(evlist, id, "llc-access"); 1772 id += 1; 1773 } 1774 1775 if (spe->synth_opts.tlb) { 1776 spe->sample_tlb = true; 1777 1778 /* TLB miss */ 1779 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1780 if (err) 1781 return err; 1782 spe->tlb_miss_id = id; 1783 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1784 id += 1; 1785 1786 /* TLB access */ 1787 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1788 if (err) 1789 return err; 1790 spe->tlb_access_id = id; 1791 arm_spe_set_event_name(evlist, id, "tlb-access"); 1792 id += 1; 1793 } 1794 1795 if (spe->synth_opts.last_branch) { 1796 if (spe->synth_opts.last_branch_sz > 2) 1797 pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); 1798 1799 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1800 /* 1801 * We don't use the hardware index, but the sample generation 1802 * code uses the new format branch_stack with this field, 1803 * so the event attributes must indicate that it's present. 1804 */ 1805 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1806 } 1807 1808 if (spe->synth_opts.branches) { 1809 spe->sample_branch = true; 1810 1811 /* Branch */ 1812 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1813 if (err) 1814 return err; 1815 spe->branch_id = id; 1816 arm_spe_set_event_name(evlist, id, "branch"); 1817 id += 1; 1818 } 1819 1820 if (spe->synth_opts.remote_access) { 1821 spe->sample_remote_access = true; 1822 1823 /* Remote access */ 1824 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1825 if (err) 1826 return err; 1827 spe->remote_access_id = id; 1828 arm_spe_set_event_name(evlist, id, "remote-access"); 1829 id += 1; 1830 } 1831 1832 if (spe->synth_opts.mem) { 1833 spe->sample_memory = true; 1834 1835 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1836 if (err) 1837 return err; 1838 spe->memory_id = id; 1839 arm_spe_set_event_name(evlist, id, "memory"); 1840 id += 1; 1841 } 1842 1843 if (spe->synth_opts.instructions) { 1844 spe->sample_instructions = true; 1845 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1846 1847 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1848 if (err) 1849 return err; 1850 spe->instructions_id = id; 1851 arm_spe_set_event_name(evlist, id, "instructions"); 1852 } 1853 1854 return 0; 1855 } 1856 1857 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1858 { 1859 u64 midr; 1860 int i; 1861 1862 if (!nr_cpu) 1863 return false; 1864 1865 for (i = 0; i < nr_cpu; i++) { 1866 if (!metadata[i]) 1867 return false; 1868 1869 if (i == 0) { 1870 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1871 continue; 1872 } 1873 1874 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1875 return false; 1876 } 1877 1878 return true; 1879 } 1880 1881 int arm_spe_process_auxtrace_info(union perf_event *event, 1882 struct perf_session *session) 1883 { 1884 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1885 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1886 struct perf_record_time_conv *tc = &session->time_conv; 1887 struct arm_spe *spe; 1888 u64 **metadata = NULL; 1889 u64 metadata_ver; 1890 int nr_cpu, err; 1891 1892 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1893 min_sz) 1894 return -EINVAL; 1895 1896 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1897 &nr_cpu); 1898 if (!metadata && metadata_ver != 1) { 1899 pr_err("Failed to parse Arm SPE metadata.\n"); 1900 return -EINVAL; 1901 } 1902 1903 spe = zalloc(sizeof(struct arm_spe)); 1904 if (!spe) { 1905 err = -ENOMEM; 1906 goto err_free_metadata; 1907 } 1908 1909 err = auxtrace_queues__init(&spe->queues); 1910 if (err) 1911 goto err_free; 1912 1913 spe->session = session; 1914 spe->machine = &session->machines.host; /* No kvm support */ 1915 spe->auxtrace_type = auxtrace_info->type; 1916 if (metadata_ver == 1) 1917 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1918 else 1919 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1920 spe->metadata = metadata; 1921 spe->metadata_ver = metadata_ver; 1922 spe->metadata_nr_cpu = nr_cpu; 1923 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1924 1925 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1926 1927 /* 1928 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1929 * and the parameters for hardware clock are stored in the session 1930 * context. Passes these parameters to the struct perf_tsc_conversion 1931 * in "spe->tc", which is used for later conversion between clock 1932 * counter and timestamp. 1933 * 1934 * For backward compatibility, copies the fields starting from 1935 * "time_cycles" only if they are contained in the event. 1936 */ 1937 spe->tc.time_shift = tc->time_shift; 1938 spe->tc.time_mult = tc->time_mult; 1939 spe->tc.time_zero = tc->time_zero; 1940 1941 if (event_contains(*tc, time_cycles)) { 1942 spe->tc.time_cycles = tc->time_cycles; 1943 spe->tc.time_mask = tc->time_mask; 1944 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1945 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1946 } 1947 1948 spe->auxtrace.process_event = arm_spe_process_event; 1949 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1950 spe->auxtrace.flush_events = arm_spe_flush; 1951 spe->auxtrace.free_events = arm_spe_free_events; 1952 spe->auxtrace.free = arm_spe_free; 1953 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1954 session->auxtrace = &spe->auxtrace; 1955 1956 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1957 1958 if (dump_trace) 1959 return 0; 1960 1961 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1962 spe->synth_opts = *session->itrace_synth_opts; 1963 } else { 1964 itrace_synth_opts__set_default(&spe->synth_opts, false); 1965 /* Default nanoseconds period not supported */ 1966 spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; 1967 spe->synth_opts.period = 1; 1968 } 1969 1970 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1971 ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n"); 1972 err = -EINVAL; 1973 goto err_free_queues; 1974 } 1975 if (spe->synth_opts.period > 1) 1976 ui__warning("Arm SPE has a hardware-based sampling period.\n\n" 1977 "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n"); 1978 1979 err = arm_spe_synth_events(spe, session); 1980 if (err) 1981 goto err_free_queues; 1982 1983 err = auxtrace_queues__process_index(&spe->queues, session); 1984 if (err) 1985 goto err_free_queues; 1986 1987 if (spe->queues.populated) 1988 spe->data_queued = true; 1989 1990 return 0; 1991 1992 err_free_queues: 1993 auxtrace_queues__free(&spe->queues); 1994 session->auxtrace = NULL; 1995 err_free: 1996 free(spe); 1997 err_free_metadata: 1998 arm_spe__free_metadata(metadata, nr_cpu); 1999 return err; 2000 } 2001