1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 #define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) 41 42 #define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \ 43 ARM_SPE_OP_SME | ARM_SPE_OP_ASE))) 44 45 #define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op)) 46 47 #define ARM_SPE_CACHE_EVENT(lvl) \ 48 (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) 49 50 #define arm_spe_is_cache_level(type, lvl) \ 51 ((type) & ARM_SPE_CACHE_EVENT(lvl)) 52 53 #define arm_spe_is_cache_hit(type, lvl) \ 54 (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) 55 56 #define arm_spe_is_cache_miss(type, lvl) \ 57 ((type) & ARM_SPE_##lvl##_MISS) 58 59 struct arm_spe { 60 struct auxtrace auxtrace; 61 struct auxtrace_queues queues; 62 struct auxtrace_heap heap; 63 struct itrace_synth_opts synth_opts; 64 u32 auxtrace_type; 65 struct perf_session *session; 66 struct machine *machine; 67 u32 pmu_type; 68 69 struct perf_tsc_conversion tc; 70 71 u8 timeless_decoding; 72 u8 data_queued; 73 74 u64 sample_type; 75 u8 sample_flc; 76 u8 sample_llc; 77 u8 sample_tlb; 78 u8 sample_branch; 79 u8 sample_remote_access; 80 u8 sample_memory; 81 u8 sample_instructions; 82 83 u64 l1d_miss_id; 84 u64 l1d_access_id; 85 u64 llc_miss_id; 86 u64 llc_access_id; 87 u64 tlb_miss_id; 88 u64 tlb_access_id; 89 u64 branch_id; 90 u64 remote_access_id; 91 u64 memory_id; 92 u64 instructions_id; 93 94 u64 kernel_start; 95 96 unsigned long num_events; 97 u8 use_ctx_pkt_for_pid; 98 99 u64 **metadata; 100 u64 metadata_ver; 101 u64 metadata_nr_cpu; 102 bool is_homogeneous; 103 }; 104 105 struct arm_spe_queue { 106 struct arm_spe *spe; 107 unsigned int queue_nr; 108 struct auxtrace_buffer *buffer; 109 struct auxtrace_buffer *old_buffer; 110 union perf_event *event_buf; 111 bool on_heap; 112 bool done; 113 pid_t pid; 114 pid_t tid; 115 int cpu; 116 struct arm_spe_decoder *decoder; 117 u64 time; 118 u64 timestamp; 119 struct thread *thread; 120 u64 sample_count; 121 u32 flags; 122 struct branch_stack *last_branch; 123 }; 124 125 struct data_source_handle { 126 const struct midr_range *midr_ranges; 127 void (*ds_synth)(const struct arm_spe_record *record, 128 union perf_mem_data_src *data_src); 129 }; 130 131 #define DS(range, func) \ 132 { \ 133 .midr_ranges = range, \ 134 .ds_synth = arm_spe__synth_##func, \ 135 } 136 137 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 138 unsigned char *buf, size_t len) 139 { 140 struct arm_spe_pkt packet; 141 size_t pos = 0; 142 int ret, pkt_len, i; 143 char desc[ARM_SPE_PKT_DESC_MAX]; 144 const char *color = PERF_COLOR_BLUE; 145 146 color_fprintf(stdout, color, 147 ". ... ARM SPE data: size %#zx bytes\n", 148 len); 149 150 while (len) { 151 ret = arm_spe_get_packet(buf, len, &packet); 152 if (ret > 0) 153 pkt_len = ret; 154 else 155 pkt_len = 1; 156 printf("."); 157 color_fprintf(stdout, color, " %08zx: ", pos); 158 for (i = 0; i < pkt_len; i++) 159 color_fprintf(stdout, color, " %02x", buf[i]); 160 for (; i < 16; i++) 161 color_fprintf(stdout, color, " "); 162 if (ret > 0) { 163 ret = arm_spe_pkt_desc(&packet, desc, 164 ARM_SPE_PKT_DESC_MAX); 165 if (!ret) 166 color_fprintf(stdout, color, " %s\n", desc); 167 } else { 168 color_fprintf(stdout, color, " Bad packet!\n"); 169 } 170 pos += pkt_len; 171 buf += pkt_len; 172 len -= pkt_len; 173 } 174 } 175 176 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 177 size_t len) 178 { 179 printf(".\n"); 180 arm_spe_dump(spe, buf, len); 181 } 182 183 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 184 { 185 struct arm_spe_queue *speq = data; 186 struct auxtrace_buffer *buffer = speq->buffer; 187 struct auxtrace_buffer *old_buffer = speq->old_buffer; 188 struct auxtrace_queue *queue; 189 190 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 191 192 buffer = auxtrace_buffer__next(queue, buffer); 193 /* If no more data, drop the previous auxtrace_buffer and return */ 194 if (!buffer) { 195 if (old_buffer) 196 auxtrace_buffer__drop_data(old_buffer); 197 b->len = 0; 198 return 0; 199 } 200 201 speq->buffer = buffer; 202 203 /* If the aux_buffer doesn't have data associated, try to load it */ 204 if (!buffer->data) { 205 /* get the file desc associated with the perf data file */ 206 int fd = perf_data__fd(speq->spe->session->data); 207 208 buffer->data = auxtrace_buffer__get_data(buffer, fd); 209 if (!buffer->data) 210 return -ENOMEM; 211 } 212 213 b->len = buffer->size; 214 b->buf = buffer->data; 215 216 if (b->len) { 217 if (old_buffer) 218 auxtrace_buffer__drop_data(old_buffer); 219 speq->old_buffer = buffer; 220 } else { 221 auxtrace_buffer__drop_data(buffer); 222 return arm_spe_get_trace(b, data); 223 } 224 225 return 0; 226 } 227 228 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 229 unsigned int queue_nr) 230 { 231 struct arm_spe_params params = { .get_trace = 0, }; 232 struct arm_spe_queue *speq; 233 234 speq = zalloc(sizeof(*speq)); 235 if (!speq) 236 return NULL; 237 238 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 239 if (!speq->event_buf) 240 goto out_free; 241 242 speq->spe = spe; 243 speq->queue_nr = queue_nr; 244 speq->pid = -1; 245 speq->tid = -1; 246 speq->cpu = -1; 247 248 /* params set */ 249 params.get_trace = arm_spe_get_trace; 250 params.data = speq; 251 252 if (spe->synth_opts.last_branch) { 253 size_t sz = sizeof(struct branch_stack); 254 255 /* Allocate up to two entries for PBT + TGT */ 256 sz += sizeof(struct branch_entry) * 257 min(spe->synth_opts.last_branch_sz, 2U); 258 speq->last_branch = zalloc(sz); 259 if (!speq->last_branch) 260 goto out_free; 261 } 262 263 /* create new decoder */ 264 speq->decoder = arm_spe_decoder_new(¶ms); 265 if (!speq->decoder) 266 goto out_free; 267 268 return speq; 269 270 out_free: 271 zfree(&speq->event_buf); 272 zfree(&speq->last_branch); 273 free(speq); 274 275 return NULL; 276 } 277 278 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 279 { 280 return ip >= spe->kernel_start ? 281 PERF_RECORD_MISC_KERNEL : 282 PERF_RECORD_MISC_USER; 283 } 284 285 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 286 struct auxtrace_queue *queue) 287 { 288 struct arm_spe_queue *speq = queue->priv; 289 pid_t tid; 290 291 tid = machine__get_current_tid(spe->machine, speq->cpu); 292 if (tid != -1) { 293 speq->tid = tid; 294 thread__zput(speq->thread); 295 } else 296 speq->tid = queue->tid; 297 298 if ((!speq->thread) && (speq->tid != -1)) { 299 speq->thread = machine__find_thread(spe->machine, -1, 300 speq->tid); 301 } 302 303 if (speq->thread) { 304 speq->pid = thread__pid(speq->thread); 305 if (queue->cpu == -1) 306 speq->cpu = thread__cpu(speq->thread); 307 } 308 } 309 310 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 311 { 312 struct arm_spe *spe = speq->spe; 313 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 314 315 if (err) 316 return err; 317 318 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 319 320 return 0; 321 } 322 323 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu) 324 { 325 u64 i; 326 327 if (!spe->metadata) 328 return NULL; 329 330 /* CPU ID is -1 for per-thread mode */ 331 if (cpu < 0) { 332 /* 333 * On the heterogeneous system, due to CPU ID is -1, 334 * cannot confirm the data source packet is supported. 335 */ 336 if (!spe->is_homogeneous) 337 return NULL; 338 339 /* In homogeneous system, simply use CPU0's metadata */ 340 return spe->metadata[0]; 341 } 342 343 for (i = 0; i < spe->metadata_nr_cpu; i++) 344 if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu) 345 return spe->metadata[i]; 346 347 return NULL; 348 } 349 350 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 351 { 352 struct simd_flags simd_flags = {}; 353 354 if (record->op & ARM_SPE_OP_SVE) 355 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 356 357 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 358 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; 359 360 if (record->type & ARM_SPE_SVE_EMPTY_PRED) 361 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; 362 363 return simd_flags; 364 } 365 366 static void arm_spe_prep_sample(struct arm_spe *spe, 367 struct arm_spe_queue *speq, 368 union perf_event *event, 369 struct perf_sample *sample) 370 { 371 struct arm_spe_record *record = &speq->decoder->record; 372 373 if (!spe->timeless_decoding) 374 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 375 376 sample->ip = record->from_ip; 377 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 378 sample->pid = speq->pid; 379 sample->tid = speq->tid; 380 sample->period = spe->synth_opts.period; 381 sample->cpu = speq->cpu; 382 sample->simd_flags = arm_spe__synth_simd_flags(record); 383 384 event->sample.header.type = PERF_RECORD_SAMPLE; 385 event->sample.header.misc = sample->cpumode; 386 event->sample.header.size = sizeof(struct perf_event_header); 387 } 388 389 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) 390 { 391 struct arm_spe *spe = speq->spe; 392 struct arm_spe_record *record = &speq->decoder->record; 393 struct branch_stack *bstack = speq->last_branch; 394 struct branch_flags *bs_flags; 395 unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; 396 bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); 397 bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; 398 size_t sz = sizeof(struct branch_stack) + 399 sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; 400 int i = 0; 401 402 /* Clean up branch stack */ 403 memset(bstack, 0x0, sz); 404 405 if (!have_tgt && !have_pbt) 406 return; 407 408 if (have_tgt) { 409 bstack->entries[i].from = record->from_ip; 410 bstack->entries[i].to = record->to_ip; 411 412 bs_flags = &bstack->entries[i].flags; 413 bs_flags->value = 0; 414 415 if (record->op & ARM_SPE_OP_BR_CR_BL) { 416 if (record->op & ARM_SPE_OP_BR_COND) 417 bs_flags->type |= PERF_BR_COND_CALL; 418 else 419 bs_flags->type |= PERF_BR_CALL; 420 /* 421 * Indirect branch instruction without link (e.g. BR), 422 * take this case as function return. 423 */ 424 } else if (record->op & ARM_SPE_OP_BR_CR_RET || 425 record->op & ARM_SPE_OP_BR_INDIRECT) { 426 if (record->op & ARM_SPE_OP_BR_COND) 427 bs_flags->type |= PERF_BR_COND_RET; 428 else 429 bs_flags->type |= PERF_BR_RET; 430 } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { 431 if (record->op & ARM_SPE_OP_BR_COND) 432 bs_flags->type |= PERF_BR_COND; 433 else 434 bs_flags->type |= PERF_BR_UNCOND; 435 } else { 436 if (record->op & ARM_SPE_OP_BR_COND) 437 bs_flags->type |= PERF_BR_COND; 438 else 439 bs_flags->type |= PERF_BR_UNKNOWN; 440 } 441 442 if (record->type & ARM_SPE_BRANCH_MISS) { 443 bs_flags->mispred = 1; 444 bs_flags->predicted = 0; 445 } else { 446 bs_flags->mispred = 0; 447 bs_flags->predicted = 1; 448 } 449 450 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 451 bs_flags->not_taken = 1; 452 453 if (record->type & ARM_SPE_IN_TXN) 454 bs_flags->in_tx = 1; 455 456 bs_flags->cycles = min(record->latency, 0xFFFFU); 457 i++; 458 } 459 460 if (have_pbt) { 461 bs_flags = &bstack->entries[i].flags; 462 bs_flags->type |= PERF_BR_UNKNOWN; 463 bstack->entries[i].to = record->prev_br_tgt; 464 i++; 465 } 466 467 bstack->nr = i; 468 bstack->hw_idx = -1ULL; 469 } 470 471 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 472 { 473 event->header.size = perf_event__sample_event_size(sample, type, 0); 474 return perf_event__synthesize_sample(event, type, 0, sample); 475 } 476 477 static inline int 478 arm_spe_deliver_synth_event(struct arm_spe *spe, 479 struct arm_spe_queue *speq __maybe_unused, 480 union perf_event *event, 481 struct perf_sample *sample) 482 { 483 int ret; 484 485 if (spe->synth_opts.inject) { 486 ret = arm_spe__inject_event(event, sample, spe->sample_type); 487 if (ret) 488 return ret; 489 } 490 491 ret = perf_session__deliver_synth_event(spe->session, event, sample); 492 if (ret) 493 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 494 495 return ret; 496 } 497 498 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 499 u64 spe_events_id, 500 union perf_mem_data_src data_src) 501 { 502 struct arm_spe *spe = speq->spe; 503 struct arm_spe_record *record = &speq->decoder->record; 504 union perf_event *event = speq->event_buf; 505 struct perf_sample sample; 506 int ret; 507 508 perf_sample__init(&sample, /*all=*/true); 509 arm_spe_prep_sample(spe, speq, event, &sample); 510 511 sample.id = spe_events_id; 512 sample.stream_id = spe_events_id; 513 sample.addr = record->virt_addr; 514 sample.phys_addr = record->phys_addr; 515 sample.data_src = data_src.val; 516 sample.weight = record->latency; 517 518 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 519 perf_sample__exit(&sample); 520 return ret; 521 } 522 523 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 524 u64 spe_events_id) 525 { 526 struct arm_spe *spe = speq->spe; 527 struct arm_spe_record *record = &speq->decoder->record; 528 union perf_event *event = speq->event_buf; 529 struct perf_sample sample; 530 int ret; 531 532 perf_sample__init(&sample, /*all=*/true); 533 arm_spe_prep_sample(spe, speq, event, &sample); 534 535 sample.id = spe_events_id; 536 sample.stream_id = spe_events_id; 537 sample.addr = record->to_ip; 538 sample.weight = record->latency; 539 sample.flags = speq->flags; 540 sample.branch_stack = speq->last_branch; 541 542 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 543 perf_sample__exit(&sample); 544 return ret; 545 } 546 547 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 548 u64 spe_events_id, 549 union perf_mem_data_src data_src) 550 { 551 struct arm_spe *spe = speq->spe; 552 struct arm_spe_record *record = &speq->decoder->record; 553 union perf_event *event = speq->event_buf; 554 struct perf_sample sample; 555 int ret; 556 557 perf_sample__init(&sample, /*all=*/true); 558 arm_spe_prep_sample(spe, speq, event, &sample); 559 560 sample.id = spe_events_id; 561 sample.stream_id = spe_events_id; 562 sample.addr = record->to_ip; 563 sample.phys_addr = record->phys_addr; 564 sample.data_src = data_src.val; 565 sample.weight = record->latency; 566 sample.flags = speq->flags; 567 sample.branch_stack = speq->last_branch; 568 569 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 570 perf_sample__exit(&sample); 571 return ret; 572 } 573 574 static const struct midr_range common_ds_encoding_cpus[] = { 575 MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), 576 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 577 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), 578 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 579 MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), 580 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), 581 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 582 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 583 MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), 584 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 585 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 586 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 587 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 588 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 589 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), 590 MIDR_ALL_VERSIONS(MIDR_NVIDIA_OLYMPUS), 591 {}, 592 }; 593 594 static const struct midr_range ampereone_ds_encoding_cpus[] = { 595 MIDR_ALL_VERSIONS(MIDR_AMPERE1A), 596 {}, 597 }; 598 599 static const struct midr_range hisi_hip_ds_encoding_cpus[] = { 600 MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), 601 {}, 602 }; 603 604 static void arm_spe__sample_flags(struct arm_spe_queue *speq) 605 { 606 const struct arm_spe_record *record = &speq->decoder->record; 607 608 speq->flags = 0; 609 if (record->op & ARM_SPE_OP_BRANCH_ERET) { 610 speq->flags = PERF_IP_FLAG_BRANCH; 611 612 if (record->type & ARM_SPE_BRANCH_MISS) 613 speq->flags |= PERF_IP_FLAG_BRANCH_MISS; 614 615 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 616 speq->flags |= PERF_IP_FLAG_NOT_TAKEN; 617 618 if (record->type & ARM_SPE_IN_TXN) 619 speq->flags |= PERF_IP_FLAG_IN_TX; 620 621 if (record->op & ARM_SPE_OP_BR_COND) 622 speq->flags |= PERF_IP_FLAG_CONDITIONAL; 623 624 if (record->op & ARM_SPE_OP_BR_CR_BL) 625 speq->flags |= PERF_IP_FLAG_CALL; 626 else if (record->op & ARM_SPE_OP_BR_CR_RET) 627 speq->flags |= PERF_IP_FLAG_RETURN; 628 /* 629 * Indirect branch instruction without link (e.g. BR), 630 * take it as a function return. 631 */ 632 else if (record->op & ARM_SPE_OP_BR_INDIRECT) 633 speq->flags |= PERF_IP_FLAG_RETURN; 634 } 635 } 636 637 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 638 union perf_mem_data_src *data_src) 639 { 640 /* 641 * Even though four levels of cache hierarchy are possible, no known 642 * production Neoverse systems currently include more than three levels 643 * so for the time being we assume three exist. If a production system 644 * is built with four the this function would have to be changed to 645 * detect the number of levels for reporting. 646 */ 647 648 /* 649 * We have no data on the hit level or data source for stores in the 650 * Neoverse SPE records. 651 */ 652 if (record->op & ARM_SPE_OP_ST) { 653 data_src->mem_lvl = PERF_MEM_LVL_NA; 654 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 655 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 656 return; 657 } 658 659 switch (record->source) { 660 case ARM_SPE_COMMON_DS_L1D: 661 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 662 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 663 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 664 break; 665 case ARM_SPE_COMMON_DS_L2: 666 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 667 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 668 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 669 break; 670 case ARM_SPE_COMMON_DS_PEER_CORE: 671 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 672 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 673 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 674 break; 675 /* 676 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 677 * transfer, so set SNOOPX_PEER 678 */ 679 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 680 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 681 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 682 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 683 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 684 break; 685 /* 686 * System cache is assumed to be L3 687 */ 688 case ARM_SPE_COMMON_DS_SYS_CACHE: 689 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 690 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 691 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 692 break; 693 /* 694 * We don't know what level it hit in, except it came from the other 695 * socket 696 */ 697 case ARM_SPE_COMMON_DS_REMOTE: 698 data_src->mem_lvl = PERF_MEM_LVL_NA; 699 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 700 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 701 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 702 break; 703 case ARM_SPE_COMMON_DS_DRAM: 704 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 705 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 706 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 707 break; 708 default: 709 break; 710 } 711 } 712 713 /* 714 * Source is IMPDEF. Here we convert the source code used on AmpereOne cores 715 * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. 716 */ 717 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, 718 union perf_mem_data_src *data_src) 719 { 720 struct arm_spe_record common_record; 721 722 switch (record->source) { 723 case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: 724 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; 725 break; 726 case ARM_SPE_AMPEREONE_SLC: 727 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; 728 break; 729 case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: 730 common_record.source = ARM_SPE_COMMON_DS_REMOTE; 731 break; 732 case ARM_SPE_AMPEREONE_DDR: 733 common_record.source = ARM_SPE_COMMON_DS_DRAM; 734 break; 735 case ARM_SPE_AMPEREONE_L1D: 736 common_record.source = ARM_SPE_COMMON_DS_L1D; 737 break; 738 case ARM_SPE_AMPEREONE_L2D: 739 common_record.source = ARM_SPE_COMMON_DS_L2; 740 break; 741 default: 742 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", 743 record->source); 744 return; 745 } 746 747 common_record.op = record->op; 748 arm_spe__synth_data_source_common(&common_record, data_src); 749 } 750 751 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, 752 union perf_mem_data_src *data_src) 753 { 754 /* Use common synthesis method to handle store operations */ 755 if (record->op & ARM_SPE_OP_ST) { 756 arm_spe__synth_data_source_common(record, data_src); 757 return; 758 } 759 760 switch (record->source) { 761 case ARM_SPE_HISI_HIP_PEER_CPU: 762 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 763 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 764 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 765 break; 766 case ARM_SPE_HISI_HIP_PEER_CPU_HITM: 767 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 768 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 769 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 770 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 771 break; 772 case ARM_SPE_HISI_HIP_L3: 773 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 774 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 775 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 776 break; 777 case ARM_SPE_HISI_HIP_L3_HITM: 778 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 779 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 780 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 781 break; 782 case ARM_SPE_HISI_HIP_PEER_CLUSTER: 783 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 784 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 785 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 786 break; 787 case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: 788 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 789 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 790 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 791 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 792 break; 793 case ARM_SPE_HISI_HIP_REMOTE_SOCKET: 794 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 795 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 796 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 797 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 798 break; 799 case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: 800 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 801 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 802 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 803 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 804 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 805 break; 806 case ARM_SPE_HISI_HIP_LOCAL_MEM: 807 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 808 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 809 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 810 break; 811 case ARM_SPE_HISI_HIP_REMOTE_MEM: 812 data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; 813 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 814 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 815 break; 816 case ARM_SPE_HISI_HIP_NC_DEV: 817 data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; 818 data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; 819 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 820 break; 821 case ARM_SPE_HISI_HIP_L2: 822 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 823 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 824 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 825 break; 826 case ARM_SPE_HISI_HIP_L2_HITM: 827 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 828 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 829 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 830 break; 831 case ARM_SPE_HISI_HIP_L1: 832 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 833 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 834 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 835 break; 836 default: 837 break; 838 } 839 } 840 841 static const struct data_source_handle data_source_handles[] = { 842 DS(common_ds_encoding_cpus, data_source_common), 843 DS(ampereone_ds_encoding_cpus, data_source_ampereone), 844 DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), 845 }; 846 847 static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, 848 union perf_mem_data_src *data_src) 849 { 850 /* 851 * To find a cache hit, search in ascending order from the lower level 852 * caches to the higher level caches. This reflects the best scenario 853 * for a cache hit. 854 */ 855 if (arm_spe_is_cache_hit(record->type, L1D)) { 856 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 857 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 858 } else if (record->type & ARM_SPE_RECENTLY_FETCHED) { 859 data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; 860 data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB; 861 } else if (arm_spe_is_cache_hit(record->type, L2D)) { 862 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 863 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 864 } else if (arm_spe_is_cache_hit(record->type, LLC)) { 865 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 866 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 867 /* 868 * To find a cache miss, search in descending order from the higher 869 * level cache to the lower level cache. This represents the worst 870 * scenario for a cache miss. 871 */ 872 } else if (arm_spe_is_cache_miss(record->type, LLC)) { 873 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; 874 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 875 } else if (arm_spe_is_cache_miss(record->type, L2D)) { 876 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS; 877 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 878 } else if (arm_spe_is_cache_miss(record->type, L1D)) { 879 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 880 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 881 } 882 } 883 884 static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, 885 union perf_mem_data_src *data_src) 886 { 887 /* Record the greatest level info for a store operation. */ 888 if (arm_spe_is_cache_level(record->type, LLC)) { 889 data_src->mem_lvl = PERF_MEM_LVL_L3; 890 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? 891 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 892 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 893 } else if (arm_spe_is_cache_level(record->type, L2D)) { 894 data_src->mem_lvl = PERF_MEM_LVL_L2; 895 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ? 896 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 897 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 898 } else if (arm_spe_is_cache_level(record->type, L1D)) { 899 data_src->mem_lvl = PERF_MEM_LVL_L1; 900 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ? 901 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 902 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 903 } 904 } 905 906 static void arm_spe__synth_memory_level(struct arm_spe_queue *speq, 907 const struct arm_spe_record *record, 908 union perf_mem_data_src *data_src) 909 { 910 struct arm_spe *spe = speq->spe; 911 912 /* 913 * The data source packet contains more info for cache levels for 914 * peer snooping. So respect the memory level if has been set by 915 * data source parsing. 916 */ 917 if (!data_src->mem_lvl) { 918 if (data_src->mem_op == PERF_MEM_OP_LOAD) 919 arm_spe__synth_ld_memory_level(record, data_src); 920 if (data_src->mem_op == PERF_MEM_OP_STORE) 921 arm_spe__synth_st_memory_level(record, data_src); 922 } 923 924 if (!data_src->mem_lvl) { 925 data_src->mem_lvl = PERF_MEM_LVL_NA; 926 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 927 } 928 929 /* 930 * If 'mem_snoop' has been set by data source packet, skip to set 931 * it at here. 932 */ 933 if (!data_src->mem_snoop) { 934 if (record->type & ARM_SPE_DATA_SNOOPED) { 935 if (record->type & ARM_SPE_HITM) 936 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 937 else 938 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 939 } else { 940 u64 *metadata = 941 arm_spe__get_metadata_by_cpu(spe, speq->cpu); 942 943 /* 944 * Set NA ("Not available") mode if no meta data or the 945 * SNOOPED event is not supported. 946 */ 947 if (!metadata || 948 !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED)) 949 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 950 else 951 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 952 } 953 } 954 955 if (!data_src->mem_remote) { 956 if (record->type & ARM_SPE_REMOTE_ACCESS) 957 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 958 } 959 } 960 961 static void arm_spe__synth_ds(struct arm_spe_queue *speq, 962 const struct arm_spe_record *record, 963 union perf_mem_data_src *data_src) 964 { 965 struct arm_spe *spe = speq->spe; 966 u64 *metadata = NULL; 967 u64 midr; 968 unsigned int i; 969 970 /* Metadata version 1 assumes all CPUs are the same (old behavior) */ 971 if (spe->metadata_ver == 1) { 972 const char *cpuid; 973 974 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); 975 cpuid = perf_env__cpuid(perf_session__env(spe->session)); 976 midr = strtol(cpuid, NULL, 16); 977 } else { 978 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 979 if (!metadata) 980 return; 981 982 midr = metadata[ARM_SPE_CPU_MIDR]; 983 } 984 985 for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { 986 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { 987 return data_source_handles[i].ds_synth(record, data_src); 988 } 989 } 990 991 return; 992 } 993 994 static union perf_mem_data_src 995 arm_spe__synth_data_source(struct arm_spe_queue *speq, 996 const struct arm_spe_record *record) 997 { 998 union perf_mem_data_src data_src = {}; 999 1000 if (!is_mem_op(record->op)) 1001 return data_src; 1002 1003 if (record->op & ARM_SPE_OP_LD) 1004 data_src.mem_op = PERF_MEM_OP_LOAD; 1005 else if (record->op & ARM_SPE_OP_ST) 1006 data_src.mem_op = PERF_MEM_OP_STORE; 1007 else 1008 data_src.mem_op = PERF_MEM_OP_NA; 1009 1010 arm_spe__synth_ds(speq, record, &data_src); 1011 arm_spe__synth_memory_level(speq, record, &data_src); 1012 1013 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 1014 data_src.mem_dtlb = PERF_MEM_TLB_WK; 1015 1016 if (record->type & ARM_SPE_TLB_MISS) 1017 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 1018 else 1019 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 1020 } 1021 1022 return data_src; 1023 } 1024 1025 static int arm_spe_sample(struct arm_spe_queue *speq) 1026 { 1027 const struct arm_spe_record *record = &speq->decoder->record; 1028 struct arm_spe *spe = speq->spe; 1029 union perf_mem_data_src data_src; 1030 int err; 1031 1032 /* 1033 * Discard all samples until period is reached 1034 */ 1035 speq->sample_count++; 1036 if (speq->sample_count < spe->synth_opts.period) 1037 return 0; 1038 speq->sample_count = 0; 1039 1040 arm_spe__sample_flags(speq); 1041 data_src = arm_spe__synth_data_source(speq, record); 1042 1043 if (spe->sample_flc) { 1044 if (record->type & ARM_SPE_L1D_MISS) { 1045 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 1046 data_src); 1047 if (err) 1048 return err; 1049 } 1050 1051 if (record->type & ARM_SPE_L1D_ACCESS) { 1052 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 1053 data_src); 1054 if (err) 1055 return err; 1056 } 1057 } 1058 1059 if (spe->sample_llc) { 1060 if (record->type & ARM_SPE_LLC_MISS) { 1061 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 1062 data_src); 1063 if (err) 1064 return err; 1065 } 1066 1067 if (record->type & ARM_SPE_LLC_ACCESS) { 1068 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 1069 data_src); 1070 if (err) 1071 return err; 1072 } 1073 } 1074 1075 if (spe->sample_tlb) { 1076 if (record->type & ARM_SPE_TLB_MISS) { 1077 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 1078 data_src); 1079 if (err) 1080 return err; 1081 } 1082 1083 if (record->type & ARM_SPE_TLB_ACCESS) { 1084 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 1085 data_src); 1086 if (err) 1087 return err; 1088 } 1089 } 1090 1091 if (spe->synth_opts.last_branch && 1092 (spe->sample_branch || spe->sample_instructions)) 1093 arm_spe__prep_branch_stack(speq); 1094 1095 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { 1096 err = arm_spe__synth_branch_sample(speq, spe->branch_id); 1097 if (err) 1098 return err; 1099 } 1100 1101 if (spe->sample_remote_access && 1102 (record->type & ARM_SPE_REMOTE_ACCESS)) { 1103 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 1104 data_src); 1105 if (err) 1106 return err; 1107 } 1108 1109 if (spe->sample_memory && is_mem_op(record->op)) { 1110 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 1111 if (err) 1112 return err; 1113 } 1114 1115 if (spe->sample_instructions) { 1116 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 1117 if (err) 1118 return err; 1119 } 1120 1121 return 0; 1122 } 1123 1124 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 1125 { 1126 struct arm_spe *spe = speq->spe; 1127 struct arm_spe_record *record; 1128 int ret; 1129 1130 if (!spe->kernel_start) 1131 spe->kernel_start = machine__kernel_start(spe->machine); 1132 1133 while (1) { 1134 /* 1135 * The usual logic is firstly to decode the packets, and then 1136 * based the record to synthesize sample; but here the flow is 1137 * reversed: it calls arm_spe_sample() for synthesizing samples 1138 * prior to arm_spe_decode(). 1139 * 1140 * Two reasons for this code logic: 1141 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 1142 * has decoded trace data and generated a record, but the record 1143 * is left to generate sample until run to here, so it's correct 1144 * to synthesize sample for the left record. 1145 * 2. After decoding trace data, it needs to compare the record 1146 * timestamp with the coming perf event, if the record timestamp 1147 * is later than the perf event, it needs bail out and pushs the 1148 * record into auxtrace heap, thus the record can be deferred to 1149 * synthesize sample until run to here at the next time; so this 1150 * can correlate samples between Arm SPE trace data and other 1151 * perf events with correct time ordering. 1152 */ 1153 1154 /* 1155 * Update pid/tid info. 1156 */ 1157 record = &speq->decoder->record; 1158 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 1159 ret = arm_spe_set_tid(speq, record->context_id); 1160 if (ret) 1161 return ret; 1162 1163 spe->use_ctx_pkt_for_pid = true; 1164 } 1165 1166 ret = arm_spe_sample(speq); 1167 if (ret) 1168 return ret; 1169 1170 ret = arm_spe_decode(speq->decoder); 1171 if (!ret) { 1172 pr_debug("No data or all data has been processed.\n"); 1173 return 1; 1174 } 1175 1176 /* 1177 * Error is detected when decode SPE trace data, continue to 1178 * the next trace data and find out more records. 1179 */ 1180 if (ret < 0) 1181 continue; 1182 1183 record = &speq->decoder->record; 1184 1185 /* Update timestamp for the last record */ 1186 if (record->timestamp > speq->timestamp) 1187 speq->timestamp = record->timestamp; 1188 1189 /* 1190 * If the timestamp of the queue is later than timestamp of the 1191 * coming perf event, bail out so can allow the perf event to 1192 * be processed ahead. 1193 */ 1194 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 1195 *timestamp = speq->timestamp; 1196 return 0; 1197 } 1198 } 1199 1200 return 0; 1201 } 1202 1203 static int arm_spe__setup_queue(struct arm_spe *spe, 1204 struct auxtrace_queue *queue, 1205 unsigned int queue_nr) 1206 { 1207 struct arm_spe_queue *speq = queue->priv; 1208 struct arm_spe_record *record; 1209 1210 if (list_empty(&queue->head) || speq) 1211 return 0; 1212 1213 speq = arm_spe__alloc_queue(spe, queue_nr); 1214 1215 if (!speq) 1216 return -ENOMEM; 1217 1218 queue->priv = speq; 1219 1220 if (queue->cpu != -1) 1221 speq->cpu = queue->cpu; 1222 1223 if (!speq->on_heap) { 1224 int ret; 1225 1226 if (spe->timeless_decoding) 1227 return 0; 1228 1229 retry: 1230 ret = arm_spe_decode(speq->decoder); 1231 1232 if (!ret) 1233 return 0; 1234 1235 if (ret < 0) 1236 goto retry; 1237 1238 record = &speq->decoder->record; 1239 1240 speq->timestamp = record->timestamp; 1241 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 1242 if (ret) 1243 return ret; 1244 speq->on_heap = true; 1245 } 1246 1247 return 0; 1248 } 1249 1250 static int arm_spe__setup_queues(struct arm_spe *spe) 1251 { 1252 unsigned int i; 1253 int ret; 1254 1255 for (i = 0; i < spe->queues.nr_queues; i++) { 1256 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 1257 if (ret) 1258 return ret; 1259 } 1260 1261 return 0; 1262 } 1263 1264 static int arm_spe__update_queues(struct arm_spe *spe) 1265 { 1266 if (spe->queues.new_data) { 1267 spe->queues.new_data = false; 1268 return arm_spe__setup_queues(spe); 1269 } 1270 1271 return 0; 1272 } 1273 1274 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 1275 { 1276 struct evsel *evsel; 1277 struct evlist *evlist = spe->session->evlist; 1278 bool timeless_decoding = true; 1279 1280 /* 1281 * Circle through the list of event and complain if we find one 1282 * with the time bit set. 1283 */ 1284 evlist__for_each_entry(evlist, evsel) { 1285 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 1286 timeless_decoding = false; 1287 } 1288 1289 return timeless_decoding; 1290 } 1291 1292 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 1293 { 1294 unsigned int queue_nr; 1295 u64 ts; 1296 int ret; 1297 1298 while (1) { 1299 struct auxtrace_queue *queue; 1300 struct arm_spe_queue *speq; 1301 1302 if (!spe->heap.heap_cnt) 1303 return 0; 1304 1305 if (spe->heap.heap_array[0].ordinal >= timestamp) 1306 return 0; 1307 1308 queue_nr = spe->heap.heap_array[0].queue_nr; 1309 queue = &spe->queues.queue_array[queue_nr]; 1310 speq = queue->priv; 1311 1312 auxtrace_heap__pop(&spe->heap); 1313 1314 if (spe->heap.heap_cnt) { 1315 ts = spe->heap.heap_array[0].ordinal + 1; 1316 if (ts > timestamp) 1317 ts = timestamp; 1318 } else { 1319 ts = timestamp; 1320 } 1321 1322 /* 1323 * A previous context-switch event has set pid/tid in the machine's context, so 1324 * here we need to update the pid/tid in the thread and SPE queue. 1325 */ 1326 if (!spe->use_ctx_pkt_for_pid) 1327 arm_spe_set_pid_tid_cpu(spe, queue); 1328 1329 ret = arm_spe_run_decoder(speq, &ts); 1330 if (ret < 0) { 1331 auxtrace_heap__add(&spe->heap, queue_nr, ts); 1332 return ret; 1333 } 1334 1335 if (!ret) { 1336 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 1337 if (ret < 0) 1338 return ret; 1339 } else { 1340 speq->on_heap = false; 1341 } 1342 } 1343 1344 return 0; 1345 } 1346 1347 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 1348 u64 time_) 1349 { 1350 struct auxtrace_queues *queues = &spe->queues; 1351 unsigned int i; 1352 u64 ts = 0; 1353 1354 for (i = 0; i < queues->nr_queues; i++) { 1355 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 1356 struct arm_spe_queue *speq = queue->priv; 1357 1358 if (speq && (tid == -1 || speq->tid == tid)) { 1359 speq->time = time_; 1360 arm_spe_set_pid_tid_cpu(spe, queue); 1361 arm_spe_run_decoder(speq, &ts); 1362 } 1363 } 1364 return 0; 1365 } 1366 1367 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 1368 struct perf_sample *sample) 1369 { 1370 pid_t pid, tid; 1371 int cpu; 1372 1373 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 1374 return 0; 1375 1376 pid = event->context_switch.next_prev_pid; 1377 tid = event->context_switch.next_prev_tid; 1378 cpu = sample->cpu; 1379 1380 if (tid == -1) 1381 pr_warning("context_switch event has no tid\n"); 1382 1383 return machine__set_current_tid(spe->machine, cpu, pid, tid); 1384 } 1385 1386 static int arm_spe_process_event(struct perf_session *session, 1387 union perf_event *event, 1388 struct perf_sample *sample, 1389 const struct perf_tool *tool) 1390 { 1391 int err = 0; 1392 u64 timestamp; 1393 struct arm_spe *spe = container_of(session->auxtrace, 1394 struct arm_spe, auxtrace); 1395 1396 if (dump_trace) 1397 return 0; 1398 1399 if (!tool->ordered_events) { 1400 pr_err("SPE trace requires ordered events\n"); 1401 return -EINVAL; 1402 } 1403 1404 if (sample->time && (sample->time != (u64) -1)) 1405 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 1406 else 1407 timestamp = 0; 1408 1409 if (timestamp || spe->timeless_decoding) { 1410 err = arm_spe__update_queues(spe); 1411 if (err) 1412 return err; 1413 } 1414 1415 if (spe->timeless_decoding) { 1416 if (event->header.type == PERF_RECORD_EXIT) { 1417 err = arm_spe_process_timeless_queues(spe, 1418 event->fork.tid, 1419 sample->time); 1420 } 1421 } else if (timestamp) { 1422 err = arm_spe_process_queues(spe, timestamp); 1423 if (err) 1424 return err; 1425 1426 if (!spe->use_ctx_pkt_for_pid && 1427 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1428 event->header.type == PERF_RECORD_SWITCH)) 1429 err = arm_spe_context_switch(spe, event, sample); 1430 } 1431 1432 return err; 1433 } 1434 1435 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1436 union perf_event *event, 1437 const struct perf_tool *tool __maybe_unused) 1438 { 1439 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1440 auxtrace); 1441 1442 if (!spe->data_queued) { 1443 struct auxtrace_buffer *buffer; 1444 off_t data_offset; 1445 int fd = perf_data__fd(session->data); 1446 int err; 1447 1448 if (perf_data__is_pipe(session->data)) { 1449 data_offset = 0; 1450 } else { 1451 data_offset = lseek(fd, 0, SEEK_CUR); 1452 if (data_offset == -1) 1453 return -errno; 1454 } 1455 1456 err = auxtrace_queues__add_event(&spe->queues, session, event, 1457 data_offset, &buffer); 1458 if (err) 1459 return err; 1460 1461 /* Dump here now we have copied a piped trace out of the pipe */ 1462 if (dump_trace) { 1463 if (auxtrace_buffer__get_data(buffer, fd)) { 1464 arm_spe_dump_event(spe, buffer->data, 1465 buffer->size); 1466 auxtrace_buffer__put_data(buffer); 1467 } 1468 } 1469 } 1470 1471 return 0; 1472 } 1473 1474 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1475 const struct perf_tool *tool __maybe_unused) 1476 { 1477 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1478 auxtrace); 1479 int ret; 1480 1481 if (dump_trace) 1482 return 0; 1483 1484 if (!tool->ordered_events) 1485 return -EINVAL; 1486 1487 ret = arm_spe__update_queues(spe); 1488 if (ret < 0) 1489 return ret; 1490 1491 if (spe->timeless_decoding) 1492 return arm_spe_process_timeless_queues(spe, -1, 1493 MAX_TIMESTAMP - 1); 1494 1495 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1496 if (ret) 1497 return ret; 1498 1499 if (!spe->use_ctx_pkt_for_pid) 1500 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1501 "Matching of TIDs to SPE events could be inaccurate.\n"); 1502 1503 return 0; 1504 } 1505 1506 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1507 { 1508 u64 *metadata; 1509 1510 metadata = zalloc(per_cpu_size); 1511 if (!metadata) 1512 return NULL; 1513 1514 memcpy(metadata, buf, per_cpu_size); 1515 return metadata; 1516 } 1517 1518 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1519 { 1520 int i; 1521 1522 for (i = 0; i < nr_cpu; i++) 1523 zfree(&metadata[i]); 1524 free(metadata); 1525 } 1526 1527 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1528 u64 *ver, int *nr_cpu) 1529 { 1530 u64 *ptr = (u64 *)info->priv; 1531 u64 metadata_size; 1532 u64 **metadata = NULL; 1533 int hdr_sz, per_cpu_sz, i; 1534 1535 metadata_size = info->header.size - 1536 sizeof(struct perf_record_auxtrace_info); 1537 1538 /* Metadata version 1 */ 1539 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1540 *ver = 1; 1541 *nr_cpu = 0; 1542 /* No per CPU metadata */ 1543 return NULL; 1544 } 1545 1546 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1547 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1548 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1549 1550 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1551 if (!metadata) 1552 return NULL; 1553 1554 /* Locate the start address of per CPU metadata */ 1555 ptr += hdr_sz; 1556 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1557 1558 for (i = 0; i < *nr_cpu; i++) { 1559 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1560 if (!metadata[i]) 1561 goto err_per_cpu_metadata; 1562 1563 ptr += per_cpu_sz / sizeof(u64); 1564 } 1565 1566 return metadata; 1567 1568 err_per_cpu_metadata: 1569 arm_spe__free_metadata(metadata, *nr_cpu); 1570 return NULL; 1571 } 1572 1573 static void arm_spe_free_queue(void *priv) 1574 { 1575 struct arm_spe_queue *speq = priv; 1576 1577 if (!speq) 1578 return; 1579 thread__zput(speq->thread); 1580 arm_spe_decoder_free(speq->decoder); 1581 zfree(&speq->event_buf); 1582 zfree(&speq->last_branch); 1583 free(speq); 1584 } 1585 1586 static void arm_spe_free_events(struct perf_session *session) 1587 { 1588 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1589 auxtrace); 1590 struct auxtrace_queues *queues = &spe->queues; 1591 unsigned int i; 1592 1593 for (i = 0; i < queues->nr_queues; i++) { 1594 arm_spe_free_queue(queues->queue_array[i].priv); 1595 queues->queue_array[i].priv = NULL; 1596 } 1597 auxtrace_queues__free(queues); 1598 } 1599 1600 static void arm_spe_free(struct perf_session *session) 1601 { 1602 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1603 auxtrace); 1604 1605 auxtrace_heap__free(&spe->heap); 1606 arm_spe_free_events(session); 1607 session->auxtrace = NULL; 1608 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1609 free(spe); 1610 } 1611 1612 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1613 struct evsel *evsel) 1614 { 1615 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1616 1617 return evsel->core.attr.type == spe->pmu_type; 1618 } 1619 1620 static const char * const metadata_hdr_v1_fmts[] = { 1621 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1622 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1623 }; 1624 1625 static const char * const metadata_hdr_fmts[] = { 1626 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1627 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1628 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1629 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1630 }; 1631 1632 static const char * const metadata_per_cpu_fmts[] = { 1633 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1634 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1635 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1636 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1637 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1638 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1639 [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n", 1640 }; 1641 1642 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1643 { 1644 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1645 const char * const *hdr_fmts; 1646 1647 if (!dump_trace) 1648 return; 1649 1650 if (spe->metadata_ver == 1) { 1651 cpu_num = 0; 1652 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1653 hdr_fmts = metadata_hdr_v1_fmts; 1654 } else { 1655 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1656 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1657 hdr_fmts = metadata_hdr_fmts; 1658 } 1659 1660 for (i = 0; i < hdr_size; i++) 1661 fprintf(stdout, hdr_fmts[i], arr[i]); 1662 1663 arr += hdr_size; 1664 for (cpu = 0; cpu < cpu_num; cpu++) { 1665 /* 1666 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1667 * are fixed. The sequential parameter size is decided by the 1668 * field 'ARM_SPE_CPU_NR_PARAMS'. 1669 */ 1670 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1671 for (i = 0; i < cpu_size; i++) 1672 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1673 arr += cpu_size; 1674 } 1675 } 1676 1677 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1678 const char *name) 1679 { 1680 struct evsel *evsel; 1681 1682 evlist__for_each_entry(evlist, evsel) { 1683 if (evsel->core.id && evsel->core.id[0] == id) { 1684 if (evsel->name) 1685 zfree(&evsel->name); 1686 evsel->name = strdup(name); 1687 break; 1688 } 1689 } 1690 } 1691 1692 static int 1693 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1694 { 1695 struct evlist *evlist = session->evlist; 1696 struct evsel *evsel; 1697 struct perf_event_attr attr; 1698 bool found = false; 1699 u64 id; 1700 int err; 1701 1702 evlist__for_each_entry(evlist, evsel) { 1703 if (evsel->core.attr.type == spe->pmu_type) { 1704 found = true; 1705 break; 1706 } 1707 } 1708 1709 if (!found) { 1710 pr_debug("No selected events with SPE trace data\n"); 1711 return 0; 1712 } 1713 1714 memset(&attr, 0, sizeof(struct perf_event_attr)); 1715 attr.size = sizeof(struct perf_event_attr); 1716 attr.type = PERF_TYPE_HARDWARE; 1717 attr.sample_type = evsel->core.attr.sample_type & 1718 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1719 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1720 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1721 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1722 if (spe->timeless_decoding) 1723 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1724 else 1725 attr.sample_type |= PERF_SAMPLE_TIME; 1726 1727 spe->sample_type = attr.sample_type; 1728 1729 attr.exclude_user = evsel->core.attr.exclude_user; 1730 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1731 attr.exclude_hv = evsel->core.attr.exclude_hv; 1732 attr.exclude_host = evsel->core.attr.exclude_host; 1733 attr.exclude_guest = evsel->core.attr.exclude_guest; 1734 attr.sample_id_all = evsel->core.attr.sample_id_all; 1735 attr.read_format = evsel->core.attr.read_format; 1736 attr.sample_period = spe->synth_opts.period; 1737 1738 /* create new id val to be a fixed offset from evsel id */ 1739 id = auxtrace_synth_id_range_start(evsel); 1740 1741 if (spe->synth_opts.flc) { 1742 spe->sample_flc = true; 1743 1744 /* Level 1 data cache miss */ 1745 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1746 if (err) 1747 return err; 1748 spe->l1d_miss_id = id; 1749 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1750 id += 1; 1751 1752 /* Level 1 data cache access */ 1753 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1754 if (err) 1755 return err; 1756 spe->l1d_access_id = id; 1757 arm_spe_set_event_name(evlist, id, "l1d-access"); 1758 id += 1; 1759 } 1760 1761 if (spe->synth_opts.llc) { 1762 spe->sample_llc = true; 1763 1764 /* Last level cache miss */ 1765 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1766 if (err) 1767 return err; 1768 spe->llc_miss_id = id; 1769 arm_spe_set_event_name(evlist, id, "llc-miss"); 1770 id += 1; 1771 1772 /* Last level cache access */ 1773 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1774 if (err) 1775 return err; 1776 spe->llc_access_id = id; 1777 arm_spe_set_event_name(evlist, id, "llc-access"); 1778 id += 1; 1779 } 1780 1781 if (spe->synth_opts.tlb) { 1782 spe->sample_tlb = true; 1783 1784 /* TLB miss */ 1785 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1786 if (err) 1787 return err; 1788 spe->tlb_miss_id = id; 1789 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1790 id += 1; 1791 1792 /* TLB access */ 1793 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1794 if (err) 1795 return err; 1796 spe->tlb_access_id = id; 1797 arm_spe_set_event_name(evlist, id, "tlb-access"); 1798 id += 1; 1799 } 1800 1801 if (spe->synth_opts.last_branch) { 1802 if (spe->synth_opts.last_branch_sz > 2) 1803 pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); 1804 1805 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1806 /* 1807 * We don't use the hardware index, but the sample generation 1808 * code uses the new format branch_stack with this field, 1809 * so the event attributes must indicate that it's present. 1810 */ 1811 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1812 } 1813 1814 if (spe->synth_opts.branches) { 1815 spe->sample_branch = true; 1816 1817 /* Branch */ 1818 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1819 if (err) 1820 return err; 1821 spe->branch_id = id; 1822 arm_spe_set_event_name(evlist, id, "branch"); 1823 id += 1; 1824 } 1825 1826 if (spe->synth_opts.remote_access) { 1827 spe->sample_remote_access = true; 1828 1829 /* Remote access */ 1830 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1831 if (err) 1832 return err; 1833 spe->remote_access_id = id; 1834 arm_spe_set_event_name(evlist, id, "remote-access"); 1835 id += 1; 1836 } 1837 1838 if (spe->synth_opts.mem) { 1839 spe->sample_memory = true; 1840 1841 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1842 if (err) 1843 return err; 1844 spe->memory_id = id; 1845 arm_spe_set_event_name(evlist, id, "memory"); 1846 id += 1; 1847 } 1848 1849 if (spe->synth_opts.instructions) { 1850 spe->sample_instructions = true; 1851 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1852 1853 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1854 if (err) 1855 return err; 1856 spe->instructions_id = id; 1857 arm_spe_set_event_name(evlist, id, "instructions"); 1858 } 1859 1860 return 0; 1861 } 1862 1863 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1864 { 1865 u64 midr; 1866 int i; 1867 1868 if (!nr_cpu) 1869 return false; 1870 1871 for (i = 0; i < nr_cpu; i++) { 1872 if (!metadata[i]) 1873 return false; 1874 1875 if (i == 0) { 1876 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1877 continue; 1878 } 1879 1880 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1881 return false; 1882 } 1883 1884 return true; 1885 } 1886 1887 int arm_spe_process_auxtrace_info(union perf_event *event, 1888 struct perf_session *session) 1889 { 1890 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1891 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1892 struct perf_record_time_conv *tc = &session->time_conv; 1893 struct arm_spe *spe; 1894 u64 **metadata = NULL; 1895 u64 metadata_ver; 1896 int nr_cpu, err; 1897 1898 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1899 min_sz) 1900 return -EINVAL; 1901 1902 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1903 &nr_cpu); 1904 if (!metadata && metadata_ver != 1) { 1905 pr_err("Failed to parse Arm SPE metadata.\n"); 1906 return -EINVAL; 1907 } 1908 1909 spe = zalloc(sizeof(struct arm_spe)); 1910 if (!spe) { 1911 err = -ENOMEM; 1912 goto err_free_metadata; 1913 } 1914 1915 err = auxtrace_queues__init(&spe->queues); 1916 if (err) 1917 goto err_free; 1918 1919 spe->session = session; 1920 spe->machine = &session->machines.host; /* No kvm support */ 1921 spe->auxtrace_type = auxtrace_info->type; 1922 if (metadata_ver == 1) 1923 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1924 else 1925 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1926 spe->metadata = metadata; 1927 spe->metadata_ver = metadata_ver; 1928 spe->metadata_nr_cpu = nr_cpu; 1929 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1930 1931 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1932 1933 /* 1934 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1935 * and the parameters for hardware clock are stored in the session 1936 * context. Passes these parameters to the struct perf_tsc_conversion 1937 * in "spe->tc", which is used for later conversion between clock 1938 * counter and timestamp. 1939 * 1940 * For backward compatibility, copies the fields starting from 1941 * "time_cycles" only if they are contained in the event. 1942 */ 1943 spe->tc.time_shift = tc->time_shift; 1944 spe->tc.time_mult = tc->time_mult; 1945 spe->tc.time_zero = tc->time_zero; 1946 1947 if (event_contains(*tc, time_cycles)) { 1948 spe->tc.time_cycles = tc->time_cycles; 1949 spe->tc.time_mask = tc->time_mask; 1950 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1951 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1952 } 1953 1954 spe->auxtrace.process_event = arm_spe_process_event; 1955 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1956 spe->auxtrace.flush_events = arm_spe_flush; 1957 spe->auxtrace.free_events = arm_spe_free_events; 1958 spe->auxtrace.free = arm_spe_free; 1959 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1960 session->auxtrace = &spe->auxtrace; 1961 1962 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1963 1964 if (dump_trace) 1965 return 0; 1966 1967 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1968 spe->synth_opts = *session->itrace_synth_opts; 1969 } else { 1970 itrace_synth_opts__set_default(&spe->synth_opts, false); 1971 /* Default nanoseconds period not supported */ 1972 spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; 1973 spe->synth_opts.period = 1; 1974 } 1975 1976 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1977 ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n"); 1978 err = -EINVAL; 1979 goto err_free_queues; 1980 } 1981 if (spe->synth_opts.period > 1) 1982 ui__warning("Arm SPE has a hardware-based sampling period.\n\n" 1983 "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n"); 1984 1985 err = arm_spe_synth_events(spe, session); 1986 if (err) 1987 goto err_free_queues; 1988 1989 err = auxtrace_queues__process_index(&spe->queues, session); 1990 if (err) 1991 goto err_free_queues; 1992 1993 if (spe->queues.populated) 1994 spe->data_queued = true; 1995 1996 return 0; 1997 1998 err_free_queues: 1999 auxtrace_queues__free(&spe->queues); 2000 session->auxtrace = NULL; 2001 err_free: 2002 free(spe); 2003 err_free_metadata: 2004 arm_spe__free_metadata(metadata, nr_cpu); 2005 return err; 2006 } 2007