1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 #define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) 41 42 #define ARM_SPE_CACHE_EVENT(lvl) \ 43 (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) 44 45 #define arm_spe_is_cache_level(type, lvl) \ 46 ((type) & ARM_SPE_CACHE_EVENT(lvl)) 47 48 #define arm_spe_is_cache_hit(type, lvl) \ 49 (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) 50 51 #define arm_spe_is_cache_miss(type, lvl) \ 52 ((type) & ARM_SPE_##lvl##_MISS) 53 54 struct arm_spe { 55 struct auxtrace auxtrace; 56 struct auxtrace_queues queues; 57 struct auxtrace_heap heap; 58 struct itrace_synth_opts synth_opts; 59 u32 auxtrace_type; 60 struct perf_session *session; 61 struct machine *machine; 62 u32 pmu_type; 63 64 struct perf_tsc_conversion tc; 65 66 u8 timeless_decoding; 67 u8 data_queued; 68 69 u64 sample_type; 70 u8 sample_flc; 71 u8 sample_llc; 72 u8 sample_tlb; 73 u8 sample_branch; 74 u8 sample_remote_access; 75 u8 sample_memory; 76 u8 sample_instructions; 77 78 u64 l1d_miss_id; 79 u64 l1d_access_id; 80 u64 llc_miss_id; 81 u64 llc_access_id; 82 u64 tlb_miss_id; 83 u64 tlb_access_id; 84 u64 branch_id; 85 u64 remote_access_id; 86 u64 memory_id; 87 u64 instructions_id; 88 89 u64 kernel_start; 90 91 unsigned long num_events; 92 u8 use_ctx_pkt_for_pid; 93 94 u64 **metadata; 95 u64 metadata_ver; 96 u64 metadata_nr_cpu; 97 bool is_homogeneous; 98 }; 99 100 struct arm_spe_queue { 101 struct arm_spe *spe; 102 unsigned int queue_nr; 103 struct auxtrace_buffer *buffer; 104 struct auxtrace_buffer *old_buffer; 105 union perf_event *event_buf; 106 bool on_heap; 107 bool done; 108 pid_t pid; 109 pid_t tid; 110 int cpu; 111 struct arm_spe_decoder *decoder; 112 u64 time; 113 u64 timestamp; 114 struct thread *thread; 115 u64 sample_count; 116 u32 flags; 117 struct branch_stack *last_branch; 118 }; 119 120 struct data_source_handle { 121 const struct midr_range *midr_ranges; 122 void (*ds_synth)(const struct arm_spe_record *record, 123 union perf_mem_data_src *data_src); 124 }; 125 126 #define DS(range, func) \ 127 { \ 128 .midr_ranges = range, \ 129 .ds_synth = arm_spe__synth_##func, \ 130 } 131 132 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 133 unsigned char *buf, size_t len) 134 { 135 struct arm_spe_pkt packet; 136 size_t pos = 0; 137 int ret, pkt_len, i; 138 char desc[ARM_SPE_PKT_DESC_MAX]; 139 const char *color = PERF_COLOR_BLUE; 140 141 color_fprintf(stdout, color, 142 ". ... ARM SPE data: size %#zx bytes\n", 143 len); 144 145 while (len) { 146 ret = arm_spe_get_packet(buf, len, &packet); 147 if (ret > 0) 148 pkt_len = ret; 149 else 150 pkt_len = 1; 151 printf("."); 152 color_fprintf(stdout, color, " %08zx: ", pos); 153 for (i = 0; i < pkt_len; i++) 154 color_fprintf(stdout, color, " %02x", buf[i]); 155 for (; i < 16; i++) 156 color_fprintf(stdout, color, " "); 157 if (ret > 0) { 158 ret = arm_spe_pkt_desc(&packet, desc, 159 ARM_SPE_PKT_DESC_MAX); 160 if (!ret) 161 color_fprintf(stdout, color, " %s\n", desc); 162 } else { 163 color_fprintf(stdout, color, " Bad packet!\n"); 164 } 165 pos += pkt_len; 166 buf += pkt_len; 167 len -= pkt_len; 168 } 169 } 170 171 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 172 size_t len) 173 { 174 printf(".\n"); 175 arm_spe_dump(spe, buf, len); 176 } 177 178 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 179 { 180 struct arm_spe_queue *speq = data; 181 struct auxtrace_buffer *buffer = speq->buffer; 182 struct auxtrace_buffer *old_buffer = speq->old_buffer; 183 struct auxtrace_queue *queue; 184 185 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 186 187 buffer = auxtrace_buffer__next(queue, buffer); 188 /* If no more data, drop the previous auxtrace_buffer and return */ 189 if (!buffer) { 190 if (old_buffer) 191 auxtrace_buffer__drop_data(old_buffer); 192 b->len = 0; 193 return 0; 194 } 195 196 speq->buffer = buffer; 197 198 /* If the aux_buffer doesn't have data associated, try to load it */ 199 if (!buffer->data) { 200 /* get the file desc associated with the perf data file */ 201 int fd = perf_data__fd(speq->spe->session->data); 202 203 buffer->data = auxtrace_buffer__get_data(buffer, fd); 204 if (!buffer->data) 205 return -ENOMEM; 206 } 207 208 b->len = buffer->size; 209 b->buf = buffer->data; 210 211 if (b->len) { 212 if (old_buffer) 213 auxtrace_buffer__drop_data(old_buffer); 214 speq->old_buffer = buffer; 215 } else { 216 auxtrace_buffer__drop_data(buffer); 217 return arm_spe_get_trace(b, data); 218 } 219 220 return 0; 221 } 222 223 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 224 unsigned int queue_nr) 225 { 226 struct arm_spe_params params = { .get_trace = 0, }; 227 struct arm_spe_queue *speq; 228 229 speq = zalloc(sizeof(*speq)); 230 if (!speq) 231 return NULL; 232 233 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 234 if (!speq->event_buf) 235 goto out_free; 236 237 speq->spe = spe; 238 speq->queue_nr = queue_nr; 239 speq->pid = -1; 240 speq->tid = -1; 241 speq->cpu = -1; 242 243 /* params set */ 244 params.get_trace = arm_spe_get_trace; 245 params.data = speq; 246 247 if (spe->synth_opts.last_branch) { 248 size_t sz = sizeof(struct branch_stack); 249 250 /* Allocate up to two entries for PBT + TGT */ 251 sz += sizeof(struct branch_entry) * 252 min(spe->synth_opts.last_branch_sz, 2U); 253 speq->last_branch = zalloc(sz); 254 if (!speq->last_branch) 255 goto out_free; 256 } 257 258 /* create new decoder */ 259 speq->decoder = arm_spe_decoder_new(¶ms); 260 if (!speq->decoder) 261 goto out_free; 262 263 return speq; 264 265 out_free: 266 zfree(&speq->event_buf); 267 zfree(&speq->last_branch); 268 free(speq); 269 270 return NULL; 271 } 272 273 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 274 { 275 return ip >= spe->kernel_start ? 276 PERF_RECORD_MISC_KERNEL : 277 PERF_RECORD_MISC_USER; 278 } 279 280 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 281 struct auxtrace_queue *queue) 282 { 283 struct arm_spe_queue *speq = queue->priv; 284 pid_t tid; 285 286 tid = machine__get_current_tid(spe->machine, speq->cpu); 287 if (tid != -1) { 288 speq->tid = tid; 289 thread__zput(speq->thread); 290 } else 291 speq->tid = queue->tid; 292 293 if ((!speq->thread) && (speq->tid != -1)) { 294 speq->thread = machine__find_thread(spe->machine, -1, 295 speq->tid); 296 } 297 298 if (speq->thread) { 299 speq->pid = thread__pid(speq->thread); 300 if (queue->cpu == -1) 301 speq->cpu = thread__cpu(speq->thread); 302 } 303 } 304 305 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 306 { 307 struct arm_spe *spe = speq->spe; 308 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 309 310 if (err) 311 return err; 312 313 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 314 315 return 0; 316 } 317 318 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu) 319 { 320 u64 i; 321 322 if (!spe->metadata) 323 return NULL; 324 325 /* CPU ID is -1 for per-thread mode */ 326 if (cpu < 0) { 327 /* 328 * On the heterogeneous system, due to CPU ID is -1, 329 * cannot confirm the data source packet is supported. 330 */ 331 if (!spe->is_homogeneous) 332 return NULL; 333 334 /* In homogeneous system, simply use CPU0's metadata */ 335 return spe->metadata[0]; 336 } 337 338 for (i = 0; i < spe->metadata_nr_cpu; i++) 339 if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu) 340 return spe->metadata[i]; 341 342 return NULL; 343 } 344 345 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 346 { 347 struct simd_flags simd_flags = {}; 348 349 if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) 350 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 351 352 if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) 353 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 354 355 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 356 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; 357 358 if (record->type & ARM_SPE_SVE_EMPTY_PRED) 359 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; 360 361 return simd_flags; 362 } 363 364 static void arm_spe_prep_sample(struct arm_spe *spe, 365 struct arm_spe_queue *speq, 366 union perf_event *event, 367 struct perf_sample *sample) 368 { 369 struct arm_spe_record *record = &speq->decoder->record; 370 371 if (!spe->timeless_decoding) 372 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 373 374 sample->ip = record->from_ip; 375 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 376 sample->pid = speq->pid; 377 sample->tid = speq->tid; 378 sample->period = spe->synth_opts.period; 379 sample->cpu = speq->cpu; 380 sample->simd_flags = arm_spe__synth_simd_flags(record); 381 382 event->sample.header.type = PERF_RECORD_SAMPLE; 383 event->sample.header.misc = sample->cpumode; 384 event->sample.header.size = sizeof(struct perf_event_header); 385 } 386 387 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) 388 { 389 struct arm_spe *spe = speq->spe; 390 struct arm_spe_record *record = &speq->decoder->record; 391 struct branch_stack *bstack = speq->last_branch; 392 struct branch_flags *bs_flags; 393 unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; 394 bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); 395 bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; 396 size_t sz = sizeof(struct branch_stack) + 397 sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; 398 int i = 0; 399 400 /* Clean up branch stack */ 401 memset(bstack, 0x0, sz); 402 403 if (!have_tgt && !have_pbt) 404 return; 405 406 if (have_tgt) { 407 bstack->entries[i].from = record->from_ip; 408 bstack->entries[i].to = record->to_ip; 409 410 bs_flags = &bstack->entries[i].flags; 411 bs_flags->value = 0; 412 413 if (record->op & ARM_SPE_OP_BR_CR_BL) { 414 if (record->op & ARM_SPE_OP_BR_COND) 415 bs_flags->type |= PERF_BR_COND_CALL; 416 else 417 bs_flags->type |= PERF_BR_CALL; 418 /* 419 * Indirect branch instruction without link (e.g. BR), 420 * take this case as function return. 421 */ 422 } else if (record->op & ARM_SPE_OP_BR_CR_RET || 423 record->op & ARM_SPE_OP_BR_INDIRECT) { 424 if (record->op & ARM_SPE_OP_BR_COND) 425 bs_flags->type |= PERF_BR_COND_RET; 426 else 427 bs_flags->type |= PERF_BR_RET; 428 } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { 429 if (record->op & ARM_SPE_OP_BR_COND) 430 bs_flags->type |= PERF_BR_COND; 431 else 432 bs_flags->type |= PERF_BR_UNCOND; 433 } else { 434 if (record->op & ARM_SPE_OP_BR_COND) 435 bs_flags->type |= PERF_BR_COND; 436 else 437 bs_flags->type |= PERF_BR_UNKNOWN; 438 } 439 440 if (record->type & ARM_SPE_BRANCH_MISS) { 441 bs_flags->mispred = 1; 442 bs_flags->predicted = 0; 443 } else { 444 bs_flags->mispred = 0; 445 bs_flags->predicted = 1; 446 } 447 448 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 449 bs_flags->not_taken = 1; 450 451 if (record->type & ARM_SPE_IN_TXN) 452 bs_flags->in_tx = 1; 453 454 bs_flags->cycles = min(record->latency, 0xFFFFU); 455 i++; 456 } 457 458 if (have_pbt) { 459 bs_flags = &bstack->entries[i].flags; 460 bs_flags->type |= PERF_BR_UNKNOWN; 461 bstack->entries[i].to = record->prev_br_tgt; 462 i++; 463 } 464 465 bstack->nr = i; 466 bstack->hw_idx = -1ULL; 467 } 468 469 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 470 { 471 event->header.size = perf_event__sample_event_size(sample, type, 0); 472 return perf_event__synthesize_sample(event, type, 0, sample); 473 } 474 475 static inline int 476 arm_spe_deliver_synth_event(struct arm_spe *spe, 477 struct arm_spe_queue *speq __maybe_unused, 478 union perf_event *event, 479 struct perf_sample *sample) 480 { 481 int ret; 482 483 if (spe->synth_opts.inject) { 484 ret = arm_spe__inject_event(event, sample, spe->sample_type); 485 if (ret) 486 return ret; 487 } 488 489 ret = perf_session__deliver_synth_event(spe->session, event, sample); 490 if (ret) 491 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 492 493 return ret; 494 } 495 496 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 497 u64 spe_events_id, 498 union perf_mem_data_src data_src) 499 { 500 struct arm_spe *spe = speq->spe; 501 struct arm_spe_record *record = &speq->decoder->record; 502 union perf_event *event = speq->event_buf; 503 struct perf_sample sample; 504 int ret; 505 506 perf_sample__init(&sample, /*all=*/true); 507 arm_spe_prep_sample(spe, speq, event, &sample); 508 509 sample.id = spe_events_id; 510 sample.stream_id = spe_events_id; 511 sample.addr = record->virt_addr; 512 sample.phys_addr = record->phys_addr; 513 sample.data_src = data_src.val; 514 sample.weight = record->latency; 515 516 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 517 perf_sample__exit(&sample); 518 return ret; 519 } 520 521 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 522 u64 spe_events_id) 523 { 524 struct arm_spe *spe = speq->spe; 525 struct arm_spe_record *record = &speq->decoder->record; 526 union perf_event *event = speq->event_buf; 527 struct perf_sample sample; 528 int ret; 529 530 perf_sample__init(&sample, /*all=*/true); 531 arm_spe_prep_sample(spe, speq, event, &sample); 532 533 sample.id = spe_events_id; 534 sample.stream_id = spe_events_id; 535 sample.addr = record->to_ip; 536 sample.weight = record->latency; 537 sample.flags = speq->flags; 538 sample.branch_stack = speq->last_branch; 539 540 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 541 perf_sample__exit(&sample); 542 return ret; 543 } 544 545 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 546 u64 spe_events_id, 547 union perf_mem_data_src data_src) 548 { 549 struct arm_spe *spe = speq->spe; 550 struct arm_spe_record *record = &speq->decoder->record; 551 union perf_event *event = speq->event_buf; 552 struct perf_sample sample; 553 int ret; 554 555 perf_sample__init(&sample, /*all=*/true); 556 arm_spe_prep_sample(spe, speq, event, &sample); 557 558 sample.id = spe_events_id; 559 sample.stream_id = spe_events_id; 560 sample.addr = record->to_ip; 561 sample.phys_addr = record->phys_addr; 562 sample.data_src = data_src.val; 563 sample.weight = record->latency; 564 sample.flags = speq->flags; 565 sample.branch_stack = speq->last_branch; 566 567 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 568 perf_sample__exit(&sample); 569 return ret; 570 } 571 572 static const struct midr_range common_ds_encoding_cpus[] = { 573 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 574 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 575 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 576 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 577 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 578 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 579 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 580 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 581 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 582 {}, 583 }; 584 585 static const struct midr_range ampereone_ds_encoding_cpus[] = { 586 MIDR_ALL_VERSIONS(MIDR_AMPERE1A), 587 {}, 588 }; 589 590 static const struct midr_range hisi_hip_ds_encoding_cpus[] = { 591 MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), 592 {}, 593 }; 594 595 static void arm_spe__sample_flags(struct arm_spe_queue *speq) 596 { 597 const struct arm_spe_record *record = &speq->decoder->record; 598 599 speq->flags = 0; 600 if (record->op & ARM_SPE_OP_BRANCH_ERET) { 601 speq->flags = PERF_IP_FLAG_BRANCH; 602 603 if (record->type & ARM_SPE_BRANCH_MISS) 604 speq->flags |= PERF_IP_FLAG_BRANCH_MISS; 605 606 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 607 speq->flags |= PERF_IP_FLAG_NOT_TAKEN; 608 609 if (record->type & ARM_SPE_IN_TXN) 610 speq->flags |= PERF_IP_FLAG_IN_TX; 611 612 if (record->op & ARM_SPE_OP_BR_COND) 613 speq->flags |= PERF_IP_FLAG_CONDITIONAL; 614 615 if (record->op & ARM_SPE_OP_BR_CR_BL) 616 speq->flags |= PERF_IP_FLAG_CALL; 617 else if (record->op & ARM_SPE_OP_BR_CR_RET) 618 speq->flags |= PERF_IP_FLAG_RETURN; 619 /* 620 * Indirect branch instruction without link (e.g. BR), 621 * take it as a function return. 622 */ 623 else if (record->op & ARM_SPE_OP_BR_INDIRECT) 624 speq->flags |= PERF_IP_FLAG_RETURN; 625 } 626 } 627 628 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 629 union perf_mem_data_src *data_src) 630 { 631 /* 632 * Even though four levels of cache hierarchy are possible, no known 633 * production Neoverse systems currently include more than three levels 634 * so for the time being we assume three exist. If a production system 635 * is built with four the this function would have to be changed to 636 * detect the number of levels for reporting. 637 */ 638 639 /* 640 * We have no data on the hit level or data source for stores in the 641 * Neoverse SPE records. 642 */ 643 if (record->op & ARM_SPE_OP_ST) { 644 data_src->mem_lvl = PERF_MEM_LVL_NA; 645 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 646 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 647 return; 648 } 649 650 switch (record->source) { 651 case ARM_SPE_COMMON_DS_L1D: 652 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 653 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 654 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 655 break; 656 case ARM_SPE_COMMON_DS_L2: 657 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 658 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 659 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 660 break; 661 case ARM_SPE_COMMON_DS_PEER_CORE: 662 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 663 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 664 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 665 break; 666 /* 667 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 668 * transfer, so set SNOOPX_PEER 669 */ 670 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 671 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 672 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 673 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 674 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 675 break; 676 /* 677 * System cache is assumed to be L3 678 */ 679 case ARM_SPE_COMMON_DS_SYS_CACHE: 680 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 681 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 682 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 683 break; 684 /* 685 * We don't know what level it hit in, except it came from the other 686 * socket 687 */ 688 case ARM_SPE_COMMON_DS_REMOTE: 689 data_src->mem_lvl = PERF_MEM_LVL_NA; 690 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 691 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 692 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 693 break; 694 case ARM_SPE_COMMON_DS_DRAM: 695 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 696 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 697 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 698 break; 699 default: 700 break; 701 } 702 } 703 704 /* 705 * Source is IMPDEF. Here we convert the source code used on AmpereOne cores 706 * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. 707 */ 708 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, 709 union perf_mem_data_src *data_src) 710 { 711 struct arm_spe_record common_record; 712 713 switch (record->source) { 714 case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: 715 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; 716 break; 717 case ARM_SPE_AMPEREONE_SLC: 718 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; 719 break; 720 case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: 721 common_record.source = ARM_SPE_COMMON_DS_REMOTE; 722 break; 723 case ARM_SPE_AMPEREONE_DDR: 724 common_record.source = ARM_SPE_COMMON_DS_DRAM; 725 break; 726 case ARM_SPE_AMPEREONE_L1D: 727 common_record.source = ARM_SPE_COMMON_DS_L1D; 728 break; 729 case ARM_SPE_AMPEREONE_L2D: 730 common_record.source = ARM_SPE_COMMON_DS_L2; 731 break; 732 default: 733 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", 734 record->source); 735 return; 736 } 737 738 common_record.op = record->op; 739 arm_spe__synth_data_source_common(&common_record, data_src); 740 } 741 742 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, 743 union perf_mem_data_src *data_src) 744 { 745 /* Use common synthesis method to handle store operations */ 746 if (record->op & ARM_SPE_OP_ST) { 747 arm_spe__synth_data_source_common(record, data_src); 748 return; 749 } 750 751 switch (record->source) { 752 case ARM_SPE_HISI_HIP_PEER_CPU: 753 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 754 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 755 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 756 break; 757 case ARM_SPE_HISI_HIP_PEER_CPU_HITM: 758 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 759 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 760 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 761 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 762 break; 763 case ARM_SPE_HISI_HIP_L3: 764 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 765 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 766 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 767 break; 768 case ARM_SPE_HISI_HIP_L3_HITM: 769 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 770 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 771 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 772 break; 773 case ARM_SPE_HISI_HIP_PEER_CLUSTER: 774 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 775 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 776 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 777 break; 778 case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: 779 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 780 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 781 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 782 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 783 break; 784 case ARM_SPE_HISI_HIP_REMOTE_SOCKET: 785 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 786 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 787 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 788 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 789 break; 790 case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: 791 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 792 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 793 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 794 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 795 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 796 break; 797 case ARM_SPE_HISI_HIP_LOCAL_MEM: 798 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 799 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 800 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 801 break; 802 case ARM_SPE_HISI_HIP_REMOTE_MEM: 803 data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; 804 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 805 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 806 break; 807 case ARM_SPE_HISI_HIP_NC_DEV: 808 data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; 809 data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; 810 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 811 break; 812 case ARM_SPE_HISI_HIP_L2: 813 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 814 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 815 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 816 break; 817 case ARM_SPE_HISI_HIP_L2_HITM: 818 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 819 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 820 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 821 break; 822 case ARM_SPE_HISI_HIP_L1: 823 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 824 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 825 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 826 break; 827 default: 828 break; 829 } 830 } 831 832 static const struct data_source_handle data_source_handles[] = { 833 DS(common_ds_encoding_cpus, data_source_common), 834 DS(ampereone_ds_encoding_cpus, data_source_ampereone), 835 DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), 836 }; 837 838 static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, 839 union perf_mem_data_src *data_src) 840 { 841 /* 842 * To find a cache hit, search in ascending order from the lower level 843 * caches to the higher level caches. This reflects the best scenario 844 * for a cache hit. 845 */ 846 if (arm_spe_is_cache_hit(record->type, L1D)) { 847 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 848 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 849 } else if (record->type & ARM_SPE_RECENTLY_FETCHED) { 850 data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; 851 data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB; 852 } else if (arm_spe_is_cache_hit(record->type, L2D)) { 853 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 854 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 855 } else if (arm_spe_is_cache_hit(record->type, LLC)) { 856 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 857 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 858 /* 859 * To find a cache miss, search in descending order from the higher 860 * level cache to the lower level cache. This represents the worst 861 * scenario for a cache miss. 862 */ 863 } else if (arm_spe_is_cache_miss(record->type, LLC)) { 864 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; 865 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 866 } else if (arm_spe_is_cache_miss(record->type, L2D)) { 867 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS; 868 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 869 } else if (arm_spe_is_cache_miss(record->type, L1D)) { 870 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 871 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 872 } 873 } 874 875 static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, 876 union perf_mem_data_src *data_src) 877 { 878 /* Record the greatest level info for a store operation. */ 879 if (arm_spe_is_cache_level(record->type, LLC)) { 880 data_src->mem_lvl = PERF_MEM_LVL_L3; 881 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? 882 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 883 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 884 } else if (arm_spe_is_cache_level(record->type, L2D)) { 885 data_src->mem_lvl = PERF_MEM_LVL_L2; 886 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ? 887 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 888 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 889 } else if (arm_spe_is_cache_level(record->type, L1D)) { 890 data_src->mem_lvl = PERF_MEM_LVL_L1; 891 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ? 892 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 893 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 894 } 895 } 896 897 static void arm_spe__synth_memory_level(struct arm_spe_queue *speq, 898 const struct arm_spe_record *record, 899 union perf_mem_data_src *data_src) 900 { 901 struct arm_spe *spe = speq->spe; 902 903 /* 904 * The data source packet contains more info for cache levels for 905 * peer snooping. So respect the memory level if has been set by 906 * data source parsing. 907 */ 908 if (!data_src->mem_lvl) { 909 if (data_src->mem_op == PERF_MEM_OP_LOAD) 910 arm_spe__synth_ld_memory_level(record, data_src); 911 if (data_src->mem_op == PERF_MEM_OP_STORE) 912 arm_spe__synth_st_memory_level(record, data_src); 913 } 914 915 if (!data_src->mem_lvl) { 916 data_src->mem_lvl = PERF_MEM_LVL_NA; 917 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 918 } 919 920 /* 921 * If 'mem_snoop' has been set by data source packet, skip to set 922 * it at here. 923 */ 924 if (!data_src->mem_snoop) { 925 if (record->type & ARM_SPE_DATA_SNOOPED) { 926 if (record->type & ARM_SPE_HITM) 927 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 928 else 929 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 930 } else { 931 u64 *metadata = 932 arm_spe__get_metadata_by_cpu(spe, speq->cpu); 933 934 /* 935 * Set NA ("Not available") mode if no meta data or the 936 * SNOOPED event is not supported. 937 */ 938 if (!metadata || 939 !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED)) 940 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 941 else 942 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 943 } 944 } 945 946 if (!data_src->mem_remote) { 947 if (record->type & ARM_SPE_REMOTE_ACCESS) 948 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 949 } 950 } 951 952 static void arm_spe__synth_ds(struct arm_spe_queue *speq, 953 const struct arm_spe_record *record, 954 union perf_mem_data_src *data_src) 955 { 956 struct arm_spe *spe = speq->spe; 957 u64 *metadata = NULL; 958 u64 midr; 959 unsigned int i; 960 961 /* Metadata version 1 assumes all CPUs are the same (old behavior) */ 962 if (spe->metadata_ver == 1) { 963 const char *cpuid; 964 965 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); 966 cpuid = perf_env__cpuid(perf_session__env(spe->session)); 967 midr = strtol(cpuid, NULL, 16); 968 } else { 969 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 970 if (!metadata) 971 return; 972 973 midr = metadata[ARM_SPE_CPU_MIDR]; 974 } 975 976 for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { 977 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { 978 return data_source_handles[i].ds_synth(record, data_src); 979 } 980 } 981 982 return; 983 } 984 985 static union perf_mem_data_src 986 arm_spe__synth_data_source(struct arm_spe_queue *speq, 987 const struct arm_spe_record *record) 988 { 989 union perf_mem_data_src data_src = {}; 990 991 /* Only synthesize data source for LDST operations */ 992 if (!is_ldst_op(record->op)) 993 return data_src; 994 995 if (record->op & ARM_SPE_OP_LD) 996 data_src.mem_op = PERF_MEM_OP_LOAD; 997 else if (record->op & ARM_SPE_OP_ST) 998 data_src.mem_op = PERF_MEM_OP_STORE; 999 else 1000 return data_src; 1001 1002 arm_spe__synth_ds(speq, record, &data_src); 1003 arm_spe__synth_memory_level(speq, record, &data_src); 1004 1005 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 1006 data_src.mem_dtlb = PERF_MEM_TLB_WK; 1007 1008 if (record->type & ARM_SPE_TLB_MISS) 1009 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 1010 else 1011 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 1012 } 1013 1014 return data_src; 1015 } 1016 1017 static int arm_spe_sample(struct arm_spe_queue *speq) 1018 { 1019 const struct arm_spe_record *record = &speq->decoder->record; 1020 struct arm_spe *spe = speq->spe; 1021 union perf_mem_data_src data_src; 1022 int err; 1023 1024 /* 1025 * Discard all samples until period is reached 1026 */ 1027 speq->sample_count++; 1028 if (speq->sample_count < spe->synth_opts.period) 1029 return 0; 1030 speq->sample_count = 0; 1031 1032 arm_spe__sample_flags(speq); 1033 data_src = arm_spe__synth_data_source(speq, record); 1034 1035 if (spe->sample_flc) { 1036 if (record->type & ARM_SPE_L1D_MISS) { 1037 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 1038 data_src); 1039 if (err) 1040 return err; 1041 } 1042 1043 if (record->type & ARM_SPE_L1D_ACCESS) { 1044 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 1045 data_src); 1046 if (err) 1047 return err; 1048 } 1049 } 1050 1051 if (spe->sample_llc) { 1052 if (record->type & ARM_SPE_LLC_MISS) { 1053 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 1054 data_src); 1055 if (err) 1056 return err; 1057 } 1058 1059 if (record->type & ARM_SPE_LLC_ACCESS) { 1060 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 1061 data_src); 1062 if (err) 1063 return err; 1064 } 1065 } 1066 1067 if (spe->sample_tlb) { 1068 if (record->type & ARM_SPE_TLB_MISS) { 1069 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 1070 data_src); 1071 if (err) 1072 return err; 1073 } 1074 1075 if (record->type & ARM_SPE_TLB_ACCESS) { 1076 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 1077 data_src); 1078 if (err) 1079 return err; 1080 } 1081 } 1082 1083 if (spe->synth_opts.last_branch && 1084 (spe->sample_branch || spe->sample_instructions)) 1085 arm_spe__prep_branch_stack(speq); 1086 1087 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { 1088 err = arm_spe__synth_branch_sample(speq, spe->branch_id); 1089 if (err) 1090 return err; 1091 } 1092 1093 if (spe->sample_remote_access && 1094 (record->type & ARM_SPE_REMOTE_ACCESS)) { 1095 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 1096 data_src); 1097 if (err) 1098 return err; 1099 } 1100 1101 /* 1102 * When data_src is zero it means the record is not a memory operation, 1103 * skip to synthesize memory sample for this case. 1104 */ 1105 if (spe->sample_memory && is_ldst_op(record->op)) { 1106 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 1107 if (err) 1108 return err; 1109 } 1110 1111 if (spe->sample_instructions) { 1112 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 1113 if (err) 1114 return err; 1115 } 1116 1117 return 0; 1118 } 1119 1120 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 1121 { 1122 struct arm_spe *spe = speq->spe; 1123 struct arm_spe_record *record; 1124 int ret; 1125 1126 if (!spe->kernel_start) 1127 spe->kernel_start = machine__kernel_start(spe->machine); 1128 1129 while (1) { 1130 /* 1131 * The usual logic is firstly to decode the packets, and then 1132 * based the record to synthesize sample; but here the flow is 1133 * reversed: it calls arm_spe_sample() for synthesizing samples 1134 * prior to arm_spe_decode(). 1135 * 1136 * Two reasons for this code logic: 1137 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 1138 * has decoded trace data and generated a record, but the record 1139 * is left to generate sample until run to here, so it's correct 1140 * to synthesize sample for the left record. 1141 * 2. After decoding trace data, it needs to compare the record 1142 * timestamp with the coming perf event, if the record timestamp 1143 * is later than the perf event, it needs bail out and pushs the 1144 * record into auxtrace heap, thus the record can be deferred to 1145 * synthesize sample until run to here at the next time; so this 1146 * can correlate samples between Arm SPE trace data and other 1147 * perf events with correct time ordering. 1148 */ 1149 1150 /* 1151 * Update pid/tid info. 1152 */ 1153 record = &speq->decoder->record; 1154 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 1155 ret = arm_spe_set_tid(speq, record->context_id); 1156 if (ret) 1157 return ret; 1158 1159 spe->use_ctx_pkt_for_pid = true; 1160 } 1161 1162 ret = arm_spe_sample(speq); 1163 if (ret) 1164 return ret; 1165 1166 ret = arm_spe_decode(speq->decoder); 1167 if (!ret) { 1168 pr_debug("No data or all data has been processed.\n"); 1169 return 1; 1170 } 1171 1172 /* 1173 * Error is detected when decode SPE trace data, continue to 1174 * the next trace data and find out more records. 1175 */ 1176 if (ret < 0) 1177 continue; 1178 1179 record = &speq->decoder->record; 1180 1181 /* Update timestamp for the last record */ 1182 if (record->timestamp > speq->timestamp) 1183 speq->timestamp = record->timestamp; 1184 1185 /* 1186 * If the timestamp of the queue is later than timestamp of the 1187 * coming perf event, bail out so can allow the perf event to 1188 * be processed ahead. 1189 */ 1190 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 1191 *timestamp = speq->timestamp; 1192 return 0; 1193 } 1194 } 1195 1196 return 0; 1197 } 1198 1199 static int arm_spe__setup_queue(struct arm_spe *spe, 1200 struct auxtrace_queue *queue, 1201 unsigned int queue_nr) 1202 { 1203 struct arm_spe_queue *speq = queue->priv; 1204 struct arm_spe_record *record; 1205 1206 if (list_empty(&queue->head) || speq) 1207 return 0; 1208 1209 speq = arm_spe__alloc_queue(spe, queue_nr); 1210 1211 if (!speq) 1212 return -ENOMEM; 1213 1214 queue->priv = speq; 1215 1216 if (queue->cpu != -1) 1217 speq->cpu = queue->cpu; 1218 1219 if (!speq->on_heap) { 1220 int ret; 1221 1222 if (spe->timeless_decoding) 1223 return 0; 1224 1225 retry: 1226 ret = arm_spe_decode(speq->decoder); 1227 1228 if (!ret) 1229 return 0; 1230 1231 if (ret < 0) 1232 goto retry; 1233 1234 record = &speq->decoder->record; 1235 1236 speq->timestamp = record->timestamp; 1237 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 1238 if (ret) 1239 return ret; 1240 speq->on_heap = true; 1241 } 1242 1243 return 0; 1244 } 1245 1246 static int arm_spe__setup_queues(struct arm_spe *spe) 1247 { 1248 unsigned int i; 1249 int ret; 1250 1251 for (i = 0; i < spe->queues.nr_queues; i++) { 1252 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 1253 if (ret) 1254 return ret; 1255 } 1256 1257 return 0; 1258 } 1259 1260 static int arm_spe__update_queues(struct arm_spe *spe) 1261 { 1262 if (spe->queues.new_data) { 1263 spe->queues.new_data = false; 1264 return arm_spe__setup_queues(spe); 1265 } 1266 1267 return 0; 1268 } 1269 1270 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 1271 { 1272 struct evsel *evsel; 1273 struct evlist *evlist = spe->session->evlist; 1274 bool timeless_decoding = true; 1275 1276 /* 1277 * Circle through the list of event and complain if we find one 1278 * with the time bit set. 1279 */ 1280 evlist__for_each_entry(evlist, evsel) { 1281 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 1282 timeless_decoding = false; 1283 } 1284 1285 return timeless_decoding; 1286 } 1287 1288 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 1289 { 1290 unsigned int queue_nr; 1291 u64 ts; 1292 int ret; 1293 1294 while (1) { 1295 struct auxtrace_queue *queue; 1296 struct arm_spe_queue *speq; 1297 1298 if (!spe->heap.heap_cnt) 1299 return 0; 1300 1301 if (spe->heap.heap_array[0].ordinal >= timestamp) 1302 return 0; 1303 1304 queue_nr = spe->heap.heap_array[0].queue_nr; 1305 queue = &spe->queues.queue_array[queue_nr]; 1306 speq = queue->priv; 1307 1308 auxtrace_heap__pop(&spe->heap); 1309 1310 if (spe->heap.heap_cnt) { 1311 ts = spe->heap.heap_array[0].ordinal + 1; 1312 if (ts > timestamp) 1313 ts = timestamp; 1314 } else { 1315 ts = timestamp; 1316 } 1317 1318 /* 1319 * A previous context-switch event has set pid/tid in the machine's context, so 1320 * here we need to update the pid/tid in the thread and SPE queue. 1321 */ 1322 if (!spe->use_ctx_pkt_for_pid) 1323 arm_spe_set_pid_tid_cpu(spe, queue); 1324 1325 ret = arm_spe_run_decoder(speq, &ts); 1326 if (ret < 0) { 1327 auxtrace_heap__add(&spe->heap, queue_nr, ts); 1328 return ret; 1329 } 1330 1331 if (!ret) { 1332 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 1333 if (ret < 0) 1334 return ret; 1335 } else { 1336 speq->on_heap = false; 1337 } 1338 } 1339 1340 return 0; 1341 } 1342 1343 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 1344 u64 time_) 1345 { 1346 struct auxtrace_queues *queues = &spe->queues; 1347 unsigned int i; 1348 u64 ts = 0; 1349 1350 for (i = 0; i < queues->nr_queues; i++) { 1351 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 1352 struct arm_spe_queue *speq = queue->priv; 1353 1354 if (speq && (tid == -1 || speq->tid == tid)) { 1355 speq->time = time_; 1356 arm_spe_set_pid_tid_cpu(spe, queue); 1357 arm_spe_run_decoder(speq, &ts); 1358 } 1359 } 1360 return 0; 1361 } 1362 1363 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 1364 struct perf_sample *sample) 1365 { 1366 pid_t pid, tid; 1367 int cpu; 1368 1369 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 1370 return 0; 1371 1372 pid = event->context_switch.next_prev_pid; 1373 tid = event->context_switch.next_prev_tid; 1374 cpu = sample->cpu; 1375 1376 if (tid == -1) 1377 pr_warning("context_switch event has no tid\n"); 1378 1379 return machine__set_current_tid(spe->machine, cpu, pid, tid); 1380 } 1381 1382 static int arm_spe_process_event(struct perf_session *session, 1383 union perf_event *event, 1384 struct perf_sample *sample, 1385 const struct perf_tool *tool) 1386 { 1387 int err = 0; 1388 u64 timestamp; 1389 struct arm_spe *spe = container_of(session->auxtrace, 1390 struct arm_spe, auxtrace); 1391 1392 if (dump_trace) 1393 return 0; 1394 1395 if (!tool->ordered_events) { 1396 pr_err("SPE trace requires ordered events\n"); 1397 return -EINVAL; 1398 } 1399 1400 if (sample->time && (sample->time != (u64) -1)) 1401 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 1402 else 1403 timestamp = 0; 1404 1405 if (timestamp || spe->timeless_decoding) { 1406 err = arm_spe__update_queues(spe); 1407 if (err) 1408 return err; 1409 } 1410 1411 if (spe->timeless_decoding) { 1412 if (event->header.type == PERF_RECORD_EXIT) { 1413 err = arm_spe_process_timeless_queues(spe, 1414 event->fork.tid, 1415 sample->time); 1416 } 1417 } else if (timestamp) { 1418 err = arm_spe_process_queues(spe, timestamp); 1419 if (err) 1420 return err; 1421 1422 if (!spe->use_ctx_pkt_for_pid && 1423 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1424 event->header.type == PERF_RECORD_SWITCH)) 1425 err = arm_spe_context_switch(spe, event, sample); 1426 } 1427 1428 return err; 1429 } 1430 1431 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1432 union perf_event *event, 1433 const struct perf_tool *tool __maybe_unused) 1434 { 1435 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1436 auxtrace); 1437 1438 if (!spe->data_queued) { 1439 struct auxtrace_buffer *buffer; 1440 off_t data_offset; 1441 int fd = perf_data__fd(session->data); 1442 int err; 1443 1444 if (perf_data__is_pipe(session->data)) { 1445 data_offset = 0; 1446 } else { 1447 data_offset = lseek(fd, 0, SEEK_CUR); 1448 if (data_offset == -1) 1449 return -errno; 1450 } 1451 1452 err = auxtrace_queues__add_event(&spe->queues, session, event, 1453 data_offset, &buffer); 1454 if (err) 1455 return err; 1456 1457 /* Dump here now we have copied a piped trace out of the pipe */ 1458 if (dump_trace) { 1459 if (auxtrace_buffer__get_data(buffer, fd)) { 1460 arm_spe_dump_event(spe, buffer->data, 1461 buffer->size); 1462 auxtrace_buffer__put_data(buffer); 1463 } 1464 } 1465 } 1466 1467 return 0; 1468 } 1469 1470 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1471 const struct perf_tool *tool __maybe_unused) 1472 { 1473 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1474 auxtrace); 1475 int ret; 1476 1477 if (dump_trace) 1478 return 0; 1479 1480 if (!tool->ordered_events) 1481 return -EINVAL; 1482 1483 ret = arm_spe__update_queues(spe); 1484 if (ret < 0) 1485 return ret; 1486 1487 if (spe->timeless_decoding) 1488 return arm_spe_process_timeless_queues(spe, -1, 1489 MAX_TIMESTAMP - 1); 1490 1491 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1492 if (ret) 1493 return ret; 1494 1495 if (!spe->use_ctx_pkt_for_pid) 1496 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1497 "Matching of TIDs to SPE events could be inaccurate.\n"); 1498 1499 return 0; 1500 } 1501 1502 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1503 { 1504 u64 *metadata; 1505 1506 metadata = zalloc(per_cpu_size); 1507 if (!metadata) 1508 return NULL; 1509 1510 memcpy(metadata, buf, per_cpu_size); 1511 return metadata; 1512 } 1513 1514 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1515 { 1516 int i; 1517 1518 for (i = 0; i < nr_cpu; i++) 1519 zfree(&metadata[i]); 1520 free(metadata); 1521 } 1522 1523 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1524 u64 *ver, int *nr_cpu) 1525 { 1526 u64 *ptr = (u64 *)info->priv; 1527 u64 metadata_size; 1528 u64 **metadata = NULL; 1529 int hdr_sz, per_cpu_sz, i; 1530 1531 metadata_size = info->header.size - 1532 sizeof(struct perf_record_auxtrace_info); 1533 1534 /* Metadata version 1 */ 1535 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1536 *ver = 1; 1537 *nr_cpu = 0; 1538 /* No per CPU metadata */ 1539 return NULL; 1540 } 1541 1542 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1543 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1544 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1545 1546 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1547 if (!metadata) 1548 return NULL; 1549 1550 /* Locate the start address of per CPU metadata */ 1551 ptr += hdr_sz; 1552 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1553 1554 for (i = 0; i < *nr_cpu; i++) { 1555 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1556 if (!metadata[i]) 1557 goto err_per_cpu_metadata; 1558 1559 ptr += per_cpu_sz / sizeof(u64); 1560 } 1561 1562 return metadata; 1563 1564 err_per_cpu_metadata: 1565 arm_spe__free_metadata(metadata, *nr_cpu); 1566 return NULL; 1567 } 1568 1569 static void arm_spe_free_queue(void *priv) 1570 { 1571 struct arm_spe_queue *speq = priv; 1572 1573 if (!speq) 1574 return; 1575 thread__zput(speq->thread); 1576 arm_spe_decoder_free(speq->decoder); 1577 zfree(&speq->event_buf); 1578 zfree(&speq->last_branch); 1579 free(speq); 1580 } 1581 1582 static void arm_spe_free_events(struct perf_session *session) 1583 { 1584 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1585 auxtrace); 1586 struct auxtrace_queues *queues = &spe->queues; 1587 unsigned int i; 1588 1589 for (i = 0; i < queues->nr_queues; i++) { 1590 arm_spe_free_queue(queues->queue_array[i].priv); 1591 queues->queue_array[i].priv = NULL; 1592 } 1593 auxtrace_queues__free(queues); 1594 } 1595 1596 static void arm_spe_free(struct perf_session *session) 1597 { 1598 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1599 auxtrace); 1600 1601 auxtrace_heap__free(&spe->heap); 1602 arm_spe_free_events(session); 1603 session->auxtrace = NULL; 1604 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1605 free(spe); 1606 } 1607 1608 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1609 struct evsel *evsel) 1610 { 1611 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1612 1613 return evsel->core.attr.type == spe->pmu_type; 1614 } 1615 1616 static const char * const metadata_hdr_v1_fmts[] = { 1617 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1618 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1619 }; 1620 1621 static const char * const metadata_hdr_fmts[] = { 1622 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1623 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1624 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1625 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1626 }; 1627 1628 static const char * const metadata_per_cpu_fmts[] = { 1629 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1630 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1631 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1632 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1633 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1634 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1635 [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n", 1636 }; 1637 1638 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1639 { 1640 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1641 const char * const *hdr_fmts; 1642 1643 if (!dump_trace) 1644 return; 1645 1646 if (spe->metadata_ver == 1) { 1647 cpu_num = 0; 1648 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1649 hdr_fmts = metadata_hdr_v1_fmts; 1650 } else { 1651 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1652 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1653 hdr_fmts = metadata_hdr_fmts; 1654 } 1655 1656 for (i = 0; i < hdr_size; i++) 1657 fprintf(stdout, hdr_fmts[i], arr[i]); 1658 1659 arr += hdr_size; 1660 for (cpu = 0; cpu < cpu_num; cpu++) { 1661 /* 1662 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1663 * are fixed. The sequential parameter size is decided by the 1664 * field 'ARM_SPE_CPU_NR_PARAMS'. 1665 */ 1666 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1667 for (i = 0; i < cpu_size; i++) 1668 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1669 arr += cpu_size; 1670 } 1671 } 1672 1673 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1674 const char *name) 1675 { 1676 struct evsel *evsel; 1677 1678 evlist__for_each_entry(evlist, evsel) { 1679 if (evsel->core.id && evsel->core.id[0] == id) { 1680 if (evsel->name) 1681 zfree(&evsel->name); 1682 evsel->name = strdup(name); 1683 break; 1684 } 1685 } 1686 } 1687 1688 static int 1689 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1690 { 1691 struct evlist *evlist = session->evlist; 1692 struct evsel *evsel; 1693 struct perf_event_attr attr; 1694 bool found = false; 1695 u64 id; 1696 int err; 1697 1698 evlist__for_each_entry(evlist, evsel) { 1699 if (evsel->core.attr.type == spe->pmu_type) { 1700 found = true; 1701 break; 1702 } 1703 } 1704 1705 if (!found) { 1706 pr_debug("No selected events with SPE trace data\n"); 1707 return 0; 1708 } 1709 1710 memset(&attr, 0, sizeof(struct perf_event_attr)); 1711 attr.size = sizeof(struct perf_event_attr); 1712 attr.type = PERF_TYPE_HARDWARE; 1713 attr.sample_type = evsel->core.attr.sample_type & 1714 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1715 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1716 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1717 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1718 if (spe->timeless_decoding) 1719 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1720 else 1721 attr.sample_type |= PERF_SAMPLE_TIME; 1722 1723 spe->sample_type = attr.sample_type; 1724 1725 attr.exclude_user = evsel->core.attr.exclude_user; 1726 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1727 attr.exclude_hv = evsel->core.attr.exclude_hv; 1728 attr.exclude_host = evsel->core.attr.exclude_host; 1729 attr.exclude_guest = evsel->core.attr.exclude_guest; 1730 attr.sample_id_all = evsel->core.attr.sample_id_all; 1731 attr.read_format = evsel->core.attr.read_format; 1732 attr.sample_period = spe->synth_opts.period; 1733 1734 /* create new id val to be a fixed offset from evsel id */ 1735 id = evsel->core.id[0] + 1000000000; 1736 1737 if (!id) 1738 id = 1; 1739 1740 if (spe->synth_opts.flc) { 1741 spe->sample_flc = true; 1742 1743 /* Level 1 data cache miss */ 1744 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1745 if (err) 1746 return err; 1747 spe->l1d_miss_id = id; 1748 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1749 id += 1; 1750 1751 /* Level 1 data cache access */ 1752 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1753 if (err) 1754 return err; 1755 spe->l1d_access_id = id; 1756 arm_spe_set_event_name(evlist, id, "l1d-access"); 1757 id += 1; 1758 } 1759 1760 if (spe->synth_opts.llc) { 1761 spe->sample_llc = true; 1762 1763 /* Last level cache miss */ 1764 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1765 if (err) 1766 return err; 1767 spe->llc_miss_id = id; 1768 arm_spe_set_event_name(evlist, id, "llc-miss"); 1769 id += 1; 1770 1771 /* Last level cache access */ 1772 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1773 if (err) 1774 return err; 1775 spe->llc_access_id = id; 1776 arm_spe_set_event_name(evlist, id, "llc-access"); 1777 id += 1; 1778 } 1779 1780 if (spe->synth_opts.tlb) { 1781 spe->sample_tlb = true; 1782 1783 /* TLB miss */ 1784 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1785 if (err) 1786 return err; 1787 spe->tlb_miss_id = id; 1788 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1789 id += 1; 1790 1791 /* TLB access */ 1792 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1793 if (err) 1794 return err; 1795 spe->tlb_access_id = id; 1796 arm_spe_set_event_name(evlist, id, "tlb-access"); 1797 id += 1; 1798 } 1799 1800 if (spe->synth_opts.last_branch) { 1801 if (spe->synth_opts.last_branch_sz > 2) 1802 pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); 1803 1804 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1805 /* 1806 * We don't use the hardware index, but the sample generation 1807 * code uses the new format branch_stack with this field, 1808 * so the event attributes must indicate that it's present. 1809 */ 1810 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1811 } 1812 1813 if (spe->synth_opts.branches) { 1814 spe->sample_branch = true; 1815 1816 /* Branch */ 1817 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1818 if (err) 1819 return err; 1820 spe->branch_id = id; 1821 arm_spe_set_event_name(evlist, id, "branch"); 1822 id += 1; 1823 } 1824 1825 if (spe->synth_opts.remote_access) { 1826 spe->sample_remote_access = true; 1827 1828 /* Remote access */ 1829 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1830 if (err) 1831 return err; 1832 spe->remote_access_id = id; 1833 arm_spe_set_event_name(evlist, id, "remote-access"); 1834 id += 1; 1835 } 1836 1837 if (spe->synth_opts.mem) { 1838 spe->sample_memory = true; 1839 1840 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1841 if (err) 1842 return err; 1843 spe->memory_id = id; 1844 arm_spe_set_event_name(evlist, id, "memory"); 1845 id += 1; 1846 } 1847 1848 if (spe->synth_opts.instructions) { 1849 spe->sample_instructions = true; 1850 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1851 1852 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1853 if (err) 1854 return err; 1855 spe->instructions_id = id; 1856 arm_spe_set_event_name(evlist, id, "instructions"); 1857 } 1858 1859 return 0; 1860 } 1861 1862 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1863 { 1864 u64 midr; 1865 int i; 1866 1867 if (!nr_cpu) 1868 return false; 1869 1870 for (i = 0; i < nr_cpu; i++) { 1871 if (!metadata[i]) 1872 return false; 1873 1874 if (i == 0) { 1875 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1876 continue; 1877 } 1878 1879 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1880 return false; 1881 } 1882 1883 return true; 1884 } 1885 1886 int arm_spe_process_auxtrace_info(union perf_event *event, 1887 struct perf_session *session) 1888 { 1889 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1890 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1891 struct perf_record_time_conv *tc = &session->time_conv; 1892 struct arm_spe *spe; 1893 u64 **metadata = NULL; 1894 u64 metadata_ver; 1895 int nr_cpu, err; 1896 1897 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1898 min_sz) 1899 return -EINVAL; 1900 1901 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1902 &nr_cpu); 1903 if (!metadata && metadata_ver != 1) { 1904 pr_err("Failed to parse Arm SPE metadata.\n"); 1905 return -EINVAL; 1906 } 1907 1908 spe = zalloc(sizeof(struct arm_spe)); 1909 if (!spe) { 1910 err = -ENOMEM; 1911 goto err_free_metadata; 1912 } 1913 1914 err = auxtrace_queues__init(&spe->queues); 1915 if (err) 1916 goto err_free; 1917 1918 spe->session = session; 1919 spe->machine = &session->machines.host; /* No kvm support */ 1920 spe->auxtrace_type = auxtrace_info->type; 1921 if (metadata_ver == 1) 1922 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1923 else 1924 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1925 spe->metadata = metadata; 1926 spe->metadata_ver = metadata_ver; 1927 spe->metadata_nr_cpu = nr_cpu; 1928 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1929 1930 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1931 1932 /* 1933 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1934 * and the parameters for hardware clock are stored in the session 1935 * context. Passes these parameters to the struct perf_tsc_conversion 1936 * in "spe->tc", which is used for later conversion between clock 1937 * counter and timestamp. 1938 * 1939 * For backward compatibility, copies the fields starting from 1940 * "time_cycles" only if they are contained in the event. 1941 */ 1942 spe->tc.time_shift = tc->time_shift; 1943 spe->tc.time_mult = tc->time_mult; 1944 spe->tc.time_zero = tc->time_zero; 1945 1946 if (event_contains(*tc, time_cycles)) { 1947 spe->tc.time_cycles = tc->time_cycles; 1948 spe->tc.time_mask = tc->time_mask; 1949 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1950 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1951 } 1952 1953 spe->auxtrace.process_event = arm_spe_process_event; 1954 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1955 spe->auxtrace.flush_events = arm_spe_flush; 1956 spe->auxtrace.free_events = arm_spe_free_events; 1957 spe->auxtrace.free = arm_spe_free; 1958 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1959 session->auxtrace = &spe->auxtrace; 1960 1961 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1962 1963 if (dump_trace) 1964 return 0; 1965 1966 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1967 spe->synth_opts = *session->itrace_synth_opts; 1968 } else { 1969 itrace_synth_opts__set_default(&spe->synth_opts, false); 1970 /* Default nanoseconds period not supported */ 1971 spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; 1972 spe->synth_opts.period = 1; 1973 } 1974 1975 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1976 ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n"); 1977 err = -EINVAL; 1978 goto err_free_queues; 1979 } 1980 if (spe->synth_opts.period > 1) 1981 ui__warning("Arm SPE has a hardware-based sampling period.\n\n" 1982 "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n"); 1983 1984 err = arm_spe_synth_events(spe, session); 1985 if (err) 1986 goto err_free_queues; 1987 1988 err = auxtrace_queues__process_index(&spe->queues, session); 1989 if (err) 1990 goto err_free_queues; 1991 1992 if (spe->queues.populated) 1993 spe->data_queued = true; 1994 1995 return 0; 1996 1997 err_free_queues: 1998 auxtrace_queues__free(&spe->queues); 1999 session->auxtrace = NULL; 2000 err_free: 2001 free(spe); 2002 err_free_metadata: 2003 arm_spe__free_metadata(metadata, nr_cpu); 2004 return err; 2005 } 2006