1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 #define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) 41 42 #define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \ 43 ARM_SPE_OP_SME | ARM_SPE_OP_ASE))) 44 45 #define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op)) 46 47 #define ARM_SPE_CACHE_EVENT(lvl) \ 48 (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) 49 50 #define arm_spe_is_cache_level(type, lvl) \ 51 ((type) & ARM_SPE_CACHE_EVENT(lvl)) 52 53 #define arm_spe_is_cache_hit(type, lvl) \ 54 (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) 55 56 #define arm_spe_is_cache_miss(type, lvl) \ 57 ((type) & ARM_SPE_##lvl##_MISS) 58 59 struct arm_spe { 60 struct auxtrace auxtrace; 61 struct auxtrace_queues queues; 62 struct auxtrace_heap heap; 63 struct itrace_synth_opts synth_opts; 64 u32 auxtrace_type; 65 struct perf_session *session; 66 struct machine *machine; 67 u32 pmu_type; 68 69 struct perf_tsc_conversion tc; 70 71 u8 timeless_decoding; 72 u8 data_queued; 73 74 u64 sample_type; 75 u8 sample_flc; 76 u8 sample_llc; 77 u8 sample_tlb; 78 u8 sample_branch; 79 u8 sample_remote_access; 80 u8 sample_memory; 81 u8 sample_instructions; 82 83 u64 l1d_miss_id; 84 u64 l1d_access_id; 85 u64 llc_miss_id; 86 u64 llc_access_id; 87 u64 tlb_miss_id; 88 u64 tlb_access_id; 89 u64 branch_id; 90 u64 remote_access_id; 91 u64 memory_id; 92 u64 instructions_id; 93 94 u64 kernel_start; 95 96 unsigned long num_events; 97 u8 use_ctx_pkt_for_pid; 98 99 u64 **metadata; 100 u64 metadata_ver; 101 u64 metadata_nr_cpu; 102 bool is_homogeneous; 103 }; 104 105 struct arm_spe_queue { 106 struct arm_spe *spe; 107 unsigned int queue_nr; 108 struct auxtrace_buffer *buffer; 109 struct auxtrace_buffer *old_buffer; 110 union perf_event *event_buf; 111 bool on_heap; 112 bool done; 113 pid_t pid; 114 pid_t tid; 115 int cpu; 116 struct arm_spe_decoder *decoder; 117 u64 time; 118 u64 timestamp; 119 struct thread *thread; 120 u64 sample_count; 121 u32 flags; 122 struct branch_stack *last_branch; 123 }; 124 125 struct data_source_handle { 126 const struct midr_range *midr_ranges; 127 void (*ds_synth)(const struct arm_spe_record *record, 128 union perf_mem_data_src *data_src); 129 }; 130 131 #define DS(range, func) \ 132 { \ 133 .midr_ranges = range, \ 134 .ds_synth = arm_spe__synth_##func, \ 135 } 136 137 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 138 unsigned char *buf, size_t len) 139 { 140 struct arm_spe_pkt packet; 141 size_t pos = 0; 142 int ret, pkt_len, i; 143 char desc[ARM_SPE_PKT_DESC_MAX]; 144 const char *color = PERF_COLOR_BLUE; 145 146 color_fprintf(stdout, color, 147 ". ... ARM SPE data: size %#zx bytes\n", 148 len); 149 150 while (len) { 151 ret = arm_spe_get_packet(buf, len, &packet); 152 if (ret > 0) 153 pkt_len = ret; 154 else 155 pkt_len = 1; 156 printf("."); 157 color_fprintf(stdout, color, " %08zx: ", pos); 158 for (i = 0; i < pkt_len; i++) 159 color_fprintf(stdout, color, " %02x", buf[i]); 160 for (; i < 16; i++) 161 color_fprintf(stdout, color, " "); 162 if (ret > 0) { 163 ret = arm_spe_pkt_desc(&packet, desc, 164 ARM_SPE_PKT_DESC_MAX); 165 if (!ret) 166 color_fprintf(stdout, color, " %s\n", desc); 167 } else { 168 color_fprintf(stdout, color, " Bad packet!\n"); 169 } 170 pos += pkt_len; 171 buf += pkt_len; 172 len -= pkt_len; 173 } 174 } 175 176 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 177 size_t len) 178 { 179 printf(".\n"); 180 arm_spe_dump(spe, buf, len); 181 } 182 183 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 184 { 185 struct arm_spe_queue *speq = data; 186 struct auxtrace_buffer *buffer = speq->buffer; 187 struct auxtrace_buffer *old_buffer = speq->old_buffer; 188 struct auxtrace_queue *queue; 189 190 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 191 192 buffer = auxtrace_buffer__next(queue, buffer); 193 /* If no more data, drop the previous auxtrace_buffer and return */ 194 if (!buffer) { 195 if (old_buffer) 196 auxtrace_buffer__drop_data(old_buffer); 197 b->len = 0; 198 return 0; 199 } 200 201 speq->buffer = buffer; 202 203 /* If the aux_buffer doesn't have data associated, try to load it */ 204 if (!buffer->data) { 205 /* get the file desc associated with the perf data file */ 206 int fd = perf_data__fd(speq->spe->session->data); 207 208 buffer->data = auxtrace_buffer__get_data(buffer, fd); 209 if (!buffer->data) 210 return -ENOMEM; 211 } 212 213 b->len = buffer->size; 214 b->buf = buffer->data; 215 216 if (b->len) { 217 if (old_buffer) 218 auxtrace_buffer__drop_data(old_buffer); 219 speq->old_buffer = buffer; 220 } else { 221 auxtrace_buffer__drop_data(buffer); 222 return arm_spe_get_trace(b, data); 223 } 224 225 return 0; 226 } 227 228 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 229 unsigned int queue_nr) 230 { 231 struct arm_spe_params params = { .get_trace = 0, }; 232 struct arm_spe_queue *speq; 233 234 speq = zalloc(sizeof(*speq)); 235 if (!speq) 236 return NULL; 237 238 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 239 if (!speq->event_buf) 240 goto out_free; 241 242 speq->spe = spe; 243 speq->queue_nr = queue_nr; 244 speq->pid = -1; 245 speq->tid = -1; 246 speq->cpu = -1; 247 248 /* params set */ 249 params.get_trace = arm_spe_get_trace; 250 params.data = speq; 251 252 if (spe->synth_opts.last_branch) { 253 size_t sz = sizeof(struct branch_stack); 254 255 /* Allocate up to two entries for PBT + TGT */ 256 sz += sizeof(struct branch_entry) * 257 min(spe->synth_opts.last_branch_sz, 2U); 258 speq->last_branch = zalloc(sz); 259 if (!speq->last_branch) 260 goto out_free; 261 } 262 263 /* create new decoder */ 264 speq->decoder = arm_spe_decoder_new(¶ms); 265 if (!speq->decoder) 266 goto out_free; 267 268 return speq; 269 270 out_free: 271 zfree(&speq->event_buf); 272 zfree(&speq->last_branch); 273 free(speq); 274 275 return NULL; 276 } 277 278 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 279 { 280 return ip >= spe->kernel_start ? 281 PERF_RECORD_MISC_KERNEL : 282 PERF_RECORD_MISC_USER; 283 } 284 285 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 286 struct auxtrace_queue *queue) 287 { 288 struct arm_spe_queue *speq = queue->priv; 289 pid_t tid; 290 291 tid = machine__get_current_tid(spe->machine, speq->cpu); 292 if (tid != -1) { 293 speq->tid = tid; 294 thread__zput(speq->thread); 295 } else 296 speq->tid = queue->tid; 297 298 if ((!speq->thread) && (speq->tid != -1)) { 299 speq->thread = machine__find_thread(spe->machine, -1, 300 speq->tid); 301 } 302 303 if (speq->thread) { 304 speq->pid = thread__pid(speq->thread); 305 if (queue->cpu == -1) 306 speq->cpu = thread__cpu(speq->thread); 307 } 308 } 309 310 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 311 { 312 struct arm_spe *spe = speq->spe; 313 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 314 315 if (err) 316 return err; 317 318 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 319 320 return 0; 321 } 322 323 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu) 324 { 325 u64 i; 326 327 if (!spe->metadata) 328 return NULL; 329 330 /* CPU ID is -1 for per-thread mode */ 331 if (cpu < 0) { 332 /* 333 * On the heterogeneous system, due to CPU ID is -1, 334 * cannot confirm the data source packet is supported. 335 */ 336 if (!spe->is_homogeneous) 337 return NULL; 338 339 /* In homogeneous system, simply use CPU0's metadata */ 340 return spe->metadata[0]; 341 } 342 343 for (i = 0; i < spe->metadata_nr_cpu; i++) 344 if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu) 345 return spe->metadata[i]; 346 347 return NULL; 348 } 349 350 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 351 { 352 struct simd_flags simd_flags = {}; 353 354 if (record->op & ARM_SPE_OP_SVE) 355 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 356 357 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 358 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; 359 360 if (record->type & ARM_SPE_SVE_EMPTY_PRED) 361 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; 362 363 return simd_flags; 364 } 365 366 static void arm_spe_prep_sample(struct arm_spe *spe, 367 struct arm_spe_queue *speq, 368 union perf_event *event, 369 struct perf_sample *sample) 370 { 371 struct arm_spe_record *record = &speq->decoder->record; 372 373 if (!spe->timeless_decoding) 374 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 375 376 sample->ip = record->from_ip; 377 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 378 sample->pid = speq->pid; 379 sample->tid = speq->tid; 380 sample->period = spe->synth_opts.period; 381 sample->cpu = speq->cpu; 382 sample->simd_flags = arm_spe__synth_simd_flags(record); 383 384 event->sample.header.type = PERF_RECORD_SAMPLE; 385 event->sample.header.misc = sample->cpumode; 386 event->sample.header.size = sizeof(struct perf_event_header); 387 } 388 389 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) 390 { 391 struct arm_spe *spe = speq->spe; 392 struct arm_spe_record *record = &speq->decoder->record; 393 struct branch_stack *bstack = speq->last_branch; 394 struct branch_flags *bs_flags; 395 unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; 396 bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); 397 bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; 398 size_t sz = sizeof(struct branch_stack) + 399 sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; 400 int i = 0; 401 402 /* Clean up branch stack */ 403 memset(bstack, 0x0, sz); 404 405 if (!have_tgt && !have_pbt) 406 return; 407 408 if (have_tgt) { 409 bstack->entries[i].from = record->from_ip; 410 bstack->entries[i].to = record->to_ip; 411 412 bs_flags = &bstack->entries[i].flags; 413 bs_flags->value = 0; 414 415 if (record->op & ARM_SPE_OP_BR_CR_BL) { 416 if (record->op & ARM_SPE_OP_BR_COND) 417 bs_flags->type |= PERF_BR_COND_CALL; 418 else 419 bs_flags->type |= PERF_BR_CALL; 420 /* 421 * Indirect branch instruction without link (e.g. BR), 422 * take this case as function return. 423 */ 424 } else if (record->op & ARM_SPE_OP_BR_CR_RET || 425 record->op & ARM_SPE_OP_BR_INDIRECT) { 426 if (record->op & ARM_SPE_OP_BR_COND) 427 bs_flags->type |= PERF_BR_COND_RET; 428 else 429 bs_flags->type |= PERF_BR_RET; 430 } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { 431 if (record->op & ARM_SPE_OP_BR_COND) 432 bs_flags->type |= PERF_BR_COND; 433 else 434 bs_flags->type |= PERF_BR_UNCOND; 435 } else { 436 if (record->op & ARM_SPE_OP_BR_COND) 437 bs_flags->type |= PERF_BR_COND; 438 else 439 bs_flags->type |= PERF_BR_UNKNOWN; 440 } 441 442 if (record->type & ARM_SPE_BRANCH_MISS) { 443 bs_flags->mispred = 1; 444 bs_flags->predicted = 0; 445 } else { 446 bs_flags->mispred = 0; 447 bs_flags->predicted = 1; 448 } 449 450 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 451 bs_flags->not_taken = 1; 452 453 if (record->type & ARM_SPE_IN_TXN) 454 bs_flags->in_tx = 1; 455 456 bs_flags->cycles = min(record->latency, 0xFFFFU); 457 i++; 458 } 459 460 if (have_pbt) { 461 bs_flags = &bstack->entries[i].flags; 462 bs_flags->type |= PERF_BR_UNKNOWN; 463 bstack->entries[i].to = record->prev_br_tgt; 464 i++; 465 } 466 467 bstack->nr = i; 468 bstack->hw_idx = -1ULL; 469 } 470 471 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 472 { 473 event->header.size = perf_event__sample_event_size(sample, type, 0); 474 return perf_event__synthesize_sample(event, type, 0, sample); 475 } 476 477 static inline int 478 arm_spe_deliver_synth_event(struct arm_spe *spe, 479 struct arm_spe_queue *speq __maybe_unused, 480 union perf_event *event, 481 struct perf_sample *sample) 482 { 483 int ret; 484 485 if (spe->synth_opts.inject) { 486 ret = arm_spe__inject_event(event, sample, spe->sample_type); 487 if (ret) 488 return ret; 489 } 490 491 ret = perf_session__deliver_synth_event(spe->session, event, sample); 492 if (ret) 493 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 494 495 return ret; 496 } 497 498 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 499 u64 spe_events_id, 500 union perf_mem_data_src data_src) 501 { 502 struct arm_spe *spe = speq->spe; 503 struct arm_spe_record *record = &speq->decoder->record; 504 union perf_event *event = speq->event_buf; 505 struct perf_sample sample; 506 int ret; 507 508 perf_sample__init(&sample, /*all=*/true); 509 arm_spe_prep_sample(spe, speq, event, &sample); 510 511 sample.id = spe_events_id; 512 sample.stream_id = spe_events_id; 513 sample.addr = record->virt_addr; 514 sample.phys_addr = record->phys_addr; 515 sample.data_src = data_src.val; 516 sample.weight = record->latency; 517 518 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 519 perf_sample__exit(&sample); 520 return ret; 521 } 522 523 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 524 u64 spe_events_id) 525 { 526 struct arm_spe *spe = speq->spe; 527 struct arm_spe_record *record = &speq->decoder->record; 528 union perf_event *event = speq->event_buf; 529 struct perf_sample sample; 530 int ret; 531 532 perf_sample__init(&sample, /*all=*/true); 533 arm_spe_prep_sample(spe, speq, event, &sample); 534 535 sample.id = spe_events_id; 536 sample.stream_id = spe_events_id; 537 sample.addr = record->to_ip; 538 sample.weight = record->latency; 539 sample.flags = speq->flags; 540 sample.branch_stack = speq->last_branch; 541 542 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 543 perf_sample__exit(&sample); 544 return ret; 545 } 546 547 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 548 u64 spe_events_id, 549 union perf_mem_data_src data_src) 550 { 551 struct arm_spe *spe = speq->spe; 552 struct arm_spe_record *record = &speq->decoder->record; 553 union perf_event *event = speq->event_buf; 554 struct perf_sample sample; 555 int ret; 556 557 perf_sample__init(&sample, /*all=*/true); 558 arm_spe_prep_sample(spe, speq, event, &sample); 559 560 sample.id = spe_events_id; 561 sample.stream_id = spe_events_id; 562 sample.addr = record->to_ip; 563 sample.phys_addr = record->phys_addr; 564 sample.data_src = data_src.val; 565 sample.weight = record->latency; 566 sample.flags = speq->flags; 567 sample.branch_stack = speq->last_branch; 568 569 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 570 perf_sample__exit(&sample); 571 return ret; 572 } 573 574 static const struct midr_range common_ds_encoding_cpus[] = { 575 MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), 576 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 577 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), 578 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 579 MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), 580 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), 581 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 582 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 583 MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), 584 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 585 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 586 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 587 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 588 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 589 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), 590 {}, 591 }; 592 593 static const struct midr_range ampereone_ds_encoding_cpus[] = { 594 MIDR_ALL_VERSIONS(MIDR_AMPERE1A), 595 {}, 596 }; 597 598 static const struct midr_range hisi_hip_ds_encoding_cpus[] = { 599 MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), 600 {}, 601 }; 602 603 static void arm_spe__sample_flags(struct arm_spe_queue *speq) 604 { 605 const struct arm_spe_record *record = &speq->decoder->record; 606 607 speq->flags = 0; 608 if (record->op & ARM_SPE_OP_BRANCH_ERET) { 609 speq->flags = PERF_IP_FLAG_BRANCH; 610 611 if (record->type & ARM_SPE_BRANCH_MISS) 612 speq->flags |= PERF_IP_FLAG_BRANCH_MISS; 613 614 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 615 speq->flags |= PERF_IP_FLAG_NOT_TAKEN; 616 617 if (record->type & ARM_SPE_IN_TXN) 618 speq->flags |= PERF_IP_FLAG_IN_TX; 619 620 if (record->op & ARM_SPE_OP_BR_COND) 621 speq->flags |= PERF_IP_FLAG_CONDITIONAL; 622 623 if (record->op & ARM_SPE_OP_BR_CR_BL) 624 speq->flags |= PERF_IP_FLAG_CALL; 625 else if (record->op & ARM_SPE_OP_BR_CR_RET) 626 speq->flags |= PERF_IP_FLAG_RETURN; 627 /* 628 * Indirect branch instruction without link (e.g. BR), 629 * take it as a function return. 630 */ 631 else if (record->op & ARM_SPE_OP_BR_INDIRECT) 632 speq->flags |= PERF_IP_FLAG_RETURN; 633 } 634 } 635 636 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 637 union perf_mem_data_src *data_src) 638 { 639 /* 640 * Even though four levels of cache hierarchy are possible, no known 641 * production Neoverse systems currently include more than three levels 642 * so for the time being we assume three exist. If a production system 643 * is built with four the this function would have to be changed to 644 * detect the number of levels for reporting. 645 */ 646 647 /* 648 * We have no data on the hit level or data source for stores in the 649 * Neoverse SPE records. 650 */ 651 if (record->op & ARM_SPE_OP_ST) { 652 data_src->mem_lvl = PERF_MEM_LVL_NA; 653 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 654 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 655 return; 656 } 657 658 switch (record->source) { 659 case ARM_SPE_COMMON_DS_L1D: 660 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 661 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 662 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 663 break; 664 case ARM_SPE_COMMON_DS_L2: 665 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 666 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 667 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 668 break; 669 case ARM_SPE_COMMON_DS_PEER_CORE: 670 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 671 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 672 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 673 break; 674 /* 675 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 676 * transfer, so set SNOOPX_PEER 677 */ 678 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 679 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 680 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 681 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 682 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 683 break; 684 /* 685 * System cache is assumed to be L3 686 */ 687 case ARM_SPE_COMMON_DS_SYS_CACHE: 688 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 689 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 690 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 691 break; 692 /* 693 * We don't know what level it hit in, except it came from the other 694 * socket 695 */ 696 case ARM_SPE_COMMON_DS_REMOTE: 697 data_src->mem_lvl = PERF_MEM_LVL_NA; 698 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 699 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 700 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 701 break; 702 case ARM_SPE_COMMON_DS_DRAM: 703 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 704 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 705 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 706 break; 707 default: 708 break; 709 } 710 } 711 712 /* 713 * Source is IMPDEF. Here we convert the source code used on AmpereOne cores 714 * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. 715 */ 716 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, 717 union perf_mem_data_src *data_src) 718 { 719 struct arm_spe_record common_record; 720 721 switch (record->source) { 722 case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: 723 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; 724 break; 725 case ARM_SPE_AMPEREONE_SLC: 726 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; 727 break; 728 case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: 729 common_record.source = ARM_SPE_COMMON_DS_REMOTE; 730 break; 731 case ARM_SPE_AMPEREONE_DDR: 732 common_record.source = ARM_SPE_COMMON_DS_DRAM; 733 break; 734 case ARM_SPE_AMPEREONE_L1D: 735 common_record.source = ARM_SPE_COMMON_DS_L1D; 736 break; 737 case ARM_SPE_AMPEREONE_L2D: 738 common_record.source = ARM_SPE_COMMON_DS_L2; 739 break; 740 default: 741 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", 742 record->source); 743 return; 744 } 745 746 common_record.op = record->op; 747 arm_spe__synth_data_source_common(&common_record, data_src); 748 } 749 750 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, 751 union perf_mem_data_src *data_src) 752 { 753 /* Use common synthesis method to handle store operations */ 754 if (record->op & ARM_SPE_OP_ST) { 755 arm_spe__synth_data_source_common(record, data_src); 756 return; 757 } 758 759 switch (record->source) { 760 case ARM_SPE_HISI_HIP_PEER_CPU: 761 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 762 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 763 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 764 break; 765 case ARM_SPE_HISI_HIP_PEER_CPU_HITM: 766 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 767 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 768 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 769 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 770 break; 771 case ARM_SPE_HISI_HIP_L3: 772 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 773 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 774 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 775 break; 776 case ARM_SPE_HISI_HIP_L3_HITM: 777 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 778 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 779 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 780 break; 781 case ARM_SPE_HISI_HIP_PEER_CLUSTER: 782 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 783 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 784 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 785 break; 786 case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: 787 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 788 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 789 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 790 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 791 break; 792 case ARM_SPE_HISI_HIP_REMOTE_SOCKET: 793 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 794 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 795 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 796 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 797 break; 798 case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: 799 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 800 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 801 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 802 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 803 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 804 break; 805 case ARM_SPE_HISI_HIP_LOCAL_MEM: 806 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 807 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 808 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 809 break; 810 case ARM_SPE_HISI_HIP_REMOTE_MEM: 811 data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; 812 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 813 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 814 break; 815 case ARM_SPE_HISI_HIP_NC_DEV: 816 data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; 817 data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; 818 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 819 break; 820 case ARM_SPE_HISI_HIP_L2: 821 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 822 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 823 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 824 break; 825 case ARM_SPE_HISI_HIP_L2_HITM: 826 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 827 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 828 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 829 break; 830 case ARM_SPE_HISI_HIP_L1: 831 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 832 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 833 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 834 break; 835 default: 836 break; 837 } 838 } 839 840 static const struct data_source_handle data_source_handles[] = { 841 DS(common_ds_encoding_cpus, data_source_common), 842 DS(ampereone_ds_encoding_cpus, data_source_ampereone), 843 DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), 844 }; 845 846 static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, 847 union perf_mem_data_src *data_src) 848 { 849 /* 850 * To find a cache hit, search in ascending order from the lower level 851 * caches to the higher level caches. This reflects the best scenario 852 * for a cache hit. 853 */ 854 if (arm_spe_is_cache_hit(record->type, L1D)) { 855 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 856 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 857 } else if (record->type & ARM_SPE_RECENTLY_FETCHED) { 858 data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; 859 data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB; 860 } else if (arm_spe_is_cache_hit(record->type, L2D)) { 861 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 862 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 863 } else if (arm_spe_is_cache_hit(record->type, LLC)) { 864 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 865 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 866 /* 867 * To find a cache miss, search in descending order from the higher 868 * level cache to the lower level cache. This represents the worst 869 * scenario for a cache miss. 870 */ 871 } else if (arm_spe_is_cache_miss(record->type, LLC)) { 872 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; 873 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 874 } else if (arm_spe_is_cache_miss(record->type, L2D)) { 875 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS; 876 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 877 } else if (arm_spe_is_cache_miss(record->type, L1D)) { 878 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 879 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 880 } 881 } 882 883 static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, 884 union perf_mem_data_src *data_src) 885 { 886 /* Record the greatest level info for a store operation. */ 887 if (arm_spe_is_cache_level(record->type, LLC)) { 888 data_src->mem_lvl = PERF_MEM_LVL_L3; 889 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? 890 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 891 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 892 } else if (arm_spe_is_cache_level(record->type, L2D)) { 893 data_src->mem_lvl = PERF_MEM_LVL_L2; 894 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ? 895 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 896 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 897 } else if (arm_spe_is_cache_level(record->type, L1D)) { 898 data_src->mem_lvl = PERF_MEM_LVL_L1; 899 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ? 900 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 901 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 902 } 903 } 904 905 static void arm_spe__synth_memory_level(struct arm_spe_queue *speq, 906 const struct arm_spe_record *record, 907 union perf_mem_data_src *data_src) 908 { 909 struct arm_spe *spe = speq->spe; 910 911 /* 912 * The data source packet contains more info for cache levels for 913 * peer snooping. So respect the memory level if has been set by 914 * data source parsing. 915 */ 916 if (!data_src->mem_lvl) { 917 if (data_src->mem_op == PERF_MEM_OP_LOAD) 918 arm_spe__synth_ld_memory_level(record, data_src); 919 if (data_src->mem_op == PERF_MEM_OP_STORE) 920 arm_spe__synth_st_memory_level(record, data_src); 921 } 922 923 if (!data_src->mem_lvl) { 924 data_src->mem_lvl = PERF_MEM_LVL_NA; 925 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 926 } 927 928 /* 929 * If 'mem_snoop' has been set by data source packet, skip to set 930 * it at here. 931 */ 932 if (!data_src->mem_snoop) { 933 if (record->type & ARM_SPE_DATA_SNOOPED) { 934 if (record->type & ARM_SPE_HITM) 935 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 936 else 937 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 938 } else { 939 u64 *metadata = 940 arm_spe__get_metadata_by_cpu(spe, speq->cpu); 941 942 /* 943 * Set NA ("Not available") mode if no meta data or the 944 * SNOOPED event is not supported. 945 */ 946 if (!metadata || 947 !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED)) 948 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 949 else 950 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 951 } 952 } 953 954 if (!data_src->mem_remote) { 955 if (record->type & ARM_SPE_REMOTE_ACCESS) 956 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 957 } 958 } 959 960 static void arm_spe__synth_ds(struct arm_spe_queue *speq, 961 const struct arm_spe_record *record, 962 union perf_mem_data_src *data_src) 963 { 964 struct arm_spe *spe = speq->spe; 965 u64 *metadata = NULL; 966 u64 midr; 967 unsigned int i; 968 969 /* Metadata version 1 assumes all CPUs are the same (old behavior) */ 970 if (spe->metadata_ver == 1) { 971 const char *cpuid; 972 973 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); 974 cpuid = perf_env__cpuid(perf_session__env(spe->session)); 975 midr = strtol(cpuid, NULL, 16); 976 } else { 977 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 978 if (!metadata) 979 return; 980 981 midr = metadata[ARM_SPE_CPU_MIDR]; 982 } 983 984 for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { 985 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { 986 return data_source_handles[i].ds_synth(record, data_src); 987 } 988 } 989 990 return; 991 } 992 993 static union perf_mem_data_src 994 arm_spe__synth_data_source(struct arm_spe_queue *speq, 995 const struct arm_spe_record *record) 996 { 997 union perf_mem_data_src data_src = {}; 998 999 if (!is_mem_op(record->op)) 1000 return data_src; 1001 1002 if (record->op & ARM_SPE_OP_LD) 1003 data_src.mem_op = PERF_MEM_OP_LOAD; 1004 else if (record->op & ARM_SPE_OP_ST) 1005 data_src.mem_op = PERF_MEM_OP_STORE; 1006 else 1007 data_src.mem_op = PERF_MEM_OP_NA; 1008 1009 arm_spe__synth_ds(speq, record, &data_src); 1010 arm_spe__synth_memory_level(speq, record, &data_src); 1011 1012 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 1013 data_src.mem_dtlb = PERF_MEM_TLB_WK; 1014 1015 if (record->type & ARM_SPE_TLB_MISS) 1016 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 1017 else 1018 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 1019 } 1020 1021 return data_src; 1022 } 1023 1024 static int arm_spe_sample(struct arm_spe_queue *speq) 1025 { 1026 const struct arm_spe_record *record = &speq->decoder->record; 1027 struct arm_spe *spe = speq->spe; 1028 union perf_mem_data_src data_src; 1029 int err; 1030 1031 /* 1032 * Discard all samples until period is reached 1033 */ 1034 speq->sample_count++; 1035 if (speq->sample_count < spe->synth_opts.period) 1036 return 0; 1037 speq->sample_count = 0; 1038 1039 arm_spe__sample_flags(speq); 1040 data_src = arm_spe__synth_data_source(speq, record); 1041 1042 if (spe->sample_flc) { 1043 if (record->type & ARM_SPE_L1D_MISS) { 1044 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 1045 data_src); 1046 if (err) 1047 return err; 1048 } 1049 1050 if (record->type & ARM_SPE_L1D_ACCESS) { 1051 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 1052 data_src); 1053 if (err) 1054 return err; 1055 } 1056 } 1057 1058 if (spe->sample_llc) { 1059 if (record->type & ARM_SPE_LLC_MISS) { 1060 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 1061 data_src); 1062 if (err) 1063 return err; 1064 } 1065 1066 if (record->type & ARM_SPE_LLC_ACCESS) { 1067 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 1068 data_src); 1069 if (err) 1070 return err; 1071 } 1072 } 1073 1074 if (spe->sample_tlb) { 1075 if (record->type & ARM_SPE_TLB_MISS) { 1076 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 1077 data_src); 1078 if (err) 1079 return err; 1080 } 1081 1082 if (record->type & ARM_SPE_TLB_ACCESS) { 1083 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 1084 data_src); 1085 if (err) 1086 return err; 1087 } 1088 } 1089 1090 if (spe->synth_opts.last_branch && 1091 (spe->sample_branch || spe->sample_instructions)) 1092 arm_spe__prep_branch_stack(speq); 1093 1094 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { 1095 err = arm_spe__synth_branch_sample(speq, spe->branch_id); 1096 if (err) 1097 return err; 1098 } 1099 1100 if (spe->sample_remote_access && 1101 (record->type & ARM_SPE_REMOTE_ACCESS)) { 1102 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 1103 data_src); 1104 if (err) 1105 return err; 1106 } 1107 1108 if (spe->sample_memory && is_mem_op(record->op)) { 1109 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 1110 if (err) 1111 return err; 1112 } 1113 1114 if (spe->sample_instructions) { 1115 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 1116 if (err) 1117 return err; 1118 } 1119 1120 return 0; 1121 } 1122 1123 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 1124 { 1125 struct arm_spe *spe = speq->spe; 1126 struct arm_spe_record *record; 1127 int ret; 1128 1129 if (!spe->kernel_start) 1130 spe->kernel_start = machine__kernel_start(spe->machine); 1131 1132 while (1) { 1133 /* 1134 * The usual logic is firstly to decode the packets, and then 1135 * based the record to synthesize sample; but here the flow is 1136 * reversed: it calls arm_spe_sample() for synthesizing samples 1137 * prior to arm_spe_decode(). 1138 * 1139 * Two reasons for this code logic: 1140 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 1141 * has decoded trace data and generated a record, but the record 1142 * is left to generate sample until run to here, so it's correct 1143 * to synthesize sample for the left record. 1144 * 2. After decoding trace data, it needs to compare the record 1145 * timestamp with the coming perf event, if the record timestamp 1146 * is later than the perf event, it needs bail out and pushs the 1147 * record into auxtrace heap, thus the record can be deferred to 1148 * synthesize sample until run to here at the next time; so this 1149 * can correlate samples between Arm SPE trace data and other 1150 * perf events with correct time ordering. 1151 */ 1152 1153 /* 1154 * Update pid/tid info. 1155 */ 1156 record = &speq->decoder->record; 1157 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 1158 ret = arm_spe_set_tid(speq, record->context_id); 1159 if (ret) 1160 return ret; 1161 1162 spe->use_ctx_pkt_for_pid = true; 1163 } 1164 1165 ret = arm_spe_sample(speq); 1166 if (ret) 1167 return ret; 1168 1169 ret = arm_spe_decode(speq->decoder); 1170 if (!ret) { 1171 pr_debug("No data or all data has been processed.\n"); 1172 return 1; 1173 } 1174 1175 /* 1176 * Error is detected when decode SPE trace data, continue to 1177 * the next trace data and find out more records. 1178 */ 1179 if (ret < 0) 1180 continue; 1181 1182 record = &speq->decoder->record; 1183 1184 /* Update timestamp for the last record */ 1185 if (record->timestamp > speq->timestamp) 1186 speq->timestamp = record->timestamp; 1187 1188 /* 1189 * If the timestamp of the queue is later than timestamp of the 1190 * coming perf event, bail out so can allow the perf event to 1191 * be processed ahead. 1192 */ 1193 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 1194 *timestamp = speq->timestamp; 1195 return 0; 1196 } 1197 } 1198 1199 return 0; 1200 } 1201 1202 static int arm_spe__setup_queue(struct arm_spe *spe, 1203 struct auxtrace_queue *queue, 1204 unsigned int queue_nr) 1205 { 1206 struct arm_spe_queue *speq = queue->priv; 1207 struct arm_spe_record *record; 1208 1209 if (list_empty(&queue->head) || speq) 1210 return 0; 1211 1212 speq = arm_spe__alloc_queue(spe, queue_nr); 1213 1214 if (!speq) 1215 return -ENOMEM; 1216 1217 queue->priv = speq; 1218 1219 if (queue->cpu != -1) 1220 speq->cpu = queue->cpu; 1221 1222 if (!speq->on_heap) { 1223 int ret; 1224 1225 if (spe->timeless_decoding) 1226 return 0; 1227 1228 retry: 1229 ret = arm_spe_decode(speq->decoder); 1230 1231 if (!ret) 1232 return 0; 1233 1234 if (ret < 0) 1235 goto retry; 1236 1237 record = &speq->decoder->record; 1238 1239 speq->timestamp = record->timestamp; 1240 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 1241 if (ret) 1242 return ret; 1243 speq->on_heap = true; 1244 } 1245 1246 return 0; 1247 } 1248 1249 static int arm_spe__setup_queues(struct arm_spe *spe) 1250 { 1251 unsigned int i; 1252 int ret; 1253 1254 for (i = 0; i < spe->queues.nr_queues; i++) { 1255 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 1256 if (ret) 1257 return ret; 1258 } 1259 1260 return 0; 1261 } 1262 1263 static int arm_spe__update_queues(struct arm_spe *spe) 1264 { 1265 if (spe->queues.new_data) { 1266 spe->queues.new_data = false; 1267 return arm_spe__setup_queues(spe); 1268 } 1269 1270 return 0; 1271 } 1272 1273 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 1274 { 1275 struct evsel *evsel; 1276 struct evlist *evlist = spe->session->evlist; 1277 bool timeless_decoding = true; 1278 1279 /* 1280 * Circle through the list of event and complain if we find one 1281 * with the time bit set. 1282 */ 1283 evlist__for_each_entry(evlist, evsel) { 1284 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 1285 timeless_decoding = false; 1286 } 1287 1288 return timeless_decoding; 1289 } 1290 1291 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 1292 { 1293 unsigned int queue_nr; 1294 u64 ts; 1295 int ret; 1296 1297 while (1) { 1298 struct auxtrace_queue *queue; 1299 struct arm_spe_queue *speq; 1300 1301 if (!spe->heap.heap_cnt) 1302 return 0; 1303 1304 if (spe->heap.heap_array[0].ordinal >= timestamp) 1305 return 0; 1306 1307 queue_nr = spe->heap.heap_array[0].queue_nr; 1308 queue = &spe->queues.queue_array[queue_nr]; 1309 speq = queue->priv; 1310 1311 auxtrace_heap__pop(&spe->heap); 1312 1313 if (spe->heap.heap_cnt) { 1314 ts = spe->heap.heap_array[0].ordinal + 1; 1315 if (ts > timestamp) 1316 ts = timestamp; 1317 } else { 1318 ts = timestamp; 1319 } 1320 1321 /* 1322 * A previous context-switch event has set pid/tid in the machine's context, so 1323 * here we need to update the pid/tid in the thread and SPE queue. 1324 */ 1325 if (!spe->use_ctx_pkt_for_pid) 1326 arm_spe_set_pid_tid_cpu(spe, queue); 1327 1328 ret = arm_spe_run_decoder(speq, &ts); 1329 if (ret < 0) { 1330 auxtrace_heap__add(&spe->heap, queue_nr, ts); 1331 return ret; 1332 } 1333 1334 if (!ret) { 1335 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 1336 if (ret < 0) 1337 return ret; 1338 } else { 1339 speq->on_heap = false; 1340 } 1341 } 1342 1343 return 0; 1344 } 1345 1346 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 1347 u64 time_) 1348 { 1349 struct auxtrace_queues *queues = &spe->queues; 1350 unsigned int i; 1351 u64 ts = 0; 1352 1353 for (i = 0; i < queues->nr_queues; i++) { 1354 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 1355 struct arm_spe_queue *speq = queue->priv; 1356 1357 if (speq && (tid == -1 || speq->tid == tid)) { 1358 speq->time = time_; 1359 arm_spe_set_pid_tid_cpu(spe, queue); 1360 arm_spe_run_decoder(speq, &ts); 1361 } 1362 } 1363 return 0; 1364 } 1365 1366 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 1367 struct perf_sample *sample) 1368 { 1369 pid_t pid, tid; 1370 int cpu; 1371 1372 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 1373 return 0; 1374 1375 pid = event->context_switch.next_prev_pid; 1376 tid = event->context_switch.next_prev_tid; 1377 cpu = sample->cpu; 1378 1379 if (tid == -1) 1380 pr_warning("context_switch event has no tid\n"); 1381 1382 return machine__set_current_tid(spe->machine, cpu, pid, tid); 1383 } 1384 1385 static int arm_spe_process_event(struct perf_session *session, 1386 union perf_event *event, 1387 struct perf_sample *sample, 1388 const struct perf_tool *tool) 1389 { 1390 int err = 0; 1391 u64 timestamp; 1392 struct arm_spe *spe = container_of(session->auxtrace, 1393 struct arm_spe, auxtrace); 1394 1395 if (dump_trace) 1396 return 0; 1397 1398 if (!tool->ordered_events) { 1399 pr_err("SPE trace requires ordered events\n"); 1400 return -EINVAL; 1401 } 1402 1403 if (sample->time && (sample->time != (u64) -1)) 1404 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 1405 else 1406 timestamp = 0; 1407 1408 if (timestamp || spe->timeless_decoding) { 1409 err = arm_spe__update_queues(spe); 1410 if (err) 1411 return err; 1412 } 1413 1414 if (spe->timeless_decoding) { 1415 if (event->header.type == PERF_RECORD_EXIT) { 1416 err = arm_spe_process_timeless_queues(spe, 1417 event->fork.tid, 1418 sample->time); 1419 } 1420 } else if (timestamp) { 1421 err = arm_spe_process_queues(spe, timestamp); 1422 if (err) 1423 return err; 1424 1425 if (!spe->use_ctx_pkt_for_pid && 1426 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1427 event->header.type == PERF_RECORD_SWITCH)) 1428 err = arm_spe_context_switch(spe, event, sample); 1429 } 1430 1431 return err; 1432 } 1433 1434 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1435 union perf_event *event, 1436 const struct perf_tool *tool __maybe_unused) 1437 { 1438 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1439 auxtrace); 1440 1441 if (!spe->data_queued) { 1442 struct auxtrace_buffer *buffer; 1443 off_t data_offset; 1444 int fd = perf_data__fd(session->data); 1445 int err; 1446 1447 if (perf_data__is_pipe(session->data)) { 1448 data_offset = 0; 1449 } else { 1450 data_offset = lseek(fd, 0, SEEK_CUR); 1451 if (data_offset == -1) 1452 return -errno; 1453 } 1454 1455 err = auxtrace_queues__add_event(&spe->queues, session, event, 1456 data_offset, &buffer); 1457 if (err) 1458 return err; 1459 1460 /* Dump here now we have copied a piped trace out of the pipe */ 1461 if (dump_trace) { 1462 if (auxtrace_buffer__get_data(buffer, fd)) { 1463 arm_spe_dump_event(spe, buffer->data, 1464 buffer->size); 1465 auxtrace_buffer__put_data(buffer); 1466 } 1467 } 1468 } 1469 1470 return 0; 1471 } 1472 1473 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1474 const struct perf_tool *tool __maybe_unused) 1475 { 1476 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1477 auxtrace); 1478 int ret; 1479 1480 if (dump_trace) 1481 return 0; 1482 1483 if (!tool->ordered_events) 1484 return -EINVAL; 1485 1486 ret = arm_spe__update_queues(spe); 1487 if (ret < 0) 1488 return ret; 1489 1490 if (spe->timeless_decoding) 1491 return arm_spe_process_timeless_queues(spe, -1, 1492 MAX_TIMESTAMP - 1); 1493 1494 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1495 if (ret) 1496 return ret; 1497 1498 if (!spe->use_ctx_pkt_for_pid) 1499 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1500 "Matching of TIDs to SPE events could be inaccurate.\n"); 1501 1502 return 0; 1503 } 1504 1505 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1506 { 1507 u64 *metadata; 1508 1509 metadata = zalloc(per_cpu_size); 1510 if (!metadata) 1511 return NULL; 1512 1513 memcpy(metadata, buf, per_cpu_size); 1514 return metadata; 1515 } 1516 1517 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1518 { 1519 int i; 1520 1521 for (i = 0; i < nr_cpu; i++) 1522 zfree(&metadata[i]); 1523 free(metadata); 1524 } 1525 1526 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1527 u64 *ver, int *nr_cpu) 1528 { 1529 u64 *ptr = (u64 *)info->priv; 1530 u64 metadata_size; 1531 u64 **metadata = NULL; 1532 int hdr_sz, per_cpu_sz, i; 1533 1534 metadata_size = info->header.size - 1535 sizeof(struct perf_record_auxtrace_info); 1536 1537 /* Metadata version 1 */ 1538 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1539 *ver = 1; 1540 *nr_cpu = 0; 1541 /* No per CPU metadata */ 1542 return NULL; 1543 } 1544 1545 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1546 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1547 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1548 1549 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1550 if (!metadata) 1551 return NULL; 1552 1553 /* Locate the start address of per CPU metadata */ 1554 ptr += hdr_sz; 1555 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1556 1557 for (i = 0; i < *nr_cpu; i++) { 1558 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1559 if (!metadata[i]) 1560 goto err_per_cpu_metadata; 1561 1562 ptr += per_cpu_sz / sizeof(u64); 1563 } 1564 1565 return metadata; 1566 1567 err_per_cpu_metadata: 1568 arm_spe__free_metadata(metadata, *nr_cpu); 1569 return NULL; 1570 } 1571 1572 static void arm_spe_free_queue(void *priv) 1573 { 1574 struct arm_spe_queue *speq = priv; 1575 1576 if (!speq) 1577 return; 1578 thread__zput(speq->thread); 1579 arm_spe_decoder_free(speq->decoder); 1580 zfree(&speq->event_buf); 1581 zfree(&speq->last_branch); 1582 free(speq); 1583 } 1584 1585 static void arm_spe_free_events(struct perf_session *session) 1586 { 1587 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1588 auxtrace); 1589 struct auxtrace_queues *queues = &spe->queues; 1590 unsigned int i; 1591 1592 for (i = 0; i < queues->nr_queues; i++) { 1593 arm_spe_free_queue(queues->queue_array[i].priv); 1594 queues->queue_array[i].priv = NULL; 1595 } 1596 auxtrace_queues__free(queues); 1597 } 1598 1599 static void arm_spe_free(struct perf_session *session) 1600 { 1601 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1602 auxtrace); 1603 1604 auxtrace_heap__free(&spe->heap); 1605 arm_spe_free_events(session); 1606 session->auxtrace = NULL; 1607 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1608 free(spe); 1609 } 1610 1611 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1612 struct evsel *evsel) 1613 { 1614 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1615 1616 return evsel->core.attr.type == spe->pmu_type; 1617 } 1618 1619 static const char * const metadata_hdr_v1_fmts[] = { 1620 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1621 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1622 }; 1623 1624 static const char * const metadata_hdr_fmts[] = { 1625 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1626 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1627 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1628 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1629 }; 1630 1631 static const char * const metadata_per_cpu_fmts[] = { 1632 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1633 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1634 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1635 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1636 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1637 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1638 [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n", 1639 }; 1640 1641 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1642 { 1643 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1644 const char * const *hdr_fmts; 1645 1646 if (!dump_trace) 1647 return; 1648 1649 if (spe->metadata_ver == 1) { 1650 cpu_num = 0; 1651 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1652 hdr_fmts = metadata_hdr_v1_fmts; 1653 } else { 1654 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1655 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1656 hdr_fmts = metadata_hdr_fmts; 1657 } 1658 1659 for (i = 0; i < hdr_size; i++) 1660 fprintf(stdout, hdr_fmts[i], arr[i]); 1661 1662 arr += hdr_size; 1663 for (cpu = 0; cpu < cpu_num; cpu++) { 1664 /* 1665 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1666 * are fixed. The sequential parameter size is decided by the 1667 * field 'ARM_SPE_CPU_NR_PARAMS'. 1668 */ 1669 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1670 for (i = 0; i < cpu_size; i++) 1671 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1672 arr += cpu_size; 1673 } 1674 } 1675 1676 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1677 const char *name) 1678 { 1679 struct evsel *evsel; 1680 1681 evlist__for_each_entry(evlist, evsel) { 1682 if (evsel->core.id && evsel->core.id[0] == id) { 1683 if (evsel->name) 1684 zfree(&evsel->name); 1685 evsel->name = strdup(name); 1686 break; 1687 } 1688 } 1689 } 1690 1691 static int 1692 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1693 { 1694 struct evlist *evlist = session->evlist; 1695 struct evsel *evsel; 1696 struct perf_event_attr attr; 1697 bool found = false; 1698 u64 id; 1699 int err; 1700 1701 evlist__for_each_entry(evlist, evsel) { 1702 if (evsel->core.attr.type == spe->pmu_type) { 1703 found = true; 1704 break; 1705 } 1706 } 1707 1708 if (!found) { 1709 pr_debug("No selected events with SPE trace data\n"); 1710 return 0; 1711 } 1712 1713 memset(&attr, 0, sizeof(struct perf_event_attr)); 1714 attr.size = sizeof(struct perf_event_attr); 1715 attr.type = PERF_TYPE_HARDWARE; 1716 attr.sample_type = evsel->core.attr.sample_type & 1717 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1718 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1719 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1720 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1721 if (spe->timeless_decoding) 1722 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1723 else 1724 attr.sample_type |= PERF_SAMPLE_TIME; 1725 1726 spe->sample_type = attr.sample_type; 1727 1728 attr.exclude_user = evsel->core.attr.exclude_user; 1729 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1730 attr.exclude_hv = evsel->core.attr.exclude_hv; 1731 attr.exclude_host = evsel->core.attr.exclude_host; 1732 attr.exclude_guest = evsel->core.attr.exclude_guest; 1733 attr.sample_id_all = evsel->core.attr.sample_id_all; 1734 attr.read_format = evsel->core.attr.read_format; 1735 attr.sample_period = spe->synth_opts.period; 1736 1737 /* create new id val to be a fixed offset from evsel id */ 1738 id = auxtrace_synth_id_range_start(evsel); 1739 1740 if (spe->synth_opts.flc) { 1741 spe->sample_flc = true; 1742 1743 /* Level 1 data cache miss */ 1744 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1745 if (err) 1746 return err; 1747 spe->l1d_miss_id = id; 1748 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1749 id += 1; 1750 1751 /* Level 1 data cache access */ 1752 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1753 if (err) 1754 return err; 1755 spe->l1d_access_id = id; 1756 arm_spe_set_event_name(evlist, id, "l1d-access"); 1757 id += 1; 1758 } 1759 1760 if (spe->synth_opts.llc) { 1761 spe->sample_llc = true; 1762 1763 /* Last level cache miss */ 1764 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1765 if (err) 1766 return err; 1767 spe->llc_miss_id = id; 1768 arm_spe_set_event_name(evlist, id, "llc-miss"); 1769 id += 1; 1770 1771 /* Last level cache access */ 1772 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1773 if (err) 1774 return err; 1775 spe->llc_access_id = id; 1776 arm_spe_set_event_name(evlist, id, "llc-access"); 1777 id += 1; 1778 } 1779 1780 if (spe->synth_opts.tlb) { 1781 spe->sample_tlb = true; 1782 1783 /* TLB miss */ 1784 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1785 if (err) 1786 return err; 1787 spe->tlb_miss_id = id; 1788 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1789 id += 1; 1790 1791 /* TLB access */ 1792 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1793 if (err) 1794 return err; 1795 spe->tlb_access_id = id; 1796 arm_spe_set_event_name(evlist, id, "tlb-access"); 1797 id += 1; 1798 } 1799 1800 if (spe->synth_opts.last_branch) { 1801 if (spe->synth_opts.last_branch_sz > 2) 1802 pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); 1803 1804 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1805 /* 1806 * We don't use the hardware index, but the sample generation 1807 * code uses the new format branch_stack with this field, 1808 * so the event attributes must indicate that it's present. 1809 */ 1810 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1811 } 1812 1813 if (spe->synth_opts.branches) { 1814 spe->sample_branch = true; 1815 1816 /* Branch */ 1817 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1818 if (err) 1819 return err; 1820 spe->branch_id = id; 1821 arm_spe_set_event_name(evlist, id, "branch"); 1822 id += 1; 1823 } 1824 1825 if (spe->synth_opts.remote_access) { 1826 spe->sample_remote_access = true; 1827 1828 /* Remote access */ 1829 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1830 if (err) 1831 return err; 1832 spe->remote_access_id = id; 1833 arm_spe_set_event_name(evlist, id, "remote-access"); 1834 id += 1; 1835 } 1836 1837 if (spe->synth_opts.mem) { 1838 spe->sample_memory = true; 1839 1840 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1841 if (err) 1842 return err; 1843 spe->memory_id = id; 1844 arm_spe_set_event_name(evlist, id, "memory"); 1845 id += 1; 1846 } 1847 1848 if (spe->synth_opts.instructions) { 1849 spe->sample_instructions = true; 1850 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1851 1852 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1853 if (err) 1854 return err; 1855 spe->instructions_id = id; 1856 arm_spe_set_event_name(evlist, id, "instructions"); 1857 } 1858 1859 return 0; 1860 } 1861 1862 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1863 { 1864 u64 midr; 1865 int i; 1866 1867 if (!nr_cpu) 1868 return false; 1869 1870 for (i = 0; i < nr_cpu; i++) { 1871 if (!metadata[i]) 1872 return false; 1873 1874 if (i == 0) { 1875 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1876 continue; 1877 } 1878 1879 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1880 return false; 1881 } 1882 1883 return true; 1884 } 1885 1886 int arm_spe_process_auxtrace_info(union perf_event *event, 1887 struct perf_session *session) 1888 { 1889 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1890 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1891 struct perf_record_time_conv *tc = &session->time_conv; 1892 struct arm_spe *spe; 1893 u64 **metadata = NULL; 1894 u64 metadata_ver; 1895 int nr_cpu, err; 1896 1897 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1898 min_sz) 1899 return -EINVAL; 1900 1901 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1902 &nr_cpu); 1903 if (!metadata && metadata_ver != 1) { 1904 pr_err("Failed to parse Arm SPE metadata.\n"); 1905 return -EINVAL; 1906 } 1907 1908 spe = zalloc(sizeof(struct arm_spe)); 1909 if (!spe) { 1910 err = -ENOMEM; 1911 goto err_free_metadata; 1912 } 1913 1914 err = auxtrace_queues__init(&spe->queues); 1915 if (err) 1916 goto err_free; 1917 1918 spe->session = session; 1919 spe->machine = &session->machines.host; /* No kvm support */ 1920 spe->auxtrace_type = auxtrace_info->type; 1921 if (metadata_ver == 1) 1922 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1923 else 1924 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1925 spe->metadata = metadata; 1926 spe->metadata_ver = metadata_ver; 1927 spe->metadata_nr_cpu = nr_cpu; 1928 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1929 1930 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1931 1932 /* 1933 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1934 * and the parameters for hardware clock are stored in the session 1935 * context. Passes these parameters to the struct perf_tsc_conversion 1936 * in "spe->tc", which is used for later conversion between clock 1937 * counter and timestamp. 1938 * 1939 * For backward compatibility, copies the fields starting from 1940 * "time_cycles" only if they are contained in the event. 1941 */ 1942 spe->tc.time_shift = tc->time_shift; 1943 spe->tc.time_mult = tc->time_mult; 1944 spe->tc.time_zero = tc->time_zero; 1945 1946 if (event_contains(*tc, time_cycles)) { 1947 spe->tc.time_cycles = tc->time_cycles; 1948 spe->tc.time_mask = tc->time_mask; 1949 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1950 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1951 } 1952 1953 spe->auxtrace.process_event = arm_spe_process_event; 1954 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1955 spe->auxtrace.flush_events = arm_spe_flush; 1956 spe->auxtrace.free_events = arm_spe_free_events; 1957 spe->auxtrace.free = arm_spe_free; 1958 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1959 session->auxtrace = &spe->auxtrace; 1960 1961 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1962 1963 if (dump_trace) 1964 return 0; 1965 1966 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1967 spe->synth_opts = *session->itrace_synth_opts; 1968 } else { 1969 itrace_synth_opts__set_default(&spe->synth_opts, false); 1970 /* Default nanoseconds period not supported */ 1971 spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; 1972 spe->synth_opts.period = 1; 1973 } 1974 1975 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1976 ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n"); 1977 err = -EINVAL; 1978 goto err_free_queues; 1979 } 1980 if (spe->synth_opts.period > 1) 1981 ui__warning("Arm SPE has a hardware-based sampling period.\n\n" 1982 "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n"); 1983 1984 err = arm_spe_synth_events(spe, session); 1985 if (err) 1986 goto err_free_queues; 1987 1988 err = auxtrace_queues__process_index(&spe->queues, session); 1989 if (err) 1990 goto err_free_queues; 1991 1992 if (spe->queues.populated) 1993 spe->data_queued = true; 1994 1995 return 0; 1996 1997 err_free_queues: 1998 auxtrace_queues__free(&spe->queues); 1999 session->auxtrace = NULL; 2000 err_free: 2001 free(spe); 2002 err_free_metadata: 2003 arm_spe__free_metadata(metadata, nr_cpu); 2004 return err; 2005 } 2006