1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #include "../../arch/arm64/include/asm/cputype.h" 38 #define MAX_TIMESTAMP (~0ULL) 39 40 #define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) 41 42 #define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \ 43 ARM_SPE_OP_SME | ARM_SPE_OP_ASE))) 44 45 #define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op)) 46 47 #define ARM_SPE_CACHE_EVENT(lvl) \ 48 (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) 49 50 #define arm_spe_is_cache_level(type, lvl) \ 51 ((type) & ARM_SPE_CACHE_EVENT(lvl)) 52 53 #define arm_spe_is_cache_hit(type, lvl) \ 54 (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) 55 56 #define arm_spe_is_cache_miss(type, lvl) \ 57 ((type) & ARM_SPE_##lvl##_MISS) 58 59 struct arm_spe { 60 struct auxtrace auxtrace; 61 struct auxtrace_queues queues; 62 struct auxtrace_heap heap; 63 struct itrace_synth_opts synth_opts; 64 u32 auxtrace_type; 65 struct perf_session *session; 66 struct machine *machine; 67 u32 pmu_type; 68 69 struct perf_tsc_conversion tc; 70 71 u8 timeless_decoding; 72 u8 data_queued; 73 74 u64 sample_type; 75 u8 sample_flc; 76 u8 sample_llc; 77 u8 sample_tlb; 78 u8 sample_branch; 79 u8 sample_remote_access; 80 u8 sample_memory; 81 u8 sample_instructions; 82 83 u64 l1d_miss_id; 84 u64 l1d_access_id; 85 u64 llc_miss_id; 86 u64 llc_access_id; 87 u64 tlb_miss_id; 88 u64 tlb_access_id; 89 u64 branch_id; 90 u64 remote_access_id; 91 u64 memory_id; 92 u64 instructions_id; 93 94 u64 kernel_start; 95 96 unsigned long num_events; 97 u8 use_ctx_pkt_for_pid; 98 99 u64 **metadata; 100 u64 metadata_ver; 101 u64 metadata_nr_cpu; 102 bool is_homogeneous; 103 }; 104 105 struct arm_spe_queue { 106 struct arm_spe *spe; 107 unsigned int queue_nr; 108 struct auxtrace_buffer *buffer; 109 struct auxtrace_buffer *old_buffer; 110 union perf_event *event_buf; 111 bool on_heap; 112 bool done; 113 pid_t pid; 114 pid_t tid; 115 int cpu; 116 struct arm_spe_decoder *decoder; 117 u64 time; 118 u64 timestamp; 119 struct thread *thread; 120 u64 sample_count; 121 u32 flags; 122 struct branch_stack *last_branch; 123 }; 124 125 struct data_source_handle { 126 const struct midr_range *midr_ranges; 127 void (*ds_synth)(const struct arm_spe_record *record, 128 union perf_mem_data_src *data_src); 129 }; 130 131 #define DS(range, func) \ 132 { \ 133 .midr_ranges = range, \ 134 .ds_synth = arm_spe__synth_##func, \ 135 } 136 137 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 138 unsigned char *buf, size_t len) 139 { 140 struct arm_spe_pkt packet; 141 size_t pos = 0; 142 int ret, pkt_len, i; 143 char desc[ARM_SPE_PKT_DESC_MAX]; 144 const char *color = PERF_COLOR_BLUE; 145 146 color_fprintf(stdout, color, 147 ". ... ARM SPE data: size %#zx bytes\n", 148 len); 149 150 while (len) { 151 ret = arm_spe_get_packet(buf, len, &packet); 152 if (ret > 0) 153 pkt_len = ret; 154 else 155 pkt_len = 1; 156 printf("."); 157 color_fprintf(stdout, color, " %08zx: ", pos); 158 for (i = 0; i < pkt_len; i++) 159 color_fprintf(stdout, color, " %02x", buf[i]); 160 for (; i < 16; i++) 161 color_fprintf(stdout, color, " "); 162 if (ret > 0) { 163 ret = arm_spe_pkt_desc(&packet, desc, 164 ARM_SPE_PKT_DESC_MAX); 165 if (!ret) 166 color_fprintf(stdout, color, " %s\n", desc); 167 } else { 168 color_fprintf(stdout, color, " Bad packet!\n"); 169 } 170 pos += pkt_len; 171 buf += pkt_len; 172 len -= pkt_len; 173 } 174 } 175 176 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 177 size_t len) 178 { 179 printf(".\n"); 180 arm_spe_dump(spe, buf, len); 181 } 182 183 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 184 { 185 struct arm_spe_queue *speq = data; 186 struct auxtrace_buffer *buffer = speq->buffer; 187 struct auxtrace_buffer *old_buffer = speq->old_buffer; 188 struct auxtrace_queue *queue; 189 190 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 191 192 buffer = auxtrace_buffer__next(queue, buffer); 193 /* If no more data, drop the previous auxtrace_buffer and return */ 194 if (!buffer) { 195 if (old_buffer) 196 auxtrace_buffer__drop_data(old_buffer); 197 b->len = 0; 198 return 0; 199 } 200 201 speq->buffer = buffer; 202 203 /* If the aux_buffer doesn't have data associated, try to load it */ 204 if (!buffer->data) { 205 /* get the file desc associated with the perf data file */ 206 int fd = perf_data__fd(speq->spe->session->data); 207 208 buffer->data = auxtrace_buffer__get_data(buffer, fd); 209 if (!buffer->data) 210 return -ENOMEM; 211 } 212 213 b->len = buffer->size; 214 b->buf = buffer->data; 215 216 if (b->len) { 217 if (old_buffer) 218 auxtrace_buffer__drop_data(old_buffer); 219 speq->old_buffer = buffer; 220 } else { 221 auxtrace_buffer__drop_data(buffer); 222 return arm_spe_get_trace(b, data); 223 } 224 225 return 0; 226 } 227 228 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 229 unsigned int queue_nr) 230 { 231 struct arm_spe_params params = { .get_trace = 0, }; 232 struct arm_spe_queue *speq; 233 234 speq = zalloc(sizeof(*speq)); 235 if (!speq) 236 return NULL; 237 238 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 239 if (!speq->event_buf) 240 goto out_free; 241 242 speq->spe = spe; 243 speq->queue_nr = queue_nr; 244 speq->pid = -1; 245 speq->tid = -1; 246 speq->cpu = -1; 247 248 /* params set */ 249 params.get_trace = arm_spe_get_trace; 250 params.data = speq; 251 252 if (spe->synth_opts.last_branch) { 253 size_t sz = sizeof(struct branch_stack); 254 255 /* Allocate up to two entries for PBT + TGT */ 256 sz += sizeof(struct branch_entry) * 257 min(spe->synth_opts.last_branch_sz, 2U); 258 speq->last_branch = zalloc(sz); 259 if (!speq->last_branch) 260 goto out_free; 261 } 262 263 /* create new decoder */ 264 speq->decoder = arm_spe_decoder_new(¶ms); 265 if (!speq->decoder) 266 goto out_free; 267 268 return speq; 269 270 out_free: 271 zfree(&speq->event_buf); 272 zfree(&speq->last_branch); 273 free(speq); 274 275 return NULL; 276 } 277 278 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 279 { 280 return ip >= spe->kernel_start ? 281 PERF_RECORD_MISC_KERNEL : 282 PERF_RECORD_MISC_USER; 283 } 284 285 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 286 struct auxtrace_queue *queue) 287 { 288 struct arm_spe_queue *speq = queue->priv; 289 pid_t tid; 290 291 tid = machine__get_current_tid(spe->machine, speq->cpu); 292 if (tid != -1) { 293 speq->tid = tid; 294 thread__zput(speq->thread); 295 } else 296 speq->tid = queue->tid; 297 298 if ((!speq->thread) && (speq->tid != -1)) { 299 speq->thread = machine__find_thread(spe->machine, -1, 300 speq->tid); 301 } 302 303 if (speq->thread) { 304 speq->pid = thread__pid(speq->thread); 305 if (queue->cpu == -1) 306 speq->cpu = thread__cpu(speq->thread); 307 } 308 } 309 310 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 311 { 312 struct arm_spe *spe = speq->spe; 313 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 314 315 if (err) 316 return err; 317 318 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 319 320 return 0; 321 } 322 323 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu) 324 { 325 u64 i; 326 327 if (!spe->metadata) 328 return NULL; 329 330 /* CPU ID is -1 for per-thread mode */ 331 if (cpu < 0) { 332 /* 333 * On the heterogeneous system, due to CPU ID is -1, 334 * cannot confirm the data source packet is supported. 335 */ 336 if (!spe->is_homogeneous) 337 return NULL; 338 339 /* In homogeneous system, simply use CPU0's metadata */ 340 return spe->metadata[0]; 341 } 342 343 for (i = 0; i < spe->metadata_nr_cpu; i++) 344 if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu) 345 return spe->metadata[i]; 346 347 return NULL; 348 } 349 350 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) 351 { 352 struct simd_flags simd_flags = {}; 353 354 if (record->op & ARM_SPE_OP_SVE) 355 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; 356 else if (record->op & ARM_SPE_OP_SME) 357 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SME; 358 else if (record->op & (ARM_SPE_OP_ASE | ARM_SPE_OP_SIMD_FP)) 359 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_ASE; 360 361 if (record->op & ARM_SPE_OP_SVE) { 362 if (!(record->op & ARM_SPE_OP_PRED)) 363 simd_flags.pred = SIMD_OP_FLAGS_PRED_DISABLED; 364 else if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 365 simd_flags.pred = SIMD_OP_FLAGS_PRED_PARTIAL; 366 else if (record->type & ARM_SPE_SVE_EMPTY_PRED) 367 simd_flags.pred = SIMD_OP_FLAGS_PRED_EMPTY; 368 else 369 simd_flags.pred = SIMD_OP_FLAGS_PRED_FULL; 370 } else { 371 if (record->type & ARM_SPE_SVE_PARTIAL_PRED) 372 simd_flags.pred = SIMD_OP_FLAGS_PRED_PARTIAL; 373 else if (record->type & ARM_SPE_SVE_EMPTY_PRED) 374 simd_flags.pred = SIMD_OP_FLAGS_PRED_EMPTY; 375 } 376 377 return simd_flags; 378 } 379 380 static void arm_spe_prep_sample(struct arm_spe *spe, 381 struct arm_spe_queue *speq, 382 union perf_event *event, 383 struct perf_sample *sample) 384 { 385 struct arm_spe_record *record = &speq->decoder->record; 386 387 if (!spe->timeless_decoding) 388 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 389 390 sample->ip = record->from_ip; 391 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 392 sample->pid = speq->pid; 393 sample->tid = speq->tid; 394 sample->period = spe->synth_opts.period; 395 sample->cpu = speq->cpu; 396 sample->simd_flags = arm_spe__synth_simd_flags(record); 397 398 event->sample.header.type = PERF_RECORD_SAMPLE; 399 event->sample.header.misc = sample->cpumode; 400 event->sample.header.size = sizeof(struct perf_event_header); 401 } 402 403 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) 404 { 405 struct arm_spe *spe = speq->spe; 406 struct arm_spe_record *record = &speq->decoder->record; 407 struct branch_stack *bstack = speq->last_branch; 408 struct branch_flags *bs_flags; 409 unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; 410 bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); 411 bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; 412 size_t sz = sizeof(struct branch_stack) + 413 sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; 414 int i = 0; 415 416 /* Clean up branch stack */ 417 memset(bstack, 0x0, sz); 418 419 if (!have_tgt && !have_pbt) 420 return; 421 422 if (have_tgt) { 423 bstack->entries[i].from = record->from_ip; 424 bstack->entries[i].to = record->to_ip; 425 426 bs_flags = &bstack->entries[i].flags; 427 bs_flags->value = 0; 428 429 if (record->op & ARM_SPE_OP_BR_CR_BL) { 430 if (record->op & ARM_SPE_OP_BR_COND) 431 bs_flags->type |= PERF_BR_COND_CALL; 432 else 433 bs_flags->type |= PERF_BR_CALL; 434 /* 435 * Indirect branch instruction without link (e.g. BR), 436 * take this case as function return. 437 */ 438 } else if (record->op & ARM_SPE_OP_BR_CR_RET || 439 record->op & ARM_SPE_OP_BR_INDIRECT) { 440 if (record->op & ARM_SPE_OP_BR_COND) 441 bs_flags->type |= PERF_BR_COND_RET; 442 else 443 bs_flags->type |= PERF_BR_RET; 444 } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { 445 if (record->op & ARM_SPE_OP_BR_COND) 446 bs_flags->type |= PERF_BR_COND; 447 else 448 bs_flags->type |= PERF_BR_UNCOND; 449 } else { 450 if (record->op & ARM_SPE_OP_BR_COND) 451 bs_flags->type |= PERF_BR_COND; 452 else 453 bs_flags->type |= PERF_BR_UNKNOWN; 454 } 455 456 if (record->type & ARM_SPE_BRANCH_MISS) { 457 bs_flags->mispred = 1; 458 bs_flags->predicted = 0; 459 } else { 460 bs_flags->mispred = 0; 461 bs_flags->predicted = 1; 462 } 463 464 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 465 bs_flags->not_taken = 1; 466 467 if (record->type & ARM_SPE_IN_TXN) 468 bs_flags->in_tx = 1; 469 470 bs_flags->cycles = min(record->latency, 0xFFFFU); 471 i++; 472 } 473 474 if (have_pbt) { 475 bs_flags = &bstack->entries[i].flags; 476 bs_flags->type |= PERF_BR_UNKNOWN; 477 bstack->entries[i].to = record->prev_br_tgt; 478 i++; 479 } 480 481 bstack->nr = i; 482 bstack->hw_idx = -1ULL; 483 } 484 485 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 486 { 487 event->header.size = perf_event__sample_event_size(sample, type, 0); 488 return perf_event__synthesize_sample(event, type, 0, sample); 489 } 490 491 static inline int 492 arm_spe_deliver_synth_event(struct arm_spe *spe, 493 struct arm_spe_queue *speq __maybe_unused, 494 union perf_event *event, 495 struct perf_sample *sample) 496 { 497 int ret; 498 499 if (spe->synth_opts.inject) { 500 ret = arm_spe__inject_event(event, sample, spe->sample_type); 501 if (ret) 502 return ret; 503 } 504 505 ret = perf_session__deliver_synth_event(spe->session, event, sample); 506 if (ret) 507 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 508 509 return ret; 510 } 511 512 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 513 u64 spe_events_id, 514 union perf_mem_data_src data_src) 515 { 516 struct arm_spe *spe = speq->spe; 517 struct arm_spe_record *record = &speq->decoder->record; 518 union perf_event *event = speq->event_buf; 519 struct perf_sample sample; 520 int ret; 521 522 perf_sample__init(&sample, /*all=*/true); 523 arm_spe_prep_sample(spe, speq, event, &sample); 524 525 sample.id = spe_events_id; 526 sample.stream_id = spe_events_id; 527 sample.addr = record->virt_addr; 528 sample.phys_addr = record->phys_addr; 529 sample.data_src = data_src.val; 530 sample.weight = record->latency; 531 532 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 533 perf_sample__exit(&sample); 534 return ret; 535 } 536 537 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 538 u64 spe_events_id) 539 { 540 struct arm_spe *spe = speq->spe; 541 struct arm_spe_record *record = &speq->decoder->record; 542 union perf_event *event = speq->event_buf; 543 struct perf_sample sample; 544 int ret; 545 546 perf_sample__init(&sample, /*all=*/true); 547 arm_spe_prep_sample(spe, speq, event, &sample); 548 549 sample.id = spe_events_id; 550 sample.stream_id = spe_events_id; 551 sample.addr = record->to_ip; 552 sample.weight = record->latency; 553 sample.flags = speq->flags; 554 sample.branch_stack = speq->last_branch; 555 556 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 557 perf_sample__exit(&sample); 558 return ret; 559 } 560 561 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 562 u64 spe_events_id, 563 union perf_mem_data_src data_src) 564 { 565 struct arm_spe *spe = speq->spe; 566 struct arm_spe_record *record = &speq->decoder->record; 567 union perf_event *event = speq->event_buf; 568 struct perf_sample sample; 569 int ret; 570 571 perf_sample__init(&sample, /*all=*/true); 572 arm_spe_prep_sample(spe, speq, event, &sample); 573 574 sample.id = spe_events_id; 575 sample.stream_id = spe_events_id; 576 sample.addr = record->to_ip; 577 sample.phys_addr = record->phys_addr; 578 sample.data_src = data_src.val; 579 sample.weight = record->latency; 580 sample.flags = speq->flags; 581 sample.branch_stack = speq->last_branch; 582 583 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); 584 perf_sample__exit(&sample); 585 return ret; 586 } 587 588 static const struct midr_range common_ds_encoding_cpus[] = { 589 MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), 590 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), 591 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), 592 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), 593 MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), 594 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), 595 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), 596 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), 597 MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), 598 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), 599 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), 600 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), 601 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), 602 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), 603 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), 604 MIDR_ALL_VERSIONS(MIDR_NVIDIA_OLYMPUS), 605 {}, 606 }; 607 608 static const struct midr_range ampereone_ds_encoding_cpus[] = { 609 MIDR_ALL_VERSIONS(MIDR_AMPERE1A), 610 {}, 611 }; 612 613 static const struct midr_range hisi_hip_ds_encoding_cpus[] = { 614 MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), 615 {}, 616 }; 617 618 static void arm_spe__sample_flags(struct arm_spe_queue *speq) 619 { 620 const struct arm_spe_record *record = &speq->decoder->record; 621 622 speq->flags = 0; 623 if (record->op & ARM_SPE_OP_BRANCH_ERET) { 624 speq->flags = PERF_IP_FLAG_BRANCH; 625 626 if (record->type & ARM_SPE_BRANCH_MISS) 627 speq->flags |= PERF_IP_FLAG_BRANCH_MISS; 628 629 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) 630 speq->flags |= PERF_IP_FLAG_NOT_TAKEN; 631 632 if (record->type & ARM_SPE_IN_TXN) 633 speq->flags |= PERF_IP_FLAG_IN_TX; 634 635 if (record->op & ARM_SPE_OP_BR_COND) 636 speq->flags |= PERF_IP_FLAG_CONDITIONAL; 637 638 if (record->op & ARM_SPE_OP_BR_CR_BL) 639 speq->flags |= PERF_IP_FLAG_CALL; 640 else if (record->op & ARM_SPE_OP_BR_CR_RET) 641 speq->flags |= PERF_IP_FLAG_RETURN; 642 /* 643 * Indirect branch instruction without link (e.g. BR), 644 * take it as a function return. 645 */ 646 else if (record->op & ARM_SPE_OP_BR_INDIRECT) 647 speq->flags |= PERF_IP_FLAG_RETURN; 648 } 649 } 650 651 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, 652 union perf_mem_data_src *data_src) 653 { 654 /* 655 * Even though four levels of cache hierarchy are possible, no known 656 * production Neoverse systems currently include more than three levels 657 * so for the time being we assume three exist. If a production system 658 * is built with four the this function would have to be changed to 659 * detect the number of levels for reporting. 660 */ 661 662 /* 663 * We have no data on the hit level or data source for stores in the 664 * Neoverse SPE records. 665 */ 666 if (record->op & ARM_SPE_OP_ST) { 667 data_src->mem_lvl = PERF_MEM_LVL_NA; 668 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 669 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 670 return; 671 } 672 673 switch (record->source) { 674 case ARM_SPE_COMMON_DS_L1D: 675 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 676 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 677 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 678 break; 679 case ARM_SPE_COMMON_DS_L2: 680 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 681 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 682 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 683 break; 684 case ARM_SPE_COMMON_DS_PEER_CORE: 685 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 686 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 687 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 688 break; 689 /* 690 * We don't know if this is L1, L2 but we do know it was a cache-2-cache 691 * transfer, so set SNOOPX_PEER 692 */ 693 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: 694 case ARM_SPE_COMMON_DS_PEER_CLUSTER: 695 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 696 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 697 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 698 break; 699 /* 700 * System cache is assumed to be L3 701 */ 702 case ARM_SPE_COMMON_DS_SYS_CACHE: 703 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 704 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 705 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 706 break; 707 /* 708 * We don't know what level it hit in, except it came from the other 709 * socket 710 */ 711 case ARM_SPE_COMMON_DS_REMOTE: 712 data_src->mem_lvl = PERF_MEM_LVL_NA; 713 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 714 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 715 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 716 break; 717 case ARM_SPE_COMMON_DS_DRAM: 718 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 719 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 720 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 721 break; 722 default: 723 break; 724 } 725 } 726 727 /* 728 * Source is IMPDEF. Here we convert the source code used on AmpereOne cores 729 * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. 730 */ 731 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, 732 union perf_mem_data_src *data_src) 733 { 734 struct arm_spe_record common_record; 735 736 switch (record->source) { 737 case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: 738 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; 739 break; 740 case ARM_SPE_AMPEREONE_SLC: 741 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; 742 break; 743 case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: 744 common_record.source = ARM_SPE_COMMON_DS_REMOTE; 745 break; 746 case ARM_SPE_AMPEREONE_DDR: 747 common_record.source = ARM_SPE_COMMON_DS_DRAM; 748 break; 749 case ARM_SPE_AMPEREONE_L1D: 750 common_record.source = ARM_SPE_COMMON_DS_L1D; 751 break; 752 case ARM_SPE_AMPEREONE_L2D: 753 common_record.source = ARM_SPE_COMMON_DS_L2; 754 break; 755 default: 756 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", 757 record->source); 758 return; 759 } 760 761 common_record.op = record->op; 762 arm_spe__synth_data_source_common(&common_record, data_src); 763 } 764 765 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, 766 union perf_mem_data_src *data_src) 767 { 768 /* Use common synthesis method to handle store operations */ 769 if (record->op & ARM_SPE_OP_ST) { 770 arm_spe__synth_data_source_common(record, data_src); 771 return; 772 } 773 774 switch (record->source) { 775 case ARM_SPE_HISI_HIP_PEER_CPU: 776 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 777 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 778 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 779 break; 780 case ARM_SPE_HISI_HIP_PEER_CPU_HITM: 781 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 782 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 783 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 784 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 785 break; 786 case ARM_SPE_HISI_HIP_L3: 787 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 788 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 789 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 790 break; 791 case ARM_SPE_HISI_HIP_L3_HITM: 792 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 793 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 794 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 795 break; 796 case ARM_SPE_HISI_HIP_PEER_CLUSTER: 797 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 798 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 799 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 800 break; 801 case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: 802 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; 803 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 804 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 805 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 806 break; 807 case ARM_SPE_HISI_HIP_REMOTE_SOCKET: 808 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 809 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 810 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 811 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 812 break; 813 case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: 814 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; 815 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; 816 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 817 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 818 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; 819 break; 820 case ARM_SPE_HISI_HIP_LOCAL_MEM: 821 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; 822 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 823 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 824 break; 825 case ARM_SPE_HISI_HIP_REMOTE_MEM: 826 data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; 827 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; 828 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 829 break; 830 case ARM_SPE_HISI_HIP_NC_DEV: 831 data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; 832 data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; 833 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 834 break; 835 case ARM_SPE_HISI_HIP_L2: 836 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 837 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 838 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 839 break; 840 case ARM_SPE_HISI_HIP_L2_HITM: 841 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 842 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 843 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 844 break; 845 case ARM_SPE_HISI_HIP_L1: 846 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 847 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 848 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 849 break; 850 default: 851 break; 852 } 853 } 854 855 static const struct data_source_handle data_source_handles[] = { 856 DS(common_ds_encoding_cpus, data_source_common), 857 DS(ampereone_ds_encoding_cpus, data_source_ampereone), 858 DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), 859 }; 860 861 static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, 862 union perf_mem_data_src *data_src) 863 { 864 /* 865 * To find a cache hit, search in ascending order from the lower level 866 * caches to the higher level caches. This reflects the best scenario 867 * for a cache hit. 868 */ 869 if (arm_spe_is_cache_hit(record->type, L1D)) { 870 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 871 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 872 } else if (record->type & ARM_SPE_RECENTLY_FETCHED) { 873 data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; 874 data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB; 875 } else if (arm_spe_is_cache_hit(record->type, L2D)) { 876 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; 877 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 878 } else if (arm_spe_is_cache_hit(record->type, LLC)) { 879 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; 880 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 881 /* 882 * To find a cache miss, search in descending order from the higher 883 * level cache to the lower level cache. This represents the worst 884 * scenario for a cache miss. 885 */ 886 } else if (arm_spe_is_cache_miss(record->type, LLC)) { 887 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; 888 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 889 } else if (arm_spe_is_cache_miss(record->type, L2D)) { 890 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS; 891 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 892 } else if (arm_spe_is_cache_miss(record->type, L1D)) { 893 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 894 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 895 } 896 } 897 898 static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, 899 union perf_mem_data_src *data_src) 900 { 901 /* Record the greatest level info for a store operation. */ 902 if (arm_spe_is_cache_level(record->type, LLC)) { 903 data_src->mem_lvl = PERF_MEM_LVL_L3; 904 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? 905 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 906 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; 907 } else if (arm_spe_is_cache_level(record->type, L2D)) { 908 data_src->mem_lvl = PERF_MEM_LVL_L2; 909 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ? 910 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 911 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; 912 } else if (arm_spe_is_cache_level(record->type, L1D)) { 913 data_src->mem_lvl = PERF_MEM_LVL_L1; 914 data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ? 915 PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; 916 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; 917 } 918 } 919 920 static void arm_spe__synth_memory_level(struct arm_spe_queue *speq, 921 const struct arm_spe_record *record, 922 union perf_mem_data_src *data_src) 923 { 924 struct arm_spe *spe = speq->spe; 925 926 /* 927 * The data source packet contains more info for cache levels for 928 * peer snooping. So respect the memory level if has been set by 929 * data source parsing. 930 */ 931 if (!data_src->mem_lvl) { 932 if (data_src->mem_op == PERF_MEM_OP_LOAD) 933 arm_spe__synth_ld_memory_level(record, data_src); 934 if (data_src->mem_op == PERF_MEM_OP_STORE) 935 arm_spe__synth_st_memory_level(record, data_src); 936 } 937 938 if (!data_src->mem_lvl) { 939 data_src->mem_lvl = PERF_MEM_LVL_NA; 940 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; 941 } 942 943 /* 944 * If 'mem_snoop' has been set by data source packet, skip to set 945 * it at here. 946 */ 947 if (!data_src->mem_snoop) { 948 if (record->type & ARM_SPE_DATA_SNOOPED) { 949 if (record->type & ARM_SPE_HITM) 950 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 951 else 952 data_src->mem_snoop = PERF_MEM_SNOOP_HIT; 953 } else { 954 u64 *metadata = 955 arm_spe__get_metadata_by_cpu(spe, speq->cpu); 956 957 /* 958 * Set NA ("Not available") mode if no meta data or the 959 * SNOOPED event is not supported. 960 */ 961 if (!metadata || 962 !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED)) 963 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 964 else 965 data_src->mem_snoop = PERF_MEM_SNOOP_NONE; 966 } 967 } 968 969 if (!data_src->mem_remote) { 970 if (record->type & ARM_SPE_REMOTE_ACCESS) 971 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; 972 } 973 } 974 975 static void arm_spe__synth_ds(struct arm_spe_queue *speq, 976 const struct arm_spe_record *record, 977 union perf_mem_data_src *data_src) 978 { 979 struct arm_spe *spe = speq->spe; 980 u64 *metadata = NULL; 981 u64 midr; 982 unsigned int i; 983 984 /* Metadata version 1 assumes all CPUs are the same (old behavior) */ 985 if (spe->metadata_ver == 1) { 986 const char *cpuid; 987 988 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); 989 cpuid = perf_env__cpuid(perf_session__env(spe->session)); 990 midr = strtol(cpuid, NULL, 16); 991 } else { 992 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); 993 if (!metadata) 994 return; 995 996 midr = metadata[ARM_SPE_CPU_MIDR]; 997 } 998 999 for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { 1000 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { 1001 return data_source_handles[i].ds_synth(record, data_src); 1002 } 1003 } 1004 1005 return; 1006 } 1007 1008 static union perf_mem_data_src 1009 arm_spe__synth_data_source(struct arm_spe_queue *speq, 1010 const struct arm_spe_record *record) 1011 { 1012 union perf_mem_data_src data_src = {}; 1013 1014 if (!is_mem_op(record->op)) 1015 return data_src; 1016 1017 if (record->op & ARM_SPE_OP_LD) 1018 data_src.mem_op = PERF_MEM_OP_LOAD; 1019 else if (record->op & ARM_SPE_OP_ST) 1020 data_src.mem_op = PERF_MEM_OP_STORE; 1021 else 1022 data_src.mem_op = PERF_MEM_OP_NA; 1023 1024 arm_spe__synth_ds(speq, record, &data_src); 1025 arm_spe__synth_memory_level(speq, record, &data_src); 1026 1027 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 1028 data_src.mem_dtlb = PERF_MEM_TLB_WK; 1029 1030 if (record->type & ARM_SPE_TLB_MISS) 1031 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 1032 else 1033 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 1034 } 1035 1036 return data_src; 1037 } 1038 1039 static int arm_spe_sample(struct arm_spe_queue *speq) 1040 { 1041 const struct arm_spe_record *record = &speq->decoder->record; 1042 struct arm_spe *spe = speq->spe; 1043 union perf_mem_data_src data_src; 1044 int err; 1045 1046 /* 1047 * Discard all samples until period is reached 1048 */ 1049 speq->sample_count++; 1050 if (speq->sample_count < spe->synth_opts.period) 1051 return 0; 1052 speq->sample_count = 0; 1053 1054 arm_spe__sample_flags(speq); 1055 data_src = arm_spe__synth_data_source(speq, record); 1056 1057 if (spe->sample_flc) { 1058 if (record->type & ARM_SPE_L1D_MISS) { 1059 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 1060 data_src); 1061 if (err) 1062 return err; 1063 } 1064 1065 if (record->type & ARM_SPE_L1D_ACCESS) { 1066 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 1067 data_src); 1068 if (err) 1069 return err; 1070 } 1071 } 1072 1073 if (spe->sample_llc) { 1074 if (record->type & ARM_SPE_LLC_MISS) { 1075 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 1076 data_src); 1077 if (err) 1078 return err; 1079 } 1080 1081 if (record->type & ARM_SPE_LLC_ACCESS) { 1082 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 1083 data_src); 1084 if (err) 1085 return err; 1086 } 1087 } 1088 1089 if (spe->sample_tlb) { 1090 if (record->type & ARM_SPE_TLB_MISS) { 1091 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 1092 data_src); 1093 if (err) 1094 return err; 1095 } 1096 1097 if (record->type & ARM_SPE_TLB_ACCESS) { 1098 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 1099 data_src); 1100 if (err) 1101 return err; 1102 } 1103 } 1104 1105 if (spe->synth_opts.last_branch && 1106 (spe->sample_branch || spe->sample_instructions)) 1107 arm_spe__prep_branch_stack(speq); 1108 1109 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { 1110 err = arm_spe__synth_branch_sample(speq, spe->branch_id); 1111 if (err) 1112 return err; 1113 } 1114 1115 if (spe->sample_remote_access && 1116 (record->type & ARM_SPE_REMOTE_ACCESS)) { 1117 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 1118 data_src); 1119 if (err) 1120 return err; 1121 } 1122 1123 if (spe->sample_memory && is_mem_op(record->op)) { 1124 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 1125 if (err) 1126 return err; 1127 } 1128 1129 if (spe->sample_instructions) { 1130 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 1131 if (err) 1132 return err; 1133 } 1134 1135 return 0; 1136 } 1137 1138 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 1139 { 1140 struct arm_spe *spe = speq->spe; 1141 struct arm_spe_record *record; 1142 int ret; 1143 1144 if (!spe->kernel_start) 1145 spe->kernel_start = machine__kernel_start(spe->machine); 1146 1147 while (1) { 1148 /* 1149 * The usual logic is firstly to decode the packets, and then 1150 * based the record to synthesize sample; but here the flow is 1151 * reversed: it calls arm_spe_sample() for synthesizing samples 1152 * prior to arm_spe_decode(). 1153 * 1154 * Two reasons for this code logic: 1155 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 1156 * has decoded trace data and generated a record, but the record 1157 * is left to generate sample until run to here, so it's correct 1158 * to synthesize sample for the left record. 1159 * 2. After decoding trace data, it needs to compare the record 1160 * timestamp with the coming perf event, if the record timestamp 1161 * is later than the perf event, it needs bail out and pushs the 1162 * record into auxtrace heap, thus the record can be deferred to 1163 * synthesize sample until run to here at the next time; so this 1164 * can correlate samples between Arm SPE trace data and other 1165 * perf events with correct time ordering. 1166 */ 1167 1168 /* 1169 * Update pid/tid info. 1170 */ 1171 record = &speq->decoder->record; 1172 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 1173 ret = arm_spe_set_tid(speq, record->context_id); 1174 if (ret) 1175 return ret; 1176 1177 spe->use_ctx_pkt_for_pid = true; 1178 } 1179 1180 ret = arm_spe_sample(speq); 1181 if (ret) 1182 return ret; 1183 1184 ret = arm_spe_decode(speq->decoder); 1185 if (!ret) { 1186 pr_debug("No data or all data has been processed.\n"); 1187 return 1; 1188 } 1189 1190 /* 1191 * Error is detected when decode SPE trace data, continue to 1192 * the next trace data and find out more records. 1193 */ 1194 if (ret < 0) 1195 continue; 1196 1197 record = &speq->decoder->record; 1198 1199 /* Update timestamp for the last record */ 1200 if (record->timestamp > speq->timestamp) 1201 speq->timestamp = record->timestamp; 1202 1203 /* 1204 * If the timestamp of the queue is later than timestamp of the 1205 * coming perf event, bail out so can allow the perf event to 1206 * be processed ahead. 1207 */ 1208 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 1209 *timestamp = speq->timestamp; 1210 return 0; 1211 } 1212 } 1213 1214 return 0; 1215 } 1216 1217 static int arm_spe__setup_queue(struct arm_spe *spe, 1218 struct auxtrace_queue *queue, 1219 unsigned int queue_nr) 1220 { 1221 struct arm_spe_queue *speq = queue->priv; 1222 struct arm_spe_record *record; 1223 1224 if (list_empty(&queue->head) || speq) 1225 return 0; 1226 1227 speq = arm_spe__alloc_queue(spe, queue_nr); 1228 1229 if (!speq) 1230 return -ENOMEM; 1231 1232 queue->priv = speq; 1233 1234 if (queue->cpu != -1) 1235 speq->cpu = queue->cpu; 1236 1237 if (!speq->on_heap) { 1238 int ret; 1239 1240 if (spe->timeless_decoding) 1241 return 0; 1242 1243 retry: 1244 ret = arm_spe_decode(speq->decoder); 1245 1246 if (!ret) 1247 return 0; 1248 1249 if (ret < 0) 1250 goto retry; 1251 1252 record = &speq->decoder->record; 1253 1254 speq->timestamp = record->timestamp; 1255 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 1256 if (ret) 1257 return ret; 1258 speq->on_heap = true; 1259 } 1260 1261 return 0; 1262 } 1263 1264 static int arm_spe__setup_queues(struct arm_spe *spe) 1265 { 1266 unsigned int i; 1267 int ret; 1268 1269 for (i = 0; i < spe->queues.nr_queues; i++) { 1270 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 1271 if (ret) 1272 return ret; 1273 } 1274 1275 return 0; 1276 } 1277 1278 static int arm_spe__update_queues(struct arm_spe *spe) 1279 { 1280 if (spe->queues.new_data) { 1281 spe->queues.new_data = false; 1282 return arm_spe__setup_queues(spe); 1283 } 1284 1285 return 0; 1286 } 1287 1288 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 1289 { 1290 struct evsel *evsel; 1291 struct evlist *evlist = spe->session->evlist; 1292 bool timeless_decoding = true; 1293 1294 /* 1295 * Circle through the list of event and complain if we find one 1296 * with the time bit set. 1297 */ 1298 evlist__for_each_entry(evlist, evsel) { 1299 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 1300 timeless_decoding = false; 1301 } 1302 1303 return timeless_decoding; 1304 } 1305 1306 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 1307 { 1308 unsigned int queue_nr; 1309 u64 ts; 1310 int ret; 1311 1312 while (1) { 1313 struct auxtrace_queue *queue; 1314 struct arm_spe_queue *speq; 1315 1316 if (!spe->heap.heap_cnt) 1317 return 0; 1318 1319 if (spe->heap.heap_array[0].ordinal >= timestamp) 1320 return 0; 1321 1322 queue_nr = spe->heap.heap_array[0].queue_nr; 1323 queue = &spe->queues.queue_array[queue_nr]; 1324 speq = queue->priv; 1325 1326 auxtrace_heap__pop(&spe->heap); 1327 1328 if (spe->heap.heap_cnt) { 1329 ts = spe->heap.heap_array[0].ordinal + 1; 1330 if (ts > timestamp) 1331 ts = timestamp; 1332 } else { 1333 ts = timestamp; 1334 } 1335 1336 /* 1337 * A previous context-switch event has set pid/tid in the machine's context, so 1338 * here we need to update the pid/tid in the thread and SPE queue. 1339 */ 1340 if (!spe->use_ctx_pkt_for_pid) 1341 arm_spe_set_pid_tid_cpu(spe, queue); 1342 1343 ret = arm_spe_run_decoder(speq, &ts); 1344 if (ret < 0) { 1345 auxtrace_heap__add(&spe->heap, queue_nr, ts); 1346 return ret; 1347 } 1348 1349 if (!ret) { 1350 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 1351 if (ret < 0) 1352 return ret; 1353 } else { 1354 speq->on_heap = false; 1355 } 1356 } 1357 1358 return 0; 1359 } 1360 1361 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 1362 u64 time_) 1363 { 1364 struct auxtrace_queues *queues = &spe->queues; 1365 unsigned int i; 1366 u64 ts = 0; 1367 1368 for (i = 0; i < queues->nr_queues; i++) { 1369 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 1370 struct arm_spe_queue *speq = queue->priv; 1371 1372 if (speq && (tid == -1 || speq->tid == tid)) { 1373 speq->time = time_; 1374 arm_spe_set_pid_tid_cpu(spe, queue); 1375 arm_spe_run_decoder(speq, &ts); 1376 } 1377 } 1378 return 0; 1379 } 1380 1381 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 1382 struct perf_sample *sample) 1383 { 1384 pid_t pid, tid; 1385 int cpu; 1386 1387 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 1388 return 0; 1389 1390 pid = event->context_switch.next_prev_pid; 1391 tid = event->context_switch.next_prev_tid; 1392 cpu = sample->cpu; 1393 1394 if (tid == -1) 1395 pr_warning("context_switch event has no tid\n"); 1396 1397 return machine__set_current_tid(spe->machine, cpu, pid, tid); 1398 } 1399 1400 static int arm_spe_process_event(struct perf_session *session, 1401 union perf_event *event, 1402 struct perf_sample *sample, 1403 const struct perf_tool *tool) 1404 { 1405 int err = 0; 1406 u64 timestamp; 1407 struct arm_spe *spe = container_of(session->auxtrace, 1408 struct arm_spe, auxtrace); 1409 1410 if (dump_trace) 1411 return 0; 1412 1413 if (!tool->ordered_events) { 1414 pr_err("SPE trace requires ordered events\n"); 1415 return -EINVAL; 1416 } 1417 1418 if (sample->time && (sample->time != (u64) -1)) 1419 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 1420 else 1421 timestamp = 0; 1422 1423 if (timestamp || spe->timeless_decoding) { 1424 err = arm_spe__update_queues(spe); 1425 if (err) 1426 return err; 1427 } 1428 1429 if (spe->timeless_decoding) { 1430 if (event->header.type == PERF_RECORD_EXIT) { 1431 err = arm_spe_process_timeless_queues(spe, 1432 event->fork.tid, 1433 sample->time); 1434 } 1435 } else if (timestamp) { 1436 err = arm_spe_process_queues(spe, timestamp); 1437 if (err) 1438 return err; 1439 1440 if (!spe->use_ctx_pkt_for_pid && 1441 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 1442 event->header.type == PERF_RECORD_SWITCH)) 1443 err = arm_spe_context_switch(spe, event, sample); 1444 } 1445 1446 return err; 1447 } 1448 1449 static int arm_spe_process_auxtrace_event(struct perf_session *session, 1450 union perf_event *event, 1451 const struct perf_tool *tool __maybe_unused) 1452 { 1453 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1454 auxtrace); 1455 1456 if (!spe->data_queued) { 1457 struct auxtrace_buffer *buffer; 1458 off_t data_offset; 1459 int fd = perf_data__fd(session->data); 1460 int err; 1461 1462 if (perf_data__is_pipe(session->data)) { 1463 data_offset = 0; 1464 } else { 1465 data_offset = lseek(fd, 0, SEEK_CUR); 1466 if (data_offset == -1) 1467 return -errno; 1468 } 1469 1470 err = auxtrace_queues__add_event(&spe->queues, session, event, 1471 data_offset, &buffer); 1472 if (err) 1473 return err; 1474 1475 /* Dump here now we have copied a piped trace out of the pipe */ 1476 if (dump_trace) { 1477 if (auxtrace_buffer__get_data(buffer, fd)) { 1478 arm_spe_dump_event(spe, buffer->data, 1479 buffer->size); 1480 auxtrace_buffer__put_data(buffer); 1481 } 1482 } 1483 } 1484 1485 return 0; 1486 } 1487 1488 static int arm_spe_flush(struct perf_session *session __maybe_unused, 1489 const struct perf_tool *tool __maybe_unused) 1490 { 1491 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1492 auxtrace); 1493 int ret; 1494 1495 if (dump_trace) 1496 return 0; 1497 1498 if (!tool->ordered_events) 1499 return -EINVAL; 1500 1501 ret = arm_spe__update_queues(spe); 1502 if (ret < 0) 1503 return ret; 1504 1505 if (spe->timeless_decoding) 1506 return arm_spe_process_timeless_queues(spe, -1, 1507 MAX_TIMESTAMP - 1); 1508 1509 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 1510 if (ret) 1511 return ret; 1512 1513 if (!spe->use_ctx_pkt_for_pid) 1514 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 1515 "Matching of TIDs to SPE events could be inaccurate.\n"); 1516 1517 return 0; 1518 } 1519 1520 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) 1521 { 1522 u64 *metadata; 1523 1524 metadata = zalloc(per_cpu_size); 1525 if (!metadata) 1526 return NULL; 1527 1528 memcpy(metadata, buf, per_cpu_size); 1529 return metadata; 1530 } 1531 1532 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) 1533 { 1534 int i; 1535 1536 for (i = 0; i < nr_cpu; i++) 1537 zfree(&metadata[i]); 1538 free(metadata); 1539 } 1540 1541 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, 1542 u64 *ver, int *nr_cpu) 1543 { 1544 u64 *ptr = (u64 *)info->priv; 1545 u64 metadata_size; 1546 u64 **metadata = NULL; 1547 int hdr_sz, per_cpu_sz, i; 1548 1549 metadata_size = info->header.size - 1550 sizeof(struct perf_record_auxtrace_info); 1551 1552 /* Metadata version 1 */ 1553 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { 1554 *ver = 1; 1555 *nr_cpu = 0; 1556 /* No per CPU metadata */ 1557 return NULL; 1558 } 1559 1560 *ver = ptr[ARM_SPE_HEADER_VERSION]; 1561 hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; 1562 *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; 1563 1564 metadata = calloc(*nr_cpu, sizeof(*metadata)); 1565 if (!metadata) 1566 return NULL; 1567 1568 /* Locate the start address of per CPU metadata */ 1569 ptr += hdr_sz; 1570 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); 1571 1572 for (i = 0; i < *nr_cpu; i++) { 1573 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); 1574 if (!metadata[i]) 1575 goto err_per_cpu_metadata; 1576 1577 ptr += per_cpu_sz / sizeof(u64); 1578 } 1579 1580 return metadata; 1581 1582 err_per_cpu_metadata: 1583 arm_spe__free_metadata(metadata, *nr_cpu); 1584 return NULL; 1585 } 1586 1587 static void arm_spe_free_queue(void *priv) 1588 { 1589 struct arm_spe_queue *speq = priv; 1590 1591 if (!speq) 1592 return; 1593 thread__zput(speq->thread); 1594 arm_spe_decoder_free(speq->decoder); 1595 zfree(&speq->event_buf); 1596 zfree(&speq->last_branch); 1597 free(speq); 1598 } 1599 1600 static void arm_spe_free_events(struct perf_session *session) 1601 { 1602 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1603 auxtrace); 1604 struct auxtrace_queues *queues = &spe->queues; 1605 unsigned int i; 1606 1607 for (i = 0; i < queues->nr_queues; i++) { 1608 arm_spe_free_queue(queues->queue_array[i].priv); 1609 queues->queue_array[i].priv = NULL; 1610 } 1611 auxtrace_queues__free(queues); 1612 } 1613 1614 static void arm_spe_free(struct perf_session *session) 1615 { 1616 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 1617 auxtrace); 1618 1619 auxtrace_heap__free(&spe->heap); 1620 arm_spe_free_events(session); 1621 session->auxtrace = NULL; 1622 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); 1623 free(spe); 1624 } 1625 1626 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 1627 struct evsel *evsel) 1628 { 1629 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 1630 1631 return evsel->core.attr.type == spe->pmu_type; 1632 } 1633 1634 static const char * const metadata_hdr_v1_fmts[] = { 1635 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1636 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", 1637 }; 1638 1639 static const char * const metadata_hdr_fmts[] = { 1640 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", 1641 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", 1642 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", 1643 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", 1644 }; 1645 1646 static const char * const metadata_per_cpu_fmts[] = { 1647 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", 1648 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", 1649 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", 1650 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", 1651 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", 1652 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", 1653 [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n", 1654 }; 1655 1656 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) 1657 { 1658 unsigned int i, cpu, hdr_size, cpu_num, cpu_size; 1659 const char * const *hdr_fmts; 1660 1661 if (!dump_trace) 1662 return; 1663 1664 if (spe->metadata_ver == 1) { 1665 cpu_num = 0; 1666 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; 1667 hdr_fmts = metadata_hdr_v1_fmts; 1668 } else { 1669 cpu_num = arr[ARM_SPE_CPUS_NUM]; 1670 hdr_size = arr[ARM_SPE_HEADER_SIZE]; 1671 hdr_fmts = metadata_hdr_fmts; 1672 } 1673 1674 for (i = 0; i < hdr_size; i++) 1675 fprintf(stdout, hdr_fmts[i], arr[i]); 1676 1677 arr += hdr_size; 1678 for (cpu = 0; cpu < cpu_num; cpu++) { 1679 /* 1680 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS 1681 * are fixed. The sequential parameter size is decided by the 1682 * field 'ARM_SPE_CPU_NR_PARAMS'. 1683 */ 1684 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; 1685 for (i = 0; i < cpu_size; i++) 1686 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); 1687 arr += cpu_size; 1688 } 1689 } 1690 1691 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 1692 const char *name) 1693 { 1694 struct evsel *evsel; 1695 1696 evlist__for_each_entry(evlist, evsel) { 1697 if (evsel->core.id && evsel->core.id[0] == id) { 1698 if (evsel->name) 1699 zfree(&evsel->name); 1700 evsel->name = strdup(name); 1701 break; 1702 } 1703 } 1704 } 1705 1706 static int 1707 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1708 { 1709 struct evlist *evlist = session->evlist; 1710 struct evsel *evsel; 1711 struct perf_event_attr attr; 1712 bool found = false; 1713 u64 id; 1714 int err; 1715 1716 evlist__for_each_entry(evlist, evsel) { 1717 if (evsel->core.attr.type == spe->pmu_type) { 1718 found = true; 1719 break; 1720 } 1721 } 1722 1723 if (!found) { 1724 pr_debug("No selected events with SPE trace data\n"); 1725 return 0; 1726 } 1727 1728 memset(&attr, 0, sizeof(struct perf_event_attr)); 1729 attr.size = sizeof(struct perf_event_attr); 1730 attr.type = PERF_TYPE_HARDWARE; 1731 attr.sample_type = evsel->core.attr.sample_type & 1732 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1733 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1734 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1735 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1736 if (spe->timeless_decoding) 1737 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1738 else 1739 attr.sample_type |= PERF_SAMPLE_TIME; 1740 1741 spe->sample_type = attr.sample_type; 1742 1743 attr.exclude_user = evsel->core.attr.exclude_user; 1744 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1745 attr.exclude_hv = evsel->core.attr.exclude_hv; 1746 attr.exclude_host = evsel->core.attr.exclude_host; 1747 attr.exclude_guest = evsel->core.attr.exclude_guest; 1748 attr.sample_id_all = evsel->core.attr.sample_id_all; 1749 attr.read_format = evsel->core.attr.read_format; 1750 attr.sample_period = spe->synth_opts.period; 1751 1752 /* create new id val to be a fixed offset from evsel id */ 1753 id = auxtrace_synth_id_range_start(evsel); 1754 1755 if (spe->synth_opts.flc) { 1756 spe->sample_flc = true; 1757 1758 /* Level 1 data cache miss */ 1759 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1760 if (err) 1761 return err; 1762 spe->l1d_miss_id = id; 1763 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1764 id += 1; 1765 1766 /* Level 1 data cache access */ 1767 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1768 if (err) 1769 return err; 1770 spe->l1d_access_id = id; 1771 arm_spe_set_event_name(evlist, id, "l1d-access"); 1772 id += 1; 1773 } 1774 1775 if (spe->synth_opts.llc) { 1776 spe->sample_llc = true; 1777 1778 /* Last level cache miss */ 1779 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1780 if (err) 1781 return err; 1782 spe->llc_miss_id = id; 1783 arm_spe_set_event_name(evlist, id, "llc-miss"); 1784 id += 1; 1785 1786 /* Last level cache access */ 1787 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1788 if (err) 1789 return err; 1790 spe->llc_access_id = id; 1791 arm_spe_set_event_name(evlist, id, "llc-access"); 1792 id += 1; 1793 } 1794 1795 if (spe->synth_opts.tlb) { 1796 spe->sample_tlb = true; 1797 1798 /* TLB miss */ 1799 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1800 if (err) 1801 return err; 1802 spe->tlb_miss_id = id; 1803 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1804 id += 1; 1805 1806 /* TLB access */ 1807 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1808 if (err) 1809 return err; 1810 spe->tlb_access_id = id; 1811 arm_spe_set_event_name(evlist, id, "tlb-access"); 1812 id += 1; 1813 } 1814 1815 if (spe->synth_opts.last_branch) { 1816 if (spe->synth_opts.last_branch_sz > 2) 1817 pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); 1818 1819 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1820 /* 1821 * We don't use the hardware index, but the sample generation 1822 * code uses the new format branch_stack with this field, 1823 * so the event attributes must indicate that it's present. 1824 */ 1825 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1826 } 1827 1828 if (spe->synth_opts.branches) { 1829 spe->sample_branch = true; 1830 1831 /* Branch */ 1832 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1833 if (err) 1834 return err; 1835 spe->branch_id = id; 1836 arm_spe_set_event_name(evlist, id, "branch"); 1837 id += 1; 1838 } 1839 1840 if (spe->synth_opts.remote_access) { 1841 spe->sample_remote_access = true; 1842 1843 /* Remote access */ 1844 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1845 if (err) 1846 return err; 1847 spe->remote_access_id = id; 1848 arm_spe_set_event_name(evlist, id, "remote-access"); 1849 id += 1; 1850 } 1851 1852 if (spe->synth_opts.mem) { 1853 spe->sample_memory = true; 1854 1855 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1856 if (err) 1857 return err; 1858 spe->memory_id = id; 1859 arm_spe_set_event_name(evlist, id, "memory"); 1860 id += 1; 1861 } 1862 1863 if (spe->synth_opts.instructions) { 1864 spe->sample_instructions = true; 1865 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1866 1867 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1868 if (err) 1869 return err; 1870 spe->instructions_id = id; 1871 arm_spe_set_event_name(evlist, id, "instructions"); 1872 } 1873 1874 return 0; 1875 } 1876 1877 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) 1878 { 1879 u64 midr; 1880 int i; 1881 1882 if (!nr_cpu) 1883 return false; 1884 1885 for (i = 0; i < nr_cpu; i++) { 1886 if (!metadata[i]) 1887 return false; 1888 1889 if (i == 0) { 1890 midr = metadata[i][ARM_SPE_CPU_MIDR]; 1891 continue; 1892 } 1893 1894 if (midr != metadata[i][ARM_SPE_CPU_MIDR]) 1895 return false; 1896 } 1897 1898 return true; 1899 } 1900 1901 int arm_spe_process_auxtrace_info(union perf_event *event, 1902 struct perf_session *session) 1903 { 1904 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1905 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; 1906 struct perf_record_time_conv *tc = &session->time_conv; 1907 struct arm_spe *spe; 1908 u64 **metadata = NULL; 1909 u64 metadata_ver; 1910 int nr_cpu, err; 1911 1912 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1913 min_sz) 1914 return -EINVAL; 1915 1916 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, 1917 &nr_cpu); 1918 if (!metadata && metadata_ver != 1) { 1919 pr_err("Failed to parse Arm SPE metadata.\n"); 1920 return -EINVAL; 1921 } 1922 1923 spe = zalloc(sizeof(struct arm_spe)); 1924 if (!spe) { 1925 err = -ENOMEM; 1926 goto err_free_metadata; 1927 } 1928 1929 err = auxtrace_queues__init(&spe->queues); 1930 if (err) 1931 goto err_free; 1932 1933 spe->session = session; 1934 spe->machine = &session->machines.host; /* No kvm support */ 1935 spe->auxtrace_type = auxtrace_info->type; 1936 if (metadata_ver == 1) 1937 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1938 else 1939 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; 1940 spe->metadata = metadata; 1941 spe->metadata_ver = metadata_ver; 1942 spe->metadata_nr_cpu = nr_cpu; 1943 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); 1944 1945 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1946 1947 /* 1948 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1949 * and the parameters for hardware clock are stored in the session 1950 * context. Passes these parameters to the struct perf_tsc_conversion 1951 * in "spe->tc", which is used for later conversion between clock 1952 * counter and timestamp. 1953 * 1954 * For backward compatibility, copies the fields starting from 1955 * "time_cycles" only if they are contained in the event. 1956 */ 1957 spe->tc.time_shift = tc->time_shift; 1958 spe->tc.time_mult = tc->time_mult; 1959 spe->tc.time_zero = tc->time_zero; 1960 1961 if (event_contains(*tc, time_cycles)) { 1962 spe->tc.time_cycles = tc->time_cycles; 1963 spe->tc.time_mask = tc->time_mask; 1964 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1965 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1966 } 1967 1968 spe->auxtrace.process_event = arm_spe_process_event; 1969 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1970 spe->auxtrace.flush_events = arm_spe_flush; 1971 spe->auxtrace.free_events = arm_spe_free_events; 1972 spe->auxtrace.free = arm_spe_free; 1973 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1974 session->auxtrace = &spe->auxtrace; 1975 1976 arm_spe_print_info(spe, &auxtrace_info->priv[0]); 1977 1978 if (dump_trace) 1979 return 0; 1980 1981 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1982 spe->synth_opts = *session->itrace_synth_opts; 1983 } else { 1984 itrace_synth_opts__set_default(&spe->synth_opts, false); 1985 /* Default nanoseconds period not supported */ 1986 spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; 1987 spe->synth_opts.period = 1; 1988 } 1989 1990 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1991 ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n"); 1992 err = -EINVAL; 1993 goto err_free_queues; 1994 } 1995 if (spe->synth_opts.period > 1) 1996 ui__warning("Arm SPE has a hardware-based sampling period.\n\n" 1997 "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n"); 1998 1999 err = arm_spe_synth_events(spe, session); 2000 if (err) 2001 goto err_free_queues; 2002 2003 err = auxtrace_queues__process_index(&spe->queues, session); 2004 if (err) 2005 goto err_free_queues; 2006 2007 if (spe->queues.populated) 2008 spe->data_queued = true; 2009 2010 return 0; 2011 2012 err_free_queues: 2013 auxtrace_queues__free(&spe->queues); 2014 session->auxtrace = NULL; 2015 err_free: 2016 free(spe); 2017 err_free_metadata: 2018 arm_spe__free_metadata(metadata, nr_cpu); 2019 return err; 2020 } 2021