1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <linux/kernel.h> 8 #include <linux/types.h> 9 #include <linux/bitops.h> 10 #include <linux/log2.h> 11 #include <linux/string.h> 12 #include <linux/zalloc.h> 13 #include <time.h> 14 15 #include "../../../util/cpumap.h" 16 #include "../../../util/event.h" 17 #include "../../../util/evsel.h" 18 #include "../../../util/evsel_config.h" 19 #include "../../../util/evlist.h" 20 #include "../../../util/session.h" 21 #include <internal/lib.h> // page_size 22 #include "../../../util/pmu.h" 23 #include "../../../util/debug.h" 24 #include "../../../util/auxtrace.h" 25 #include "../../../util/record.h" 26 #include "../../../util/header.h" 27 #include "../../../util/arm-spe.h" 28 #include <tools/libc_compat.h> // reallocarray 29 30 #define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL 31 32 #define KiB(x) ((x) * 1024) 33 #define MiB(x) ((x) * 1024 * 1024) 34 35 struct arm_spe_recording { 36 struct auxtrace_record itr; 37 struct perf_pmu *arm_spe_pmu; 38 struct evlist *evlist; 39 int wrapped_cnt; 40 bool *wrapped; 41 }; 42 43 /* 44 * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke 45 * perf_cpu_map__put() to release the map after use. 46 */ 47 static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist) 48 { 49 struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; 50 struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 51 struct perf_cpu_map *intersect_cpus; 52 53 /* cpu map is not "any" CPU , we have specific CPUs to work with */ 54 if (!perf_cpu_map__has_any_cpu(event_cpus)) { 55 intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); 56 perf_cpu_map__put(online_cpus); 57 /* Event can be "any" CPU so count all CPUs. */ 58 } else { 59 intersect_cpus = online_cpus; 60 } 61 62 return intersect_cpus; 63 } 64 65 static size_t 66 arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, 67 struct evlist *evlist) 68 { 69 struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist); 70 size_t size; 71 72 if (!cpu_map) 73 return 0; 74 75 size = ARM_SPE_AUXTRACE_PRIV_MAX + 76 ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map); 77 size *= sizeof(u64); 78 79 perf_cpu_map__put(cpu_map); 80 return size; 81 } 82 83 static int arm_spe_save_cpu_header(struct auxtrace_record *itr, 84 struct perf_cpu cpu, __u64 data[]) 85 { 86 struct arm_spe_recording *sper = 87 container_of(itr, struct arm_spe_recording, itr); 88 struct perf_pmu *pmu = NULL; 89 char *cpuid = NULL; 90 u64 val; 91 92 /* Read CPU MIDR */ 93 cpuid = get_cpuid_allow_env_override(cpu); 94 if (!cpuid) 95 return -ENOMEM; 96 val = strtol(cpuid, NULL, 16); 97 98 data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; 99 data[ARM_SPE_CPU] = cpu.cpu; 100 data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; 101 data[ARM_SPE_CPU_MIDR] = val; 102 103 /* Find the associate Arm SPE PMU for the CPU */ 104 if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) 105 pmu = sper->arm_spe_pmu; 106 107 if (!pmu) { 108 /* No Arm SPE PMU is found */ 109 data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; 110 data[ARM_SPE_CAP_MIN_IVAL] = 0; 111 } else { 112 data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; 113 114 if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) 115 val = 0; 116 data[ARM_SPE_CAP_MIN_IVAL] = val; 117 } 118 119 free(cpuid); 120 return ARM_SPE_CPU_PRIV_MAX; 121 } 122 123 static int arm_spe_info_fill(struct auxtrace_record *itr, 124 struct perf_session *session, 125 struct perf_record_auxtrace_info *auxtrace_info, 126 size_t priv_size) 127 { 128 int i, ret; 129 size_t offset; 130 struct arm_spe_recording *sper = 131 container_of(itr, struct arm_spe_recording, itr); 132 struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; 133 struct perf_cpu_map *cpu_map; 134 struct perf_cpu cpu; 135 __u64 *data; 136 137 if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) 138 return -EINVAL; 139 140 if (!session->evlist->core.nr_mmaps) 141 return -EINVAL; 142 143 cpu_map = arm_spe_find_cpus(session->evlist); 144 if (!cpu_map) 145 return -EINVAL; 146 147 auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; 148 auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; 149 auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = 150 ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; 151 auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type; 152 auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); 153 154 offset = ARM_SPE_AUXTRACE_PRIV_MAX; 155 perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { 156 assert(offset < priv_size); 157 data = &auxtrace_info->priv[offset]; 158 ret = arm_spe_save_cpu_header(itr, cpu, data); 159 if (ret < 0) 160 goto out; 161 offset += ret; 162 } 163 164 ret = 0; 165 out: 166 perf_cpu_map__put(cpu_map); 167 return ret; 168 } 169 170 static void 171 arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, 172 bool privileged) 173 { 174 /* 175 * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor 176 * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for 177 * unprivileged users. 178 * 179 * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for 180 * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages 181 * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the 182 * user is likely to get an error as they exceed their mlock limmit. 183 */ 184 185 /* 186 * No size were given to '-S' or '-m,', so go with the default 187 */ 188 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 189 if (privileged) { 190 opts->auxtrace_mmap_pages = MiB(4) / page_size; 191 } else { 192 opts->auxtrace_mmap_pages = KiB(128) / page_size; 193 if (opts->mmap_pages == UINT_MAX) 194 opts->mmap_pages = KiB(256) / page_size; 195 } 196 } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { 197 opts->mmap_pages = KiB(256) / page_size; 198 } 199 200 /* 201 * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the 202 * auxtrace mmap area. 203 */ 204 if (!opts->auxtrace_snapshot_size) 205 opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; 206 207 /* 208 * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big 209 * enough to fit the requested snapshot size. 210 */ 211 if (!opts->auxtrace_mmap_pages) { 212 size_t sz = opts->auxtrace_snapshot_size; 213 214 sz = round_up(sz, page_size) / page_size; 215 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 216 } 217 } 218 219 static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu) 220 { 221 static __u64 sample_period; 222 223 if (sample_period) 224 return sample_period; 225 226 /* 227 * If kernel driver doesn't advertise a minimum, 228 * use max allowable by PMSIDR_EL1.INTERVAL 229 */ 230 if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", 231 &sample_period) != 1) { 232 pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); 233 sample_period = 4096; 234 } 235 return sample_period; 236 } 237 238 static void arm_spe_setup_evsel(struct evsel *evsel, struct perf_cpu_map *cpus) 239 { 240 u64 bit; 241 242 evsel->core.attr.freq = 0; 243 evsel->core.attr.sample_period = arm_spe_pmu__sample_period(evsel->pmu); 244 evsel->needs_auxtrace_mmap = true; 245 246 /* 247 * To obtain the auxtrace buffer file descriptor, the auxtrace event 248 * must come first. 249 */ 250 evlist__to_front(evsel->evlist, evsel); 251 252 /* 253 * In the case of per-cpu mmaps, sample CPU for AUX event; 254 * also enable the timestamp tracing for samples correlation. 255 */ 256 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 257 evsel__set_sample_bit(evsel, CPU); 258 evsel__set_config_if_unset(evsel->pmu, evsel, "ts_enable", 1); 259 } 260 261 /* 262 * Set this only so that perf report knows that SPE generates memory info. It has no effect 263 * on the opening of the event or the SPE data produced. 264 */ 265 evsel__set_sample_bit(evsel, DATA_SRC); 266 267 /* 268 * The PHYS_ADDR flag does not affect the driver behaviour, it is used to 269 * inform that the resulting output's SPE samples contain physical addresses 270 * where applicable. 271 */ 272 bit = perf_pmu__format_bits(evsel->pmu, "pa_enable"); 273 if (evsel->core.attr.config & bit) 274 evsel__set_sample_bit(evsel, PHYS_ADDR); 275 } 276 277 static int arm_spe_setup_aux_buffer(struct record_opts *opts) 278 { 279 bool privileged = perf_event_paranoid_check(-1); 280 281 /* 282 * we are in snapshot mode. 283 */ 284 if (opts->auxtrace_snapshot_mode) { 285 /* 286 * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with 287 * default values. 288 */ 289 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) 290 arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); 291 292 /* 293 * Snapshot size can't be bigger than the auxtrace area. 294 */ 295 if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { 296 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 297 opts->auxtrace_snapshot_size, 298 opts->auxtrace_mmap_pages * (size_t)page_size); 299 return -EINVAL; 300 } 301 302 /* 303 * Something went wrong somewhere - this shouldn't happen. 304 */ 305 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 306 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 307 return -EINVAL; 308 } 309 310 pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, 311 opts->auxtrace_snapshot_size); 312 } 313 314 /* We are in full trace mode but '-m,xyz' wasn't specified */ 315 if (!opts->auxtrace_mmap_pages) { 316 if (privileged) { 317 opts->auxtrace_mmap_pages = MiB(4) / page_size; 318 } else { 319 opts->auxtrace_mmap_pages = KiB(128) / page_size; 320 if (opts->mmap_pages == UINT_MAX) 321 opts->mmap_pages = KiB(256) / page_size; 322 } 323 } 324 325 /* Validate auxtrace_mmap_pages */ 326 if (opts->auxtrace_mmap_pages) { 327 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 328 size_t min_sz = KiB(8); 329 330 if (sz < min_sz || !is_power_of_2(sz)) { 331 pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n", 332 min_sz / 1024); 333 return -EINVAL; 334 } 335 } 336 337 return 0; 338 } 339 340 static int arm_spe_setup_tracking_event(struct evlist *evlist, 341 struct record_opts *opts) 342 { 343 int err; 344 struct evsel *tracking_evsel; 345 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 346 347 /* Add dummy event to keep tracking */ 348 err = parse_event(evlist, "dummy:u"); 349 if (err) 350 return err; 351 352 tracking_evsel = evlist__last(evlist); 353 evlist__set_tracking_event(evlist, tracking_evsel); 354 355 tracking_evsel->core.attr.freq = 0; 356 tracking_evsel->core.attr.sample_period = 1; 357 358 /* In per-cpu case, always need the time of mmap events etc */ 359 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 360 evsel__set_sample_bit(tracking_evsel, TIME); 361 evsel__set_sample_bit(tracking_evsel, CPU); 362 363 /* also track task context switch */ 364 if (!record_opts__no_switch_events(opts)) 365 tracking_evsel->core.attr.context_switch = 1; 366 } 367 368 return 0; 369 } 370 371 static int arm_spe_recording_options(struct auxtrace_record *itr, 372 struct evlist *evlist, 373 struct record_opts *opts) 374 { 375 struct arm_spe_recording *sper = 376 container_of(itr, struct arm_spe_recording, itr); 377 struct evsel *evsel, *tmp; 378 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 379 bool discard = false; 380 int err; 381 382 sper->evlist = evlist; 383 384 evlist__for_each_entry(evlist, evsel) { 385 if (evsel__is_aux_event(evsel)) { 386 if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) { 387 pr_err("Found unexpected auxtrace event: %s\n", 388 evsel->pmu->name); 389 return -EINVAL; 390 } 391 opts->full_auxtrace = true; 392 } 393 } 394 395 if (!opts->full_auxtrace) 396 return 0; 397 398 evlist__for_each_entry_safe(evlist, tmp, evsel) { 399 if (evsel__is_aux_event(evsel)) { 400 arm_spe_setup_evsel(evsel, cpus); 401 if (evsel->core.attr.config & 402 perf_pmu__format_bits(evsel->pmu, "discard")) 403 discard = true; 404 } 405 } 406 407 if (discard) 408 return 0; 409 410 err = arm_spe_setup_aux_buffer(opts); 411 if (err) 412 return err; 413 414 return arm_spe_setup_tracking_event(evlist, opts); 415 } 416 417 static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, 418 struct record_opts *opts, 419 const char *str) 420 { 421 unsigned long long snapshot_size = 0; 422 char *endptr; 423 424 if (str) { 425 snapshot_size = strtoull(str, &endptr, 0); 426 if (*endptr || snapshot_size > SIZE_MAX) 427 return -1; 428 } 429 430 opts->auxtrace_snapshot_mode = true; 431 opts->auxtrace_snapshot_size = snapshot_size; 432 433 return 0; 434 } 435 436 static int arm_spe_snapshot_start(struct auxtrace_record *itr) 437 { 438 struct arm_spe_recording *ptr = 439 container_of(itr, struct arm_spe_recording, itr); 440 struct evsel *evsel; 441 int ret = -EINVAL; 442 443 evlist__for_each_entry(ptr->evlist, evsel) { 444 if (evsel__is_aux_event(evsel)) { 445 ret = evsel__disable(evsel); 446 if (ret < 0) 447 return ret; 448 } 449 } 450 return ret; 451 } 452 453 static int arm_spe_snapshot_finish(struct auxtrace_record *itr) 454 { 455 struct arm_spe_recording *ptr = 456 container_of(itr, struct arm_spe_recording, itr); 457 struct evsel *evsel; 458 int ret = -EINVAL; 459 460 evlist__for_each_entry(ptr->evlist, evsel) { 461 if (evsel__is_aux_event(evsel)) { 462 ret = evsel__enable(evsel); 463 if (ret < 0) 464 return ret; 465 } 466 } 467 return ret; 468 } 469 470 static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) 471 { 472 bool *wrapped; 473 int cnt = ptr->wrapped_cnt, new_cnt, i; 474 475 /* 476 * No need to allocate, so return early. 477 */ 478 if (idx < cnt) 479 return 0; 480 481 /* 482 * Make ptr->wrapped as big as idx. 483 */ 484 new_cnt = idx + 1; 485 486 /* 487 * Free'ed in arm_spe_recording_free(). 488 */ 489 wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); 490 if (!wrapped) 491 return -ENOMEM; 492 493 /* 494 * init new allocated values. 495 */ 496 for (i = cnt; i < new_cnt; i++) 497 wrapped[i] = false; 498 499 ptr->wrapped_cnt = new_cnt; 500 ptr->wrapped = wrapped; 501 502 return 0; 503 } 504 505 static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, 506 size_t buffer_size, u64 head) 507 { 508 u64 i, watermark; 509 u64 *buf = (u64 *)buffer; 510 size_t buf_size = buffer_size; 511 512 /* 513 * Defensively handle the case where head might be continually increasing - if its value is 514 * equal or greater than the size of the ring buffer, then we can safely determine it has 515 * wrapped around. Otherwise, continue to detect if head might have wrapped. 516 */ 517 if (head >= buffer_size) 518 return true; 519 520 /* 521 * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. 522 */ 523 watermark = buf_size - 512; 524 525 /* 526 * The value of head is somewhere within the size of the ring buffer. This can be that there 527 * hasn't been enough data to fill the ring buffer yet or the trace time was so long that 528 * head has numerically wrapped around. To find we need to check if we have data at the 529 * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed 530 * out and there is a fresh mapping with every new session. 531 */ 532 533 /* 534 * head is less than 512 byte from the end of the ring buffer. 535 */ 536 if (head > watermark) 537 watermark = head; 538 539 /* 540 * Speed things up by using 64 bit transactions (see "u64 *buf" above) 541 */ 542 watermark /= sizeof(u64); 543 buf_size /= sizeof(u64); 544 545 /* 546 * If we find trace data at the end of the ring buffer, head has been there and has 547 * numerically wrapped around at least once. 548 */ 549 for (i = watermark; i < buf_size; i++) 550 if (buf[i]) 551 return true; 552 553 return false; 554 } 555 556 static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, 557 struct auxtrace_mmap *mm, unsigned char *data, 558 u64 *head, u64 *old) 559 { 560 int err; 561 bool wrapped; 562 struct arm_spe_recording *ptr = 563 container_of(itr, struct arm_spe_recording, itr); 564 565 /* 566 * Allocate memory to keep track of wrapping if this is the first 567 * time we deal with this *mm. 568 */ 569 if (idx >= ptr->wrapped_cnt) { 570 err = arm_spe_alloc_wrapped_array(ptr, idx); 571 if (err) 572 return err; 573 } 574 575 /* 576 * Check to see if *head has wrapped around. If it hasn't only the 577 * amount of data between *head and *old is snapshot'ed to avoid 578 * bloating the perf.data file with zeros. But as soon as *head has 579 * wrapped around the entire size of the AUX ring buffer it taken. 580 */ 581 wrapped = ptr->wrapped[idx]; 582 if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { 583 wrapped = true; 584 ptr->wrapped[idx] = true; 585 } 586 587 pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", 588 __func__, idx, (size_t)*old, (size_t)*head, mm->len); 589 590 /* 591 * No wrap has occurred, we can just use *head and *old. 592 */ 593 if (!wrapped) 594 return 0; 595 596 /* 597 * *head has wrapped around - adjust *head and *old to pickup the 598 * entire content of the AUX buffer. 599 */ 600 if (*head >= mm->len) { 601 *old = *head - mm->len; 602 } else { 603 *head += mm->len; 604 *old = *head - mm->len; 605 } 606 607 return 0; 608 } 609 610 static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) 611 { 612 struct timespec ts; 613 614 clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 615 616 return ts.tv_sec ^ ts.tv_nsec; 617 } 618 619 static void arm_spe_recording_free(struct auxtrace_record *itr) 620 { 621 struct arm_spe_recording *sper = 622 container_of(itr, struct arm_spe_recording, itr); 623 624 zfree(&sper->wrapped); 625 free(sper); 626 } 627 628 struct auxtrace_record *arm_spe_recording_init(int *err, 629 struct perf_pmu *arm_spe_pmu) 630 { 631 struct arm_spe_recording *sper; 632 633 if (!arm_spe_pmu) { 634 *err = -ENODEV; 635 return NULL; 636 } 637 638 sper = zalloc(sizeof(struct arm_spe_recording)); 639 if (!sper) { 640 *err = -ENOMEM; 641 return NULL; 642 } 643 644 sper->arm_spe_pmu = arm_spe_pmu; 645 sper->itr.snapshot_start = arm_spe_snapshot_start; 646 sper->itr.snapshot_finish = arm_spe_snapshot_finish; 647 sper->itr.find_snapshot = arm_spe_find_snapshot; 648 sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; 649 sper->itr.recording_options = arm_spe_recording_options; 650 sper->itr.info_priv_size = arm_spe_info_priv_size; 651 sper->itr.info_fill = arm_spe_info_fill; 652 sper->itr.free = arm_spe_recording_free; 653 sper->itr.reference = arm_spe_reference; 654 sper->itr.read_finish = auxtrace_record__read_finish; 655 sper->itr.alignment = 0; 656 657 *err = 0; 658 return &sper->itr; 659 } 660 661 void 662 arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr) 663 { 664 attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu); 665 } 666