1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <linux/kernel.h> 8 #include <linux/types.h> 9 #include <linux/bitops.h> 10 #include <linux/log2.h> 11 #include <linux/string.h> 12 #include <linux/zalloc.h> 13 #include <time.h> 14 15 #include "../../../util/cpumap.h" 16 #include "../../../util/event.h" 17 #include "../../../util/evsel.h" 18 #include "../../../util/evsel_config.h" 19 #include "../../../util/evlist.h" 20 #include "../../../util/session.h" 21 #include <internal/lib.h> // page_size 22 #include "../../../util/pmu.h" 23 #include "../../../util/debug.h" 24 #include "../../../util/auxtrace.h" 25 #include "../../../util/record.h" 26 #include "../../../util/header.h" 27 #include "../../../util/arm-spe.h" 28 #include <tools/libc_compat.h> // reallocarray 29 30 #define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL 31 32 #define KiB(x) ((x) * 1024) 33 #define MiB(x) ((x) * 1024 * 1024) 34 35 struct arm_spe_recording { 36 struct auxtrace_record itr; 37 struct perf_pmu *arm_spe_pmu; 38 struct evlist *evlist; 39 int wrapped_cnt; 40 bool *wrapped; 41 }; 42 43 /* 44 * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke 45 * perf_cpu_map__put() to release the map after use. 46 */ 47 static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist) 48 { 49 struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; 50 struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 51 struct perf_cpu_map *intersect_cpus; 52 53 /* cpu map is not "any" CPU , we have specific CPUs to work with */ 54 if (!perf_cpu_map__has_any_cpu(event_cpus)) { 55 intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); 56 perf_cpu_map__put(online_cpus); 57 /* Event can be "any" CPU so count all CPUs. */ 58 } else { 59 intersect_cpus = online_cpus; 60 } 61 62 return intersect_cpus; 63 } 64 65 static size_t 66 arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, 67 struct evlist *evlist) 68 { 69 struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist); 70 size_t size; 71 72 if (!cpu_map) 73 return 0; 74 75 size = ARM_SPE_AUXTRACE_PRIV_MAX + 76 ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map); 77 size *= sizeof(u64); 78 79 perf_cpu_map__put(cpu_map); 80 return size; 81 } 82 83 static int arm_spe_save_cpu_header(struct auxtrace_record *itr, 84 struct perf_cpu cpu, __u64 data[]) 85 { 86 struct arm_spe_recording *sper = 87 container_of(itr, struct arm_spe_recording, itr); 88 struct perf_pmu *pmu = NULL; 89 char *cpuid = NULL; 90 u64 val; 91 92 /* Read CPU MIDR */ 93 cpuid = get_cpuid_allow_env_override(cpu); 94 if (!cpuid) 95 return -ENOMEM; 96 val = strtol(cpuid, NULL, 16); 97 98 data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; 99 data[ARM_SPE_CPU] = cpu.cpu; 100 data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; 101 data[ARM_SPE_CPU_MIDR] = val; 102 103 /* Find the associate Arm SPE PMU for the CPU */ 104 if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) 105 pmu = sper->arm_spe_pmu; 106 107 if (!pmu) { 108 /* No Arm SPE PMU is found */ 109 data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; 110 data[ARM_SPE_CAP_MIN_IVAL] = 0; 111 } else { 112 data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; 113 114 if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) 115 val = 0; 116 data[ARM_SPE_CAP_MIN_IVAL] = val; 117 } 118 119 free(cpuid); 120 return ARM_SPE_CPU_PRIV_MAX; 121 } 122 123 static int arm_spe_info_fill(struct auxtrace_record *itr, 124 struct perf_session *session, 125 struct perf_record_auxtrace_info *auxtrace_info, 126 size_t priv_size) 127 { 128 int i, ret; 129 size_t offset; 130 struct arm_spe_recording *sper = 131 container_of(itr, struct arm_spe_recording, itr); 132 struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; 133 struct perf_cpu_map *cpu_map; 134 struct perf_cpu cpu; 135 __u64 *data; 136 137 if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) 138 return -EINVAL; 139 140 if (!session->evlist->core.nr_mmaps) 141 return -EINVAL; 142 143 cpu_map = arm_spe_find_cpus(session->evlist); 144 if (!cpu_map) 145 return -EINVAL; 146 147 auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; 148 auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; 149 auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = 150 ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; 151 auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type; 152 auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); 153 154 offset = ARM_SPE_AUXTRACE_PRIV_MAX; 155 perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { 156 assert(offset < priv_size); 157 data = &auxtrace_info->priv[offset]; 158 ret = arm_spe_save_cpu_header(itr, cpu, data); 159 if (ret < 0) 160 goto out; 161 offset += ret; 162 } 163 164 ret = 0; 165 out: 166 perf_cpu_map__put(cpu_map); 167 return ret; 168 } 169 170 static void 171 arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, 172 bool privileged) 173 { 174 /* 175 * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor 176 * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for 177 * unprivileged users. 178 * 179 * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for 180 * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages 181 * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the 182 * user is likely to get an error as they exceed their mlock limmit. 183 */ 184 185 /* 186 * No size were given to '-S' or '-m,', so go with the default 187 */ 188 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 189 if (privileged) { 190 opts->auxtrace_mmap_pages = MiB(4) / page_size; 191 } else { 192 opts->auxtrace_mmap_pages = KiB(128) / page_size; 193 if (opts->mmap_pages == UINT_MAX) 194 opts->mmap_pages = KiB(256) / page_size; 195 } 196 } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { 197 opts->mmap_pages = KiB(256) / page_size; 198 } 199 200 /* 201 * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the 202 * auxtrace mmap area. 203 */ 204 if (!opts->auxtrace_snapshot_size) 205 opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; 206 207 /* 208 * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big 209 * enough to fit the requested snapshot size. 210 */ 211 if (!opts->auxtrace_mmap_pages) { 212 size_t sz = opts->auxtrace_snapshot_size; 213 214 sz = round_up(sz, page_size) / page_size; 215 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 216 } 217 } 218 219 static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu) 220 { 221 static __u64 sample_period; 222 223 if (sample_period) 224 return sample_period; 225 226 /* 227 * If kernel driver doesn't advertise a minimum, 228 * use max allowable by PMSIDR_EL1.INTERVAL 229 */ 230 if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", 231 &sample_period) != 1) { 232 pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); 233 sample_period = 4096; 234 } 235 return sample_period; 236 } 237 238 static void arm_spe_setup_evsel(struct evsel *evsel, struct perf_cpu_map *cpus) 239 { 240 u64 bit; 241 242 evsel->core.attr.freq = 0; 243 evsel->core.attr.sample_period = arm_spe_pmu__sample_period(evsel->pmu); 244 evsel->needs_auxtrace_mmap = true; 245 246 /* 247 * To obtain the auxtrace buffer file descriptor, the auxtrace event 248 * must come first. 249 */ 250 evlist__to_front(evsel->evlist, evsel); 251 252 /* 253 * In the case of per-cpu mmaps, sample CPU for AUX event; 254 * also enable the timestamp tracing for samples correlation. 255 */ 256 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 257 evsel__set_sample_bit(evsel, CPU); 258 evsel__set_config_if_unset(evsel->pmu, evsel, "ts_enable", 1); 259 } 260 261 /* 262 * Set this only so that perf report knows that SPE generates memory info. It has no effect 263 * on the opening of the event or the SPE data produced. 264 */ 265 evsel__set_sample_bit(evsel, DATA_SRC); 266 267 /* 268 * The PHYS_ADDR flag does not affect the driver behaviour, it is used to 269 * inform that the resulting output's SPE samples contain physical addresses 270 * where applicable. 271 */ 272 bit = perf_pmu__format_bits(evsel->pmu, "pa_enable"); 273 if (evsel->core.attr.config & bit) 274 evsel__set_sample_bit(evsel, PHYS_ADDR); 275 } 276 277 static int arm_spe_recording_options(struct auxtrace_record *itr, 278 struct evlist *evlist, 279 struct record_opts *opts) 280 { 281 struct arm_spe_recording *sper = 282 container_of(itr, struct arm_spe_recording, itr); 283 struct evsel *evsel, *tmp; 284 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 285 bool privileged = perf_event_paranoid_check(-1); 286 struct evsel *tracking_evsel; 287 int err; 288 289 sper->evlist = evlist; 290 291 evlist__for_each_entry(evlist, evsel) { 292 if (evsel__is_aux_event(evsel)) { 293 if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) { 294 pr_err("Found unexpected auxtrace event: %s\n", 295 evsel->pmu->name); 296 return -EINVAL; 297 } 298 opts->full_auxtrace = true; 299 } 300 } 301 302 if (!opts->full_auxtrace) 303 return 0; 304 305 /* 306 * we are in snapshot mode. 307 */ 308 if (opts->auxtrace_snapshot_mode) { 309 /* 310 * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with 311 * default values. 312 */ 313 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) 314 arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); 315 316 /* 317 * Snapshot size can't be bigger than the auxtrace area. 318 */ 319 if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { 320 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 321 opts->auxtrace_snapshot_size, 322 opts->auxtrace_mmap_pages * (size_t)page_size); 323 return -EINVAL; 324 } 325 326 /* 327 * Something went wrong somewhere - this shouldn't happen. 328 */ 329 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 330 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 331 return -EINVAL; 332 } 333 } 334 335 /* We are in full trace mode but '-m,xyz' wasn't specified */ 336 if (!opts->auxtrace_mmap_pages) { 337 if (privileged) { 338 opts->auxtrace_mmap_pages = MiB(4) / page_size; 339 } else { 340 opts->auxtrace_mmap_pages = KiB(128) / page_size; 341 if (opts->mmap_pages == UINT_MAX) 342 opts->mmap_pages = KiB(256) / page_size; 343 } 344 } 345 346 /* Validate auxtrace_mmap_pages */ 347 if (opts->auxtrace_mmap_pages) { 348 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 349 size_t min_sz = KiB(8); 350 351 if (sz < min_sz || !is_power_of_2(sz)) { 352 pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n", 353 min_sz / 1024); 354 return -EINVAL; 355 } 356 } 357 358 if (opts->auxtrace_snapshot_mode) 359 pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, 360 opts->auxtrace_snapshot_size); 361 362 evlist__for_each_entry_safe(evlist, tmp, evsel) { 363 if (evsel__is_aux_event(evsel)) 364 arm_spe_setup_evsel(evsel, cpus); 365 } 366 367 /* Add dummy event to keep tracking */ 368 err = parse_event(evlist, "dummy:u"); 369 if (err) 370 return err; 371 372 tracking_evsel = evlist__last(evlist); 373 evlist__set_tracking_event(evlist, tracking_evsel); 374 375 tracking_evsel->core.attr.freq = 0; 376 tracking_evsel->core.attr.sample_period = 1; 377 378 /* In per-cpu case, always need the time of mmap events etc */ 379 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 380 evsel__set_sample_bit(tracking_evsel, TIME); 381 evsel__set_sample_bit(tracking_evsel, CPU); 382 383 /* also track task context switch */ 384 if (!record_opts__no_switch_events(opts)) 385 tracking_evsel->core.attr.context_switch = 1; 386 } 387 388 return 0; 389 } 390 391 static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, 392 struct record_opts *opts, 393 const char *str) 394 { 395 unsigned long long snapshot_size = 0; 396 char *endptr; 397 398 if (str) { 399 snapshot_size = strtoull(str, &endptr, 0); 400 if (*endptr || snapshot_size > SIZE_MAX) 401 return -1; 402 } 403 404 opts->auxtrace_snapshot_mode = true; 405 opts->auxtrace_snapshot_size = snapshot_size; 406 407 return 0; 408 } 409 410 static int arm_spe_snapshot_start(struct auxtrace_record *itr) 411 { 412 struct arm_spe_recording *ptr = 413 container_of(itr, struct arm_spe_recording, itr); 414 struct evsel *evsel; 415 int ret = -EINVAL; 416 417 evlist__for_each_entry(ptr->evlist, evsel) { 418 if (evsel__is_aux_event(evsel)) { 419 ret = evsel__disable(evsel); 420 if (ret < 0) 421 return ret; 422 } 423 } 424 return ret; 425 } 426 427 static int arm_spe_snapshot_finish(struct auxtrace_record *itr) 428 { 429 struct arm_spe_recording *ptr = 430 container_of(itr, struct arm_spe_recording, itr); 431 struct evsel *evsel; 432 int ret = -EINVAL; 433 434 evlist__for_each_entry(ptr->evlist, evsel) { 435 if (evsel__is_aux_event(evsel)) { 436 ret = evsel__enable(evsel); 437 if (ret < 0) 438 return ret; 439 } 440 } 441 return ret; 442 } 443 444 static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) 445 { 446 bool *wrapped; 447 int cnt = ptr->wrapped_cnt, new_cnt, i; 448 449 /* 450 * No need to allocate, so return early. 451 */ 452 if (idx < cnt) 453 return 0; 454 455 /* 456 * Make ptr->wrapped as big as idx. 457 */ 458 new_cnt = idx + 1; 459 460 /* 461 * Free'ed in arm_spe_recording_free(). 462 */ 463 wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); 464 if (!wrapped) 465 return -ENOMEM; 466 467 /* 468 * init new allocated values. 469 */ 470 for (i = cnt; i < new_cnt; i++) 471 wrapped[i] = false; 472 473 ptr->wrapped_cnt = new_cnt; 474 ptr->wrapped = wrapped; 475 476 return 0; 477 } 478 479 static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, 480 size_t buffer_size, u64 head) 481 { 482 u64 i, watermark; 483 u64 *buf = (u64 *)buffer; 484 size_t buf_size = buffer_size; 485 486 /* 487 * Defensively handle the case where head might be continually increasing - if its value is 488 * equal or greater than the size of the ring buffer, then we can safely determine it has 489 * wrapped around. Otherwise, continue to detect if head might have wrapped. 490 */ 491 if (head >= buffer_size) 492 return true; 493 494 /* 495 * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. 496 */ 497 watermark = buf_size - 512; 498 499 /* 500 * The value of head is somewhere within the size of the ring buffer. This can be that there 501 * hasn't been enough data to fill the ring buffer yet or the trace time was so long that 502 * head has numerically wrapped around. To find we need to check if we have data at the 503 * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed 504 * out and there is a fresh mapping with every new session. 505 */ 506 507 /* 508 * head is less than 512 byte from the end of the ring buffer. 509 */ 510 if (head > watermark) 511 watermark = head; 512 513 /* 514 * Speed things up by using 64 bit transactions (see "u64 *buf" above) 515 */ 516 watermark /= sizeof(u64); 517 buf_size /= sizeof(u64); 518 519 /* 520 * If we find trace data at the end of the ring buffer, head has been there and has 521 * numerically wrapped around at least once. 522 */ 523 for (i = watermark; i < buf_size; i++) 524 if (buf[i]) 525 return true; 526 527 return false; 528 } 529 530 static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, 531 struct auxtrace_mmap *mm, unsigned char *data, 532 u64 *head, u64 *old) 533 { 534 int err; 535 bool wrapped; 536 struct arm_spe_recording *ptr = 537 container_of(itr, struct arm_spe_recording, itr); 538 539 /* 540 * Allocate memory to keep track of wrapping if this is the first 541 * time we deal with this *mm. 542 */ 543 if (idx >= ptr->wrapped_cnt) { 544 err = arm_spe_alloc_wrapped_array(ptr, idx); 545 if (err) 546 return err; 547 } 548 549 /* 550 * Check to see if *head has wrapped around. If it hasn't only the 551 * amount of data between *head and *old is snapshot'ed to avoid 552 * bloating the perf.data file with zeros. But as soon as *head has 553 * wrapped around the entire size of the AUX ring buffer it taken. 554 */ 555 wrapped = ptr->wrapped[idx]; 556 if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { 557 wrapped = true; 558 ptr->wrapped[idx] = true; 559 } 560 561 pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", 562 __func__, idx, (size_t)*old, (size_t)*head, mm->len); 563 564 /* 565 * No wrap has occurred, we can just use *head and *old. 566 */ 567 if (!wrapped) 568 return 0; 569 570 /* 571 * *head has wrapped around - adjust *head and *old to pickup the 572 * entire content of the AUX buffer. 573 */ 574 if (*head >= mm->len) { 575 *old = *head - mm->len; 576 } else { 577 *head += mm->len; 578 *old = *head - mm->len; 579 } 580 581 return 0; 582 } 583 584 static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) 585 { 586 struct timespec ts; 587 588 clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 589 590 return ts.tv_sec ^ ts.tv_nsec; 591 } 592 593 static void arm_spe_recording_free(struct auxtrace_record *itr) 594 { 595 struct arm_spe_recording *sper = 596 container_of(itr, struct arm_spe_recording, itr); 597 598 zfree(&sper->wrapped); 599 free(sper); 600 } 601 602 struct auxtrace_record *arm_spe_recording_init(int *err, 603 struct perf_pmu *arm_spe_pmu) 604 { 605 struct arm_spe_recording *sper; 606 607 if (!arm_spe_pmu) { 608 *err = -ENODEV; 609 return NULL; 610 } 611 612 sper = zalloc(sizeof(struct arm_spe_recording)); 613 if (!sper) { 614 *err = -ENOMEM; 615 return NULL; 616 } 617 618 sper->arm_spe_pmu = arm_spe_pmu; 619 sper->itr.snapshot_start = arm_spe_snapshot_start; 620 sper->itr.snapshot_finish = arm_spe_snapshot_finish; 621 sper->itr.find_snapshot = arm_spe_find_snapshot; 622 sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; 623 sper->itr.recording_options = arm_spe_recording_options; 624 sper->itr.info_priv_size = arm_spe_info_priv_size; 625 sper->itr.info_fill = arm_spe_info_fill; 626 sper->itr.free = arm_spe_recording_free; 627 sper->itr.reference = arm_spe_reference; 628 sper->itr.read_finish = auxtrace_record__read_finish; 629 sper->itr.alignment = 0; 630 631 *err = 0; 632 return &sper->itr; 633 } 634 635 void 636 arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr) 637 { 638 attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu); 639 } 640