1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <linux/kernel.h> 8 #include <linux/types.h> 9 #include <linux/bitops.h> 10 #include <linux/log2.h> 11 #include <linux/string.h> 12 #include <linux/zalloc.h> 13 #include <time.h> 14 15 #include "../../../util/cpumap.h" 16 #include "../../../util/event.h" 17 #include "../../../util/evsel.h" 18 #include "../../../util/evsel_config.h" 19 #include "../../../util/evlist.h" 20 #include "../../../util/session.h" 21 #include <internal/lib.h> // page_size 22 #include "../../../util/pmu.h" 23 #include "../../../util/debug.h" 24 #include "../../../util/auxtrace.h" 25 #include "../../../util/record.h" 26 #include "../../../util/arm-spe.h" 27 #include <tools/libc_compat.h> // reallocarray 28 29 #define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL 30 31 #define KiB(x) ((x) * 1024) 32 #define MiB(x) ((x) * 1024 * 1024) 33 34 struct arm_spe_recording { 35 struct auxtrace_record itr; 36 struct perf_pmu *arm_spe_pmu; 37 struct evlist *evlist; 38 int wrapped_cnt; 39 bool *wrapped; 40 }; 41 42 /* 43 * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke 44 * perf_cpu_map__put() to release the map after use. 45 */ 46 static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist) 47 { 48 struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; 49 struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 50 struct perf_cpu_map *intersect_cpus; 51 52 /* cpu map is not "any" CPU , we have specific CPUs to work with */ 53 if (!perf_cpu_map__has_any_cpu(event_cpus)) { 54 intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); 55 perf_cpu_map__put(online_cpus); 56 /* Event can be "any" CPU so count all CPUs. */ 57 } else { 58 intersect_cpus = online_cpus; 59 } 60 61 return intersect_cpus; 62 } 63 64 static size_t 65 arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, 66 struct evlist *evlist) 67 { 68 struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist); 69 size_t size; 70 71 if (!cpu_map) 72 return 0; 73 74 size = ARM_SPE_AUXTRACE_PRIV_MAX + 75 ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map); 76 size *= sizeof(u64); 77 78 perf_cpu_map__put(cpu_map); 79 return size; 80 } 81 82 static int arm_spe_save_cpu_header(struct auxtrace_record *itr, 83 struct perf_cpu cpu, __u64 data[]) 84 { 85 struct arm_spe_recording *sper = 86 container_of(itr, struct arm_spe_recording, itr); 87 struct perf_pmu *pmu = NULL; 88 struct perf_pmu tmp_pmu; 89 char cpu_id_str[16]; 90 char *cpuid = NULL; 91 u64 val; 92 93 snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu); 94 tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str); 95 if (!tmp_pmu.cpus) 96 return -ENOMEM; 97 98 /* Read CPU MIDR */ 99 cpuid = perf_pmu__getcpuid(&tmp_pmu); 100 101 /* The CPU map will not be used anymore, release it */ 102 perf_cpu_map__put(tmp_pmu.cpus); 103 104 if (!cpuid) 105 return -ENOMEM; 106 val = strtol(cpuid, NULL, 16); 107 108 data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; 109 data[ARM_SPE_CPU] = cpu.cpu; 110 data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; 111 data[ARM_SPE_CPU_MIDR] = val; 112 113 /* Find the associate Arm SPE PMU for the CPU */ 114 if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) 115 pmu = sper->arm_spe_pmu; 116 117 if (!pmu) { 118 /* No Arm SPE PMU is found */ 119 data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; 120 data[ARM_SPE_CAP_MIN_IVAL] = 0; 121 } else { 122 data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; 123 124 if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) 125 val = 0; 126 data[ARM_SPE_CAP_MIN_IVAL] = val; 127 } 128 129 free(cpuid); 130 return ARM_SPE_CPU_PRIV_MAX; 131 } 132 133 static int arm_spe_info_fill(struct auxtrace_record *itr, 134 struct perf_session *session, 135 struct perf_record_auxtrace_info *auxtrace_info, 136 size_t priv_size) 137 { 138 int i, ret; 139 size_t offset; 140 struct arm_spe_recording *sper = 141 container_of(itr, struct arm_spe_recording, itr); 142 struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; 143 struct perf_cpu_map *cpu_map; 144 struct perf_cpu cpu; 145 __u64 *data; 146 147 if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) 148 return -EINVAL; 149 150 if (!session->evlist->core.nr_mmaps) 151 return -EINVAL; 152 153 cpu_map = arm_spe_find_cpus(session->evlist); 154 if (!cpu_map) 155 return -EINVAL; 156 157 auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; 158 auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; 159 auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = 160 ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; 161 auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type; 162 auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); 163 164 offset = ARM_SPE_AUXTRACE_PRIV_MAX; 165 perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { 166 assert(offset < priv_size); 167 data = &auxtrace_info->priv[offset]; 168 ret = arm_spe_save_cpu_header(itr, cpu, data); 169 if (ret < 0) 170 goto out; 171 offset += ret; 172 } 173 174 ret = 0; 175 out: 176 perf_cpu_map__put(cpu_map); 177 return ret; 178 } 179 180 static void 181 arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, 182 bool privileged) 183 { 184 /* 185 * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor 186 * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for 187 * unprivileged users. 188 * 189 * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for 190 * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages 191 * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the 192 * user is likely to get an error as they exceed their mlock limmit. 193 */ 194 195 /* 196 * No size were given to '-S' or '-m,', so go with the default 197 */ 198 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 199 if (privileged) { 200 opts->auxtrace_mmap_pages = MiB(4) / page_size; 201 } else { 202 opts->auxtrace_mmap_pages = KiB(128) / page_size; 203 if (opts->mmap_pages == UINT_MAX) 204 opts->mmap_pages = KiB(256) / page_size; 205 } 206 } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { 207 opts->mmap_pages = KiB(256) / page_size; 208 } 209 210 /* 211 * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the 212 * auxtrace mmap area. 213 */ 214 if (!opts->auxtrace_snapshot_size) 215 opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; 216 217 /* 218 * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big 219 * enough to fit the requested snapshot size. 220 */ 221 if (!opts->auxtrace_mmap_pages) { 222 size_t sz = opts->auxtrace_snapshot_size; 223 224 sz = round_up(sz, page_size) / page_size; 225 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 226 } 227 } 228 229 static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu) 230 { 231 static __u64 sample_period; 232 233 if (sample_period) 234 return sample_period; 235 236 /* 237 * If kernel driver doesn't advertise a minimum, 238 * use max allowable by PMSIDR_EL1.INTERVAL 239 */ 240 if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", 241 &sample_period) != 1) { 242 pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); 243 sample_period = 4096; 244 } 245 return sample_period; 246 } 247 248 static void arm_spe_setup_evsel(struct evsel *evsel, struct perf_cpu_map *cpus) 249 { 250 u64 bit; 251 252 evsel->core.attr.freq = 0; 253 evsel->core.attr.sample_period = arm_spe_pmu__sample_period(evsel->pmu); 254 evsel->needs_auxtrace_mmap = true; 255 256 /* 257 * To obtain the auxtrace buffer file descriptor, the auxtrace event 258 * must come first. 259 */ 260 evlist__to_front(evsel->evlist, evsel); 261 262 /* 263 * In the case of per-cpu mmaps, sample CPU for AUX event; 264 * also enable the timestamp tracing for samples correlation. 265 */ 266 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 267 evsel__set_sample_bit(evsel, CPU); 268 evsel__set_config_if_unset(evsel->pmu, evsel, "ts_enable", 1); 269 } 270 271 /* 272 * Set this only so that perf report knows that SPE generates memory info. It has no effect 273 * on the opening of the event or the SPE data produced. 274 */ 275 evsel__set_sample_bit(evsel, DATA_SRC); 276 277 /* 278 * The PHYS_ADDR flag does not affect the driver behaviour, it is used to 279 * inform that the resulting output's SPE samples contain physical addresses 280 * where applicable. 281 */ 282 bit = perf_pmu__format_bits(evsel->pmu, "pa_enable"); 283 if (evsel->core.attr.config & bit) 284 evsel__set_sample_bit(evsel, PHYS_ADDR); 285 } 286 287 static int arm_spe_recording_options(struct auxtrace_record *itr, 288 struct evlist *evlist, 289 struct record_opts *opts) 290 { 291 struct arm_spe_recording *sper = 292 container_of(itr, struct arm_spe_recording, itr); 293 struct evsel *evsel, *tmp; 294 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 295 bool privileged = perf_event_paranoid_check(-1); 296 struct evsel *tracking_evsel; 297 int err; 298 299 sper->evlist = evlist; 300 301 evlist__for_each_entry(evlist, evsel) { 302 if (evsel__is_aux_event(evsel)) { 303 if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) { 304 pr_err("Found unexpected auxtrace event: %s\n", 305 evsel->pmu->name); 306 return -EINVAL; 307 } 308 opts->full_auxtrace = true; 309 } 310 } 311 312 if (!opts->full_auxtrace) 313 return 0; 314 315 /* 316 * we are in snapshot mode. 317 */ 318 if (opts->auxtrace_snapshot_mode) { 319 /* 320 * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with 321 * default values. 322 */ 323 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) 324 arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); 325 326 /* 327 * Snapshot size can't be bigger than the auxtrace area. 328 */ 329 if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { 330 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 331 opts->auxtrace_snapshot_size, 332 opts->auxtrace_mmap_pages * (size_t)page_size); 333 return -EINVAL; 334 } 335 336 /* 337 * Something went wrong somewhere - this shouldn't happen. 338 */ 339 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 340 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 341 return -EINVAL; 342 } 343 } 344 345 /* We are in full trace mode but '-m,xyz' wasn't specified */ 346 if (!opts->auxtrace_mmap_pages) { 347 if (privileged) { 348 opts->auxtrace_mmap_pages = MiB(4) / page_size; 349 } else { 350 opts->auxtrace_mmap_pages = KiB(128) / page_size; 351 if (opts->mmap_pages == UINT_MAX) 352 opts->mmap_pages = KiB(256) / page_size; 353 } 354 } 355 356 /* Validate auxtrace_mmap_pages */ 357 if (opts->auxtrace_mmap_pages) { 358 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 359 size_t min_sz = KiB(8); 360 361 if (sz < min_sz || !is_power_of_2(sz)) { 362 pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n", 363 min_sz / 1024); 364 return -EINVAL; 365 } 366 } 367 368 if (opts->auxtrace_snapshot_mode) 369 pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, 370 opts->auxtrace_snapshot_size); 371 372 evlist__for_each_entry_safe(evlist, tmp, evsel) { 373 if (evsel__is_aux_event(evsel)) 374 arm_spe_setup_evsel(evsel, cpus); 375 } 376 377 /* Add dummy event to keep tracking */ 378 err = parse_event(evlist, "dummy:u"); 379 if (err) 380 return err; 381 382 tracking_evsel = evlist__last(evlist); 383 evlist__set_tracking_event(evlist, tracking_evsel); 384 385 tracking_evsel->core.attr.freq = 0; 386 tracking_evsel->core.attr.sample_period = 1; 387 388 /* In per-cpu case, always need the time of mmap events etc */ 389 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 390 evsel__set_sample_bit(tracking_evsel, TIME); 391 evsel__set_sample_bit(tracking_evsel, CPU); 392 393 /* also track task context switch */ 394 if (!record_opts__no_switch_events(opts)) 395 tracking_evsel->core.attr.context_switch = 1; 396 } 397 398 return 0; 399 } 400 401 static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, 402 struct record_opts *opts, 403 const char *str) 404 { 405 unsigned long long snapshot_size = 0; 406 char *endptr; 407 408 if (str) { 409 snapshot_size = strtoull(str, &endptr, 0); 410 if (*endptr || snapshot_size > SIZE_MAX) 411 return -1; 412 } 413 414 opts->auxtrace_snapshot_mode = true; 415 opts->auxtrace_snapshot_size = snapshot_size; 416 417 return 0; 418 } 419 420 static int arm_spe_snapshot_start(struct auxtrace_record *itr) 421 { 422 struct arm_spe_recording *ptr = 423 container_of(itr, struct arm_spe_recording, itr); 424 struct evsel *evsel; 425 int ret = -EINVAL; 426 427 evlist__for_each_entry(ptr->evlist, evsel) { 428 if (evsel__is_aux_event(evsel)) { 429 ret = evsel__disable(evsel); 430 if (ret < 0) 431 return ret; 432 } 433 } 434 return ret; 435 } 436 437 static int arm_spe_snapshot_finish(struct auxtrace_record *itr) 438 { 439 struct arm_spe_recording *ptr = 440 container_of(itr, struct arm_spe_recording, itr); 441 struct evsel *evsel; 442 int ret = -EINVAL; 443 444 evlist__for_each_entry(ptr->evlist, evsel) { 445 if (evsel__is_aux_event(evsel)) { 446 ret = evsel__enable(evsel); 447 if (ret < 0) 448 return ret; 449 } 450 } 451 return ret; 452 } 453 454 static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) 455 { 456 bool *wrapped; 457 int cnt = ptr->wrapped_cnt, new_cnt, i; 458 459 /* 460 * No need to allocate, so return early. 461 */ 462 if (idx < cnt) 463 return 0; 464 465 /* 466 * Make ptr->wrapped as big as idx. 467 */ 468 new_cnt = idx + 1; 469 470 /* 471 * Free'ed in arm_spe_recording_free(). 472 */ 473 wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); 474 if (!wrapped) 475 return -ENOMEM; 476 477 /* 478 * init new allocated values. 479 */ 480 for (i = cnt; i < new_cnt; i++) 481 wrapped[i] = false; 482 483 ptr->wrapped_cnt = new_cnt; 484 ptr->wrapped = wrapped; 485 486 return 0; 487 } 488 489 static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, 490 size_t buffer_size, u64 head) 491 { 492 u64 i, watermark; 493 u64 *buf = (u64 *)buffer; 494 size_t buf_size = buffer_size; 495 496 /* 497 * Defensively handle the case where head might be continually increasing - if its value is 498 * equal or greater than the size of the ring buffer, then we can safely determine it has 499 * wrapped around. Otherwise, continue to detect if head might have wrapped. 500 */ 501 if (head >= buffer_size) 502 return true; 503 504 /* 505 * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. 506 */ 507 watermark = buf_size - 512; 508 509 /* 510 * The value of head is somewhere within the size of the ring buffer. This can be that there 511 * hasn't been enough data to fill the ring buffer yet or the trace time was so long that 512 * head has numerically wrapped around. To find we need to check if we have data at the 513 * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed 514 * out and there is a fresh mapping with every new session. 515 */ 516 517 /* 518 * head is less than 512 byte from the end of the ring buffer. 519 */ 520 if (head > watermark) 521 watermark = head; 522 523 /* 524 * Speed things up by using 64 bit transactions (see "u64 *buf" above) 525 */ 526 watermark /= sizeof(u64); 527 buf_size /= sizeof(u64); 528 529 /* 530 * If we find trace data at the end of the ring buffer, head has been there and has 531 * numerically wrapped around at least once. 532 */ 533 for (i = watermark; i < buf_size; i++) 534 if (buf[i]) 535 return true; 536 537 return false; 538 } 539 540 static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, 541 struct auxtrace_mmap *mm, unsigned char *data, 542 u64 *head, u64 *old) 543 { 544 int err; 545 bool wrapped; 546 struct arm_spe_recording *ptr = 547 container_of(itr, struct arm_spe_recording, itr); 548 549 /* 550 * Allocate memory to keep track of wrapping if this is the first 551 * time we deal with this *mm. 552 */ 553 if (idx >= ptr->wrapped_cnt) { 554 err = arm_spe_alloc_wrapped_array(ptr, idx); 555 if (err) 556 return err; 557 } 558 559 /* 560 * Check to see if *head has wrapped around. If it hasn't only the 561 * amount of data between *head and *old is snapshot'ed to avoid 562 * bloating the perf.data file with zeros. But as soon as *head has 563 * wrapped around the entire size of the AUX ring buffer it taken. 564 */ 565 wrapped = ptr->wrapped[idx]; 566 if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { 567 wrapped = true; 568 ptr->wrapped[idx] = true; 569 } 570 571 pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", 572 __func__, idx, (size_t)*old, (size_t)*head, mm->len); 573 574 /* 575 * No wrap has occurred, we can just use *head and *old. 576 */ 577 if (!wrapped) 578 return 0; 579 580 /* 581 * *head has wrapped around - adjust *head and *old to pickup the 582 * entire content of the AUX buffer. 583 */ 584 if (*head >= mm->len) { 585 *old = *head - mm->len; 586 } else { 587 *head += mm->len; 588 *old = *head - mm->len; 589 } 590 591 return 0; 592 } 593 594 static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) 595 { 596 struct timespec ts; 597 598 clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 599 600 return ts.tv_sec ^ ts.tv_nsec; 601 } 602 603 static void arm_spe_recording_free(struct auxtrace_record *itr) 604 { 605 struct arm_spe_recording *sper = 606 container_of(itr, struct arm_spe_recording, itr); 607 608 zfree(&sper->wrapped); 609 free(sper); 610 } 611 612 struct auxtrace_record *arm_spe_recording_init(int *err, 613 struct perf_pmu *arm_spe_pmu) 614 { 615 struct arm_spe_recording *sper; 616 617 if (!arm_spe_pmu) { 618 *err = -ENODEV; 619 return NULL; 620 } 621 622 sper = zalloc(sizeof(struct arm_spe_recording)); 623 if (!sper) { 624 *err = -ENOMEM; 625 return NULL; 626 } 627 628 sper->arm_spe_pmu = arm_spe_pmu; 629 sper->itr.snapshot_start = arm_spe_snapshot_start; 630 sper->itr.snapshot_finish = arm_spe_snapshot_finish; 631 sper->itr.find_snapshot = arm_spe_find_snapshot; 632 sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; 633 sper->itr.recording_options = arm_spe_recording_options; 634 sper->itr.info_priv_size = arm_spe_info_priv_size; 635 sper->itr.info_fill = arm_spe_info_fill; 636 sper->itr.free = arm_spe_recording_free; 637 sper->itr.reference = arm_spe_reference; 638 sper->itr.read_finish = auxtrace_record__read_finish; 639 sper->itr.alignment = 0; 640 641 *err = 0; 642 return &sper->itr; 643 } 644 645 void 646 arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr) 647 { 648 attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu); 649 } 650