1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "cgroup.h" 3 #include "counts.h" 4 #include "cputopo.h" 5 #include "evsel.h" 6 #include "pmu.h" 7 #include "print-events.h" 8 #include "smt.h" 9 #include "stat.h" 10 #include "time-utils.h" 11 #include "tool_pmu.h" 12 #include "tsc.h" 13 #include <api/fs/fs.h> 14 #include <api/io.h> 15 #include <internal/threadmap.h> 16 #include <perf/threadmap.h> 17 #include <fcntl.h> 18 #include <strings.h> 19 20 #define INVALID_START_TIME ~0ULL 21 22 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { 23 NULL, 24 "duration_time", 25 "user_time", 26 "system_time", 27 "has_pmem", 28 "num_cores", 29 "num_cpus", 30 "num_cpus_online", 31 "num_dies", 32 "num_packages", 33 "slots", 34 "smt_on", 35 "system_tsc_freq", 36 "core_wide", 37 "target_cpu", 38 }; 39 40 bool tool_pmu__skip_event(const char *name __maybe_unused) 41 { 42 #if !defined(__aarch64__) 43 /* The slots event should only appear on arm64. */ 44 if (strcasecmp(name, "slots") == 0) 45 return true; 46 #endif 47 #if !defined(__i386__) && !defined(__x86_64__) 48 /* The system_tsc_freq event should only appear on x86. */ 49 if (strcasecmp(name, "system_tsc_freq") == 0) 50 return true; 51 #endif 52 return false; 53 } 54 55 int tool_pmu__num_skip_events(void) 56 { 57 int num = 0; 58 59 #if !defined(__aarch64__) 60 num++; 61 #endif 62 #if !defined(__i386__) && !defined(__x86_64__) 63 num++; 64 #endif 65 return num; 66 } 67 68 const char *tool_pmu__event_to_str(enum tool_pmu_event ev) 69 { 70 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && 71 !tool_pmu__skip_event(tool_pmu__event_names[ev])) 72 return tool_pmu__event_names[ev]; 73 74 return NULL; 75 } 76 77 enum tool_pmu_event tool_pmu__str_to_event(const char *str) 78 { 79 int i; 80 81 if (tool_pmu__skip_event(str)) 82 return TOOL_PMU__EVENT_NONE; 83 84 tool_pmu__for_each_event(i) { 85 if (!strcasecmp(str, tool_pmu__event_names[i])) 86 return i; 87 } 88 return TOOL_PMU__EVENT_NONE; 89 } 90 91 bool perf_pmu__is_tool(const struct perf_pmu *pmu) 92 { 93 return pmu && pmu->type == PERF_PMU_TYPE_TOOL; 94 } 95 96 bool evsel__is_tool(const struct evsel *evsel) 97 { 98 return perf_pmu__is_tool(evsel->pmu); 99 } 100 101 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) 102 { 103 if (!evsel__is_tool(evsel)) 104 return TOOL_PMU__EVENT_NONE; 105 106 return (enum tool_pmu_event)evsel->core.attr.config; 107 } 108 109 const char *evsel__tool_pmu_event_name(const struct evsel *evsel) 110 { 111 return tool_pmu__event_to_str(evsel->core.attr.config); 112 } 113 114 static bool read_until_char(struct io *io, char e) 115 { 116 int c; 117 118 do { 119 c = io__get_char(io); 120 if (c == -1) 121 return false; 122 } while (c != e); 123 return true; 124 } 125 126 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) 127 { 128 char buf[256]; 129 struct io io; 130 int i; 131 132 io__init(&io, fd, buf, sizeof(buf)); 133 134 /* Skip lines to relevant CPU. */ 135 for (i = -1; i < cpu.cpu; i++) { 136 if (!read_until_char(&io, '\n')) 137 return -EINVAL; 138 } 139 /* Skip to "cpu". */ 140 if (io__get_char(&io) != 'c') return -EINVAL; 141 if (io__get_char(&io) != 'p') return -EINVAL; 142 if (io__get_char(&io) != 'u') return -EINVAL; 143 144 /* Skip N of cpuN. */ 145 if (!read_until_char(&io, ' ')) 146 return -EINVAL; 147 148 i = 1; 149 while (true) { 150 if (io__get_dec(&io, val) != ' ') 151 break; 152 if (field == i) 153 return 0; 154 i++; 155 } 156 return -EINVAL; 157 } 158 159 static int read_pid_stat_field(int fd, int field, __u64 *val) 160 { 161 char buf[256]; 162 struct io io; 163 int c, i; 164 165 io__init(&io, fd, buf, sizeof(buf)); 166 if (io__get_dec(&io, val) != ' ') 167 return -EINVAL; 168 if (field == 1) 169 return 0; 170 171 /* Skip comm. */ 172 if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) 173 return -EINVAL; 174 if (field == 2) 175 return -EINVAL; /* String can't be returned. */ 176 177 /* Skip state */ 178 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) 179 return -EINVAL; 180 if (field == 3) 181 return -EINVAL; /* String can't be returned. */ 182 183 /* Loop over numeric fields*/ 184 if (io__get_char(&io) != ' ') 185 return -EINVAL; 186 187 i = 4; 188 while (true) { 189 c = io__get_dec(&io, val); 190 if (c == -1) 191 return -EINVAL; 192 if (c == -2) { 193 /* Assume a -ve was read */ 194 c = io__get_dec(&io, val); 195 *val *= -1; 196 } 197 if (c != ' ') 198 return -EINVAL; 199 if (field == i) 200 return 0; 201 i++; 202 } 203 return -EINVAL; 204 } 205 206 int evsel__tool_pmu_prepare_open(struct evsel *evsel, 207 struct perf_cpu_map *cpus, 208 int nthreads) 209 { 210 enum tool_pmu_event ev = evsel__tool_event(evsel); 211 212 if (ev == TOOL_PMU__EVENT_SYSTEM_TIME || ev == TOOL_PMU__EVENT_USER_TIME) { 213 if (!evsel->process_time.start_times) { 214 evsel->process_time.start_times = 215 xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64)); 216 if (!evsel->process_time.start_times) 217 return -ENOMEM; 218 } 219 if (!evsel->process_time.accumulated_times) { 220 evsel->process_time.accumulated_times = 221 xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64)); 222 if (!evsel->process_time.accumulated_times) 223 return -ENOMEM; 224 } 225 } 226 return 0; 227 } 228 229 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 230 231 static int tool_pmu__read_stat(struct evsel *evsel, int cpu_map_idx, int thread, __u64 *val) 232 { 233 enum tool_pmu_event ev = evsel__tool_event(evsel); 234 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; 235 int fd = FD(evsel, cpu_map_idx, thread); 236 int err = 0; 237 238 if (fd < 0) { 239 *val = 0; 240 return 0; 241 } 242 243 lseek(fd, 0, SEEK_SET); 244 if (evsel->pid_stat) { 245 if (cpu_map_idx == 0) 246 err = read_pid_stat_field(fd, system ? 15 : 14, val); 247 else 248 *val = 0; 249 } else { 250 if (thread == 0) { 251 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); 252 253 err = read_stat_field(fd, cpu, system ? 3 : 1, val); 254 } else { 255 *val = 0; 256 } 257 } 258 return err; 259 } 260 261 int evsel__tool_pmu_open(struct evsel *evsel, 262 struct perf_thread_map *threads, 263 int start_cpu_map_idx, int end_cpu_map_idx) 264 { 265 enum tool_pmu_event ev = evsel__tool_event(evsel); 266 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; 267 268 if (ev == TOOL_PMU__EVENT_NUM_CPUS) 269 return 0; 270 271 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 272 if (evsel->core.attr.sample_period) /* no sampling */ 273 return -EINVAL; 274 evsel->duration_time.accumulated_time = 0; 275 if (evsel->core.attr.disabled) { 276 evsel->disabled = true; 277 evsel->duration_time.start_time = INVALID_START_TIME; 278 } else { 279 evsel->disabled = false; 280 evsel->duration_time.start_time = rdclock(); 281 } 282 return 0; 283 } 284 285 if (evsel->cgrp) 286 pid = evsel->cgrp->fd; 287 288 nthreads = perf_thread_map__nr(threads); 289 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { 290 for (thread = 0; thread < nthreads; thread++) { 291 if (!evsel->cgrp && !evsel->core.system_wide) 292 pid = perf_thread_map__pid(threads, thread); 293 294 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 295 __u64 *start_time = NULL; 296 __u64 *accumulated_time = NULL; 297 int fd; 298 299 if (evsel->core.attr.sample_period) { 300 /* no sampling */ 301 err = -EINVAL; 302 goto out_close; 303 } 304 if (pid > -1) { 305 char buf[64]; 306 307 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); 308 fd = open(buf, O_RDONLY); 309 evsel->pid_stat = true; 310 } else { 311 fd = open("/proc/stat", O_RDONLY); 312 } 313 FD(evsel, idx, thread) = fd; 314 if (fd < 0) { 315 err = -errno; 316 goto out_close; 317 } 318 start_time = xyarray__entry(evsel->process_time.start_times, idx, 319 thread); 320 accumulated_time = xyarray__entry( 321 evsel->process_time.accumulated_times, idx, thread); 322 *accumulated_time = 0; 323 324 if (evsel->core.attr.disabled) { 325 evsel->disabled = true; 326 *start_time = INVALID_START_TIME; 327 } else { 328 evsel->disabled = false; 329 err = tool_pmu__read_stat(evsel, idx, thread, start_time); 330 if (err) { 331 close(fd); 332 FD(evsel, idx, thread) = -1; 333 goto out_close; 334 } 335 } 336 } 337 } 338 } 339 return 0; 340 out_close: 341 if (err) 342 threads->err_thread = thread; 343 344 old_errno = errno; 345 do { 346 while (--thread >= 0) { 347 if (FD(evsel, idx, thread) >= 0) 348 close(FD(evsel, idx, thread)); 349 FD(evsel, idx, thread) = -1; 350 } 351 thread = nthreads; 352 } while (--idx >= 0); 353 errno = old_errno; 354 return err; 355 } 356 357 #if !defined(__i386__) && !defined(__x86_64__) 358 u64 arch_get_tsc_freq(void) 359 { 360 return 0; 361 } 362 #endif 363 364 #if !defined(__aarch64__) 365 u64 tool_pmu__cpu_slots_per_cycle(void) 366 { 367 return 0; 368 } 369 #endif 370 371 static bool has_pmem(void) 372 { 373 static bool has_pmem, cached; 374 const char *sysfs = sysfs__mountpoint(); 375 char path[PATH_MAX]; 376 377 if (!cached) { 378 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); 379 has_pmem = access(path, F_OK) == 0; 380 cached = true; 381 } 382 return has_pmem; 383 } 384 385 bool tool_pmu__read_event(enum tool_pmu_event ev, 386 struct evsel *evsel, 387 bool system_wide, 388 const char *user_requested_cpu_list, 389 u64 *result) 390 { 391 const struct cpu_topology *topology; 392 393 switch (ev) { 394 case TOOL_PMU__EVENT_HAS_PMEM: 395 *result = has_pmem() ? 1 : 0; 396 return true; 397 398 case TOOL_PMU__EVENT_NUM_CORES: 399 topology = online_topology(); 400 *result = topology->core_cpus_lists; 401 return true; 402 403 case TOOL_PMU__EVENT_NUM_CPUS: 404 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 405 /* No evsel to be specific to. */ 406 *result = cpu__max_present_cpu().cpu; 407 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 408 /* Evsel just has specific CPUs. */ 409 *result = perf_cpu_map__nr(evsel->core.cpus); 410 } else { 411 /* 412 * "Any CPU" event that can be scheduled on any CPU in 413 * the PMU's cpumask. The PMU cpumask should be saved in 414 * pmu_cpus. If not present fall back to max. 415 */ 416 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) 417 *result = perf_cpu_map__nr(evsel->core.pmu_cpus); 418 else 419 *result = cpu__max_present_cpu().cpu; 420 } 421 return true; 422 423 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { 424 struct perf_cpu_map *online = cpu_map__online(); 425 426 if (!online) 427 return false; 428 429 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 430 /* No evsel to be specific to. */ 431 *result = perf_cpu_map__nr(online); 432 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 433 /* Evsel just has specific CPUs. */ 434 struct perf_cpu_map *tmp = 435 perf_cpu_map__intersect(online, evsel->core.cpus); 436 437 *result = perf_cpu_map__nr(tmp); 438 perf_cpu_map__put(tmp); 439 } else { 440 /* 441 * "Any CPU" event that can be scheduled on any CPU in 442 * the PMU's cpumask. The PMU cpumask should be saved in 443 * pmu_cpus, if not present then just the online cpu 444 * mask. 445 */ 446 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { 447 struct perf_cpu_map *tmp = 448 perf_cpu_map__intersect(online, evsel->core.pmu_cpus); 449 450 *result = perf_cpu_map__nr(tmp); 451 perf_cpu_map__put(tmp); 452 } else { 453 *result = perf_cpu_map__nr(online); 454 } 455 } 456 perf_cpu_map__put(online); 457 return true; 458 } 459 case TOOL_PMU__EVENT_NUM_DIES: 460 topology = online_topology(); 461 *result = topology->die_cpus_lists; 462 return true; 463 464 case TOOL_PMU__EVENT_NUM_PACKAGES: 465 topology = online_topology(); 466 *result = topology->package_cpus_lists; 467 return true; 468 469 case TOOL_PMU__EVENT_SLOTS: 470 *result = tool_pmu__cpu_slots_per_cycle(); 471 return *result ? true : false; 472 473 case TOOL_PMU__EVENT_SMT_ON: 474 *result = smt_on() ? 1 : 0; 475 return true; 476 477 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 478 *result = arch_get_tsc_freq(); 479 return true; 480 481 case TOOL_PMU__EVENT_CORE_WIDE: 482 *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0; 483 return true; 484 485 case TOOL_PMU__EVENT_TARGET_CPU: 486 *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0; 487 return true; 488 489 case TOOL_PMU__EVENT_NONE: 490 case TOOL_PMU__EVENT_DURATION_TIME: 491 case TOOL_PMU__EVENT_USER_TIME: 492 case TOOL_PMU__EVENT_SYSTEM_TIME: 493 case TOOL_PMU__EVENT_MAX: 494 default: 495 return false; 496 } 497 } 498 499 static void perf_counts__update(struct perf_counts_values *count, 500 const struct perf_counts_values *old_count, 501 bool raw, u64 val) 502 { 503 /* 504 * The values of enabled and running must make a ratio of 100%. The 505 * exact values don't matter as long as they are non-zero to avoid 506 * issues with evsel__count_has_error. 507 */ 508 if (old_count) { 509 count->val = raw ? val : old_count->val + val; 510 count->run = old_count->run + 1; 511 count->ena = old_count->ena + 1; 512 count->lost = old_count->lost; 513 } else { 514 count->val = val; 515 count->run++; 516 count->ena++; 517 count->lost = 0; 518 } 519 } 520 int evsel__tool_pmu_enable_cpu(struct evsel *evsel, int cpu_map_idx) 521 { 522 enum tool_pmu_event ev = evsel__tool_event(evsel); 523 int thread, nthreads; 524 525 if (!evsel->disabled) 526 return 0; 527 528 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 529 if (cpu_map_idx == 0) 530 evsel->duration_time.start_time = rdclock(); 531 return 0; 532 } 533 534 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 535 nthreads = xyarray__max_y(evsel->process_time.start_times); 536 for (thread = 0; thread < nthreads; thread++) { 537 __u64 *start_time = xyarray__entry(evsel->process_time.start_times, 538 cpu_map_idx, thread); 539 __u64 val; 540 int err; 541 542 err = tool_pmu__read_stat(evsel, cpu_map_idx, thread, &val); 543 if (!err) 544 *start_time = val; 545 else 546 *start_time = INVALID_START_TIME; 547 } 548 } 549 return 0; 550 } 551 552 int evsel__tool_pmu_enable(struct evsel *evsel) 553 { 554 unsigned int idx; 555 int err = 0; 556 557 if (!evsel->disabled) 558 return 0; 559 560 for (idx = 0; idx < perf_cpu_map__nr(evsel->core.cpus); idx++) { 561 err = evsel__tool_pmu_enable_cpu(evsel, idx); 562 if (err) 563 break; 564 } 565 return err; 566 } 567 568 int evsel__tool_pmu_disable_cpu(struct evsel *evsel, int cpu_map_idx) 569 { 570 enum tool_pmu_event ev = evsel__tool_event(evsel); 571 int thread, nthreads; 572 573 if (evsel->disabled) 574 return 0; 575 576 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 577 if (cpu_map_idx == 0) { 578 __u64 delta = rdclock() - evsel->duration_time.start_time; 579 580 evsel->duration_time.accumulated_time += delta; 581 } 582 return 0; 583 } 584 585 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 586 nthreads = xyarray__max_y(evsel->process_time.start_times); 587 for (thread = 0; thread < nthreads; thread++) { 588 __u64 *start_time = xyarray__entry(evsel->process_time.start_times, 589 cpu_map_idx, thread); 590 __u64 *accumulated_time = xyarray__entry( 591 evsel->process_time.accumulated_times, cpu_map_idx, thread); 592 __u64 val; 593 int err; 594 595 err = tool_pmu__read_stat(evsel, cpu_map_idx, thread, &val); 596 if (!err) { 597 if (*start_time != INVALID_START_TIME && val >= *start_time) 598 *accumulated_time += (val - *start_time); 599 } 600 *start_time = INVALID_START_TIME; 601 } 602 } 603 return 0; 604 } 605 606 int evsel__tool_pmu_disable(struct evsel *evsel) 607 { 608 unsigned int idx; 609 int err = 0; 610 611 if (evsel->disabled) 612 return 0; 613 614 for (idx = 0; idx < perf_cpu_map__nr(evsel->core.cpus); idx++) { 615 err = evsel__tool_pmu_disable_cpu(evsel, idx); 616 if (err) 617 break; 618 } 619 return err; 620 } 621 622 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) 623 { 624 __u64 delta_start = 0; 625 int err = 0; 626 struct perf_counts_values *count, *old_count = NULL; 627 bool adjust = false; 628 enum tool_pmu_event ev = evsel__tool_event(evsel); 629 630 count = perf_counts(evsel->counts, cpu_map_idx, thread); 631 if (evsel->prev_raw_counts) 632 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); 633 634 switch (ev) { 635 case TOOL_PMU__EVENT_HAS_PMEM: 636 case TOOL_PMU__EVENT_NUM_CORES: 637 case TOOL_PMU__EVENT_NUM_CPUS: 638 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: 639 case TOOL_PMU__EVENT_NUM_DIES: 640 case TOOL_PMU__EVENT_NUM_PACKAGES: 641 case TOOL_PMU__EVENT_SLOTS: 642 case TOOL_PMU__EVENT_SMT_ON: 643 case TOOL_PMU__EVENT_CORE_WIDE: 644 case TOOL_PMU__EVENT_TARGET_CPU: 645 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: { 646 u64 val = 0; 647 648 if (cpu_map_idx == 0 && thread == 0) { 649 if (!tool_pmu__read_event(ev, evsel, 650 stat_config.system_wide, 651 stat_config.user_requested_cpu_list, 652 &val)) { 653 count->lost++; 654 val = 0; 655 } 656 } 657 perf_counts__update(count, old_count, /*raw=*/false, val); 658 return 0; 659 } 660 case TOOL_PMU__EVENT_DURATION_TIME: 661 if (cpu_map_idx == 0 && thread == 0) { 662 delta_start = evsel->duration_time.accumulated_time; 663 if (!evsel->disabled && 664 evsel->duration_time.start_time != INVALID_START_TIME) 665 delta_start += (rdclock() - evsel->duration_time.start_time); 666 } else { 667 delta_start = 0; 668 } 669 break; 670 case TOOL_PMU__EVENT_USER_TIME: 671 case TOOL_PMU__EVENT_SYSTEM_TIME: { 672 __u64 accumulated = *(__u64 *)xyarray__entry(evsel->process_time.accumulated_times, 673 cpu_map_idx, thread); 674 675 if (evsel->disabled) { 676 delta_start = accumulated; 677 } else { 678 __u64 *start_time = xyarray__entry(evsel->process_time.start_times, 679 cpu_map_idx, thread); 680 __u64 cur_time; 681 682 err = tool_pmu__read_stat(evsel, cpu_map_idx, thread, &cur_time); 683 if (!err) { 684 if (*start_time != INVALID_START_TIME && cur_time >= *start_time) 685 delta_start = accumulated + (cur_time - *start_time); 686 else 687 delta_start = accumulated; 688 } 689 } 690 adjust = true; 691 break; 692 } 693 case TOOL_PMU__EVENT_NONE: 694 case TOOL_PMU__EVENT_MAX: 695 default: 696 err = -EINVAL; 697 } 698 if (err) 699 return err; 700 701 if (adjust) { 702 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); 703 704 delta_start *= 1e9 / ticks_per_sec; 705 } 706 perf_counts__update(count, old_count, /*raw=*/true, delta_start); 707 return 0; 708 } 709 710 struct perf_pmu *tool_pmu__new(void) 711 { 712 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); 713 714 if (!tool) 715 return NULL; 716 717 if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { 718 perf_pmu__delete(tool); 719 return NULL; 720 } 721 tool->events_table = find_core_events_table("common", "common"); 722 return tool; 723 } 724