1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "cgroup.h" 3 #include "counts.h" 4 #include "cputopo.h" 5 #include "debug.h" 6 #include "evsel.h" 7 #include "pmu.h" 8 #include "print-events.h" 9 #include "smt.h" 10 #include "stat.h" 11 #include "time-utils.h" 12 #include "tool_pmu.h" 13 #include "tsc.h" 14 #include <api/fs/fs.h> 15 #include <api/io.h> 16 #include <internal/threadmap.h> 17 #include <perf/cpumap.h> 18 #include <perf/threadmap.h> 19 #include <fcntl.h> 20 #include <strings.h> 21 22 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { 23 NULL, 24 "duration_time", 25 "user_time", 26 "system_time", 27 "has_pmem", 28 "num_cores", 29 "num_cpus", 30 "num_cpus_online", 31 "num_dies", 32 "num_packages", 33 "slots", 34 "smt_on", 35 "system_tsc_freq", 36 "core_wide", 37 "target_cpu", 38 }; 39 40 bool tool_pmu__skip_event(const char *name __maybe_unused) 41 { 42 #if !defined(__aarch64__) 43 /* The slots event should only appear on arm64. */ 44 if (strcasecmp(name, "slots") == 0) 45 return true; 46 #endif 47 #if !defined(__i386__) && !defined(__x86_64__) 48 /* The system_tsc_freq event should only appear on x86. */ 49 if (strcasecmp(name, "system_tsc_freq") == 0) 50 return true; 51 #endif 52 return false; 53 } 54 55 int tool_pmu__num_skip_events(void) 56 { 57 int num = 0; 58 59 #if !defined(__aarch64__) 60 num++; 61 #endif 62 #if !defined(__i386__) && !defined(__x86_64__) 63 num++; 64 #endif 65 return num; 66 } 67 68 const char *tool_pmu__event_to_str(enum tool_pmu_event ev) 69 { 70 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && 71 !tool_pmu__skip_event(tool_pmu__event_names[ev])) 72 return tool_pmu__event_names[ev]; 73 74 return NULL; 75 } 76 77 enum tool_pmu_event tool_pmu__str_to_event(const char *str) 78 { 79 int i; 80 81 if (tool_pmu__skip_event(str)) 82 return TOOL_PMU__EVENT_NONE; 83 84 tool_pmu__for_each_event(i) { 85 if (!strcasecmp(str, tool_pmu__event_names[i])) 86 return i; 87 } 88 return TOOL_PMU__EVENT_NONE; 89 } 90 91 bool perf_pmu__is_tool(const struct perf_pmu *pmu) 92 { 93 return pmu && pmu->type == PERF_PMU_TYPE_TOOL; 94 } 95 96 bool evsel__is_tool(const struct evsel *evsel) 97 { 98 return perf_pmu__is_tool(evsel->pmu); 99 } 100 101 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) 102 { 103 if (!evsel__is_tool(evsel)) 104 return TOOL_PMU__EVENT_NONE; 105 106 return (enum tool_pmu_event)evsel->core.attr.config; 107 } 108 109 const char *evsel__tool_pmu_event_name(const struct evsel *evsel) 110 { 111 return tool_pmu__event_to_str(evsel->core.attr.config); 112 } 113 114 struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr) 115 { 116 static struct perf_cpu_map *cpu0_map; 117 enum tool_pmu_event event = (enum tool_pmu_event)attr->config; 118 119 if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) { 120 pr_err("Invalid tool PMU event config %llx\n", attr->config); 121 return NULL; 122 } 123 if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME) 124 return cpu_map__online(); 125 126 if (!cpu0_map) 127 cpu0_map = perf_cpu_map__new_int(0); 128 return perf_cpu_map__get(cpu0_map); 129 } 130 131 static bool read_until_char(struct io *io, char e) 132 { 133 int c; 134 135 do { 136 c = io__get_char(io); 137 if (c == -1) 138 return false; 139 } while (c != e); 140 return true; 141 } 142 143 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) 144 { 145 char buf[256]; 146 struct io io; 147 int i; 148 149 io__init(&io, fd, buf, sizeof(buf)); 150 151 /* Skip lines to relevant CPU. */ 152 for (i = -1; i < cpu.cpu; i++) { 153 if (!read_until_char(&io, '\n')) 154 return -EINVAL; 155 } 156 /* Skip to "cpu". */ 157 if (io__get_char(&io) != 'c') return -EINVAL; 158 if (io__get_char(&io) != 'p') return -EINVAL; 159 if (io__get_char(&io) != 'u') return -EINVAL; 160 161 /* Skip N of cpuN. */ 162 if (!read_until_char(&io, ' ')) 163 return -EINVAL; 164 165 i = 1; 166 while (true) { 167 if (io__get_dec(&io, val) != ' ') 168 break; 169 if (field == i) 170 return 0; 171 i++; 172 } 173 return -EINVAL; 174 } 175 176 static int read_pid_stat_field(int fd, int field, __u64 *val) 177 { 178 char buf[256]; 179 struct io io; 180 int c, i; 181 182 io__init(&io, fd, buf, sizeof(buf)); 183 if (io__get_dec(&io, val) != ' ') 184 return -EINVAL; 185 if (field == 1) 186 return 0; 187 188 /* Skip comm. */ 189 if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) 190 return -EINVAL; 191 if (field == 2) 192 return -EINVAL; /* String can't be returned. */ 193 194 /* Skip state */ 195 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) 196 return -EINVAL; 197 if (field == 3) 198 return -EINVAL; /* String can't be returned. */ 199 200 /* Loop over numeric fields*/ 201 if (io__get_char(&io) != ' ') 202 return -EINVAL; 203 204 i = 4; 205 while (true) { 206 c = io__get_dec(&io, val); 207 if (c == -1) 208 return -EINVAL; 209 if (c == -2) { 210 /* Assume a -ve was read */ 211 c = io__get_dec(&io, val); 212 *val *= -1; 213 } 214 if (c != ' ') 215 return -EINVAL; 216 if (field == i) 217 return 0; 218 i++; 219 } 220 return -EINVAL; 221 } 222 223 int evsel__tool_pmu_prepare_open(struct evsel *evsel, 224 struct perf_cpu_map *cpus, 225 int nthreads) 226 { 227 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || 228 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && 229 !evsel->start_times) { 230 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), 231 nthreads, 232 sizeof(__u64)); 233 if (!evsel->start_times) 234 return -ENOMEM; 235 } 236 return 0; 237 } 238 239 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 240 241 int evsel__tool_pmu_open(struct evsel *evsel, 242 struct perf_thread_map *threads, 243 int start_cpu_map_idx, int end_cpu_map_idx) 244 { 245 enum tool_pmu_event ev = evsel__tool_event(evsel); 246 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; 247 248 if (ev == TOOL_PMU__EVENT_NUM_CPUS) 249 return 0; 250 251 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 252 if (evsel->core.attr.sample_period) /* no sampling */ 253 return -EINVAL; 254 evsel->start_time = rdclock(); 255 return 0; 256 } 257 258 if (evsel->cgrp) 259 pid = evsel->cgrp->fd; 260 261 nthreads = perf_thread_map__nr(threads); 262 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { 263 for (thread = 0; thread < nthreads; thread++) { 264 if (!evsel->cgrp && !evsel->core.system_wide) 265 pid = perf_thread_map__pid(threads, thread); 266 267 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 268 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; 269 __u64 *start_time = NULL; 270 int fd; 271 272 if (evsel->core.attr.sample_period) { 273 /* no sampling */ 274 err = -EINVAL; 275 goto out_close; 276 } 277 if (pid > -1) { 278 char buf[64]; 279 280 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); 281 fd = open(buf, O_RDONLY); 282 evsel->pid_stat = true; 283 } else { 284 fd = open("/proc/stat", O_RDONLY); 285 } 286 FD(evsel, idx, thread) = fd; 287 if (fd < 0) { 288 err = -errno; 289 goto out_close; 290 } 291 start_time = xyarray__entry(evsel->start_times, idx, thread); 292 if (pid > -1) { 293 err = read_pid_stat_field(fd, system ? 15 : 14, 294 start_time); 295 } else { 296 struct perf_cpu cpu; 297 298 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); 299 err = read_stat_field(fd, cpu, system ? 3 : 1, 300 start_time); 301 } 302 if (err) 303 goto out_close; 304 } 305 306 } 307 } 308 return 0; 309 out_close: 310 if (err) 311 threads->err_thread = thread; 312 313 old_errno = errno; 314 do { 315 while (--thread >= 0) { 316 if (FD(evsel, idx, thread) >= 0) 317 close(FD(evsel, idx, thread)); 318 FD(evsel, idx, thread) = -1; 319 } 320 thread = nthreads; 321 } while (--idx >= 0); 322 errno = old_errno; 323 return err; 324 } 325 326 #if !defined(__i386__) && !defined(__x86_64__) 327 u64 arch_get_tsc_freq(void) 328 { 329 return 0; 330 } 331 #endif 332 333 #if !defined(__aarch64__) 334 u64 tool_pmu__cpu_slots_per_cycle(void) 335 { 336 return 0; 337 } 338 #endif 339 340 static bool has_pmem(void) 341 { 342 static bool has_pmem, cached; 343 const char *sysfs = sysfs__mountpoint(); 344 char path[PATH_MAX]; 345 346 if (!cached) { 347 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); 348 has_pmem = access(path, F_OK) == 0; 349 cached = true; 350 } 351 return has_pmem; 352 } 353 354 bool tool_pmu__read_event(enum tool_pmu_event ev, 355 struct evsel *evsel, 356 bool system_wide, 357 const char *user_requested_cpu_list, 358 u64 *result) 359 { 360 const struct cpu_topology *topology; 361 362 switch (ev) { 363 case TOOL_PMU__EVENT_HAS_PMEM: 364 *result = has_pmem() ? 1 : 0; 365 return true; 366 367 case TOOL_PMU__EVENT_NUM_CORES: 368 topology = online_topology(); 369 *result = topology->core_cpus_lists; 370 return true; 371 372 case TOOL_PMU__EVENT_NUM_CPUS: 373 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 374 /* No evsel to be specific to. */ 375 *result = cpu__max_present_cpu().cpu; 376 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 377 /* Evsel just has specific CPUs. */ 378 *result = perf_cpu_map__nr(evsel->core.cpus); 379 } else { 380 /* 381 * "Any CPU" event that can be scheduled on any CPU in 382 * the PMU's cpumask. The PMU cpumask should be saved in 383 * pmu_cpus. If not present fall back to max. 384 */ 385 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) 386 *result = perf_cpu_map__nr(evsel->core.pmu_cpus); 387 else 388 *result = cpu__max_present_cpu().cpu; 389 } 390 return true; 391 392 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { 393 struct perf_cpu_map *online = cpu_map__online(); 394 395 if (!online) 396 return false; 397 398 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 399 /* No evsel to be specific to. */ 400 *result = perf_cpu_map__nr(online); 401 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 402 /* Evsel just has specific CPUs. */ 403 struct perf_cpu_map *tmp = 404 perf_cpu_map__intersect(online, evsel->core.cpus); 405 406 *result = perf_cpu_map__nr(tmp); 407 perf_cpu_map__put(tmp); 408 } else { 409 /* 410 * "Any CPU" event that can be scheduled on any CPU in 411 * the PMU's cpumask. The PMU cpumask should be saved in 412 * pmu_cpus, if not present then just the online cpu 413 * mask. 414 */ 415 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { 416 struct perf_cpu_map *tmp = 417 perf_cpu_map__intersect(online, evsel->core.pmu_cpus); 418 419 *result = perf_cpu_map__nr(tmp); 420 perf_cpu_map__put(tmp); 421 } else { 422 *result = perf_cpu_map__nr(online); 423 } 424 } 425 perf_cpu_map__put(online); 426 return true; 427 } 428 case TOOL_PMU__EVENT_NUM_DIES: 429 topology = online_topology(); 430 *result = topology->die_cpus_lists; 431 return true; 432 433 case TOOL_PMU__EVENT_NUM_PACKAGES: 434 topology = online_topology(); 435 *result = topology->package_cpus_lists; 436 return true; 437 438 case TOOL_PMU__EVENT_SLOTS: 439 *result = tool_pmu__cpu_slots_per_cycle(); 440 return *result ? true : false; 441 442 case TOOL_PMU__EVENT_SMT_ON: 443 *result = smt_on() ? 1 : 0; 444 return true; 445 446 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 447 *result = arch_get_tsc_freq(); 448 return true; 449 450 case TOOL_PMU__EVENT_CORE_WIDE: 451 *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0; 452 return true; 453 454 case TOOL_PMU__EVENT_TARGET_CPU: 455 *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0; 456 return true; 457 458 case TOOL_PMU__EVENT_NONE: 459 case TOOL_PMU__EVENT_DURATION_TIME: 460 case TOOL_PMU__EVENT_USER_TIME: 461 case TOOL_PMU__EVENT_SYSTEM_TIME: 462 case TOOL_PMU__EVENT_MAX: 463 default: 464 return false; 465 } 466 } 467 468 static void perf_counts__update(struct perf_counts_values *count, 469 const struct perf_counts_values *old_count, 470 bool raw, u64 val) 471 { 472 /* 473 * The values of enabled and running must make a ratio of 100%. The 474 * exact values don't matter as long as they are non-zero to avoid 475 * issues with evsel__count_has_error. 476 */ 477 if (old_count) { 478 count->val = raw ? val : old_count->val + val; 479 count->run = old_count->run + 1; 480 count->ena = old_count->ena + 1; 481 count->lost = old_count->lost; 482 } else { 483 count->val = val; 484 count->run++; 485 count->ena++; 486 count->lost = 0; 487 } 488 } 489 490 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) 491 { 492 __u64 *start_time, cur_time, delta_start; 493 int err = 0; 494 struct perf_counts_values *count, *old_count = NULL; 495 bool adjust = false; 496 enum tool_pmu_event ev = evsel__tool_event(evsel); 497 498 count = perf_counts(evsel->counts, cpu_map_idx, thread); 499 if (evsel->prev_raw_counts) 500 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); 501 502 switch (ev) { 503 case TOOL_PMU__EVENT_HAS_PMEM: 504 case TOOL_PMU__EVENT_NUM_CORES: 505 case TOOL_PMU__EVENT_NUM_CPUS: 506 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: 507 case TOOL_PMU__EVENT_NUM_DIES: 508 case TOOL_PMU__EVENT_NUM_PACKAGES: 509 case TOOL_PMU__EVENT_SLOTS: 510 case TOOL_PMU__EVENT_SMT_ON: 511 case TOOL_PMU__EVENT_CORE_WIDE: 512 case TOOL_PMU__EVENT_TARGET_CPU: 513 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: { 514 u64 val = 0; 515 516 if (cpu_map_idx == 0 && thread == 0) { 517 if (!tool_pmu__read_event(ev, evsel, 518 stat_config.system_wide, 519 stat_config.user_requested_cpu_list, 520 &val)) { 521 count->lost++; 522 val = 0; 523 } 524 } 525 perf_counts__update(count, old_count, /*raw=*/false, val); 526 return 0; 527 } 528 case TOOL_PMU__EVENT_DURATION_TIME: 529 /* 530 * Pretend duration_time is only on the first CPU and thread, or 531 * else aggregation will scale duration_time by the number of 532 * CPUs/threads. 533 */ 534 start_time = &evsel->start_time; 535 if (cpu_map_idx == 0 && thread == 0) 536 cur_time = rdclock(); 537 else 538 cur_time = *start_time; 539 break; 540 case TOOL_PMU__EVENT_USER_TIME: 541 case TOOL_PMU__EVENT_SYSTEM_TIME: { 542 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; 543 int fd = FD(evsel, cpu_map_idx, thread); 544 545 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); 546 lseek(fd, SEEK_SET, 0); 547 if (evsel->pid_stat) { 548 /* The event exists solely on 1 CPU. */ 549 if (cpu_map_idx == 0) 550 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); 551 else 552 cur_time = 0; 553 } else { 554 /* The event is for all threads. */ 555 if (thread == 0) { 556 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, 557 cpu_map_idx); 558 559 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); 560 } else { 561 cur_time = 0; 562 } 563 } 564 adjust = true; 565 break; 566 } 567 case TOOL_PMU__EVENT_NONE: 568 case TOOL_PMU__EVENT_MAX: 569 default: 570 err = -EINVAL; 571 } 572 if (err) 573 return err; 574 575 delta_start = cur_time - *start_time; 576 if (adjust) { 577 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); 578 579 delta_start *= 1e9 / ticks_per_sec; 580 } 581 perf_counts__update(count, old_count, /*raw=*/true, delta_start); 582 return 0; 583 } 584 585 struct perf_pmu *tool_pmu__new(void) 586 { 587 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); 588 589 if (!tool) 590 return NULL; 591 592 if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { 593 perf_pmu__delete(tool); 594 return NULL; 595 } 596 tool->events_table = find_core_events_table("common", "common"); 597 return tool; 598 } 599