1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "cgroup.h" 3 #include "counts.h" 4 #include "cputopo.h" 5 #include "evsel.h" 6 #include "pmu.h" 7 #include "print-events.h" 8 #include "smt.h" 9 #include "stat.h" 10 #include "time-utils.h" 11 #include "tool_pmu.h" 12 #include "tsc.h" 13 #include <api/fs/fs.h> 14 #include <api/io.h> 15 #include <internal/threadmap.h> 16 #include <perf/threadmap.h> 17 #include <fcntl.h> 18 #include <strings.h> 19 20 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { 21 NULL, 22 "duration_time", 23 "user_time", 24 "system_time", 25 "has_pmem", 26 "num_cores", 27 "num_cpus", 28 "num_cpus_online", 29 "num_dies", 30 "num_packages", 31 "slots", 32 "smt_on", 33 "system_tsc_freq", 34 "core_wide", 35 "target_cpu", 36 }; 37 38 bool tool_pmu__skip_event(const char *name __maybe_unused) 39 { 40 #if !defined(__aarch64__) 41 /* The slots event should only appear on arm64. */ 42 if (strcasecmp(name, "slots") == 0) 43 return true; 44 #endif 45 #if !defined(__i386__) && !defined(__x86_64__) 46 /* The system_tsc_freq event should only appear on x86. */ 47 if (strcasecmp(name, "system_tsc_freq") == 0) 48 return true; 49 #endif 50 return false; 51 } 52 53 int tool_pmu__num_skip_events(void) 54 { 55 int num = 0; 56 57 #if !defined(__aarch64__) 58 num++; 59 #endif 60 #if !defined(__i386__) && !defined(__x86_64__) 61 num++; 62 #endif 63 return num; 64 } 65 66 const char *tool_pmu__event_to_str(enum tool_pmu_event ev) 67 { 68 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && 69 !tool_pmu__skip_event(tool_pmu__event_names[ev])) 70 return tool_pmu__event_names[ev]; 71 72 return NULL; 73 } 74 75 enum tool_pmu_event tool_pmu__str_to_event(const char *str) 76 { 77 int i; 78 79 if (tool_pmu__skip_event(str)) 80 return TOOL_PMU__EVENT_NONE; 81 82 tool_pmu__for_each_event(i) { 83 if (!strcasecmp(str, tool_pmu__event_names[i])) 84 return i; 85 } 86 return TOOL_PMU__EVENT_NONE; 87 } 88 89 bool perf_pmu__is_tool(const struct perf_pmu *pmu) 90 { 91 return pmu && pmu->type == PERF_PMU_TYPE_TOOL; 92 } 93 94 bool evsel__is_tool(const struct evsel *evsel) 95 { 96 return perf_pmu__is_tool(evsel->pmu); 97 } 98 99 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) 100 { 101 if (!evsel__is_tool(evsel)) 102 return TOOL_PMU__EVENT_NONE; 103 104 return (enum tool_pmu_event)evsel->core.attr.config; 105 } 106 107 const char *evsel__tool_pmu_event_name(const struct evsel *evsel) 108 { 109 return tool_pmu__event_to_str(evsel->core.attr.config); 110 } 111 112 static bool read_until_char(struct io *io, char e) 113 { 114 int c; 115 116 do { 117 c = io__get_char(io); 118 if (c == -1) 119 return false; 120 } while (c != e); 121 return true; 122 } 123 124 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) 125 { 126 char buf[256]; 127 struct io io; 128 int i; 129 130 io__init(&io, fd, buf, sizeof(buf)); 131 132 /* Skip lines to relevant CPU. */ 133 for (i = -1; i < cpu.cpu; i++) { 134 if (!read_until_char(&io, '\n')) 135 return -EINVAL; 136 } 137 /* Skip to "cpu". */ 138 if (io__get_char(&io) != 'c') return -EINVAL; 139 if (io__get_char(&io) != 'p') return -EINVAL; 140 if (io__get_char(&io) != 'u') return -EINVAL; 141 142 /* Skip N of cpuN. */ 143 if (!read_until_char(&io, ' ')) 144 return -EINVAL; 145 146 i = 1; 147 while (true) { 148 if (io__get_dec(&io, val) != ' ') 149 break; 150 if (field == i) 151 return 0; 152 i++; 153 } 154 return -EINVAL; 155 } 156 157 static int read_pid_stat_field(int fd, int field, __u64 *val) 158 { 159 char buf[256]; 160 struct io io; 161 int c, i; 162 163 io__init(&io, fd, buf, sizeof(buf)); 164 if (io__get_dec(&io, val) != ' ') 165 return -EINVAL; 166 if (field == 1) 167 return 0; 168 169 /* Skip comm. */ 170 if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) 171 return -EINVAL; 172 if (field == 2) 173 return -EINVAL; /* String can't be returned. */ 174 175 /* Skip state */ 176 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) 177 return -EINVAL; 178 if (field == 3) 179 return -EINVAL; /* String can't be returned. */ 180 181 /* Loop over numeric fields*/ 182 if (io__get_char(&io) != ' ') 183 return -EINVAL; 184 185 i = 4; 186 while (true) { 187 c = io__get_dec(&io, val); 188 if (c == -1) 189 return -EINVAL; 190 if (c == -2) { 191 /* Assume a -ve was read */ 192 c = io__get_dec(&io, val); 193 *val *= -1; 194 } 195 if (c != ' ') 196 return -EINVAL; 197 if (field == i) 198 return 0; 199 i++; 200 } 201 return -EINVAL; 202 } 203 204 int evsel__tool_pmu_prepare_open(struct evsel *evsel, 205 struct perf_cpu_map *cpus, 206 int nthreads) 207 { 208 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || 209 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && 210 !evsel->start_times) { 211 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), 212 nthreads, 213 sizeof(__u64)); 214 if (!evsel->start_times) 215 return -ENOMEM; 216 } 217 return 0; 218 } 219 220 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 221 222 int evsel__tool_pmu_open(struct evsel *evsel, 223 struct perf_thread_map *threads, 224 int start_cpu_map_idx, int end_cpu_map_idx) 225 { 226 enum tool_pmu_event ev = evsel__tool_event(evsel); 227 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; 228 229 if (ev == TOOL_PMU__EVENT_NUM_CPUS) 230 return 0; 231 232 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 233 if (evsel->core.attr.sample_period) /* no sampling */ 234 return -EINVAL; 235 evsel->start_time = rdclock(); 236 return 0; 237 } 238 239 if (evsel->cgrp) 240 pid = evsel->cgrp->fd; 241 242 nthreads = perf_thread_map__nr(threads); 243 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { 244 for (thread = 0; thread < nthreads; thread++) { 245 if (!evsel->cgrp && !evsel->core.system_wide) 246 pid = perf_thread_map__pid(threads, thread); 247 248 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 249 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; 250 __u64 *start_time = NULL; 251 int fd; 252 253 if (evsel->core.attr.sample_period) { 254 /* no sampling */ 255 err = -EINVAL; 256 goto out_close; 257 } 258 if (pid > -1) { 259 char buf[64]; 260 261 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); 262 fd = open(buf, O_RDONLY); 263 evsel->pid_stat = true; 264 } else { 265 fd = open("/proc/stat", O_RDONLY); 266 } 267 FD(evsel, idx, thread) = fd; 268 if (fd < 0) { 269 err = -errno; 270 goto out_close; 271 } 272 start_time = xyarray__entry(evsel->start_times, idx, thread); 273 if (pid > -1) { 274 err = read_pid_stat_field(fd, system ? 15 : 14, 275 start_time); 276 } else { 277 struct perf_cpu cpu; 278 279 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); 280 err = read_stat_field(fd, cpu, system ? 3 : 1, 281 start_time); 282 } 283 if (err) 284 goto out_close; 285 } 286 287 } 288 } 289 return 0; 290 out_close: 291 if (err) 292 threads->err_thread = thread; 293 294 old_errno = errno; 295 do { 296 while (--thread >= 0) { 297 if (FD(evsel, idx, thread) >= 0) 298 close(FD(evsel, idx, thread)); 299 FD(evsel, idx, thread) = -1; 300 } 301 thread = nthreads; 302 } while (--idx >= 0); 303 errno = old_errno; 304 return err; 305 } 306 307 #if !defined(__i386__) && !defined(__x86_64__) 308 u64 arch_get_tsc_freq(void) 309 { 310 return 0; 311 } 312 #endif 313 314 #if !defined(__aarch64__) 315 u64 tool_pmu__cpu_slots_per_cycle(void) 316 { 317 return 0; 318 } 319 #endif 320 321 static bool has_pmem(void) 322 { 323 static bool has_pmem, cached; 324 const char *sysfs = sysfs__mountpoint(); 325 char path[PATH_MAX]; 326 327 if (!cached) { 328 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); 329 has_pmem = access(path, F_OK) == 0; 330 cached = true; 331 } 332 return has_pmem; 333 } 334 335 bool tool_pmu__read_event(enum tool_pmu_event ev, 336 struct evsel *evsel, 337 bool system_wide, 338 const char *user_requested_cpu_list, 339 u64 *result) 340 { 341 const struct cpu_topology *topology; 342 343 switch (ev) { 344 case TOOL_PMU__EVENT_HAS_PMEM: 345 *result = has_pmem() ? 1 : 0; 346 return true; 347 348 case TOOL_PMU__EVENT_NUM_CORES: 349 topology = online_topology(); 350 *result = topology->core_cpus_lists; 351 return true; 352 353 case TOOL_PMU__EVENT_NUM_CPUS: 354 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 355 /* No evsel to be specific to. */ 356 *result = cpu__max_present_cpu().cpu; 357 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 358 /* Evsel just has specific CPUs. */ 359 *result = perf_cpu_map__nr(evsel->core.cpus); 360 } else { 361 /* 362 * "Any CPU" event that can be scheduled on any CPU in 363 * the PMU's cpumask. The PMU cpumask should be saved in 364 * pmu_cpus. If not present fall back to max. 365 */ 366 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) 367 *result = perf_cpu_map__nr(evsel->core.pmu_cpus); 368 else 369 *result = cpu__max_present_cpu().cpu; 370 } 371 return true; 372 373 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { 374 struct perf_cpu_map *online = cpu_map__online(); 375 376 if (!online) 377 return false; 378 379 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 380 /* No evsel to be specific to. */ 381 *result = perf_cpu_map__nr(online); 382 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 383 /* Evsel just has specific CPUs. */ 384 struct perf_cpu_map *tmp = 385 perf_cpu_map__intersect(online, evsel->core.cpus); 386 387 *result = perf_cpu_map__nr(tmp); 388 perf_cpu_map__put(tmp); 389 } else { 390 /* 391 * "Any CPU" event that can be scheduled on any CPU in 392 * the PMU's cpumask. The PMU cpumask should be saved in 393 * pmu_cpus, if not present then just the online cpu 394 * mask. 395 */ 396 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { 397 struct perf_cpu_map *tmp = 398 perf_cpu_map__intersect(online, evsel->core.pmu_cpus); 399 400 *result = perf_cpu_map__nr(tmp); 401 perf_cpu_map__put(tmp); 402 } else { 403 *result = perf_cpu_map__nr(online); 404 } 405 } 406 perf_cpu_map__put(online); 407 return true; 408 } 409 case TOOL_PMU__EVENT_NUM_DIES: 410 topology = online_topology(); 411 *result = topology->die_cpus_lists; 412 return true; 413 414 case TOOL_PMU__EVENT_NUM_PACKAGES: 415 topology = online_topology(); 416 *result = topology->package_cpus_lists; 417 return true; 418 419 case TOOL_PMU__EVENT_SLOTS: 420 *result = tool_pmu__cpu_slots_per_cycle(); 421 return *result ? true : false; 422 423 case TOOL_PMU__EVENT_SMT_ON: 424 *result = smt_on() ? 1 : 0; 425 return true; 426 427 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 428 *result = arch_get_tsc_freq(); 429 return true; 430 431 case TOOL_PMU__EVENT_CORE_WIDE: 432 *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0; 433 return true; 434 435 case TOOL_PMU__EVENT_TARGET_CPU: 436 *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0; 437 return true; 438 439 case TOOL_PMU__EVENT_NONE: 440 case TOOL_PMU__EVENT_DURATION_TIME: 441 case TOOL_PMU__EVENT_USER_TIME: 442 case TOOL_PMU__EVENT_SYSTEM_TIME: 443 case TOOL_PMU__EVENT_MAX: 444 default: 445 return false; 446 } 447 } 448 449 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) 450 { 451 __u64 *start_time, cur_time, delta_start; 452 u64 val; 453 int fd, err = 0; 454 struct perf_counts_values *count, *old_count = NULL; 455 bool adjust = false; 456 enum tool_pmu_event ev = evsel__tool_event(evsel); 457 458 count = perf_counts(evsel->counts, cpu_map_idx, thread); 459 460 switch (ev) { 461 case TOOL_PMU__EVENT_HAS_PMEM: 462 case TOOL_PMU__EVENT_NUM_CORES: 463 case TOOL_PMU__EVENT_NUM_CPUS: 464 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: 465 case TOOL_PMU__EVENT_NUM_DIES: 466 case TOOL_PMU__EVENT_NUM_PACKAGES: 467 case TOOL_PMU__EVENT_SLOTS: 468 case TOOL_PMU__EVENT_SMT_ON: 469 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 470 case TOOL_PMU__EVENT_CORE_WIDE: 471 case TOOL_PMU__EVENT_TARGET_CPU: 472 if (evsel->prev_raw_counts) 473 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); 474 val = 0; 475 if (cpu_map_idx == 0 && thread == 0) { 476 if (!tool_pmu__read_event(ev, evsel, 477 stat_config.system_wide, 478 stat_config.user_requested_cpu_list, 479 &val)) { 480 count->lost++; 481 val = 0; 482 } 483 } 484 if (old_count) { 485 count->val = old_count->val + val; 486 count->run = old_count->run + 1; 487 count->ena = old_count->ena + 1; 488 } else { 489 count->val = val; 490 count->run++; 491 count->ena++; 492 } 493 return 0; 494 case TOOL_PMU__EVENT_DURATION_TIME: 495 /* 496 * Pretend duration_time is only on the first CPU and thread, or 497 * else aggregation will scale duration_time by the number of 498 * CPUs/threads. 499 */ 500 start_time = &evsel->start_time; 501 if (cpu_map_idx == 0 && thread == 0) 502 cur_time = rdclock(); 503 else 504 cur_time = *start_time; 505 break; 506 case TOOL_PMU__EVENT_USER_TIME: 507 case TOOL_PMU__EVENT_SYSTEM_TIME: { 508 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; 509 510 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); 511 fd = FD(evsel, cpu_map_idx, thread); 512 lseek(fd, SEEK_SET, 0); 513 if (evsel->pid_stat) { 514 /* The event exists solely on 1 CPU. */ 515 if (cpu_map_idx == 0) 516 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); 517 else 518 cur_time = 0; 519 } else { 520 /* The event is for all threads. */ 521 if (thread == 0) { 522 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, 523 cpu_map_idx); 524 525 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); 526 } else { 527 cur_time = 0; 528 } 529 } 530 adjust = true; 531 break; 532 } 533 case TOOL_PMU__EVENT_NONE: 534 case TOOL_PMU__EVENT_MAX: 535 default: 536 err = -EINVAL; 537 } 538 if (err) 539 return err; 540 541 delta_start = cur_time - *start_time; 542 if (adjust) { 543 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); 544 545 delta_start *= 1000000000 / ticks_per_sec; 546 } 547 count->val = delta_start; 548 count->lost = 0; 549 /* 550 * The values of enabled and running must make a ratio of 100%. The 551 * exact values don't matter as long as they are non-zero to avoid 552 * issues with evsel__count_has_error. 553 */ 554 count->ena++; 555 count->run++; 556 return 0; 557 } 558 559 struct perf_pmu *tool_pmu__new(void) 560 { 561 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); 562 563 if (!tool) 564 return NULL; 565 566 if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { 567 perf_pmu__delete(tool); 568 return NULL; 569 } 570 tool->events_table = find_core_events_table("common", "common"); 571 return tool; 572 } 573