1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "cgroup.h" 3 #include "counts.h" 4 #include "cputopo.h" 5 #include "evsel.h" 6 #include "pmu.h" 7 #include "print-events.h" 8 #include "smt.h" 9 #include "time-utils.h" 10 #include "tool_pmu.h" 11 #include "tsc.h" 12 #include <api/fs/fs.h> 13 #include <api/io.h> 14 #include <internal/threadmap.h> 15 #include <perf/threadmap.h> 16 #include <fcntl.h> 17 #include <strings.h> 18 19 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { 20 NULL, 21 "duration_time", 22 "user_time", 23 "system_time", 24 "has_pmem", 25 "num_cores", 26 "num_cpus", 27 "num_cpus_online", 28 "num_dies", 29 "num_packages", 30 "slots", 31 "smt_on", 32 "system_tsc_freq", 33 }; 34 35 bool tool_pmu__skip_event(const char *name __maybe_unused) 36 { 37 #if !defined(__aarch64__) 38 /* The slots event should only appear on arm64. */ 39 if (strcasecmp(name, "slots") == 0) 40 return true; 41 #endif 42 #if !defined(__i386__) && !defined(__x86_64__) 43 /* The system_tsc_freq event should only appear on x86. */ 44 if (strcasecmp(name, "system_tsc_freq") == 0) 45 return true; 46 #endif 47 return false; 48 } 49 50 int tool_pmu__num_skip_events(void) 51 { 52 int num = 0; 53 54 #if !defined(__aarch64__) 55 num++; 56 #endif 57 #if !defined(__i386__) && !defined(__x86_64__) 58 num++; 59 #endif 60 return num; 61 } 62 63 const char *tool_pmu__event_to_str(enum tool_pmu_event ev) 64 { 65 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && 66 !tool_pmu__skip_event(tool_pmu__event_names[ev])) 67 return tool_pmu__event_names[ev]; 68 69 return NULL; 70 } 71 72 enum tool_pmu_event tool_pmu__str_to_event(const char *str) 73 { 74 int i; 75 76 if (tool_pmu__skip_event(str)) 77 return TOOL_PMU__EVENT_NONE; 78 79 tool_pmu__for_each_event(i) { 80 if (!strcasecmp(str, tool_pmu__event_names[i])) 81 return i; 82 } 83 return TOOL_PMU__EVENT_NONE; 84 } 85 86 bool perf_pmu__is_tool(const struct perf_pmu *pmu) 87 { 88 return pmu && pmu->type == PERF_PMU_TYPE_TOOL; 89 } 90 91 bool evsel__is_tool(const struct evsel *evsel) 92 { 93 return perf_pmu__is_tool(evsel->pmu); 94 } 95 96 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) 97 { 98 if (!evsel__is_tool(evsel)) 99 return TOOL_PMU__EVENT_NONE; 100 101 return (enum tool_pmu_event)evsel->core.attr.config; 102 } 103 104 const char *evsel__tool_pmu_event_name(const struct evsel *evsel) 105 { 106 return tool_pmu__event_to_str(evsel->core.attr.config); 107 } 108 109 static bool read_until_char(struct io *io, char e) 110 { 111 int c; 112 113 do { 114 c = io__get_char(io); 115 if (c == -1) 116 return false; 117 } while (c != e); 118 return true; 119 } 120 121 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) 122 { 123 char buf[256]; 124 struct io io; 125 int i; 126 127 io__init(&io, fd, buf, sizeof(buf)); 128 129 /* Skip lines to relevant CPU. */ 130 for (i = -1; i < cpu.cpu; i++) { 131 if (!read_until_char(&io, '\n')) 132 return -EINVAL; 133 } 134 /* Skip to "cpu". */ 135 if (io__get_char(&io) != 'c') return -EINVAL; 136 if (io__get_char(&io) != 'p') return -EINVAL; 137 if (io__get_char(&io) != 'u') return -EINVAL; 138 139 /* Skip N of cpuN. */ 140 if (!read_until_char(&io, ' ')) 141 return -EINVAL; 142 143 i = 1; 144 while (true) { 145 if (io__get_dec(&io, val) != ' ') 146 break; 147 if (field == i) 148 return 0; 149 i++; 150 } 151 return -EINVAL; 152 } 153 154 static int read_pid_stat_field(int fd, int field, __u64 *val) 155 { 156 char buf[256]; 157 struct io io; 158 int c, i; 159 160 io__init(&io, fd, buf, sizeof(buf)); 161 if (io__get_dec(&io, val) != ' ') 162 return -EINVAL; 163 if (field == 1) 164 return 0; 165 166 /* Skip comm. */ 167 if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) 168 return -EINVAL; 169 if (field == 2) 170 return -EINVAL; /* String can't be returned. */ 171 172 /* Skip state */ 173 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) 174 return -EINVAL; 175 if (field == 3) 176 return -EINVAL; /* String can't be returned. */ 177 178 /* Loop over numeric fields*/ 179 if (io__get_char(&io) != ' ') 180 return -EINVAL; 181 182 i = 4; 183 while (true) { 184 c = io__get_dec(&io, val); 185 if (c == -1) 186 return -EINVAL; 187 if (c == -2) { 188 /* Assume a -ve was read */ 189 c = io__get_dec(&io, val); 190 *val *= -1; 191 } 192 if (c != ' ') 193 return -EINVAL; 194 if (field == i) 195 return 0; 196 i++; 197 } 198 return -EINVAL; 199 } 200 201 int evsel__tool_pmu_prepare_open(struct evsel *evsel, 202 struct perf_cpu_map *cpus, 203 int nthreads) 204 { 205 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || 206 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && 207 !evsel->start_times) { 208 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), 209 nthreads, 210 sizeof(__u64)); 211 if (!evsel->start_times) 212 return -ENOMEM; 213 } 214 return 0; 215 } 216 217 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 218 219 int evsel__tool_pmu_open(struct evsel *evsel, 220 struct perf_thread_map *threads, 221 int start_cpu_map_idx, int end_cpu_map_idx) 222 { 223 enum tool_pmu_event ev = evsel__tool_event(evsel); 224 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; 225 226 if (ev == TOOL_PMU__EVENT_NUM_CPUS) 227 return 0; 228 229 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 230 if (evsel->core.attr.sample_period) /* no sampling */ 231 return -EINVAL; 232 evsel->start_time = rdclock(); 233 return 0; 234 } 235 236 if (evsel->cgrp) 237 pid = evsel->cgrp->fd; 238 239 nthreads = perf_thread_map__nr(threads); 240 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { 241 for (thread = 0; thread < nthreads; thread++) { 242 if (!evsel->cgrp && !evsel->core.system_wide) 243 pid = perf_thread_map__pid(threads, thread); 244 245 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 246 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; 247 __u64 *start_time = NULL; 248 int fd; 249 250 if (evsel->core.attr.sample_period) { 251 /* no sampling */ 252 err = -EINVAL; 253 goto out_close; 254 } 255 if (pid > -1) { 256 char buf[64]; 257 258 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); 259 fd = open(buf, O_RDONLY); 260 evsel->pid_stat = true; 261 } else { 262 fd = open("/proc/stat", O_RDONLY); 263 } 264 FD(evsel, idx, thread) = fd; 265 if (fd < 0) { 266 err = -errno; 267 goto out_close; 268 } 269 start_time = xyarray__entry(evsel->start_times, idx, thread); 270 if (pid > -1) { 271 err = read_pid_stat_field(fd, system ? 15 : 14, 272 start_time); 273 } else { 274 struct perf_cpu cpu; 275 276 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); 277 err = read_stat_field(fd, cpu, system ? 3 : 1, 278 start_time); 279 } 280 if (err) 281 goto out_close; 282 } 283 284 } 285 } 286 return 0; 287 out_close: 288 if (err) 289 threads->err_thread = thread; 290 291 old_errno = errno; 292 do { 293 while (--thread >= 0) { 294 if (FD(evsel, idx, thread) >= 0) 295 close(FD(evsel, idx, thread)); 296 FD(evsel, idx, thread) = -1; 297 } 298 thread = nthreads; 299 } while (--idx >= 0); 300 errno = old_errno; 301 return err; 302 } 303 304 #if !defined(__i386__) && !defined(__x86_64__) 305 u64 arch_get_tsc_freq(void) 306 { 307 return 0; 308 } 309 #endif 310 311 #if !defined(__aarch64__) 312 u64 tool_pmu__cpu_slots_per_cycle(void) 313 { 314 return 0; 315 } 316 #endif 317 318 static bool has_pmem(void) 319 { 320 static bool has_pmem, cached; 321 const char *sysfs = sysfs__mountpoint(); 322 char path[PATH_MAX]; 323 324 if (!cached) { 325 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); 326 has_pmem = access(path, F_OK) == 0; 327 cached = true; 328 } 329 return has_pmem; 330 } 331 332 bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result) 333 { 334 const struct cpu_topology *topology; 335 336 switch (ev) { 337 case TOOL_PMU__EVENT_HAS_PMEM: 338 *result = has_pmem() ? 1 : 0; 339 return true; 340 341 case TOOL_PMU__EVENT_NUM_CORES: 342 topology = online_topology(); 343 *result = topology->core_cpus_lists; 344 return true; 345 346 case TOOL_PMU__EVENT_NUM_CPUS: 347 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 348 /* No evsel to be specific to. */ 349 *result = cpu__max_present_cpu().cpu; 350 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 351 /* Evsel just has specific CPUs. */ 352 *result = perf_cpu_map__nr(evsel->core.cpus); 353 } else { 354 /* 355 * "Any CPU" event that can be scheduled on any CPU in 356 * the PMU's cpumask. The PMU cpumask should be saved in 357 * pmu_cpus. If not present fall back to max. 358 */ 359 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) 360 *result = perf_cpu_map__nr(evsel->core.pmu_cpus); 361 else 362 *result = cpu__max_present_cpu().cpu; 363 } 364 return true; 365 366 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { 367 struct perf_cpu_map *online = cpu_map__online(); 368 369 if (!online) 370 return false; 371 372 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 373 /* No evsel to be specific to. */ 374 *result = perf_cpu_map__nr(online); 375 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 376 /* Evsel just has specific CPUs. */ 377 struct perf_cpu_map *tmp = 378 perf_cpu_map__intersect(online, evsel->core.cpus); 379 380 *result = perf_cpu_map__nr(tmp); 381 perf_cpu_map__put(tmp); 382 } else { 383 /* 384 * "Any CPU" event that can be scheduled on any CPU in 385 * the PMU's cpumask. The PMU cpumask should be saved in 386 * pmu_cpus, if not present then just the online cpu 387 * mask. 388 */ 389 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { 390 struct perf_cpu_map *tmp = 391 perf_cpu_map__intersect(online, evsel->core.pmu_cpus); 392 393 *result = perf_cpu_map__nr(tmp); 394 perf_cpu_map__put(tmp); 395 } else { 396 *result = perf_cpu_map__nr(online); 397 } 398 } 399 perf_cpu_map__put(online); 400 return true; 401 } 402 case TOOL_PMU__EVENT_NUM_DIES: 403 topology = online_topology(); 404 *result = topology->die_cpus_lists; 405 return true; 406 407 case TOOL_PMU__EVENT_NUM_PACKAGES: 408 topology = online_topology(); 409 *result = topology->package_cpus_lists; 410 return true; 411 412 case TOOL_PMU__EVENT_SLOTS: 413 *result = tool_pmu__cpu_slots_per_cycle(); 414 return *result ? true : false; 415 416 case TOOL_PMU__EVENT_SMT_ON: 417 *result = smt_on() ? 1 : 0; 418 return true; 419 420 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 421 *result = arch_get_tsc_freq(); 422 return true; 423 424 case TOOL_PMU__EVENT_NONE: 425 case TOOL_PMU__EVENT_DURATION_TIME: 426 case TOOL_PMU__EVENT_USER_TIME: 427 case TOOL_PMU__EVENT_SYSTEM_TIME: 428 case TOOL_PMU__EVENT_MAX: 429 default: 430 return false; 431 } 432 } 433 434 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) 435 { 436 __u64 *start_time, cur_time, delta_start; 437 u64 val; 438 int fd, err = 0; 439 struct perf_counts_values *count, *old_count = NULL; 440 bool adjust = false; 441 enum tool_pmu_event ev = evsel__tool_event(evsel); 442 443 count = perf_counts(evsel->counts, cpu_map_idx, thread); 444 445 switch (ev) { 446 case TOOL_PMU__EVENT_HAS_PMEM: 447 case TOOL_PMU__EVENT_NUM_CORES: 448 case TOOL_PMU__EVENT_NUM_CPUS: 449 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: 450 case TOOL_PMU__EVENT_NUM_DIES: 451 case TOOL_PMU__EVENT_NUM_PACKAGES: 452 case TOOL_PMU__EVENT_SLOTS: 453 case TOOL_PMU__EVENT_SMT_ON: 454 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 455 if (evsel->prev_raw_counts) 456 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); 457 val = 0; 458 if (cpu_map_idx == 0 && thread == 0) { 459 if (!tool_pmu__read_event(ev, evsel, &val)) { 460 count->lost++; 461 val = 0; 462 } 463 } 464 if (old_count) { 465 count->val = old_count->val + val; 466 count->run = old_count->run + 1; 467 count->ena = old_count->ena + 1; 468 } else { 469 count->val = val; 470 count->run++; 471 count->ena++; 472 } 473 return 0; 474 case TOOL_PMU__EVENT_DURATION_TIME: 475 /* 476 * Pretend duration_time is only on the first CPU and thread, or 477 * else aggregation will scale duration_time by the number of 478 * CPUs/threads. 479 */ 480 start_time = &evsel->start_time; 481 if (cpu_map_idx == 0 && thread == 0) 482 cur_time = rdclock(); 483 else 484 cur_time = *start_time; 485 break; 486 case TOOL_PMU__EVENT_USER_TIME: 487 case TOOL_PMU__EVENT_SYSTEM_TIME: { 488 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; 489 490 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); 491 fd = FD(evsel, cpu_map_idx, thread); 492 lseek(fd, SEEK_SET, 0); 493 if (evsel->pid_stat) { 494 /* The event exists solely on 1 CPU. */ 495 if (cpu_map_idx == 0) 496 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); 497 else 498 cur_time = 0; 499 } else { 500 /* The event is for all threads. */ 501 if (thread == 0) { 502 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, 503 cpu_map_idx); 504 505 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); 506 } else { 507 cur_time = 0; 508 } 509 } 510 adjust = true; 511 break; 512 } 513 case TOOL_PMU__EVENT_NONE: 514 case TOOL_PMU__EVENT_MAX: 515 default: 516 err = -EINVAL; 517 } 518 if (err) 519 return err; 520 521 delta_start = cur_time - *start_time; 522 if (adjust) { 523 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); 524 525 delta_start *= 1000000000 / ticks_per_sec; 526 } 527 count->val = delta_start; 528 count->lost = 0; 529 /* 530 * The values of enabled and running must make a ratio of 100%. The 531 * exact values don't matter as long as they are non-zero to avoid 532 * issues with evsel__count_has_error. 533 */ 534 count->ena++; 535 count->run++; 536 return 0; 537 } 538 539 struct perf_pmu *tool_pmu__new(void) 540 { 541 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); 542 543 if (!tool) 544 return NULL; 545 546 if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { 547 perf_pmu__delete(tool); 548 return NULL; 549 } 550 tool->events_table = find_core_events_table("common", "common"); 551 return tool; 552 } 553