1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "cgroup.h" 3 #include "counts.h" 4 #include "cputopo.h" 5 #include "evsel.h" 6 #include "pmu.h" 7 #include "print-events.h" 8 #include "smt.h" 9 #include "time-utils.h" 10 #include "tool_pmu.h" 11 #include "tsc.h" 12 #include <api/fs/fs.h> 13 #include <api/io.h> 14 #include <internal/threadmap.h> 15 #include <perf/threadmap.h> 16 #include <fcntl.h> 17 #include <strings.h> 18 19 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { 20 NULL, 21 "duration_time", 22 "user_time", 23 "system_time", 24 "has_pmem", 25 "num_cores", 26 "num_cpus", 27 "num_cpus_online", 28 "num_dies", 29 "num_packages", 30 "slots", 31 "smt_on", 32 "system_tsc_freq", 33 }; 34 35 bool tool_pmu__skip_event(const char *name __maybe_unused) 36 { 37 #if !defined(__aarch64__) 38 /* The slots event should only appear on arm64. */ 39 if (strcasecmp(name, "slots") == 0) 40 return true; 41 #endif 42 #if !defined(__i386__) && !defined(__x86_64__) 43 /* The system_tsc_freq event should only appear on x86. */ 44 if (strcasecmp(name, "system_tsc_freq") == 0) 45 return true; 46 #endif 47 return false; 48 } 49 50 int tool_pmu__num_skip_events(void) 51 { 52 int num = 0; 53 54 #if !defined(__aarch64__) 55 num++; 56 #endif 57 #if !defined(__i386__) && !defined(__x86_64__) 58 num++; 59 #endif 60 return num; 61 } 62 63 const char *tool_pmu__event_to_str(enum tool_pmu_event ev) 64 { 65 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && 66 !tool_pmu__skip_event(tool_pmu__event_names[ev])) 67 return tool_pmu__event_names[ev]; 68 69 return NULL; 70 } 71 72 enum tool_pmu_event tool_pmu__str_to_event(const char *str) 73 { 74 int i; 75 76 if (tool_pmu__skip_event(str)) 77 return TOOL_PMU__EVENT_NONE; 78 79 tool_pmu__for_each_event(i) { 80 if (!strcasecmp(str, tool_pmu__event_names[i])) 81 return i; 82 } 83 return TOOL_PMU__EVENT_NONE; 84 } 85 86 bool perf_pmu__is_tool(const struct perf_pmu *pmu) 87 { 88 return pmu && pmu->type == PERF_PMU_TYPE_TOOL; 89 } 90 91 bool evsel__is_tool(const struct evsel *evsel) 92 { 93 return perf_pmu__is_tool(evsel->pmu); 94 } 95 96 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) 97 { 98 if (!evsel__is_tool(evsel)) 99 return TOOL_PMU__EVENT_NONE; 100 101 return (enum tool_pmu_event)evsel->core.attr.config; 102 } 103 104 const char *evsel__tool_pmu_event_name(const struct evsel *evsel) 105 { 106 return tool_pmu__event_to_str(evsel->core.attr.config); 107 } 108 109 static bool read_until_char(struct io *io, char e) 110 { 111 int c; 112 113 do { 114 c = io__get_char(io); 115 if (c == -1) 116 return false; 117 } while (c != e); 118 return true; 119 } 120 121 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) 122 { 123 char buf[256]; 124 struct io io; 125 int i; 126 127 io__init(&io, fd, buf, sizeof(buf)); 128 129 /* Skip lines to relevant CPU. */ 130 for (i = -1; i < cpu.cpu; i++) { 131 if (!read_until_char(&io, '\n')) 132 return -EINVAL; 133 } 134 /* Skip to "cpu". */ 135 if (io__get_char(&io) != 'c') return -EINVAL; 136 if (io__get_char(&io) != 'p') return -EINVAL; 137 if (io__get_char(&io) != 'u') return -EINVAL; 138 139 /* Skip N of cpuN. */ 140 if (!read_until_char(&io, ' ')) 141 return -EINVAL; 142 143 i = 1; 144 while (true) { 145 if (io__get_dec(&io, val) != ' ') 146 break; 147 if (field == i) 148 return 0; 149 i++; 150 } 151 return -EINVAL; 152 } 153 154 static int read_pid_stat_field(int fd, int field, __u64 *val) 155 { 156 char buf[256]; 157 struct io io; 158 int c, i; 159 160 io__init(&io, fd, buf, sizeof(buf)); 161 if (io__get_dec(&io, val) != ' ') 162 return -EINVAL; 163 if (field == 1) 164 return 0; 165 166 /* Skip comm. */ 167 if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) 168 return -EINVAL; 169 if (field == 2) 170 return -EINVAL; /* String can't be returned. */ 171 172 /* Skip state */ 173 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) 174 return -EINVAL; 175 if (field == 3) 176 return -EINVAL; /* String can't be returned. */ 177 178 /* Loop over numeric fields*/ 179 if (io__get_char(&io) != ' ') 180 return -EINVAL; 181 182 i = 4; 183 while (true) { 184 c = io__get_dec(&io, val); 185 if (c == -1) 186 return -EINVAL; 187 if (c == -2) { 188 /* Assume a -ve was read */ 189 c = io__get_dec(&io, val); 190 *val *= -1; 191 } 192 if (c != ' ') 193 return -EINVAL; 194 if (field == i) 195 return 0; 196 i++; 197 } 198 return -EINVAL; 199 } 200 201 int evsel__tool_pmu_prepare_open(struct evsel *evsel, 202 struct perf_cpu_map *cpus, 203 int nthreads) 204 { 205 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || 206 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && 207 !evsel->start_times) { 208 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), 209 nthreads, 210 sizeof(__u64)); 211 if (!evsel->start_times) 212 return -ENOMEM; 213 } 214 return 0; 215 } 216 217 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 218 219 int evsel__tool_pmu_open(struct evsel *evsel, 220 struct perf_thread_map *threads, 221 int start_cpu_map_idx, int end_cpu_map_idx) 222 { 223 enum tool_pmu_event ev = evsel__tool_event(evsel); 224 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; 225 226 if (ev == TOOL_PMU__EVENT_NUM_CPUS) 227 return 0; 228 229 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 230 if (evsel->core.attr.sample_period) /* no sampling */ 231 return -EINVAL; 232 evsel->start_time = rdclock(); 233 return 0; 234 } 235 236 if (evsel->cgrp) 237 pid = evsel->cgrp->fd; 238 239 nthreads = perf_thread_map__nr(threads); 240 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { 241 for (thread = 0; thread < nthreads; thread++) { 242 if (thread >= nthreads) 243 break; 244 245 if (!evsel->cgrp && !evsel->core.system_wide) 246 pid = perf_thread_map__pid(threads, thread); 247 248 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 249 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; 250 __u64 *start_time = NULL; 251 int fd; 252 253 if (evsel->core.attr.sample_period) { 254 /* no sampling */ 255 err = -EINVAL; 256 goto out_close; 257 } 258 if (pid > -1) { 259 char buf[64]; 260 261 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); 262 fd = open(buf, O_RDONLY); 263 evsel->pid_stat = true; 264 } else { 265 fd = open("/proc/stat", O_RDONLY); 266 } 267 FD(evsel, idx, thread) = fd; 268 if (fd < 0) { 269 err = -errno; 270 goto out_close; 271 } 272 start_time = xyarray__entry(evsel->start_times, idx, thread); 273 if (pid > -1) { 274 err = read_pid_stat_field(fd, system ? 15 : 14, 275 start_time); 276 } else { 277 struct perf_cpu cpu; 278 279 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); 280 err = read_stat_field(fd, cpu, system ? 3 : 1, 281 start_time); 282 } 283 if (err) 284 goto out_close; 285 } 286 287 } 288 } 289 return 0; 290 out_close: 291 if (err) 292 threads->err_thread = thread; 293 294 old_errno = errno; 295 do { 296 while (--thread >= 0) { 297 if (FD(evsel, idx, thread) >= 0) 298 close(FD(evsel, idx, thread)); 299 FD(evsel, idx, thread) = -1; 300 } 301 thread = nthreads; 302 } while (--idx >= 0); 303 errno = old_errno; 304 return err; 305 } 306 307 #if !defined(__i386__) && !defined(__x86_64__) 308 u64 arch_get_tsc_freq(void) 309 { 310 return 0; 311 } 312 #endif 313 314 #if !defined(__aarch64__) 315 u64 tool_pmu__cpu_slots_per_cycle(void) 316 { 317 return 0; 318 } 319 #endif 320 321 static bool has_pmem(void) 322 { 323 static bool has_pmem, cached; 324 const char *sysfs = sysfs__mountpoint(); 325 char path[PATH_MAX]; 326 327 if (!cached) { 328 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); 329 has_pmem = access(path, F_OK) == 0; 330 cached = true; 331 } 332 return has_pmem; 333 } 334 335 bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result) 336 { 337 const struct cpu_topology *topology; 338 339 switch (ev) { 340 case TOOL_PMU__EVENT_HAS_PMEM: 341 *result = has_pmem() ? 1 : 0; 342 return true; 343 344 case TOOL_PMU__EVENT_NUM_CORES: 345 topology = online_topology(); 346 *result = topology->core_cpus_lists; 347 return true; 348 349 case TOOL_PMU__EVENT_NUM_CPUS: 350 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 351 /* No evsel to be specific to. */ 352 *result = cpu__max_present_cpu().cpu; 353 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 354 /* Evsel just has specific CPUs. */ 355 *result = perf_cpu_map__nr(evsel->core.cpus); 356 } else { 357 /* 358 * "Any CPU" event that can be scheduled on any CPU in 359 * the PMU's cpumask. The PMU cpumask should be saved in 360 * pmu_cpus. If not present fall back to max. 361 */ 362 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) 363 *result = perf_cpu_map__nr(evsel->core.pmu_cpus); 364 else 365 *result = cpu__max_present_cpu().cpu; 366 } 367 return true; 368 369 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { 370 struct perf_cpu_map *online = cpu_map__online(); 371 372 if (!online) 373 return false; 374 375 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { 376 /* No evsel to be specific to. */ 377 *result = perf_cpu_map__nr(online); 378 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { 379 /* Evsel just has specific CPUs. */ 380 struct perf_cpu_map *tmp = 381 perf_cpu_map__intersect(online, evsel->core.cpus); 382 383 *result = perf_cpu_map__nr(tmp); 384 perf_cpu_map__put(tmp); 385 } else { 386 /* 387 * "Any CPU" event that can be scheduled on any CPU in 388 * the PMU's cpumask. The PMU cpumask should be saved in 389 * pmu_cpus, if not present then just the online cpu 390 * mask. 391 */ 392 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { 393 struct perf_cpu_map *tmp = 394 perf_cpu_map__intersect(online, evsel->core.pmu_cpus); 395 396 *result = perf_cpu_map__nr(tmp); 397 perf_cpu_map__put(tmp); 398 } else { 399 *result = perf_cpu_map__nr(online); 400 } 401 } 402 perf_cpu_map__put(online); 403 return true; 404 } 405 case TOOL_PMU__EVENT_NUM_DIES: 406 topology = online_topology(); 407 *result = topology->die_cpus_lists; 408 return true; 409 410 case TOOL_PMU__EVENT_NUM_PACKAGES: 411 topology = online_topology(); 412 *result = topology->package_cpus_lists; 413 return true; 414 415 case TOOL_PMU__EVENT_SLOTS: 416 *result = tool_pmu__cpu_slots_per_cycle(); 417 return *result ? true : false; 418 419 case TOOL_PMU__EVENT_SMT_ON: 420 *result = smt_on() ? 1 : 0; 421 return true; 422 423 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 424 *result = arch_get_tsc_freq(); 425 return true; 426 427 case TOOL_PMU__EVENT_NONE: 428 case TOOL_PMU__EVENT_DURATION_TIME: 429 case TOOL_PMU__EVENT_USER_TIME: 430 case TOOL_PMU__EVENT_SYSTEM_TIME: 431 case TOOL_PMU__EVENT_MAX: 432 default: 433 return false; 434 } 435 } 436 437 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) 438 { 439 __u64 *start_time, cur_time, delta_start; 440 u64 val; 441 int fd, err = 0; 442 struct perf_counts_values *count, *old_count = NULL; 443 bool adjust = false; 444 enum tool_pmu_event ev = evsel__tool_event(evsel); 445 446 count = perf_counts(evsel->counts, cpu_map_idx, thread); 447 448 switch (ev) { 449 case TOOL_PMU__EVENT_HAS_PMEM: 450 case TOOL_PMU__EVENT_NUM_CORES: 451 case TOOL_PMU__EVENT_NUM_CPUS: 452 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: 453 case TOOL_PMU__EVENT_NUM_DIES: 454 case TOOL_PMU__EVENT_NUM_PACKAGES: 455 case TOOL_PMU__EVENT_SLOTS: 456 case TOOL_PMU__EVENT_SMT_ON: 457 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 458 if (evsel->prev_raw_counts) 459 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); 460 val = 0; 461 if (cpu_map_idx == 0 && thread == 0) { 462 if (!tool_pmu__read_event(ev, evsel, &val)) { 463 count->lost++; 464 val = 0; 465 } 466 } 467 if (old_count) { 468 count->val = old_count->val + val; 469 count->run = old_count->run + 1; 470 count->ena = old_count->ena + 1; 471 } else { 472 count->val = val; 473 count->run++; 474 count->ena++; 475 } 476 return 0; 477 case TOOL_PMU__EVENT_DURATION_TIME: 478 /* 479 * Pretend duration_time is only on the first CPU and thread, or 480 * else aggregation will scale duration_time by the number of 481 * CPUs/threads. 482 */ 483 start_time = &evsel->start_time; 484 if (cpu_map_idx == 0 && thread == 0) 485 cur_time = rdclock(); 486 else 487 cur_time = *start_time; 488 break; 489 case TOOL_PMU__EVENT_USER_TIME: 490 case TOOL_PMU__EVENT_SYSTEM_TIME: { 491 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; 492 493 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); 494 fd = FD(evsel, cpu_map_idx, thread); 495 lseek(fd, SEEK_SET, 0); 496 if (evsel->pid_stat) { 497 /* The event exists solely on 1 CPU. */ 498 if (cpu_map_idx == 0) 499 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); 500 else 501 cur_time = 0; 502 } else { 503 /* The event is for all threads. */ 504 if (thread == 0) { 505 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, 506 cpu_map_idx); 507 508 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); 509 } else { 510 cur_time = 0; 511 } 512 } 513 adjust = true; 514 break; 515 } 516 case TOOL_PMU__EVENT_NONE: 517 case TOOL_PMU__EVENT_MAX: 518 default: 519 err = -EINVAL; 520 } 521 if (err) 522 return err; 523 524 delta_start = cur_time - *start_time; 525 if (adjust) { 526 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); 527 528 delta_start *= 1000000000 / ticks_per_sec; 529 } 530 count->val = delta_start; 531 count->lost = 0; 532 /* 533 * The values of enabled and running must make a ratio of 100%. The 534 * exact values don't matter as long as they are non-zero to avoid 535 * issues with evsel__count_has_error. 536 */ 537 count->ena++; 538 count->run++; 539 return 0; 540 } 541 542 struct perf_pmu *tool_pmu__new(void) 543 { 544 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); 545 546 if (!tool) 547 return NULL; 548 549 if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { 550 perf_pmu__delete(tool); 551 return NULL; 552 } 553 tool->events_table = find_core_events_table("common", "common"); 554 return tool; 555 } 556