1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "cgroup.h" 3 #include "counts.h" 4 #include "cputopo.h" 5 #include "evsel.h" 6 #include "pmu.h" 7 #include "print-events.h" 8 #include "smt.h" 9 #include "time-utils.h" 10 #include "tool_pmu.h" 11 #include "tsc.h" 12 #include <api/fs/fs.h> 13 #include <api/io.h> 14 #include <internal/threadmap.h> 15 #include <perf/threadmap.h> 16 #include <fcntl.h> 17 #include <strings.h> 18 19 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { 20 NULL, 21 "duration_time", 22 "user_time", 23 "system_time", 24 "has_pmem", 25 "num_cores", 26 "num_cpus", 27 "num_cpus_online", 28 "num_dies", 29 "num_packages", 30 "slots", 31 "smt_on", 32 "system_tsc_freq", 33 }; 34 35 bool tool_pmu__skip_event(const char *name __maybe_unused) 36 { 37 #if !defined(__aarch64__) 38 /* The slots event should only appear on arm64. */ 39 if (strcasecmp(name, "slots") == 0) 40 return true; 41 #endif 42 #if !defined(__i386__) && !defined(__x86_64__) 43 /* The system_tsc_freq event should only appear on x86. */ 44 if (strcasecmp(name, "system_tsc_freq") == 0) 45 return true; 46 #endif 47 return false; 48 } 49 50 int tool_pmu__num_skip_events(void) 51 { 52 int num = 0; 53 54 #if !defined(__aarch64__) 55 num++; 56 #endif 57 #if !defined(__i386__) && !defined(__x86_64__) 58 num++; 59 #endif 60 return num; 61 } 62 63 const char *tool_pmu__event_to_str(enum tool_pmu_event ev) 64 { 65 if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) 66 return tool_pmu__event_names[ev]; 67 68 return NULL; 69 } 70 71 enum tool_pmu_event tool_pmu__str_to_event(const char *str) 72 { 73 int i; 74 75 if (tool_pmu__skip_event(str)) 76 return TOOL_PMU__EVENT_NONE; 77 78 tool_pmu__for_each_event(i) { 79 if (!strcasecmp(str, tool_pmu__event_names[i])) 80 return i; 81 } 82 return TOOL_PMU__EVENT_NONE; 83 } 84 85 bool perf_pmu__is_tool(const struct perf_pmu *pmu) 86 { 87 return pmu && pmu->type == PERF_PMU_TYPE_TOOL; 88 } 89 90 bool evsel__is_tool(const struct evsel *evsel) 91 { 92 return perf_pmu__is_tool(evsel->pmu); 93 } 94 95 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) 96 { 97 if (!evsel__is_tool(evsel)) 98 return TOOL_PMU__EVENT_NONE; 99 100 return (enum tool_pmu_event)evsel->core.attr.config; 101 } 102 103 const char *evsel__tool_pmu_event_name(const struct evsel *evsel) 104 { 105 return tool_pmu__event_to_str(evsel->core.attr.config); 106 } 107 108 static bool read_until_char(struct io *io, char e) 109 { 110 int c; 111 112 do { 113 c = io__get_char(io); 114 if (c == -1) 115 return false; 116 } while (c != e); 117 return true; 118 } 119 120 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) 121 { 122 char buf[256]; 123 struct io io; 124 int i; 125 126 io__init(&io, fd, buf, sizeof(buf)); 127 128 /* Skip lines to relevant CPU. */ 129 for (i = -1; i < cpu.cpu; i++) { 130 if (!read_until_char(&io, '\n')) 131 return -EINVAL; 132 } 133 /* Skip to "cpu". */ 134 if (io__get_char(&io) != 'c') return -EINVAL; 135 if (io__get_char(&io) != 'p') return -EINVAL; 136 if (io__get_char(&io) != 'u') return -EINVAL; 137 138 /* Skip N of cpuN. */ 139 if (!read_until_char(&io, ' ')) 140 return -EINVAL; 141 142 i = 1; 143 while (true) { 144 if (io__get_dec(&io, val) != ' ') 145 break; 146 if (field == i) 147 return 0; 148 i++; 149 } 150 return -EINVAL; 151 } 152 153 static int read_pid_stat_field(int fd, int field, __u64 *val) 154 { 155 char buf[256]; 156 struct io io; 157 int c, i; 158 159 io__init(&io, fd, buf, sizeof(buf)); 160 if (io__get_dec(&io, val) != ' ') 161 return -EINVAL; 162 if (field == 1) 163 return 0; 164 165 /* Skip comm. */ 166 if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) 167 return -EINVAL; 168 if (field == 2) 169 return -EINVAL; /* String can't be returned. */ 170 171 /* Skip state */ 172 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) 173 return -EINVAL; 174 if (field == 3) 175 return -EINVAL; /* String can't be returned. */ 176 177 /* Loop over numeric fields*/ 178 if (io__get_char(&io) != ' ') 179 return -EINVAL; 180 181 i = 4; 182 while (true) { 183 c = io__get_dec(&io, val); 184 if (c == -1) 185 return -EINVAL; 186 if (c == -2) { 187 /* Assume a -ve was read */ 188 c = io__get_dec(&io, val); 189 *val *= -1; 190 } 191 if (c != ' ') 192 return -EINVAL; 193 if (field == i) 194 return 0; 195 i++; 196 } 197 return -EINVAL; 198 } 199 200 int evsel__tool_pmu_prepare_open(struct evsel *evsel, 201 struct perf_cpu_map *cpus, 202 int nthreads) 203 { 204 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || 205 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && 206 !evsel->start_times) { 207 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), 208 nthreads, 209 sizeof(__u64)); 210 if (!evsel->start_times) 211 return -ENOMEM; 212 } 213 return 0; 214 } 215 216 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 217 218 int evsel__tool_pmu_open(struct evsel *evsel, 219 struct perf_thread_map *threads, 220 int start_cpu_map_idx, int end_cpu_map_idx) 221 { 222 enum tool_pmu_event ev = evsel__tool_event(evsel); 223 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; 224 225 if (ev == TOOL_PMU__EVENT_NUM_CPUS) 226 return 0; 227 228 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 229 if (evsel->core.attr.sample_period) /* no sampling */ 230 return -EINVAL; 231 evsel->start_time = rdclock(); 232 return 0; 233 } 234 235 if (evsel->cgrp) 236 pid = evsel->cgrp->fd; 237 238 nthreads = perf_thread_map__nr(threads); 239 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { 240 for (thread = 0; thread < nthreads; thread++) { 241 if (thread >= nthreads) 242 break; 243 244 if (!evsel->cgrp && !evsel->core.system_wide) 245 pid = perf_thread_map__pid(threads, thread); 246 247 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 248 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; 249 __u64 *start_time = NULL; 250 int fd; 251 252 if (evsel->core.attr.sample_period) { 253 /* no sampling */ 254 err = -EINVAL; 255 goto out_close; 256 } 257 if (pid > -1) { 258 char buf[64]; 259 260 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); 261 fd = open(buf, O_RDONLY); 262 evsel->pid_stat = true; 263 } else { 264 fd = open("/proc/stat", O_RDONLY); 265 } 266 FD(evsel, idx, thread) = fd; 267 if (fd < 0) { 268 err = -errno; 269 goto out_close; 270 } 271 start_time = xyarray__entry(evsel->start_times, idx, thread); 272 if (pid > -1) { 273 err = read_pid_stat_field(fd, system ? 15 : 14, 274 start_time); 275 } else { 276 struct perf_cpu cpu; 277 278 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); 279 err = read_stat_field(fd, cpu, system ? 3 : 1, 280 start_time); 281 } 282 if (err) 283 goto out_close; 284 } 285 286 } 287 } 288 return 0; 289 out_close: 290 if (err) 291 threads->err_thread = thread; 292 293 old_errno = errno; 294 do { 295 while (--thread >= 0) { 296 if (FD(evsel, idx, thread) >= 0) 297 close(FD(evsel, idx, thread)); 298 FD(evsel, idx, thread) = -1; 299 } 300 thread = nthreads; 301 } while (--idx >= 0); 302 errno = old_errno; 303 return err; 304 } 305 306 #if !defined(__i386__) && !defined(__x86_64__) 307 u64 arch_get_tsc_freq(void) 308 { 309 return 0; 310 } 311 #endif 312 313 #if !defined(__aarch64__) 314 u64 tool_pmu__cpu_slots_per_cycle(void) 315 { 316 return 0; 317 } 318 #endif 319 320 static bool has_pmem(void) 321 { 322 static bool has_pmem, cached; 323 const char *sysfs = sysfs__mountpoint(); 324 char path[PATH_MAX]; 325 326 if (!cached) { 327 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); 328 has_pmem = access(path, F_OK) == 0; 329 cached = true; 330 } 331 return has_pmem; 332 } 333 334 bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) 335 { 336 const struct cpu_topology *topology; 337 338 switch (ev) { 339 case TOOL_PMU__EVENT_HAS_PMEM: 340 *result = has_pmem() ? 1 : 0; 341 return true; 342 343 case TOOL_PMU__EVENT_NUM_CORES: 344 topology = online_topology(); 345 *result = topology->core_cpus_lists; 346 return true; 347 348 case TOOL_PMU__EVENT_NUM_CPUS: 349 *result = cpu__max_present_cpu().cpu; 350 return true; 351 352 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { 353 struct perf_cpu_map *online = cpu_map__online(); 354 355 if (online) { 356 *result = perf_cpu_map__nr(online); 357 return true; 358 } 359 return false; 360 } 361 case TOOL_PMU__EVENT_NUM_DIES: 362 topology = online_topology(); 363 *result = topology->die_cpus_lists; 364 return true; 365 366 case TOOL_PMU__EVENT_NUM_PACKAGES: 367 topology = online_topology(); 368 *result = topology->package_cpus_lists; 369 return true; 370 371 case TOOL_PMU__EVENT_SLOTS: 372 *result = tool_pmu__cpu_slots_per_cycle(); 373 return *result ? true : false; 374 375 case TOOL_PMU__EVENT_SMT_ON: 376 *result = smt_on() ? 1 : 0; 377 return true; 378 379 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 380 *result = arch_get_tsc_freq(); 381 return true; 382 383 case TOOL_PMU__EVENT_NONE: 384 case TOOL_PMU__EVENT_DURATION_TIME: 385 case TOOL_PMU__EVENT_USER_TIME: 386 case TOOL_PMU__EVENT_SYSTEM_TIME: 387 case TOOL_PMU__EVENT_MAX: 388 default: 389 return false; 390 } 391 } 392 393 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) 394 { 395 __u64 *start_time, cur_time, delta_start; 396 u64 val; 397 int fd, err = 0; 398 struct perf_counts_values *count, *old_count = NULL; 399 bool adjust = false; 400 enum tool_pmu_event ev = evsel__tool_event(evsel); 401 402 count = perf_counts(evsel->counts, cpu_map_idx, thread); 403 404 switch (ev) { 405 case TOOL_PMU__EVENT_HAS_PMEM: 406 case TOOL_PMU__EVENT_NUM_CORES: 407 case TOOL_PMU__EVENT_NUM_CPUS: 408 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: 409 case TOOL_PMU__EVENT_NUM_DIES: 410 case TOOL_PMU__EVENT_NUM_PACKAGES: 411 case TOOL_PMU__EVENT_SLOTS: 412 case TOOL_PMU__EVENT_SMT_ON: 413 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 414 if (evsel->prev_raw_counts) 415 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); 416 val = 0; 417 if (cpu_map_idx == 0 && thread == 0) { 418 if (!tool_pmu__read_event(ev, &val)) { 419 count->lost++; 420 val = 0; 421 } 422 } 423 if (old_count) { 424 count->val = old_count->val + val; 425 count->run = old_count->run + 1; 426 count->ena = old_count->ena + 1; 427 } else { 428 count->val = val; 429 count->run++; 430 count->ena++; 431 } 432 return 0; 433 case TOOL_PMU__EVENT_DURATION_TIME: 434 /* 435 * Pretend duration_time is only on the first CPU and thread, or 436 * else aggregation will scale duration_time by the number of 437 * CPUs/threads. 438 */ 439 start_time = &evsel->start_time; 440 if (cpu_map_idx == 0 && thread == 0) 441 cur_time = rdclock(); 442 else 443 cur_time = *start_time; 444 break; 445 case TOOL_PMU__EVENT_USER_TIME: 446 case TOOL_PMU__EVENT_SYSTEM_TIME: { 447 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; 448 449 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); 450 fd = FD(evsel, cpu_map_idx, thread); 451 lseek(fd, SEEK_SET, 0); 452 if (evsel->pid_stat) { 453 /* The event exists solely on 1 CPU. */ 454 if (cpu_map_idx == 0) 455 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); 456 else 457 cur_time = 0; 458 } else { 459 /* The event is for all threads. */ 460 if (thread == 0) { 461 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, 462 cpu_map_idx); 463 464 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); 465 } else { 466 cur_time = 0; 467 } 468 } 469 adjust = true; 470 break; 471 } 472 case TOOL_PMU__EVENT_NONE: 473 case TOOL_PMU__EVENT_MAX: 474 default: 475 err = -EINVAL; 476 } 477 if (err) 478 return err; 479 480 delta_start = cur_time - *start_time; 481 if (adjust) { 482 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); 483 484 delta_start *= 1000000000 / ticks_per_sec; 485 } 486 count->val = delta_start; 487 count->ena = count->run = delta_start; 488 count->lost = 0; 489 return 0; 490 } 491 492 struct perf_pmu *perf_pmus__tool_pmu(void) 493 { 494 static struct perf_pmu tool = { 495 .name = "tool", 496 .type = PERF_PMU_TYPE_TOOL, 497 .aliases = LIST_HEAD_INIT(tool.aliases), 498 .caps = LIST_HEAD_INIT(tool.caps), 499 .format = LIST_HEAD_INIT(tool.format), 500 }; 501 if (!tool.events_table) 502 tool.events_table = find_core_events_table("common", "common"); 503 504 return &tool; 505 } 506