1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "cgroup.h" 3 #include "counts.h" 4 #include "cputopo.h" 5 #include "evsel.h" 6 #include "pmu.h" 7 #include "print-events.h" 8 #include "smt.h" 9 #include "time-utils.h" 10 #include "tool_pmu.h" 11 #include "tsc.h" 12 #include <api/fs/fs.h> 13 #include <api/io.h> 14 #include <internal/threadmap.h> 15 #include <perf/threadmap.h> 16 #include <fcntl.h> 17 #include <strings.h> 18 19 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { 20 NULL, 21 "duration_time", 22 "user_time", 23 "system_time", 24 "has_pmem", 25 "num_cores", 26 "num_cpus", 27 "num_cpus_online", 28 "num_dies", 29 "num_packages", 30 "slots", 31 "smt_on", 32 "system_tsc_freq", 33 }; 34 35 bool tool_pmu__skip_event(const char *name __maybe_unused) 36 { 37 #if !defined(__aarch64__) 38 /* The slots event should only appear on arm64. */ 39 if (strcasecmp(name, "slots") == 0) 40 return true; 41 #endif 42 #if !defined(__i386__) && !defined(__x86_64__) 43 /* The system_tsc_freq event should only appear on x86. */ 44 if (strcasecmp(name, "system_tsc_freq") == 0) 45 return true; 46 #endif 47 return false; 48 } 49 50 int tool_pmu__num_skip_events(void) 51 { 52 int num = 0; 53 54 #if !defined(__aarch64__) 55 num++; 56 #endif 57 #if !defined(__i386__) && !defined(__x86_64__) 58 num++; 59 #endif 60 return num; 61 } 62 63 const char *tool_pmu__event_to_str(enum tool_pmu_event ev) 64 { 65 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && 66 !tool_pmu__skip_event(tool_pmu__event_names[ev])) 67 return tool_pmu__event_names[ev]; 68 69 return NULL; 70 } 71 72 enum tool_pmu_event tool_pmu__str_to_event(const char *str) 73 { 74 int i; 75 76 if (tool_pmu__skip_event(str)) 77 return TOOL_PMU__EVENT_NONE; 78 79 tool_pmu__for_each_event(i) { 80 if (!strcasecmp(str, tool_pmu__event_names[i])) 81 return i; 82 } 83 return TOOL_PMU__EVENT_NONE; 84 } 85 86 bool perf_pmu__is_tool(const struct perf_pmu *pmu) 87 { 88 return pmu && pmu->type == PERF_PMU_TYPE_TOOL; 89 } 90 91 bool evsel__is_tool(const struct evsel *evsel) 92 { 93 return perf_pmu__is_tool(evsel->pmu); 94 } 95 96 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) 97 { 98 if (!evsel__is_tool(evsel)) 99 return TOOL_PMU__EVENT_NONE; 100 101 return (enum tool_pmu_event)evsel->core.attr.config; 102 } 103 104 const char *evsel__tool_pmu_event_name(const struct evsel *evsel) 105 { 106 return tool_pmu__event_to_str(evsel->core.attr.config); 107 } 108 109 static bool read_until_char(struct io *io, char e) 110 { 111 int c; 112 113 do { 114 c = io__get_char(io); 115 if (c == -1) 116 return false; 117 } while (c != e); 118 return true; 119 } 120 121 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) 122 { 123 char buf[256]; 124 struct io io; 125 int i; 126 127 io__init(&io, fd, buf, sizeof(buf)); 128 129 /* Skip lines to relevant CPU. */ 130 for (i = -1; i < cpu.cpu; i++) { 131 if (!read_until_char(&io, '\n')) 132 return -EINVAL; 133 } 134 /* Skip to "cpu". */ 135 if (io__get_char(&io) != 'c') return -EINVAL; 136 if (io__get_char(&io) != 'p') return -EINVAL; 137 if (io__get_char(&io) != 'u') return -EINVAL; 138 139 /* Skip N of cpuN. */ 140 if (!read_until_char(&io, ' ')) 141 return -EINVAL; 142 143 i = 1; 144 while (true) { 145 if (io__get_dec(&io, val) != ' ') 146 break; 147 if (field == i) 148 return 0; 149 i++; 150 } 151 return -EINVAL; 152 } 153 154 static int read_pid_stat_field(int fd, int field, __u64 *val) 155 { 156 char buf[256]; 157 struct io io; 158 int c, i; 159 160 io__init(&io, fd, buf, sizeof(buf)); 161 if (io__get_dec(&io, val) != ' ') 162 return -EINVAL; 163 if (field == 1) 164 return 0; 165 166 /* Skip comm. */ 167 if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) 168 return -EINVAL; 169 if (field == 2) 170 return -EINVAL; /* String can't be returned. */ 171 172 /* Skip state */ 173 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) 174 return -EINVAL; 175 if (field == 3) 176 return -EINVAL; /* String can't be returned. */ 177 178 /* Loop over numeric fields*/ 179 if (io__get_char(&io) != ' ') 180 return -EINVAL; 181 182 i = 4; 183 while (true) { 184 c = io__get_dec(&io, val); 185 if (c == -1) 186 return -EINVAL; 187 if (c == -2) { 188 /* Assume a -ve was read */ 189 c = io__get_dec(&io, val); 190 *val *= -1; 191 } 192 if (c != ' ') 193 return -EINVAL; 194 if (field == i) 195 return 0; 196 i++; 197 } 198 return -EINVAL; 199 } 200 201 int evsel__tool_pmu_prepare_open(struct evsel *evsel, 202 struct perf_cpu_map *cpus, 203 int nthreads) 204 { 205 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || 206 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && 207 !evsel->start_times) { 208 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), 209 nthreads, 210 sizeof(__u64)); 211 if (!evsel->start_times) 212 return -ENOMEM; 213 } 214 return 0; 215 } 216 217 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 218 219 int evsel__tool_pmu_open(struct evsel *evsel, 220 struct perf_thread_map *threads, 221 int start_cpu_map_idx, int end_cpu_map_idx) 222 { 223 enum tool_pmu_event ev = evsel__tool_event(evsel); 224 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; 225 226 if (ev == TOOL_PMU__EVENT_NUM_CPUS) 227 return 0; 228 229 if (ev == TOOL_PMU__EVENT_DURATION_TIME) { 230 if (evsel->core.attr.sample_period) /* no sampling */ 231 return -EINVAL; 232 evsel->start_time = rdclock(); 233 return 0; 234 } 235 236 if (evsel->cgrp) 237 pid = evsel->cgrp->fd; 238 239 nthreads = perf_thread_map__nr(threads); 240 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { 241 for (thread = 0; thread < nthreads; thread++) { 242 if (thread >= nthreads) 243 break; 244 245 if (!evsel->cgrp && !evsel->core.system_wide) 246 pid = perf_thread_map__pid(threads, thread); 247 248 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { 249 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; 250 __u64 *start_time = NULL; 251 int fd; 252 253 if (evsel->core.attr.sample_period) { 254 /* no sampling */ 255 err = -EINVAL; 256 goto out_close; 257 } 258 if (pid > -1) { 259 char buf[64]; 260 261 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); 262 fd = open(buf, O_RDONLY); 263 evsel->pid_stat = true; 264 } else { 265 fd = open("/proc/stat", O_RDONLY); 266 } 267 FD(evsel, idx, thread) = fd; 268 if (fd < 0) { 269 err = -errno; 270 goto out_close; 271 } 272 start_time = xyarray__entry(evsel->start_times, idx, thread); 273 if (pid > -1) { 274 err = read_pid_stat_field(fd, system ? 15 : 14, 275 start_time); 276 } else { 277 struct perf_cpu cpu; 278 279 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); 280 err = read_stat_field(fd, cpu, system ? 3 : 1, 281 start_time); 282 } 283 if (err) 284 goto out_close; 285 } 286 287 } 288 } 289 return 0; 290 out_close: 291 if (err) 292 threads->err_thread = thread; 293 294 old_errno = errno; 295 do { 296 while (--thread >= 0) { 297 if (FD(evsel, idx, thread) >= 0) 298 close(FD(evsel, idx, thread)); 299 FD(evsel, idx, thread) = -1; 300 } 301 thread = nthreads; 302 } while (--idx >= 0); 303 errno = old_errno; 304 return err; 305 } 306 307 #if !defined(__i386__) && !defined(__x86_64__) 308 u64 arch_get_tsc_freq(void) 309 { 310 return 0; 311 } 312 #endif 313 314 #if !defined(__aarch64__) 315 u64 tool_pmu__cpu_slots_per_cycle(void) 316 { 317 return 0; 318 } 319 #endif 320 321 static bool has_pmem(void) 322 { 323 static bool has_pmem, cached; 324 const char *sysfs = sysfs__mountpoint(); 325 char path[PATH_MAX]; 326 327 if (!cached) { 328 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); 329 has_pmem = access(path, F_OK) == 0; 330 cached = true; 331 } 332 return has_pmem; 333 } 334 335 bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) 336 { 337 const struct cpu_topology *topology; 338 339 switch (ev) { 340 case TOOL_PMU__EVENT_HAS_PMEM: 341 *result = has_pmem() ? 1 : 0; 342 return true; 343 344 case TOOL_PMU__EVENT_NUM_CORES: 345 topology = online_topology(); 346 *result = topology->core_cpus_lists; 347 return true; 348 349 case TOOL_PMU__EVENT_NUM_CPUS: 350 *result = cpu__max_present_cpu().cpu; 351 return true; 352 353 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { 354 struct perf_cpu_map *online = cpu_map__online(); 355 356 if (online) { 357 *result = perf_cpu_map__nr(online); 358 perf_cpu_map__put(online); 359 return true; 360 } 361 return false; 362 } 363 case TOOL_PMU__EVENT_NUM_DIES: 364 topology = online_topology(); 365 *result = topology->die_cpus_lists; 366 return true; 367 368 case TOOL_PMU__EVENT_NUM_PACKAGES: 369 topology = online_topology(); 370 *result = topology->package_cpus_lists; 371 return true; 372 373 case TOOL_PMU__EVENT_SLOTS: 374 *result = tool_pmu__cpu_slots_per_cycle(); 375 return *result ? true : false; 376 377 case TOOL_PMU__EVENT_SMT_ON: 378 *result = smt_on() ? 1 : 0; 379 return true; 380 381 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 382 *result = arch_get_tsc_freq(); 383 return true; 384 385 case TOOL_PMU__EVENT_NONE: 386 case TOOL_PMU__EVENT_DURATION_TIME: 387 case TOOL_PMU__EVENT_USER_TIME: 388 case TOOL_PMU__EVENT_SYSTEM_TIME: 389 case TOOL_PMU__EVENT_MAX: 390 default: 391 return false; 392 } 393 } 394 395 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) 396 { 397 __u64 *start_time, cur_time, delta_start; 398 u64 val; 399 int fd, err = 0; 400 struct perf_counts_values *count, *old_count = NULL; 401 bool adjust = false; 402 enum tool_pmu_event ev = evsel__tool_event(evsel); 403 404 count = perf_counts(evsel->counts, cpu_map_idx, thread); 405 406 switch (ev) { 407 case TOOL_PMU__EVENT_HAS_PMEM: 408 case TOOL_PMU__EVENT_NUM_CORES: 409 case TOOL_PMU__EVENT_NUM_CPUS: 410 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: 411 case TOOL_PMU__EVENT_NUM_DIES: 412 case TOOL_PMU__EVENT_NUM_PACKAGES: 413 case TOOL_PMU__EVENT_SLOTS: 414 case TOOL_PMU__EVENT_SMT_ON: 415 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: 416 if (evsel->prev_raw_counts) 417 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); 418 val = 0; 419 if (cpu_map_idx == 0 && thread == 0) { 420 if (!tool_pmu__read_event(ev, &val)) { 421 count->lost++; 422 val = 0; 423 } 424 } 425 if (old_count) { 426 count->val = old_count->val + val; 427 count->run = old_count->run + 1; 428 count->ena = old_count->ena + 1; 429 } else { 430 count->val = val; 431 count->run++; 432 count->ena++; 433 } 434 return 0; 435 case TOOL_PMU__EVENT_DURATION_TIME: 436 /* 437 * Pretend duration_time is only on the first CPU and thread, or 438 * else aggregation will scale duration_time by the number of 439 * CPUs/threads. 440 */ 441 start_time = &evsel->start_time; 442 if (cpu_map_idx == 0 && thread == 0) 443 cur_time = rdclock(); 444 else 445 cur_time = *start_time; 446 break; 447 case TOOL_PMU__EVENT_USER_TIME: 448 case TOOL_PMU__EVENT_SYSTEM_TIME: { 449 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; 450 451 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); 452 fd = FD(evsel, cpu_map_idx, thread); 453 lseek(fd, SEEK_SET, 0); 454 if (evsel->pid_stat) { 455 /* The event exists solely on 1 CPU. */ 456 if (cpu_map_idx == 0) 457 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); 458 else 459 cur_time = 0; 460 } else { 461 /* The event is for all threads. */ 462 if (thread == 0) { 463 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, 464 cpu_map_idx); 465 466 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); 467 } else { 468 cur_time = 0; 469 } 470 } 471 adjust = true; 472 break; 473 } 474 case TOOL_PMU__EVENT_NONE: 475 case TOOL_PMU__EVENT_MAX: 476 default: 477 err = -EINVAL; 478 } 479 if (err) 480 return err; 481 482 delta_start = cur_time - *start_time; 483 if (adjust) { 484 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); 485 486 delta_start *= 1000000000 / ticks_per_sec; 487 } 488 count->val = delta_start; 489 count->ena = count->run = delta_start; 490 count->lost = 0; 491 return 0; 492 } 493 494 struct perf_pmu *tool_pmu__new(void) 495 { 496 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); 497 498 if (!tool) 499 goto out; 500 tool->name = strdup("tool"); 501 if (!tool->name) { 502 zfree(&tool); 503 goto out; 504 } 505 506 tool->type = PERF_PMU_TYPE_TOOL; 507 INIT_LIST_HEAD(&tool->aliases); 508 INIT_LIST_HEAD(&tool->caps); 509 INIT_LIST_HEAD(&tool->format); 510 tool->events_table = find_core_events_table("common", "common"); 511 512 out: 513 return tool; 514 } 515