1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include <math.h> 5 #include <string.h> 6 #include "counts.h" 7 #include "cpumap.h" 8 #include "debug.h" 9 #include "header.h" 10 #include "stat.h" 11 #include "session.h" 12 #include "target.h" 13 #include "evlist.h" 14 #include "evsel.h" 15 #include "thread_map.h" 16 #include <linux/zalloc.h> 17 18 void update_stats(struct stats *stats, u64 val) 19 { 20 double delta; 21 22 stats->n++; 23 delta = val - stats->mean; 24 stats->mean += delta / stats->n; 25 stats->M2 += delta*(val - stats->mean); 26 27 if (val > stats->max) 28 stats->max = val; 29 30 if (val < stats->min) 31 stats->min = val; 32 } 33 34 double avg_stats(struct stats *stats) 35 { 36 return stats->mean; 37 } 38 39 /* 40 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 41 * 42 * (\Sum n_i^2) - ((\Sum n_i)^2)/n 43 * s^2 = ------------------------------- 44 * n - 1 45 * 46 * http://en.wikipedia.org/wiki/Stddev 47 * 48 * The std dev of the mean is related to the std dev by: 49 * 50 * s 51 * s_mean = ------- 52 * sqrt(n) 53 * 54 */ 55 double stddev_stats(struct stats *stats) 56 { 57 double variance, variance_mean; 58 59 if (stats->n < 2) 60 return 0.0; 61 62 variance = stats->M2 / (stats->n - 1); 63 variance_mean = variance / stats->n; 64 65 return sqrt(variance_mean); 66 } 67 68 double rel_stddev_stats(double stddev, double avg) 69 { 70 double pct = 0.0; 71 72 if (avg) 73 pct = 100.0 * stddev/avg; 74 75 return pct; 76 } 77 78 bool __perf_evsel_stat__is(struct evsel *evsel, 79 enum perf_stat_evsel_id id) 80 { 81 struct perf_stat_evsel *ps = evsel->stats; 82 83 return ps->id == id; 84 } 85 86 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name 87 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { 88 ID(NONE, x), 89 ID(CYCLES_IN_TX, cpu/cycles-t/), 90 ID(TRANSACTION_START, cpu/tx-start/), 91 ID(ELISION_START, cpu/el-start/), 92 ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), 93 ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), 94 ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), 95 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), 96 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), 97 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), 98 ID(TOPDOWN_RETIRING, topdown-retiring), 99 ID(TOPDOWN_BAD_SPEC, topdown-bad-spec), 100 ID(TOPDOWN_FE_BOUND, topdown-fe-bound), 101 ID(TOPDOWN_BE_BOUND, topdown-be-bound), 102 ID(SMI_NUM, msr/smi/), 103 ID(APERF, msr/aperf/), 104 }; 105 #undef ID 106 107 static void perf_stat_evsel_id_init(struct evsel *evsel) 108 { 109 struct perf_stat_evsel *ps = evsel->stats; 110 int i; 111 112 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ 113 114 for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { 115 if (!strcmp(evsel__name(evsel), id_str[i])) { 116 ps->id = i; 117 break; 118 } 119 } 120 } 121 122 static void evsel__reset_stat_priv(struct evsel *evsel) 123 { 124 int i; 125 struct perf_stat_evsel *ps = evsel->stats; 126 127 for (i = 0; i < 3; i++) 128 init_stats(&ps->res_stats[i]); 129 130 perf_stat_evsel_id_init(evsel); 131 } 132 133 static int evsel__alloc_stat_priv(struct evsel *evsel) 134 { 135 evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); 136 if (evsel->stats == NULL) 137 return -ENOMEM; 138 evsel__reset_stat_priv(evsel); 139 return 0; 140 } 141 142 static void evsel__free_stat_priv(struct evsel *evsel) 143 { 144 struct perf_stat_evsel *ps = evsel->stats; 145 146 if (ps) 147 zfree(&ps->group_data); 148 zfree(&evsel->stats); 149 } 150 151 static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads) 152 { 153 struct perf_counts *counts; 154 155 counts = perf_counts__new(ncpus, nthreads); 156 if (counts) 157 evsel->prev_raw_counts = counts; 158 159 return counts ? 0 : -ENOMEM; 160 } 161 162 static void evsel__free_prev_raw_counts(struct evsel *evsel) 163 { 164 perf_counts__delete(evsel->prev_raw_counts); 165 evsel->prev_raw_counts = NULL; 166 } 167 168 static void evsel__reset_prev_raw_counts(struct evsel *evsel) 169 { 170 if (evsel->prev_raw_counts) 171 perf_counts__reset(evsel->prev_raw_counts); 172 } 173 174 static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) 175 { 176 int ncpus = evsel__nr_cpus(evsel); 177 int nthreads = perf_thread_map__nr(evsel->core.threads); 178 179 if (evsel__alloc_stat_priv(evsel) < 0 || 180 evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || 181 (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) 182 return -ENOMEM; 183 184 return 0; 185 } 186 187 int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) 188 { 189 struct evsel *evsel; 190 191 evlist__for_each_entry(evlist, evsel) { 192 if (evsel__alloc_stats(evsel, alloc_raw)) 193 goto out_free; 194 } 195 196 return 0; 197 198 out_free: 199 evlist__free_stats(evlist); 200 return -1; 201 } 202 203 void evlist__free_stats(struct evlist *evlist) 204 { 205 struct evsel *evsel; 206 207 evlist__for_each_entry(evlist, evsel) { 208 evsel__free_stat_priv(evsel); 209 evsel__free_counts(evsel); 210 evsel__free_prev_raw_counts(evsel); 211 } 212 } 213 214 void evlist__reset_stats(struct evlist *evlist) 215 { 216 struct evsel *evsel; 217 218 evlist__for_each_entry(evlist, evsel) { 219 evsel__reset_stat_priv(evsel); 220 evsel__reset_counts(evsel); 221 } 222 } 223 224 void evlist__reset_prev_raw_counts(struct evlist *evlist) 225 { 226 struct evsel *evsel; 227 228 evlist__for_each_entry(evlist, evsel) 229 evsel__reset_prev_raw_counts(evsel); 230 } 231 232 static void evsel__copy_prev_raw_counts(struct evsel *evsel) 233 { 234 int ncpus = evsel__nr_cpus(evsel); 235 int nthreads = perf_thread_map__nr(evsel->core.threads); 236 237 for (int thread = 0; thread < nthreads; thread++) { 238 for (int cpu = 0; cpu < ncpus; cpu++) { 239 *perf_counts(evsel->counts, cpu, thread) = 240 *perf_counts(evsel->prev_raw_counts, cpu, 241 thread); 242 } 243 } 244 245 evsel->counts->aggr = evsel->prev_raw_counts->aggr; 246 } 247 248 void evlist__copy_prev_raw_counts(struct evlist *evlist) 249 { 250 struct evsel *evsel; 251 252 evlist__for_each_entry(evlist, evsel) 253 evsel__copy_prev_raw_counts(evsel); 254 } 255 256 void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) 257 { 258 struct evsel *evsel; 259 260 /* 261 * To collect the overall statistics for interval mode, 262 * we copy the counts from evsel->prev_raw_counts to 263 * evsel->counts. The perf_stat_process_counter creates 264 * aggr values from per cpu values, but the per cpu values 265 * are 0 for AGGR_GLOBAL. So we use a trick that saves the 266 * previous aggr value to the first member of perf_counts, 267 * then aggr calculation in process_counter_values can work 268 * correctly. 269 */ 270 evlist__for_each_entry(evlist, evsel) { 271 *perf_counts(evsel->prev_raw_counts, 0, 0) = 272 evsel->prev_raw_counts->aggr; 273 } 274 } 275 276 static void zero_per_pkg(struct evsel *counter) 277 { 278 if (counter->per_pkg_mask) 279 memset(counter->per_pkg_mask, 0, cpu__max_cpu()); 280 } 281 282 static int check_per_pkg(struct evsel *counter, 283 struct perf_counts_values *vals, int cpu, bool *skip) 284 { 285 unsigned long *mask = counter->per_pkg_mask; 286 struct perf_cpu_map *cpus = evsel__cpus(counter); 287 int s; 288 289 *skip = false; 290 291 if (!counter->per_pkg) 292 return 0; 293 294 if (perf_cpu_map__empty(cpus)) 295 return 0; 296 297 if (!mask) { 298 mask = zalloc(cpu__max_cpu()); 299 if (!mask) 300 return -ENOMEM; 301 302 counter->per_pkg_mask = mask; 303 } 304 305 /* 306 * we do not consider an event that has not run as a good 307 * instance to mark a package as used (skip=1). Otherwise 308 * we may run into a situation where the first CPU in a package 309 * is not running anything, yet the second is, and this function 310 * would mark the package as used after the first CPU and would 311 * not read the values from the second CPU. 312 */ 313 if (!(vals->run && vals->ena)) 314 return 0; 315 316 s = cpu_map__get_socket(cpus, cpu, NULL).socket; 317 if (s < 0) 318 return -1; 319 320 *skip = test_and_set_bit(s, mask) == 1; 321 return 0; 322 } 323 324 static int 325 process_counter_values(struct perf_stat_config *config, struct evsel *evsel, 326 int cpu, int thread, 327 struct perf_counts_values *count) 328 { 329 struct perf_counts_values *aggr = &evsel->counts->aggr; 330 static struct perf_counts_values zero; 331 bool skip = false; 332 333 if (check_per_pkg(evsel, count, cpu, &skip)) { 334 pr_err("failed to read per-pkg counter\n"); 335 return -1; 336 } 337 338 if (skip) 339 count = &zero; 340 341 switch (config->aggr_mode) { 342 case AGGR_THREAD: 343 case AGGR_CORE: 344 case AGGR_DIE: 345 case AGGR_SOCKET: 346 case AGGR_NODE: 347 case AGGR_NONE: 348 if (!evsel->snapshot) 349 evsel__compute_deltas(evsel, cpu, thread, count); 350 perf_counts_values__scale(count, config->scale, NULL); 351 if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { 352 perf_stat__update_shadow_stats(evsel, count->val, 353 cpu, &rt_stat); 354 } 355 356 if (config->aggr_mode == AGGR_THREAD) { 357 if (config->stats) 358 perf_stat__update_shadow_stats(evsel, 359 count->val, 0, &config->stats[thread]); 360 else 361 perf_stat__update_shadow_stats(evsel, 362 count->val, 0, &rt_stat); 363 } 364 break; 365 case AGGR_GLOBAL: 366 aggr->val += count->val; 367 aggr->ena += count->ena; 368 aggr->run += count->run; 369 case AGGR_UNSET: 370 default: 371 break; 372 } 373 374 return 0; 375 } 376 377 static int process_counter_maps(struct perf_stat_config *config, 378 struct evsel *counter) 379 { 380 int nthreads = perf_thread_map__nr(counter->core.threads); 381 int ncpus = evsel__nr_cpus(counter); 382 int cpu, thread; 383 384 if (counter->core.system_wide) 385 nthreads = 1; 386 387 for (thread = 0; thread < nthreads; thread++) { 388 for (cpu = 0; cpu < ncpus; cpu++) { 389 if (process_counter_values(config, counter, cpu, thread, 390 perf_counts(counter->counts, cpu, thread))) 391 return -1; 392 } 393 } 394 395 return 0; 396 } 397 398 int perf_stat_process_counter(struct perf_stat_config *config, 399 struct evsel *counter) 400 { 401 struct perf_counts_values *aggr = &counter->counts->aggr; 402 struct perf_stat_evsel *ps = counter->stats; 403 u64 *count = counter->counts->aggr.values; 404 int i, ret; 405 406 aggr->val = aggr->ena = aggr->run = 0; 407 408 /* 409 * We calculate counter's data every interval, 410 * and the display code shows ps->res_stats 411 * avg value. We need to zero the stats for 412 * interval mode, otherwise overall avg running 413 * averages will be shown for each interval. 414 */ 415 if (config->interval || config->summary) { 416 for (i = 0; i < 3; i++) 417 init_stats(&ps->res_stats[i]); 418 } 419 420 if (counter->per_pkg) 421 zero_per_pkg(counter); 422 423 ret = process_counter_maps(config, counter); 424 if (ret) 425 return ret; 426 427 if (config->aggr_mode != AGGR_GLOBAL) 428 return 0; 429 430 if (!counter->snapshot) 431 evsel__compute_deltas(counter, -1, -1, aggr); 432 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); 433 434 for (i = 0; i < 3; i++) 435 update_stats(&ps->res_stats[i], count[i]); 436 437 if (verbose > 0) { 438 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 439 evsel__name(counter), count[0], count[1], count[2]); 440 } 441 442 /* 443 * Save the full runtime - to allow normalization during printout: 444 */ 445 perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); 446 447 return 0; 448 } 449 450 int perf_event__process_stat_event(struct perf_session *session, 451 union perf_event *event) 452 { 453 struct perf_counts_values count; 454 struct perf_record_stat *st = &event->stat; 455 struct evsel *counter; 456 457 count.val = st->val; 458 count.ena = st->ena; 459 count.run = st->run; 460 461 counter = evlist__id2evsel(session->evlist, st->id); 462 if (!counter) { 463 pr_err("Failed to resolve counter for stat event.\n"); 464 return -EINVAL; 465 } 466 467 *perf_counts(counter->counts, st->cpu, st->thread) = count; 468 counter->supported = true; 469 return 0; 470 } 471 472 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp) 473 { 474 struct perf_record_stat *st = (struct perf_record_stat *)event; 475 size_t ret; 476 477 ret = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n", 478 st->id, st->cpu, st->thread); 479 ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n", 480 st->val, st->ena, st->run); 481 482 return ret; 483 } 484 485 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) 486 { 487 struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event; 488 size_t ret; 489 490 ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time, 491 rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL"); 492 493 return ret; 494 } 495 496 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) 497 { 498 struct perf_stat_config sc; 499 size_t ret; 500 501 perf_event__read_stat_config(&sc, &event->stat_config); 502 503 ret = fprintf(fp, "\n"); 504 ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode); 505 ret += fprintf(fp, "... scale %d\n", sc.scale); 506 ret += fprintf(fp, "... interval %u\n", sc.interval); 507 508 return ret; 509 } 510 511 int create_perf_stat_counter(struct evsel *evsel, 512 struct perf_stat_config *config, 513 struct target *target, 514 int cpu) 515 { 516 struct perf_event_attr *attr = &evsel->core.attr; 517 struct evsel *leader = evsel->leader; 518 519 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 520 PERF_FORMAT_TOTAL_TIME_RUNNING; 521 522 /* 523 * The event is part of non trivial group, let's enable 524 * the group read (for leader) and ID retrieval for all 525 * members. 526 */ 527 if (leader->core.nr_members > 1) 528 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 529 530 attr->inherit = !config->no_inherit; 531 532 /* 533 * Some events get initialized with sample_(period/type) set, 534 * like tracepoints. Clear it up for counting. 535 */ 536 attr->sample_period = 0; 537 538 if (config->identifier) 539 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 540 541 if (config->all_user) { 542 attr->exclude_kernel = 1; 543 attr->exclude_user = 0; 544 } 545 546 if (config->all_kernel) { 547 attr->exclude_kernel = 0; 548 attr->exclude_user = 1; 549 } 550 551 /* 552 * Disabling all counters initially, they will be enabled 553 * either manually by us or by kernel via enable_on_exec 554 * set later. 555 */ 556 if (evsel__is_group_leader(evsel)) { 557 attr->disabled = 1; 558 559 /* 560 * In case of initial_delay we enable tracee 561 * events manually. 562 */ 563 if (target__none(target) && !config->initial_delay) 564 attr->enable_on_exec = 1; 565 } 566 567 if (target__has_cpu(target) && !target__has_per_thread(target)) 568 return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); 569 570 return evsel__open_per_thread(evsel, evsel->core.threads); 571 } 572