1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include <math.h> 5 #include "counts.h" 6 #include "stat.h" 7 #include "target.h" 8 #include "evlist.h" 9 #include "evsel.h" 10 #include "thread_map.h" 11 #include <linux/zalloc.h> 12 13 void update_stats(struct stats *stats, u64 val) 14 { 15 double delta; 16 17 stats->n++; 18 delta = val - stats->mean; 19 stats->mean += delta / stats->n; 20 stats->M2 += delta*(val - stats->mean); 21 22 if (val > stats->max) 23 stats->max = val; 24 25 if (val < stats->min) 26 stats->min = val; 27 } 28 29 double avg_stats(struct stats *stats) 30 { 31 return stats->mean; 32 } 33 34 /* 35 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 36 * 37 * (\Sum n_i^2) - ((\Sum n_i)^2)/n 38 * s^2 = ------------------------------- 39 * n - 1 40 * 41 * http://en.wikipedia.org/wiki/Stddev 42 * 43 * The std dev of the mean is related to the std dev by: 44 * 45 * s 46 * s_mean = ------- 47 * sqrt(n) 48 * 49 */ 50 double stddev_stats(struct stats *stats) 51 { 52 double variance, variance_mean; 53 54 if (stats->n < 2) 55 return 0.0; 56 57 variance = stats->M2 / (stats->n - 1); 58 variance_mean = variance / stats->n; 59 60 return sqrt(variance_mean); 61 } 62 63 double rel_stddev_stats(double stddev, double avg) 64 { 65 double pct = 0.0; 66 67 if (avg) 68 pct = 100.0 * stddev/avg; 69 70 return pct; 71 } 72 73 bool __perf_evsel_stat__is(struct evsel *evsel, 74 enum perf_stat_evsel_id id) 75 { 76 struct perf_stat_evsel *ps = evsel->stats; 77 78 return ps->id == id; 79 } 80 81 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name 82 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { 83 ID(NONE, x), 84 ID(CYCLES_IN_TX, cpu/cycles-t/), 85 ID(TRANSACTION_START, cpu/tx-start/), 86 ID(ELISION_START, cpu/el-start/), 87 ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), 88 ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), 89 ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), 90 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), 91 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), 92 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), 93 ID(SMI_NUM, msr/smi/), 94 ID(APERF, msr/aperf/), 95 }; 96 #undef ID 97 98 static void perf_stat_evsel_id_init(struct evsel *evsel) 99 { 100 struct perf_stat_evsel *ps = evsel->stats; 101 int i; 102 103 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ 104 105 for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { 106 if (!strcmp(perf_evsel__name(evsel), id_str[i])) { 107 ps->id = i; 108 break; 109 } 110 } 111 } 112 113 static void perf_evsel__reset_stat_priv(struct evsel *evsel) 114 { 115 int i; 116 struct perf_stat_evsel *ps = evsel->stats; 117 118 for (i = 0; i < 3; i++) 119 init_stats(&ps->res_stats[i]); 120 121 perf_stat_evsel_id_init(evsel); 122 } 123 124 static int perf_evsel__alloc_stat_priv(struct evsel *evsel) 125 { 126 evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); 127 if (evsel->stats == NULL) 128 return -ENOMEM; 129 perf_evsel__reset_stat_priv(evsel); 130 return 0; 131 } 132 133 static void perf_evsel__free_stat_priv(struct evsel *evsel) 134 { 135 struct perf_stat_evsel *ps = evsel->stats; 136 137 if (ps) 138 zfree(&ps->group_data); 139 zfree(&evsel->stats); 140 } 141 142 static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel, 143 int ncpus, int nthreads) 144 { 145 struct perf_counts *counts; 146 147 counts = perf_counts__new(ncpus, nthreads); 148 if (counts) 149 evsel->prev_raw_counts = counts; 150 151 return counts ? 0 : -ENOMEM; 152 } 153 154 static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) 155 { 156 perf_counts__delete(evsel->prev_raw_counts); 157 evsel->prev_raw_counts = NULL; 158 } 159 160 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) 161 { 162 int ncpus = perf_evsel__nr_cpus(evsel); 163 int nthreads = perf_thread_map__nr(evsel->core.threads); 164 165 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 166 perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || 167 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) 168 return -ENOMEM; 169 170 return 0; 171 } 172 173 int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) 174 { 175 struct evsel *evsel; 176 177 evlist__for_each_entry(evlist, evsel) { 178 if (perf_evsel__alloc_stats(evsel, alloc_raw)) 179 goto out_free; 180 } 181 182 return 0; 183 184 out_free: 185 perf_evlist__free_stats(evlist); 186 return -1; 187 } 188 189 void perf_evlist__free_stats(struct evlist *evlist) 190 { 191 struct evsel *evsel; 192 193 evlist__for_each_entry(evlist, evsel) { 194 perf_evsel__free_stat_priv(evsel); 195 perf_evsel__free_counts(evsel); 196 perf_evsel__free_prev_raw_counts(evsel); 197 } 198 } 199 200 void perf_evlist__reset_stats(struct evlist *evlist) 201 { 202 struct evsel *evsel; 203 204 evlist__for_each_entry(evlist, evsel) { 205 perf_evsel__reset_stat_priv(evsel); 206 perf_evsel__reset_counts(evsel); 207 } 208 } 209 210 static void zero_per_pkg(struct evsel *counter) 211 { 212 if (counter->per_pkg_mask) 213 memset(counter->per_pkg_mask, 0, cpu__max_cpu()); 214 } 215 216 static int check_per_pkg(struct evsel *counter, 217 struct perf_counts_values *vals, int cpu, bool *skip) 218 { 219 unsigned long *mask = counter->per_pkg_mask; 220 struct perf_cpu_map *cpus = evsel__cpus(counter); 221 int s; 222 223 *skip = false; 224 225 if (!counter->per_pkg) 226 return 0; 227 228 if (perf_cpu_map__empty(cpus)) 229 return 0; 230 231 if (!mask) { 232 mask = zalloc(cpu__max_cpu()); 233 if (!mask) 234 return -ENOMEM; 235 236 counter->per_pkg_mask = mask; 237 } 238 239 /* 240 * we do not consider an event that has not run as a good 241 * instance to mark a package as used (skip=1). Otherwise 242 * we may run into a situation where the first CPU in a package 243 * is not running anything, yet the second is, and this function 244 * would mark the package as used after the first CPU and would 245 * not read the values from the second CPU. 246 */ 247 if (!(vals->run && vals->ena)) 248 return 0; 249 250 s = cpu_map__get_socket(cpus, cpu, NULL); 251 if (s < 0) 252 return -1; 253 254 *skip = test_and_set_bit(s, mask) == 1; 255 return 0; 256 } 257 258 static int 259 process_counter_values(struct perf_stat_config *config, struct evsel *evsel, 260 int cpu, int thread, 261 struct perf_counts_values *count) 262 { 263 struct perf_counts_values *aggr = &evsel->counts->aggr; 264 static struct perf_counts_values zero; 265 bool skip = false; 266 267 if (check_per_pkg(evsel, count, cpu, &skip)) { 268 pr_err("failed to read per-pkg counter\n"); 269 return -1; 270 } 271 272 if (skip) 273 count = &zero; 274 275 switch (config->aggr_mode) { 276 case AGGR_THREAD: 277 case AGGR_CORE: 278 case AGGR_DIE: 279 case AGGR_SOCKET: 280 case AGGR_NONE: 281 if (!evsel->snapshot) 282 perf_evsel__compute_deltas(evsel, cpu, thread, count); 283 perf_counts_values__scale(count, config->scale, NULL); 284 if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { 285 perf_stat__update_shadow_stats(evsel, count->val, 286 cpu, &rt_stat); 287 } 288 289 if (config->aggr_mode == AGGR_THREAD) { 290 if (config->stats) 291 perf_stat__update_shadow_stats(evsel, 292 count->val, 0, &config->stats[thread]); 293 else 294 perf_stat__update_shadow_stats(evsel, 295 count->val, 0, &rt_stat); 296 } 297 break; 298 case AGGR_GLOBAL: 299 aggr->val += count->val; 300 aggr->ena += count->ena; 301 aggr->run += count->run; 302 case AGGR_UNSET: 303 default: 304 break; 305 } 306 307 return 0; 308 } 309 310 static int process_counter_maps(struct perf_stat_config *config, 311 struct evsel *counter) 312 { 313 int nthreads = perf_thread_map__nr(counter->core.threads); 314 int ncpus = perf_evsel__nr_cpus(counter); 315 int cpu, thread; 316 317 if (counter->system_wide) 318 nthreads = 1; 319 320 for (thread = 0; thread < nthreads; thread++) { 321 for (cpu = 0; cpu < ncpus; cpu++) { 322 if (process_counter_values(config, counter, cpu, thread, 323 perf_counts(counter->counts, cpu, thread))) 324 return -1; 325 } 326 } 327 328 return 0; 329 } 330 331 int perf_stat_process_counter(struct perf_stat_config *config, 332 struct evsel *counter) 333 { 334 struct perf_counts_values *aggr = &counter->counts->aggr; 335 struct perf_stat_evsel *ps = counter->stats; 336 u64 *count = counter->counts->aggr.values; 337 int i, ret; 338 339 aggr->val = aggr->ena = aggr->run = 0; 340 341 /* 342 * We calculate counter's data every interval, 343 * and the display code shows ps->res_stats 344 * avg value. We need to zero the stats for 345 * interval mode, otherwise overall avg running 346 * averages will be shown for each interval. 347 */ 348 if (config->interval) 349 init_stats(ps->res_stats); 350 351 if (counter->per_pkg) 352 zero_per_pkg(counter); 353 354 ret = process_counter_maps(config, counter); 355 if (ret) 356 return ret; 357 358 if (config->aggr_mode != AGGR_GLOBAL) 359 return 0; 360 361 if (!counter->snapshot) 362 perf_evsel__compute_deltas(counter, -1, -1, aggr); 363 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); 364 365 for (i = 0; i < 3; i++) 366 update_stats(&ps->res_stats[i], count[i]); 367 368 if (verbose > 0) { 369 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 370 perf_evsel__name(counter), count[0], count[1], count[2]); 371 } 372 373 /* 374 * Save the full runtime - to allow normalization during printout: 375 */ 376 perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); 377 378 return 0; 379 } 380 381 int perf_event__process_stat_event(struct perf_session *session, 382 union perf_event *event) 383 { 384 struct perf_counts_values count; 385 struct perf_record_stat *st = &event->stat; 386 struct evsel *counter; 387 388 count.val = st->val; 389 count.ena = st->ena; 390 count.run = st->run; 391 392 counter = perf_evlist__id2evsel(session->evlist, st->id); 393 if (!counter) { 394 pr_err("Failed to resolve counter for stat event.\n"); 395 return -EINVAL; 396 } 397 398 *perf_counts(counter->counts, st->cpu, st->thread) = count; 399 counter->supported = true; 400 return 0; 401 } 402 403 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp) 404 { 405 struct perf_record_stat *st = (struct perf_record_stat *)event; 406 size_t ret; 407 408 ret = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n", 409 st->id, st->cpu, st->thread); 410 ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n", 411 st->val, st->ena, st->run); 412 413 return ret; 414 } 415 416 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) 417 { 418 struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event; 419 size_t ret; 420 421 ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time, 422 rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL"); 423 424 return ret; 425 } 426 427 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) 428 { 429 struct perf_stat_config sc; 430 size_t ret; 431 432 perf_event__read_stat_config(&sc, &event->stat_config); 433 434 ret = fprintf(fp, "\n"); 435 ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode); 436 ret += fprintf(fp, "... scale %d\n", sc.scale); 437 ret += fprintf(fp, "... interval %u\n", sc.interval); 438 439 return ret; 440 } 441 442 int create_perf_stat_counter(struct evsel *evsel, 443 struct perf_stat_config *config, 444 struct target *target) 445 { 446 struct perf_event_attr *attr = &evsel->core.attr; 447 struct evsel *leader = evsel->leader; 448 449 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 450 PERF_FORMAT_TOTAL_TIME_RUNNING; 451 452 /* 453 * The event is part of non trivial group, let's enable 454 * the group read (for leader) and ID retrieval for all 455 * members. 456 */ 457 if (leader->core.nr_members > 1) 458 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 459 460 attr->inherit = !config->no_inherit; 461 462 /* 463 * Some events get initialized with sample_(period/type) set, 464 * like tracepoints. Clear it up for counting. 465 */ 466 attr->sample_period = 0; 467 468 if (config->identifier) 469 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 470 471 /* 472 * Disabling all counters initially, they will be enabled 473 * either manually by us or by kernel via enable_on_exec 474 * set later. 475 */ 476 if (perf_evsel__is_group_leader(evsel)) { 477 attr->disabled = 1; 478 479 /* 480 * In case of initial_delay we enable tracee 481 * events manually. 482 */ 483 if (target__none(target) && !config->initial_delay) 484 attr->enable_on_exec = 1; 485 } 486 487 if (target__has_cpu(target) && !target__has_per_thread(target)) 488 return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel)); 489 490 return perf_evsel__open_per_thread(evsel, evsel->core.threads); 491 } 492 493 int perf_stat_synthesize_config(struct perf_stat_config *config, 494 struct perf_tool *tool, 495 struct evlist *evlist, 496 perf_event__handler_t process, 497 bool attrs) 498 { 499 int err; 500 501 if (attrs) { 502 err = perf_event__synthesize_attrs(tool, evlist, process); 503 if (err < 0) { 504 pr_err("Couldn't synthesize attrs.\n"); 505 return err; 506 } 507 } 508 509 err = perf_event__synthesize_extra_attr(tool, evlist, process, 510 attrs); 511 512 err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, 513 process, NULL); 514 if (err < 0) { 515 pr_err("Couldn't synthesize thread map.\n"); 516 return err; 517 } 518 519 err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, 520 process, NULL); 521 if (err < 0) { 522 pr_err("Couldn't synthesize thread map.\n"); 523 return err; 524 } 525 526 err = perf_event__synthesize_stat_config(tool, config, process, NULL); 527 if (err < 0) { 528 pr_err("Couldn't synthesize config.\n"); 529 return err; 530 } 531 532 return 0; 533 } 534