1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include <math.h> 5 #include "counts.h" 6 #include "debug.h" 7 #include "stat.h" 8 #include "target.h" 9 #include "evlist.h" 10 #include "evsel.h" 11 #include "thread_map.h" 12 #include <linux/zalloc.h> 13 14 void update_stats(struct stats *stats, u64 val) 15 { 16 double delta; 17 18 stats->n++; 19 delta = val - stats->mean; 20 stats->mean += delta / stats->n; 21 stats->M2 += delta*(val - stats->mean); 22 23 if (val > stats->max) 24 stats->max = val; 25 26 if (val < stats->min) 27 stats->min = val; 28 } 29 30 double avg_stats(struct stats *stats) 31 { 32 return stats->mean; 33 } 34 35 /* 36 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 37 * 38 * (\Sum n_i^2) - ((\Sum n_i)^2)/n 39 * s^2 = ------------------------------- 40 * n - 1 41 * 42 * http://en.wikipedia.org/wiki/Stddev 43 * 44 * The std dev of the mean is related to the std dev by: 45 * 46 * s 47 * s_mean = ------- 48 * sqrt(n) 49 * 50 */ 51 double stddev_stats(struct stats *stats) 52 { 53 double variance, variance_mean; 54 55 if (stats->n < 2) 56 return 0.0; 57 58 variance = stats->M2 / (stats->n - 1); 59 variance_mean = variance / stats->n; 60 61 return sqrt(variance_mean); 62 } 63 64 double rel_stddev_stats(double stddev, double avg) 65 { 66 double pct = 0.0; 67 68 if (avg) 69 pct = 100.0 * stddev/avg; 70 71 return pct; 72 } 73 74 bool __perf_evsel_stat__is(struct evsel *evsel, 75 enum perf_stat_evsel_id id) 76 { 77 struct perf_stat_evsel *ps = evsel->stats; 78 79 return ps->id == id; 80 } 81 82 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name 83 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { 84 ID(NONE, x), 85 ID(CYCLES_IN_TX, cpu/cycles-t/), 86 ID(TRANSACTION_START, cpu/tx-start/), 87 ID(ELISION_START, cpu/el-start/), 88 ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), 89 ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), 90 ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), 91 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), 92 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), 93 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), 94 ID(SMI_NUM, msr/smi/), 95 ID(APERF, msr/aperf/), 96 }; 97 #undef ID 98 99 static void perf_stat_evsel_id_init(struct evsel *evsel) 100 { 101 struct perf_stat_evsel *ps = evsel->stats; 102 int i; 103 104 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ 105 106 for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { 107 if (!strcmp(perf_evsel__name(evsel), id_str[i])) { 108 ps->id = i; 109 break; 110 } 111 } 112 } 113 114 static void perf_evsel__reset_stat_priv(struct evsel *evsel) 115 { 116 int i; 117 struct perf_stat_evsel *ps = evsel->stats; 118 119 for (i = 0; i < 3; i++) 120 init_stats(&ps->res_stats[i]); 121 122 perf_stat_evsel_id_init(evsel); 123 } 124 125 static int perf_evsel__alloc_stat_priv(struct evsel *evsel) 126 { 127 evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); 128 if (evsel->stats == NULL) 129 return -ENOMEM; 130 perf_evsel__reset_stat_priv(evsel); 131 return 0; 132 } 133 134 static void perf_evsel__free_stat_priv(struct evsel *evsel) 135 { 136 struct perf_stat_evsel *ps = evsel->stats; 137 138 if (ps) 139 zfree(&ps->group_data); 140 zfree(&evsel->stats); 141 } 142 143 static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel, 144 int ncpus, int nthreads) 145 { 146 struct perf_counts *counts; 147 148 counts = perf_counts__new(ncpus, nthreads); 149 if (counts) 150 evsel->prev_raw_counts = counts; 151 152 return counts ? 0 : -ENOMEM; 153 } 154 155 static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) 156 { 157 perf_counts__delete(evsel->prev_raw_counts); 158 evsel->prev_raw_counts = NULL; 159 } 160 161 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) 162 { 163 int ncpus = perf_evsel__nr_cpus(evsel); 164 int nthreads = perf_thread_map__nr(evsel->core.threads); 165 166 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 167 perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || 168 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) 169 return -ENOMEM; 170 171 return 0; 172 } 173 174 int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) 175 { 176 struct evsel *evsel; 177 178 evlist__for_each_entry(evlist, evsel) { 179 if (perf_evsel__alloc_stats(evsel, alloc_raw)) 180 goto out_free; 181 } 182 183 return 0; 184 185 out_free: 186 perf_evlist__free_stats(evlist); 187 return -1; 188 } 189 190 void perf_evlist__free_stats(struct evlist *evlist) 191 { 192 struct evsel *evsel; 193 194 evlist__for_each_entry(evlist, evsel) { 195 perf_evsel__free_stat_priv(evsel); 196 perf_evsel__free_counts(evsel); 197 perf_evsel__free_prev_raw_counts(evsel); 198 } 199 } 200 201 void perf_evlist__reset_stats(struct evlist *evlist) 202 { 203 struct evsel *evsel; 204 205 evlist__for_each_entry(evlist, evsel) { 206 perf_evsel__reset_stat_priv(evsel); 207 perf_evsel__reset_counts(evsel); 208 } 209 } 210 211 static void zero_per_pkg(struct evsel *counter) 212 { 213 if (counter->per_pkg_mask) 214 memset(counter->per_pkg_mask, 0, cpu__max_cpu()); 215 } 216 217 static int check_per_pkg(struct evsel *counter, 218 struct perf_counts_values *vals, int cpu, bool *skip) 219 { 220 unsigned long *mask = counter->per_pkg_mask; 221 struct perf_cpu_map *cpus = evsel__cpus(counter); 222 int s; 223 224 *skip = false; 225 226 if (!counter->per_pkg) 227 return 0; 228 229 if (perf_cpu_map__empty(cpus)) 230 return 0; 231 232 if (!mask) { 233 mask = zalloc(cpu__max_cpu()); 234 if (!mask) 235 return -ENOMEM; 236 237 counter->per_pkg_mask = mask; 238 } 239 240 /* 241 * we do not consider an event that has not run as a good 242 * instance to mark a package as used (skip=1). Otherwise 243 * we may run into a situation where the first CPU in a package 244 * is not running anything, yet the second is, and this function 245 * would mark the package as used after the first CPU and would 246 * not read the values from the second CPU. 247 */ 248 if (!(vals->run && vals->ena)) 249 return 0; 250 251 s = cpu_map__get_socket(cpus, cpu, NULL); 252 if (s < 0) 253 return -1; 254 255 *skip = test_and_set_bit(s, mask) == 1; 256 return 0; 257 } 258 259 static int 260 process_counter_values(struct perf_stat_config *config, struct evsel *evsel, 261 int cpu, int thread, 262 struct perf_counts_values *count) 263 { 264 struct perf_counts_values *aggr = &evsel->counts->aggr; 265 static struct perf_counts_values zero; 266 bool skip = false; 267 268 if (check_per_pkg(evsel, count, cpu, &skip)) { 269 pr_err("failed to read per-pkg counter\n"); 270 return -1; 271 } 272 273 if (skip) 274 count = &zero; 275 276 switch (config->aggr_mode) { 277 case AGGR_THREAD: 278 case AGGR_CORE: 279 case AGGR_DIE: 280 case AGGR_SOCKET: 281 case AGGR_NONE: 282 if (!evsel->snapshot) 283 perf_evsel__compute_deltas(evsel, cpu, thread, count); 284 perf_counts_values__scale(count, config->scale, NULL); 285 if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { 286 perf_stat__update_shadow_stats(evsel, count->val, 287 cpu, &rt_stat); 288 } 289 290 if (config->aggr_mode == AGGR_THREAD) { 291 if (config->stats) 292 perf_stat__update_shadow_stats(evsel, 293 count->val, 0, &config->stats[thread]); 294 else 295 perf_stat__update_shadow_stats(evsel, 296 count->val, 0, &rt_stat); 297 } 298 break; 299 case AGGR_GLOBAL: 300 aggr->val += count->val; 301 aggr->ena += count->ena; 302 aggr->run += count->run; 303 case AGGR_UNSET: 304 default: 305 break; 306 } 307 308 return 0; 309 } 310 311 static int process_counter_maps(struct perf_stat_config *config, 312 struct evsel *counter) 313 { 314 int nthreads = perf_thread_map__nr(counter->core.threads); 315 int ncpus = perf_evsel__nr_cpus(counter); 316 int cpu, thread; 317 318 if (counter->system_wide) 319 nthreads = 1; 320 321 for (thread = 0; thread < nthreads; thread++) { 322 for (cpu = 0; cpu < ncpus; cpu++) { 323 if (process_counter_values(config, counter, cpu, thread, 324 perf_counts(counter->counts, cpu, thread))) 325 return -1; 326 } 327 } 328 329 return 0; 330 } 331 332 int perf_stat_process_counter(struct perf_stat_config *config, 333 struct evsel *counter) 334 { 335 struct perf_counts_values *aggr = &counter->counts->aggr; 336 struct perf_stat_evsel *ps = counter->stats; 337 u64 *count = counter->counts->aggr.values; 338 int i, ret; 339 340 aggr->val = aggr->ena = aggr->run = 0; 341 342 /* 343 * We calculate counter's data every interval, 344 * and the display code shows ps->res_stats 345 * avg value. We need to zero the stats for 346 * interval mode, otherwise overall avg running 347 * averages will be shown for each interval. 348 */ 349 if (config->interval) 350 init_stats(ps->res_stats); 351 352 if (counter->per_pkg) 353 zero_per_pkg(counter); 354 355 ret = process_counter_maps(config, counter); 356 if (ret) 357 return ret; 358 359 if (config->aggr_mode != AGGR_GLOBAL) 360 return 0; 361 362 if (!counter->snapshot) 363 perf_evsel__compute_deltas(counter, -1, -1, aggr); 364 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); 365 366 for (i = 0; i < 3; i++) 367 update_stats(&ps->res_stats[i], count[i]); 368 369 if (verbose > 0) { 370 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 371 perf_evsel__name(counter), count[0], count[1], count[2]); 372 } 373 374 /* 375 * Save the full runtime - to allow normalization during printout: 376 */ 377 perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); 378 379 return 0; 380 } 381 382 int perf_event__process_stat_event(struct perf_session *session, 383 union perf_event *event) 384 { 385 struct perf_counts_values count; 386 struct perf_record_stat *st = &event->stat; 387 struct evsel *counter; 388 389 count.val = st->val; 390 count.ena = st->ena; 391 count.run = st->run; 392 393 counter = perf_evlist__id2evsel(session->evlist, st->id); 394 if (!counter) { 395 pr_err("Failed to resolve counter for stat event.\n"); 396 return -EINVAL; 397 } 398 399 *perf_counts(counter->counts, st->cpu, st->thread) = count; 400 counter->supported = true; 401 return 0; 402 } 403 404 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp) 405 { 406 struct perf_record_stat *st = (struct perf_record_stat *)event; 407 size_t ret; 408 409 ret = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n", 410 st->id, st->cpu, st->thread); 411 ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n", 412 st->val, st->ena, st->run); 413 414 return ret; 415 } 416 417 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) 418 { 419 struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event; 420 size_t ret; 421 422 ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time, 423 rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL"); 424 425 return ret; 426 } 427 428 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) 429 { 430 struct perf_stat_config sc; 431 size_t ret; 432 433 perf_event__read_stat_config(&sc, &event->stat_config); 434 435 ret = fprintf(fp, "\n"); 436 ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode); 437 ret += fprintf(fp, "... scale %d\n", sc.scale); 438 ret += fprintf(fp, "... interval %u\n", sc.interval); 439 440 return ret; 441 } 442 443 int create_perf_stat_counter(struct evsel *evsel, 444 struct perf_stat_config *config, 445 struct target *target) 446 { 447 struct perf_event_attr *attr = &evsel->core.attr; 448 struct evsel *leader = evsel->leader; 449 450 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 451 PERF_FORMAT_TOTAL_TIME_RUNNING; 452 453 /* 454 * The event is part of non trivial group, let's enable 455 * the group read (for leader) and ID retrieval for all 456 * members. 457 */ 458 if (leader->core.nr_members > 1) 459 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 460 461 attr->inherit = !config->no_inherit; 462 463 /* 464 * Some events get initialized with sample_(period/type) set, 465 * like tracepoints. Clear it up for counting. 466 */ 467 attr->sample_period = 0; 468 469 if (config->identifier) 470 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 471 472 /* 473 * Disabling all counters initially, they will be enabled 474 * either manually by us or by kernel via enable_on_exec 475 * set later. 476 */ 477 if (perf_evsel__is_group_leader(evsel)) { 478 attr->disabled = 1; 479 480 /* 481 * In case of initial_delay we enable tracee 482 * events manually. 483 */ 484 if (target__none(target) && !config->initial_delay) 485 attr->enable_on_exec = 1; 486 } 487 488 if (target__has_cpu(target) && !target__has_per_thread(target)) 489 return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel)); 490 491 return perf_evsel__open_per_thread(evsel, evsel->core.threads); 492 } 493 494 int perf_stat_synthesize_config(struct perf_stat_config *config, 495 struct perf_tool *tool, 496 struct evlist *evlist, 497 perf_event__handler_t process, 498 bool attrs) 499 { 500 int err; 501 502 if (attrs) { 503 err = perf_event__synthesize_attrs(tool, evlist, process); 504 if (err < 0) { 505 pr_err("Couldn't synthesize attrs.\n"); 506 return err; 507 } 508 } 509 510 err = perf_event__synthesize_extra_attr(tool, evlist, process, 511 attrs); 512 513 err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, 514 process, NULL); 515 if (err < 0) { 516 pr_err("Couldn't synthesize thread map.\n"); 517 return err; 518 } 519 520 err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, 521 process, NULL); 522 if (err < 0) { 523 pr_err("Couldn't synthesize thread map.\n"); 524 return err; 525 } 526 527 err = perf_event__synthesize_stat_config(tool, config, process, NULL); 528 if (err < 0) { 529 pr_err("Couldn't synthesize config.\n"); 530 return err; 531 } 532 533 return 0; 534 } 535