xref: /linux/tools/perf/util/stat.c (revision 8520a98dbab61e9e340cdfb72dd17ccc8a98961e)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include <math.h>
5 #include "counts.h"
6 #include "debug.h"
7 #include "stat.h"
8 #include "target.h"
9 #include "evlist.h"
10 #include "evsel.h"
11 #include "thread_map.h"
12 #include <linux/zalloc.h>
13 
14 void update_stats(struct stats *stats, u64 val)
15 {
16 	double delta;
17 
18 	stats->n++;
19 	delta = val - stats->mean;
20 	stats->mean += delta / stats->n;
21 	stats->M2 += delta*(val - stats->mean);
22 
23 	if (val > stats->max)
24 		stats->max = val;
25 
26 	if (val < stats->min)
27 		stats->min = val;
28 }
29 
30 double avg_stats(struct stats *stats)
31 {
32 	return stats->mean;
33 }
34 
35 /*
36  * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
37  *
38  *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
39  * s^2 = -------------------------------
40  *                  n - 1
41  *
42  * http://en.wikipedia.org/wiki/Stddev
43  *
44  * The std dev of the mean is related to the std dev by:
45  *
46  *             s
47  * s_mean = -------
48  *          sqrt(n)
49  *
50  */
51 double stddev_stats(struct stats *stats)
52 {
53 	double variance, variance_mean;
54 
55 	if (stats->n < 2)
56 		return 0.0;
57 
58 	variance = stats->M2 / (stats->n - 1);
59 	variance_mean = variance / stats->n;
60 
61 	return sqrt(variance_mean);
62 }
63 
64 double rel_stddev_stats(double stddev, double avg)
65 {
66 	double pct = 0.0;
67 
68 	if (avg)
69 		pct = 100.0 * stddev/avg;
70 
71 	return pct;
72 }
73 
74 bool __perf_evsel_stat__is(struct evsel *evsel,
75 			   enum perf_stat_evsel_id id)
76 {
77 	struct perf_stat_evsel *ps = evsel->stats;
78 
79 	return ps->id == id;
80 }
81 
82 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
83 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
84 	ID(NONE,		x),
85 	ID(CYCLES_IN_TX,	cpu/cycles-t/),
86 	ID(TRANSACTION_START,	cpu/tx-start/),
87 	ID(ELISION_START,	cpu/el-start/),
88 	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
89 	ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
90 	ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
91 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
92 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
93 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
94 	ID(SMI_NUM, msr/smi/),
95 	ID(APERF, msr/aperf/),
96 };
97 #undef ID
98 
99 static void perf_stat_evsel_id_init(struct evsel *evsel)
100 {
101 	struct perf_stat_evsel *ps = evsel->stats;
102 	int i;
103 
104 	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
105 
106 	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
107 		if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
108 			ps->id = i;
109 			break;
110 		}
111 	}
112 }
113 
114 static void perf_evsel__reset_stat_priv(struct evsel *evsel)
115 {
116 	int i;
117 	struct perf_stat_evsel *ps = evsel->stats;
118 
119 	for (i = 0; i < 3; i++)
120 		init_stats(&ps->res_stats[i]);
121 
122 	perf_stat_evsel_id_init(evsel);
123 }
124 
125 static int perf_evsel__alloc_stat_priv(struct evsel *evsel)
126 {
127 	evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
128 	if (evsel->stats == NULL)
129 		return -ENOMEM;
130 	perf_evsel__reset_stat_priv(evsel);
131 	return 0;
132 }
133 
134 static void perf_evsel__free_stat_priv(struct evsel *evsel)
135 {
136 	struct perf_stat_evsel *ps = evsel->stats;
137 
138 	if (ps)
139 		zfree(&ps->group_data);
140 	zfree(&evsel->stats);
141 }
142 
143 static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
144 					     int ncpus, int nthreads)
145 {
146 	struct perf_counts *counts;
147 
148 	counts = perf_counts__new(ncpus, nthreads);
149 	if (counts)
150 		evsel->prev_raw_counts = counts;
151 
152 	return counts ? 0 : -ENOMEM;
153 }
154 
155 static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
156 {
157 	perf_counts__delete(evsel->prev_raw_counts);
158 	evsel->prev_raw_counts = NULL;
159 }
160 
161 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
162 {
163 	int ncpus = perf_evsel__nr_cpus(evsel);
164 	int nthreads = perf_thread_map__nr(evsel->core.threads);
165 
166 	if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
167 	    perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
168 	    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
169 		return -ENOMEM;
170 
171 	return 0;
172 }
173 
174 int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
175 {
176 	struct evsel *evsel;
177 
178 	evlist__for_each_entry(evlist, evsel) {
179 		if (perf_evsel__alloc_stats(evsel, alloc_raw))
180 			goto out_free;
181 	}
182 
183 	return 0;
184 
185 out_free:
186 	perf_evlist__free_stats(evlist);
187 	return -1;
188 }
189 
190 void perf_evlist__free_stats(struct evlist *evlist)
191 {
192 	struct evsel *evsel;
193 
194 	evlist__for_each_entry(evlist, evsel) {
195 		perf_evsel__free_stat_priv(evsel);
196 		perf_evsel__free_counts(evsel);
197 		perf_evsel__free_prev_raw_counts(evsel);
198 	}
199 }
200 
201 void perf_evlist__reset_stats(struct evlist *evlist)
202 {
203 	struct evsel *evsel;
204 
205 	evlist__for_each_entry(evlist, evsel) {
206 		perf_evsel__reset_stat_priv(evsel);
207 		perf_evsel__reset_counts(evsel);
208 	}
209 }
210 
211 static void zero_per_pkg(struct evsel *counter)
212 {
213 	if (counter->per_pkg_mask)
214 		memset(counter->per_pkg_mask, 0, cpu__max_cpu());
215 }
216 
217 static int check_per_pkg(struct evsel *counter,
218 			 struct perf_counts_values *vals, int cpu, bool *skip)
219 {
220 	unsigned long *mask = counter->per_pkg_mask;
221 	struct perf_cpu_map *cpus = evsel__cpus(counter);
222 	int s;
223 
224 	*skip = false;
225 
226 	if (!counter->per_pkg)
227 		return 0;
228 
229 	if (perf_cpu_map__empty(cpus))
230 		return 0;
231 
232 	if (!mask) {
233 		mask = zalloc(cpu__max_cpu());
234 		if (!mask)
235 			return -ENOMEM;
236 
237 		counter->per_pkg_mask = mask;
238 	}
239 
240 	/*
241 	 * we do not consider an event that has not run as a good
242 	 * instance to mark a package as used (skip=1). Otherwise
243 	 * we may run into a situation where the first CPU in a package
244 	 * is not running anything, yet the second is, and this function
245 	 * would mark the package as used after the first CPU and would
246 	 * not read the values from the second CPU.
247 	 */
248 	if (!(vals->run && vals->ena))
249 		return 0;
250 
251 	s = cpu_map__get_socket(cpus, cpu, NULL);
252 	if (s < 0)
253 		return -1;
254 
255 	*skip = test_and_set_bit(s, mask) == 1;
256 	return 0;
257 }
258 
259 static int
260 process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
261 		       int cpu, int thread,
262 		       struct perf_counts_values *count)
263 {
264 	struct perf_counts_values *aggr = &evsel->counts->aggr;
265 	static struct perf_counts_values zero;
266 	bool skip = false;
267 
268 	if (check_per_pkg(evsel, count, cpu, &skip)) {
269 		pr_err("failed to read per-pkg counter\n");
270 		return -1;
271 	}
272 
273 	if (skip)
274 		count = &zero;
275 
276 	switch (config->aggr_mode) {
277 	case AGGR_THREAD:
278 	case AGGR_CORE:
279 	case AGGR_DIE:
280 	case AGGR_SOCKET:
281 	case AGGR_NONE:
282 		if (!evsel->snapshot)
283 			perf_evsel__compute_deltas(evsel, cpu, thread, count);
284 		perf_counts_values__scale(count, config->scale, NULL);
285 		if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
286 			perf_stat__update_shadow_stats(evsel, count->val,
287 						       cpu, &rt_stat);
288 		}
289 
290 		if (config->aggr_mode == AGGR_THREAD) {
291 			if (config->stats)
292 				perf_stat__update_shadow_stats(evsel,
293 					count->val, 0, &config->stats[thread]);
294 			else
295 				perf_stat__update_shadow_stats(evsel,
296 					count->val, 0, &rt_stat);
297 		}
298 		break;
299 	case AGGR_GLOBAL:
300 		aggr->val += count->val;
301 		aggr->ena += count->ena;
302 		aggr->run += count->run;
303 	case AGGR_UNSET:
304 	default:
305 		break;
306 	}
307 
308 	return 0;
309 }
310 
311 static int process_counter_maps(struct perf_stat_config *config,
312 				struct evsel *counter)
313 {
314 	int nthreads = perf_thread_map__nr(counter->core.threads);
315 	int ncpus = perf_evsel__nr_cpus(counter);
316 	int cpu, thread;
317 
318 	if (counter->system_wide)
319 		nthreads = 1;
320 
321 	for (thread = 0; thread < nthreads; thread++) {
322 		for (cpu = 0; cpu < ncpus; cpu++) {
323 			if (process_counter_values(config, counter, cpu, thread,
324 						   perf_counts(counter->counts, cpu, thread)))
325 				return -1;
326 		}
327 	}
328 
329 	return 0;
330 }
331 
332 int perf_stat_process_counter(struct perf_stat_config *config,
333 			      struct evsel *counter)
334 {
335 	struct perf_counts_values *aggr = &counter->counts->aggr;
336 	struct perf_stat_evsel *ps = counter->stats;
337 	u64 *count = counter->counts->aggr.values;
338 	int i, ret;
339 
340 	aggr->val = aggr->ena = aggr->run = 0;
341 
342 	/*
343 	 * We calculate counter's data every interval,
344 	 * and the display code shows ps->res_stats
345 	 * avg value. We need to zero the stats for
346 	 * interval mode, otherwise overall avg running
347 	 * averages will be shown for each interval.
348 	 */
349 	if (config->interval)
350 		init_stats(ps->res_stats);
351 
352 	if (counter->per_pkg)
353 		zero_per_pkg(counter);
354 
355 	ret = process_counter_maps(config, counter);
356 	if (ret)
357 		return ret;
358 
359 	if (config->aggr_mode != AGGR_GLOBAL)
360 		return 0;
361 
362 	if (!counter->snapshot)
363 		perf_evsel__compute_deltas(counter, -1, -1, aggr);
364 	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
365 
366 	for (i = 0; i < 3; i++)
367 		update_stats(&ps->res_stats[i], count[i]);
368 
369 	if (verbose > 0) {
370 		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
371 			perf_evsel__name(counter), count[0], count[1], count[2]);
372 	}
373 
374 	/*
375 	 * Save the full runtime - to allow normalization during printout:
376 	 */
377 	perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
378 
379 	return 0;
380 }
381 
382 int perf_event__process_stat_event(struct perf_session *session,
383 				   union perf_event *event)
384 {
385 	struct perf_counts_values count;
386 	struct perf_record_stat *st = &event->stat;
387 	struct evsel *counter;
388 
389 	count.val = st->val;
390 	count.ena = st->ena;
391 	count.run = st->run;
392 
393 	counter = perf_evlist__id2evsel(session->evlist, st->id);
394 	if (!counter) {
395 		pr_err("Failed to resolve counter for stat event.\n");
396 		return -EINVAL;
397 	}
398 
399 	*perf_counts(counter->counts, st->cpu, st->thread) = count;
400 	counter->supported = true;
401 	return 0;
402 }
403 
404 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
405 {
406 	struct perf_record_stat *st = (struct perf_record_stat *)event;
407 	size_t ret;
408 
409 	ret  = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
410 		       st->id, st->cpu, st->thread);
411 	ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
412 		       st->val, st->ena, st->run);
413 
414 	return ret;
415 }
416 
417 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
418 {
419 	struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
420 	size_t ret;
421 
422 	ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
423 		      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
424 
425 	return ret;
426 }
427 
428 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
429 {
430 	struct perf_stat_config sc;
431 	size_t ret;
432 
433 	perf_event__read_stat_config(&sc, &event->stat_config);
434 
435 	ret  = fprintf(fp, "\n");
436 	ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
437 	ret += fprintf(fp, "... scale     %d\n", sc.scale);
438 	ret += fprintf(fp, "... interval  %u\n", sc.interval);
439 
440 	return ret;
441 }
442 
443 int create_perf_stat_counter(struct evsel *evsel,
444 			     struct perf_stat_config *config,
445 			     struct target *target)
446 {
447 	struct perf_event_attr *attr = &evsel->core.attr;
448 	struct evsel *leader = evsel->leader;
449 
450 	attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
451 			    PERF_FORMAT_TOTAL_TIME_RUNNING;
452 
453 	/*
454 	 * The event is part of non trivial group, let's enable
455 	 * the group read (for leader) and ID retrieval for all
456 	 * members.
457 	 */
458 	if (leader->core.nr_members > 1)
459 		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
460 
461 	attr->inherit = !config->no_inherit;
462 
463 	/*
464 	 * Some events get initialized with sample_(period/type) set,
465 	 * like tracepoints. Clear it up for counting.
466 	 */
467 	attr->sample_period = 0;
468 
469 	if (config->identifier)
470 		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
471 
472 	/*
473 	 * Disabling all counters initially, they will be enabled
474 	 * either manually by us or by kernel via enable_on_exec
475 	 * set later.
476 	 */
477 	if (perf_evsel__is_group_leader(evsel)) {
478 		attr->disabled = 1;
479 
480 		/*
481 		 * In case of initial_delay we enable tracee
482 		 * events manually.
483 		 */
484 		if (target__none(target) && !config->initial_delay)
485 			attr->enable_on_exec = 1;
486 	}
487 
488 	if (target__has_cpu(target) && !target__has_per_thread(target))
489 		return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel));
490 
491 	return perf_evsel__open_per_thread(evsel, evsel->core.threads);
492 }
493 
494 int perf_stat_synthesize_config(struct perf_stat_config *config,
495 				struct perf_tool *tool,
496 				struct evlist *evlist,
497 				perf_event__handler_t process,
498 				bool attrs)
499 {
500 	int err;
501 
502 	if (attrs) {
503 		err = perf_event__synthesize_attrs(tool, evlist, process);
504 		if (err < 0) {
505 			pr_err("Couldn't synthesize attrs.\n");
506 			return err;
507 		}
508 	}
509 
510 	err = perf_event__synthesize_extra_attr(tool, evlist, process,
511 						attrs);
512 
513 	err = perf_event__synthesize_thread_map2(tool, evlist->core.threads,
514 						 process, NULL);
515 	if (err < 0) {
516 		pr_err("Couldn't synthesize thread map.\n");
517 		return err;
518 	}
519 
520 	err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus,
521 					     process, NULL);
522 	if (err < 0) {
523 		pr_err("Couldn't synthesize thread map.\n");
524 		return err;
525 	}
526 
527 	err = perf_event__synthesize_stat_config(tool, config, process, NULL);
528 	if (err < 0) {
529 		pr_err("Couldn't synthesize config.\n");
530 		return err;
531 	}
532 
533 	return 0;
534 }
535