xref: /linux/tools/perf/util/stat.c (revision c1a604dff486399ae0be95e6396e0158df95ad5d)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include <math.h>
5 #include "counts.h"
6 #include "stat.h"
7 #include "target.h"
8 #include "evlist.h"
9 #include "evsel.h"
10 #include "thread_map.h"
11 #include <linux/zalloc.h>
12 
13 void update_stats(struct stats *stats, u64 val)
14 {
15 	double delta;
16 
17 	stats->n++;
18 	delta = val - stats->mean;
19 	stats->mean += delta / stats->n;
20 	stats->M2 += delta*(val - stats->mean);
21 
22 	if (val > stats->max)
23 		stats->max = val;
24 
25 	if (val < stats->min)
26 		stats->min = val;
27 }
28 
29 double avg_stats(struct stats *stats)
30 {
31 	return stats->mean;
32 }
33 
34 /*
35  * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
36  *
37  *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
38  * s^2 = -------------------------------
39  *                  n - 1
40  *
41  * http://en.wikipedia.org/wiki/Stddev
42  *
43  * The std dev of the mean is related to the std dev by:
44  *
45  *             s
46  * s_mean = -------
47  *          sqrt(n)
48  *
49  */
50 double stddev_stats(struct stats *stats)
51 {
52 	double variance, variance_mean;
53 
54 	if (stats->n < 2)
55 		return 0.0;
56 
57 	variance = stats->M2 / (stats->n - 1);
58 	variance_mean = variance / stats->n;
59 
60 	return sqrt(variance_mean);
61 }
62 
63 double rel_stddev_stats(double stddev, double avg)
64 {
65 	double pct = 0.0;
66 
67 	if (avg)
68 		pct = 100.0 * stddev/avg;
69 
70 	return pct;
71 }
72 
73 bool __perf_evsel_stat__is(struct evsel *evsel,
74 			   enum perf_stat_evsel_id id)
75 {
76 	struct perf_stat_evsel *ps = evsel->stats;
77 
78 	return ps->id == id;
79 }
80 
81 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
82 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
83 	ID(NONE,		x),
84 	ID(CYCLES_IN_TX,	cpu/cycles-t/),
85 	ID(TRANSACTION_START,	cpu/tx-start/),
86 	ID(ELISION_START,	cpu/el-start/),
87 	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
88 	ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
89 	ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
90 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
91 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
92 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
93 	ID(SMI_NUM, msr/smi/),
94 	ID(APERF, msr/aperf/),
95 };
96 #undef ID
97 
98 static void perf_stat_evsel_id_init(struct evsel *evsel)
99 {
100 	struct perf_stat_evsel *ps = evsel->stats;
101 	int i;
102 
103 	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
104 
105 	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
106 		if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
107 			ps->id = i;
108 			break;
109 		}
110 	}
111 }
112 
113 static void perf_evsel__reset_stat_priv(struct evsel *evsel)
114 {
115 	int i;
116 	struct perf_stat_evsel *ps = evsel->stats;
117 
118 	for (i = 0; i < 3; i++)
119 		init_stats(&ps->res_stats[i]);
120 
121 	perf_stat_evsel_id_init(evsel);
122 }
123 
124 static int perf_evsel__alloc_stat_priv(struct evsel *evsel)
125 {
126 	evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
127 	if (evsel->stats == NULL)
128 		return -ENOMEM;
129 	perf_evsel__reset_stat_priv(evsel);
130 	return 0;
131 }
132 
133 static void perf_evsel__free_stat_priv(struct evsel *evsel)
134 {
135 	struct perf_stat_evsel *ps = evsel->stats;
136 
137 	if (ps)
138 		zfree(&ps->group_data);
139 	zfree(&evsel->stats);
140 }
141 
142 static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
143 					     int ncpus, int nthreads)
144 {
145 	struct perf_counts *counts;
146 
147 	counts = perf_counts__new(ncpus, nthreads);
148 	if (counts)
149 		evsel->prev_raw_counts = counts;
150 
151 	return counts ? 0 : -ENOMEM;
152 }
153 
154 static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
155 {
156 	perf_counts__delete(evsel->prev_raw_counts);
157 	evsel->prev_raw_counts = NULL;
158 }
159 
160 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
161 {
162 	int ncpus = perf_evsel__nr_cpus(evsel);
163 	int nthreads = perf_thread_map__nr(evsel->core.threads);
164 
165 	if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
166 	    perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
167 	    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
168 		return -ENOMEM;
169 
170 	return 0;
171 }
172 
173 int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
174 {
175 	struct evsel *evsel;
176 
177 	evlist__for_each_entry(evlist, evsel) {
178 		if (perf_evsel__alloc_stats(evsel, alloc_raw))
179 			goto out_free;
180 	}
181 
182 	return 0;
183 
184 out_free:
185 	perf_evlist__free_stats(evlist);
186 	return -1;
187 }
188 
189 void perf_evlist__free_stats(struct evlist *evlist)
190 {
191 	struct evsel *evsel;
192 
193 	evlist__for_each_entry(evlist, evsel) {
194 		perf_evsel__free_stat_priv(evsel);
195 		perf_evsel__free_counts(evsel);
196 		perf_evsel__free_prev_raw_counts(evsel);
197 	}
198 }
199 
200 void perf_evlist__reset_stats(struct evlist *evlist)
201 {
202 	struct evsel *evsel;
203 
204 	evlist__for_each_entry(evlist, evsel) {
205 		perf_evsel__reset_stat_priv(evsel);
206 		perf_evsel__reset_counts(evsel);
207 	}
208 }
209 
210 static void zero_per_pkg(struct evsel *counter)
211 {
212 	if (counter->per_pkg_mask)
213 		memset(counter->per_pkg_mask, 0, cpu__max_cpu());
214 }
215 
216 static int check_per_pkg(struct evsel *counter,
217 			 struct perf_counts_values *vals, int cpu, bool *skip)
218 {
219 	unsigned long *mask = counter->per_pkg_mask;
220 	struct perf_cpu_map *cpus = evsel__cpus(counter);
221 	int s;
222 
223 	*skip = false;
224 
225 	if (!counter->per_pkg)
226 		return 0;
227 
228 	if (perf_cpu_map__empty(cpus))
229 		return 0;
230 
231 	if (!mask) {
232 		mask = zalloc(cpu__max_cpu());
233 		if (!mask)
234 			return -ENOMEM;
235 
236 		counter->per_pkg_mask = mask;
237 	}
238 
239 	/*
240 	 * we do not consider an event that has not run as a good
241 	 * instance to mark a package as used (skip=1). Otherwise
242 	 * we may run into a situation where the first CPU in a package
243 	 * is not running anything, yet the second is, and this function
244 	 * would mark the package as used after the first CPU and would
245 	 * not read the values from the second CPU.
246 	 */
247 	if (!(vals->run && vals->ena))
248 		return 0;
249 
250 	s = cpu_map__get_socket(cpus, cpu, NULL);
251 	if (s < 0)
252 		return -1;
253 
254 	*skip = test_and_set_bit(s, mask) == 1;
255 	return 0;
256 }
257 
258 static int
259 process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
260 		       int cpu, int thread,
261 		       struct perf_counts_values *count)
262 {
263 	struct perf_counts_values *aggr = &evsel->counts->aggr;
264 	static struct perf_counts_values zero;
265 	bool skip = false;
266 
267 	if (check_per_pkg(evsel, count, cpu, &skip)) {
268 		pr_err("failed to read per-pkg counter\n");
269 		return -1;
270 	}
271 
272 	if (skip)
273 		count = &zero;
274 
275 	switch (config->aggr_mode) {
276 	case AGGR_THREAD:
277 	case AGGR_CORE:
278 	case AGGR_DIE:
279 	case AGGR_SOCKET:
280 	case AGGR_NONE:
281 		if (!evsel->snapshot)
282 			perf_evsel__compute_deltas(evsel, cpu, thread, count);
283 		perf_counts_values__scale(count, config->scale, NULL);
284 		if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
285 			perf_stat__update_shadow_stats(evsel, count->val,
286 						       cpu, &rt_stat);
287 		}
288 
289 		if (config->aggr_mode == AGGR_THREAD) {
290 			if (config->stats)
291 				perf_stat__update_shadow_stats(evsel,
292 					count->val, 0, &config->stats[thread]);
293 			else
294 				perf_stat__update_shadow_stats(evsel,
295 					count->val, 0, &rt_stat);
296 		}
297 		break;
298 	case AGGR_GLOBAL:
299 		aggr->val += count->val;
300 		aggr->ena += count->ena;
301 		aggr->run += count->run;
302 	case AGGR_UNSET:
303 	default:
304 		break;
305 	}
306 
307 	return 0;
308 }
309 
310 static int process_counter_maps(struct perf_stat_config *config,
311 				struct evsel *counter)
312 {
313 	int nthreads = perf_thread_map__nr(counter->core.threads);
314 	int ncpus = perf_evsel__nr_cpus(counter);
315 	int cpu, thread;
316 
317 	if (counter->system_wide)
318 		nthreads = 1;
319 
320 	for (thread = 0; thread < nthreads; thread++) {
321 		for (cpu = 0; cpu < ncpus; cpu++) {
322 			if (process_counter_values(config, counter, cpu, thread,
323 						   perf_counts(counter->counts, cpu, thread)))
324 				return -1;
325 		}
326 	}
327 
328 	return 0;
329 }
330 
331 int perf_stat_process_counter(struct perf_stat_config *config,
332 			      struct evsel *counter)
333 {
334 	struct perf_counts_values *aggr = &counter->counts->aggr;
335 	struct perf_stat_evsel *ps = counter->stats;
336 	u64 *count = counter->counts->aggr.values;
337 	int i, ret;
338 
339 	aggr->val = aggr->ena = aggr->run = 0;
340 
341 	/*
342 	 * We calculate counter's data every interval,
343 	 * and the display code shows ps->res_stats
344 	 * avg value. We need to zero the stats for
345 	 * interval mode, otherwise overall avg running
346 	 * averages will be shown for each interval.
347 	 */
348 	if (config->interval)
349 		init_stats(ps->res_stats);
350 
351 	if (counter->per_pkg)
352 		zero_per_pkg(counter);
353 
354 	ret = process_counter_maps(config, counter);
355 	if (ret)
356 		return ret;
357 
358 	if (config->aggr_mode != AGGR_GLOBAL)
359 		return 0;
360 
361 	if (!counter->snapshot)
362 		perf_evsel__compute_deltas(counter, -1, -1, aggr);
363 	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
364 
365 	for (i = 0; i < 3; i++)
366 		update_stats(&ps->res_stats[i], count[i]);
367 
368 	if (verbose > 0) {
369 		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
370 			perf_evsel__name(counter), count[0], count[1], count[2]);
371 	}
372 
373 	/*
374 	 * Save the full runtime - to allow normalization during printout:
375 	 */
376 	perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
377 
378 	return 0;
379 }
380 
381 int perf_event__process_stat_event(struct perf_session *session,
382 				   union perf_event *event)
383 {
384 	struct perf_counts_values count;
385 	struct perf_record_stat *st = &event->stat;
386 	struct evsel *counter;
387 
388 	count.val = st->val;
389 	count.ena = st->ena;
390 	count.run = st->run;
391 
392 	counter = perf_evlist__id2evsel(session->evlist, st->id);
393 	if (!counter) {
394 		pr_err("Failed to resolve counter for stat event.\n");
395 		return -EINVAL;
396 	}
397 
398 	*perf_counts(counter->counts, st->cpu, st->thread) = count;
399 	counter->supported = true;
400 	return 0;
401 }
402 
403 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
404 {
405 	struct perf_record_stat *st = (struct perf_record_stat *)event;
406 	size_t ret;
407 
408 	ret  = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
409 		       st->id, st->cpu, st->thread);
410 	ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
411 		       st->val, st->ena, st->run);
412 
413 	return ret;
414 }
415 
416 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
417 {
418 	struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
419 	size_t ret;
420 
421 	ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
422 		      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
423 
424 	return ret;
425 }
426 
427 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
428 {
429 	struct perf_stat_config sc;
430 	size_t ret;
431 
432 	perf_event__read_stat_config(&sc, &event->stat_config);
433 
434 	ret  = fprintf(fp, "\n");
435 	ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
436 	ret += fprintf(fp, "... scale     %d\n", sc.scale);
437 	ret += fprintf(fp, "... interval  %u\n", sc.interval);
438 
439 	return ret;
440 }
441 
442 int create_perf_stat_counter(struct evsel *evsel,
443 			     struct perf_stat_config *config,
444 			     struct target *target)
445 {
446 	struct perf_event_attr *attr = &evsel->core.attr;
447 	struct evsel *leader = evsel->leader;
448 
449 	attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
450 			    PERF_FORMAT_TOTAL_TIME_RUNNING;
451 
452 	/*
453 	 * The event is part of non trivial group, let's enable
454 	 * the group read (for leader) and ID retrieval for all
455 	 * members.
456 	 */
457 	if (leader->core.nr_members > 1)
458 		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
459 
460 	attr->inherit = !config->no_inherit;
461 
462 	/*
463 	 * Some events get initialized with sample_(period/type) set,
464 	 * like tracepoints. Clear it up for counting.
465 	 */
466 	attr->sample_period = 0;
467 
468 	if (config->identifier)
469 		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
470 
471 	/*
472 	 * Disabling all counters initially, they will be enabled
473 	 * either manually by us or by kernel via enable_on_exec
474 	 * set later.
475 	 */
476 	if (perf_evsel__is_group_leader(evsel)) {
477 		attr->disabled = 1;
478 
479 		/*
480 		 * In case of initial_delay we enable tracee
481 		 * events manually.
482 		 */
483 		if (target__none(target) && !config->initial_delay)
484 			attr->enable_on_exec = 1;
485 	}
486 
487 	if (target__has_cpu(target) && !target__has_per_thread(target))
488 		return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel));
489 
490 	return perf_evsel__open_per_thread(evsel, evsel->core.threads);
491 }
492 
493 int perf_stat_synthesize_config(struct perf_stat_config *config,
494 				struct perf_tool *tool,
495 				struct evlist *evlist,
496 				perf_event__handler_t process,
497 				bool attrs)
498 {
499 	int err;
500 
501 	if (attrs) {
502 		err = perf_event__synthesize_attrs(tool, evlist, process);
503 		if (err < 0) {
504 			pr_err("Couldn't synthesize attrs.\n");
505 			return err;
506 		}
507 	}
508 
509 	err = perf_event__synthesize_extra_attr(tool, evlist, process,
510 						attrs);
511 
512 	err = perf_event__synthesize_thread_map2(tool, evlist->core.threads,
513 						 process, NULL);
514 	if (err < 0) {
515 		pr_err("Couldn't synthesize thread map.\n");
516 		return err;
517 	}
518 
519 	err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus,
520 					     process, NULL);
521 	if (err < 0) {
522 		pr_err("Couldn't synthesize thread map.\n");
523 		return err;
524 	}
525 
526 	err = perf_event__synthesize_stat_config(tool, config, process, NULL);
527 	if (err < 0) {
528 		pr_err("Couldn't synthesize config.\n");
529 		return err;
530 	}
531 
532 	return 0;
533 }
534