xref: /linux/tools/perf/util/evsel.c (revision d229807f669ba3dea9f64467ee965051c4366aed)
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9 
10 #include <byteswap.h>
11 #include "asm/bug.h"
12 #include "evsel.h"
13 #include "evlist.h"
14 #include "util.h"
15 #include "cpumap.h"
16 #include "thread_map.h"
17 
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19 
20 int __perf_evsel__sample_size(u64 sample_type)
21 {
22 	u64 mask = sample_type & PERF_SAMPLE_MASK;
23 	int size = 0;
24 	int i;
25 
26 	for (i = 0; i < 64; i++) {
27 		if (mask & (1ULL << i))
28 			size++;
29 	}
30 
31 	size *= sizeof(u64);
32 
33 	return size;
34 }
35 
36 void perf_evsel__init(struct perf_evsel *evsel,
37 		      struct perf_event_attr *attr, int idx)
38 {
39 	evsel->idx	   = idx;
40 	evsel->attr	   = *attr;
41 	INIT_LIST_HEAD(&evsel->node);
42 	hists__init(&evsel->hists);
43 }
44 
45 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
46 {
47 	struct perf_evsel *evsel = zalloc(sizeof(*evsel));
48 
49 	if (evsel != NULL)
50 		perf_evsel__init(evsel, attr, idx);
51 
52 	return evsel;
53 }
54 
55 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
56 {
57 	int cpu, thread;
58 	evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
59 
60 	if (evsel->fd) {
61 		for (cpu = 0; cpu < ncpus; cpu++) {
62 			for (thread = 0; thread < nthreads; thread++) {
63 				FD(evsel, cpu, thread) = -1;
64 			}
65 		}
66 	}
67 
68 	return evsel->fd != NULL ? 0 : -ENOMEM;
69 }
70 
71 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
72 {
73 	evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
74 	if (evsel->sample_id == NULL)
75 		return -ENOMEM;
76 
77 	evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
78 	if (evsel->id == NULL) {
79 		xyarray__delete(evsel->sample_id);
80 		evsel->sample_id = NULL;
81 		return -ENOMEM;
82 	}
83 
84 	return 0;
85 }
86 
87 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
88 {
89 	evsel->counts = zalloc((sizeof(*evsel->counts) +
90 				(ncpus * sizeof(struct perf_counts_values))));
91 	return evsel->counts != NULL ? 0 : -ENOMEM;
92 }
93 
94 void perf_evsel__free_fd(struct perf_evsel *evsel)
95 {
96 	xyarray__delete(evsel->fd);
97 	evsel->fd = NULL;
98 }
99 
100 void perf_evsel__free_id(struct perf_evsel *evsel)
101 {
102 	xyarray__delete(evsel->sample_id);
103 	evsel->sample_id = NULL;
104 	free(evsel->id);
105 	evsel->id = NULL;
106 }
107 
108 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
109 {
110 	int cpu, thread;
111 
112 	for (cpu = 0; cpu < ncpus; cpu++)
113 		for (thread = 0; thread < nthreads; ++thread) {
114 			close(FD(evsel, cpu, thread));
115 			FD(evsel, cpu, thread) = -1;
116 		}
117 }
118 
119 void perf_evsel__exit(struct perf_evsel *evsel)
120 {
121 	assert(list_empty(&evsel->node));
122 	xyarray__delete(evsel->fd);
123 	xyarray__delete(evsel->sample_id);
124 	free(evsel->id);
125 }
126 
127 void perf_evsel__delete(struct perf_evsel *evsel)
128 {
129 	perf_evsel__exit(evsel);
130 	close_cgroup(evsel->cgrp);
131 	free(evsel->name);
132 	free(evsel);
133 }
134 
135 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
136 			      int cpu, int thread, bool scale)
137 {
138 	struct perf_counts_values count;
139 	size_t nv = scale ? 3 : 1;
140 
141 	if (FD(evsel, cpu, thread) < 0)
142 		return -EINVAL;
143 
144 	if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
145 		return -ENOMEM;
146 
147 	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
148 		return -errno;
149 
150 	if (scale) {
151 		if (count.run == 0)
152 			count.val = 0;
153 		else if (count.run < count.ena)
154 			count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
155 	} else
156 		count.ena = count.run = 0;
157 
158 	evsel->counts->cpu[cpu] = count;
159 	return 0;
160 }
161 
162 int __perf_evsel__read(struct perf_evsel *evsel,
163 		       int ncpus, int nthreads, bool scale)
164 {
165 	size_t nv = scale ? 3 : 1;
166 	int cpu, thread;
167 	struct perf_counts_values *aggr = &evsel->counts->aggr, count;
168 
169 	aggr->val = aggr->ena = aggr->run = 0;
170 
171 	for (cpu = 0; cpu < ncpus; cpu++) {
172 		for (thread = 0; thread < nthreads; thread++) {
173 			if (FD(evsel, cpu, thread) < 0)
174 				continue;
175 
176 			if (readn(FD(evsel, cpu, thread),
177 				  &count, nv * sizeof(u64)) < 0)
178 				return -errno;
179 
180 			aggr->val += count.val;
181 			if (scale) {
182 				aggr->ena += count.ena;
183 				aggr->run += count.run;
184 			}
185 		}
186 	}
187 
188 	evsel->counts->scaled = 0;
189 	if (scale) {
190 		if (aggr->run == 0) {
191 			evsel->counts->scaled = -1;
192 			aggr->val = 0;
193 			return 0;
194 		}
195 
196 		if (aggr->run < aggr->ena) {
197 			evsel->counts->scaled = 1;
198 			aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
199 		}
200 	} else
201 		aggr->ena = aggr->run = 0;
202 
203 	return 0;
204 }
205 
206 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
207 			      struct thread_map *threads, bool group)
208 {
209 	int cpu, thread;
210 	unsigned long flags = 0;
211 	int pid = -1;
212 
213 	if (evsel->fd == NULL &&
214 	    perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
215 		return -1;
216 
217 	if (evsel->cgrp) {
218 		flags = PERF_FLAG_PID_CGROUP;
219 		pid = evsel->cgrp->fd;
220 	}
221 
222 	for (cpu = 0; cpu < cpus->nr; cpu++) {
223 		int group_fd = -1;
224 
225 		for (thread = 0; thread < threads->nr; thread++) {
226 
227 			if (!evsel->cgrp)
228 				pid = threads->map[thread];
229 
230 			FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
231 								     pid,
232 								     cpus->map[cpu],
233 								     group_fd, flags);
234 			if (FD(evsel, cpu, thread) < 0)
235 				goto out_close;
236 
237 			if (group && group_fd == -1)
238 				group_fd = FD(evsel, cpu, thread);
239 		}
240 	}
241 
242 	return 0;
243 
244 out_close:
245 	do {
246 		while (--thread >= 0) {
247 			close(FD(evsel, cpu, thread));
248 			FD(evsel, cpu, thread) = -1;
249 		}
250 		thread = threads->nr;
251 	} while (--cpu >= 0);
252 	return -1;
253 }
254 
255 static struct {
256 	struct cpu_map map;
257 	int cpus[1];
258 } empty_cpu_map = {
259 	.map.nr	= 1,
260 	.cpus	= { -1, },
261 };
262 
263 static struct {
264 	struct thread_map map;
265 	int threads[1];
266 } empty_thread_map = {
267 	.map.nr	 = 1,
268 	.threads = { -1, },
269 };
270 
271 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
272 		     struct thread_map *threads, bool group)
273 {
274 	if (cpus == NULL) {
275 		/* Work around old compiler warnings about strict aliasing */
276 		cpus = &empty_cpu_map.map;
277 	}
278 
279 	if (threads == NULL)
280 		threads = &empty_thread_map.map;
281 
282 	return __perf_evsel__open(evsel, cpus, threads, group);
283 }
284 
285 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
286 			     struct cpu_map *cpus, bool group)
287 {
288 	return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group);
289 }
290 
291 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
292 				struct thread_map *threads, bool group)
293 {
294 	return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group);
295 }
296 
297 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
298 				       struct perf_sample *sample)
299 {
300 	const u64 *array = event->sample.array;
301 
302 	array += ((event->header.size -
303 		   sizeof(event->header)) / sizeof(u64)) - 1;
304 
305 	if (type & PERF_SAMPLE_CPU) {
306 		u32 *p = (u32 *)array;
307 		sample->cpu = *p;
308 		array--;
309 	}
310 
311 	if (type & PERF_SAMPLE_STREAM_ID) {
312 		sample->stream_id = *array;
313 		array--;
314 	}
315 
316 	if (type & PERF_SAMPLE_ID) {
317 		sample->id = *array;
318 		array--;
319 	}
320 
321 	if (type & PERF_SAMPLE_TIME) {
322 		sample->time = *array;
323 		array--;
324 	}
325 
326 	if (type & PERF_SAMPLE_TID) {
327 		u32 *p = (u32 *)array;
328 		sample->pid = p[0];
329 		sample->tid = p[1];
330 	}
331 
332 	return 0;
333 }
334 
335 static bool sample_overlap(const union perf_event *event,
336 			   const void *offset, u64 size)
337 {
338 	const void *base = event;
339 
340 	if (offset + size > base + event->header.size)
341 		return true;
342 
343 	return false;
344 }
345 
346 int perf_event__parse_sample(const union perf_event *event, u64 type,
347 			     int sample_size, bool sample_id_all,
348 			     struct perf_sample *data, bool swapped)
349 {
350 	const u64 *array;
351 
352 	/*
353 	 * used for cross-endian analysis. See git commit 65014ab3
354 	 * for why this goofiness is needed.
355 	 */
356 	union {
357 		u64 val64;
358 		u32 val32[2];
359 	} u;
360 
361 
362 	data->cpu = data->pid = data->tid = -1;
363 	data->stream_id = data->id = data->time = -1ULL;
364 
365 	if (event->header.type != PERF_RECORD_SAMPLE) {
366 		if (!sample_id_all)
367 			return 0;
368 		return perf_event__parse_id_sample(event, type, data);
369 	}
370 
371 	array = event->sample.array;
372 
373 	if (sample_size + sizeof(event->header) > event->header.size)
374 		return -EFAULT;
375 
376 	if (type & PERF_SAMPLE_IP) {
377 		data->ip = event->ip.ip;
378 		array++;
379 	}
380 
381 	if (type & PERF_SAMPLE_TID) {
382 		u.val64 = *array;
383 		if (swapped) {
384 			/* undo swap of u64, then swap on individual u32s */
385 			u.val64 = bswap_64(u.val64);
386 			u.val32[0] = bswap_32(u.val32[0]);
387 			u.val32[1] = bswap_32(u.val32[1]);
388 		}
389 
390 		data->pid = u.val32[0];
391 		data->tid = u.val32[1];
392 		array++;
393 	}
394 
395 	if (type & PERF_SAMPLE_TIME) {
396 		data->time = *array;
397 		array++;
398 	}
399 
400 	data->addr = 0;
401 	if (type & PERF_SAMPLE_ADDR) {
402 		data->addr = *array;
403 		array++;
404 	}
405 
406 	data->id = -1ULL;
407 	if (type & PERF_SAMPLE_ID) {
408 		data->id = *array;
409 		array++;
410 	}
411 
412 	if (type & PERF_SAMPLE_STREAM_ID) {
413 		data->stream_id = *array;
414 		array++;
415 	}
416 
417 	if (type & PERF_SAMPLE_CPU) {
418 
419 		u.val64 = *array;
420 		if (swapped) {
421 			/* undo swap of u64, then swap on individual u32s */
422 			u.val64 = bswap_64(u.val64);
423 			u.val32[0] = bswap_32(u.val32[0]);
424 		}
425 
426 		data->cpu = u.val32[0];
427 		array++;
428 	}
429 
430 	if (type & PERF_SAMPLE_PERIOD) {
431 		data->period = *array;
432 		array++;
433 	}
434 
435 	if (type & PERF_SAMPLE_READ) {
436 		fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
437 		return -1;
438 	}
439 
440 	if (type & PERF_SAMPLE_CALLCHAIN) {
441 		if (sample_overlap(event, array, sizeof(data->callchain->nr)))
442 			return -EFAULT;
443 
444 		data->callchain = (struct ip_callchain *)array;
445 
446 		if (sample_overlap(event, array, data->callchain->nr))
447 			return -EFAULT;
448 
449 		array += 1 + data->callchain->nr;
450 	}
451 
452 	if (type & PERF_SAMPLE_RAW) {
453 		const u64 *pdata;
454 
455 		u.val64 = *array;
456 		if (WARN_ONCE(swapped,
457 			      "Endianness of raw data not corrected!\n")) {
458 			/* undo swap of u64, then swap on individual u32s */
459 			u.val64 = bswap_64(u.val64);
460 			u.val32[0] = bswap_32(u.val32[0]);
461 			u.val32[1] = bswap_32(u.val32[1]);
462 		}
463 
464 		if (sample_overlap(event, array, sizeof(u32)))
465 			return -EFAULT;
466 
467 		data->raw_size = u.val32[0];
468 		pdata = (void *) array + sizeof(u32);
469 
470 		if (sample_overlap(event, pdata, data->raw_size))
471 			return -EFAULT;
472 
473 		data->raw_data = (void *) pdata;
474 	}
475 
476 	return 0;
477 }
478