xref: /linux/tools/bpf/bpftool/map_perf_ring.c (revision f412eed9dfdeeb6becd7de2ffe8b5d0a8b3f81ca)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2018 Netronome Systems, Inc. */
3 /* This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <libbpf.h>
10 #include <poll.h>
11 #include <signal.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <unistd.h>
18 #include <linux/bpf.h>
19 #include <linux/perf_event.h>
20 #include <sys/ioctl.h>
21 #include <sys/mman.h>
22 #include <sys/syscall.h>
23 
24 #include <bpf.h>
25 #include <perf-sys.h>
26 
27 #include "main.h"
28 
29 #define MMAP_PAGE_CNT	16
30 
31 static bool stop;
32 
33 struct event_ring_info {
34 	int fd;
35 	int key;
36 	unsigned int cpu;
37 	void *mem;
38 };
39 
40 struct perf_event_sample {
41 	struct perf_event_header header;
42 	__u32 size;
43 	unsigned char data[];
44 };
45 
46 static void int_exit(int signo)
47 {
48 	fprintf(stderr, "Stopping...\n");
49 	stop = true;
50 }
51 
52 static void
53 print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
54 {
55 	struct {
56 		struct perf_event_header header;
57 		__u64 id;
58 		__u64 lost;
59 	} *lost = (void *)e;
60 	struct timespec ts;
61 
62 	if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
63 		perror("Can't read clock for timestamp");
64 		return;
65 	}
66 
67 	if (json_output) {
68 		jsonw_start_object(json_wtr);
69 		jsonw_name(json_wtr, "timestamp");
70 		jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec);
71 		jsonw_name(json_wtr, "type");
72 		jsonw_uint(json_wtr, e->header.type);
73 		jsonw_name(json_wtr, "cpu");
74 		jsonw_uint(json_wtr, ring->cpu);
75 		jsonw_name(json_wtr, "index");
76 		jsonw_uint(json_wtr, ring->key);
77 		if (e->header.type == PERF_RECORD_SAMPLE) {
78 			jsonw_name(json_wtr, "data");
79 			print_data_json(e->data, e->size);
80 		} else if (e->header.type == PERF_RECORD_LOST) {
81 			jsonw_name(json_wtr, "lost");
82 			jsonw_start_object(json_wtr);
83 			jsonw_name(json_wtr, "id");
84 			jsonw_uint(json_wtr, lost->id);
85 			jsonw_name(json_wtr, "count");
86 			jsonw_uint(json_wtr, lost->lost);
87 			jsonw_end_object(json_wtr);
88 		}
89 		jsonw_end_object(json_wtr);
90 	} else {
91 		if (e->header.type == PERF_RECORD_SAMPLE) {
92 			printf("== @%ld.%ld CPU: %d index: %d =====\n",
93 			       (long)ts.tv_sec, ts.tv_nsec,
94 			       ring->cpu, ring->key);
95 			fprint_hex(stdout, e->data, e->size, " ");
96 			printf("\n");
97 		} else if (e->header.type == PERF_RECORD_LOST) {
98 			printf("lost %lld events\n", lost->lost);
99 		} else {
100 			printf("unknown event type=%d size=%d\n",
101 			       e->header.type, e->header.size);
102 		}
103 	}
104 }
105 
106 static void
107 perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
108 {
109 	volatile struct perf_event_mmap_page *header = ring->mem;
110 	__u64 buffer_size = MMAP_PAGE_CNT * get_page_size();
111 	__u64 data_tail = header->data_tail;
112 	__u64 data_head = header->data_head;
113 	void *base, *begin, *end;
114 
115 	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
116 	if (data_head == data_tail)
117 		return;
118 
119 	base = ((char *)header) + get_page_size();
120 
121 	begin = base + data_tail % buffer_size;
122 	end = base + data_head % buffer_size;
123 
124 	while (begin != end) {
125 		struct perf_event_sample *e;
126 
127 		e = begin;
128 		if (begin + e->header.size > base + buffer_size) {
129 			long len = base + buffer_size - begin;
130 
131 			if (*buf_len < e->header.size) {
132 				free(*buf);
133 				*buf = malloc(e->header.size);
134 				if (!*buf) {
135 					fprintf(stderr,
136 						"can't allocate memory");
137 					stop = true;
138 					return;
139 				}
140 				*buf_len = e->header.size;
141 			}
142 
143 			memcpy(*buf, begin, len);
144 			memcpy(*buf + len, base, e->header.size - len);
145 			e = (void *)*buf;
146 			begin = base + e->header.size - len;
147 		} else if (begin + e->header.size == base + buffer_size) {
148 			begin = base;
149 		} else {
150 			begin += e->header.size;
151 		}
152 
153 		print_bpf_output(ring, e);
154 	}
155 
156 	__sync_synchronize(); /* smp_mb() */
157 	header->data_tail = data_head;
158 }
159 
160 static int perf_mmap_size(void)
161 {
162 	return get_page_size() * (MMAP_PAGE_CNT + 1);
163 }
164 
165 static void *perf_event_mmap(int fd)
166 {
167 	int mmap_size = perf_mmap_size();
168 	void *base;
169 
170 	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
171 	if (base == MAP_FAILED) {
172 		p_err("event mmap failed: %s\n", strerror(errno));
173 		return NULL;
174 	}
175 
176 	return base;
177 }
178 
179 static void perf_event_unmap(void *mem)
180 {
181 	if (munmap(mem, perf_mmap_size()))
182 		fprintf(stderr, "Can't unmap ring memory!\n");
183 }
184 
185 static int bpf_perf_event_open(int map_fd, int key, int cpu)
186 {
187 	struct perf_event_attr attr = {
188 		.sample_type = PERF_SAMPLE_RAW,
189 		.type = PERF_TYPE_SOFTWARE,
190 		.config = PERF_COUNT_SW_BPF_OUTPUT,
191 	};
192 	int pmu_fd;
193 
194 	pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
195 	if (pmu_fd < 0) {
196 		p_err("failed to open perf event %d for CPU %d", key, cpu);
197 		return -1;
198 	}
199 
200 	if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
201 		p_err("failed to update map for event %d for CPU %d", key, cpu);
202 		goto err_close;
203 	}
204 	if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
205 		p_err("failed to enable event %d for CPU %d", key, cpu);
206 		goto err_close;
207 	}
208 
209 	return pmu_fd;
210 
211 err_close:
212 	close(pmu_fd);
213 	return -1;
214 }
215 
216 int do_event_pipe(int argc, char **argv)
217 {
218 	int i, nfds, map_fd, index = -1, cpu = -1;
219 	struct bpf_map_info map_info = {};
220 	struct event_ring_info *rings;
221 	size_t tmp_buf_sz = 0;
222 	void *tmp_buf = NULL;
223 	struct pollfd *pfds;
224 	__u32 map_info_len;
225 	bool do_all = true;
226 
227 	map_info_len = sizeof(map_info);
228 	map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
229 	if (map_fd < 0)
230 		return -1;
231 
232 	if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
233 		p_err("map is not a perf event array");
234 		goto err_close_map;
235 	}
236 
237 	while (argc) {
238 		if (argc < 2)
239 			BAD_ARG();
240 
241 		if (is_prefix(*argv, "cpu")) {
242 			char *endptr;
243 
244 			NEXT_ARG();
245 			cpu = strtoul(*argv, &endptr, 0);
246 			if (*endptr) {
247 				p_err("can't parse %s as CPU ID", **argv);
248 				goto err_close_map;
249 			}
250 
251 			NEXT_ARG();
252 		} else if (is_prefix(*argv, "index")) {
253 			char *endptr;
254 
255 			NEXT_ARG();
256 			index = strtoul(*argv, &endptr, 0);
257 			if (*endptr) {
258 				p_err("can't parse %s as index", **argv);
259 				goto err_close_map;
260 			}
261 
262 			NEXT_ARG();
263 		} else {
264 			BAD_ARG();
265 		}
266 
267 		do_all = false;
268 	}
269 
270 	if (!do_all) {
271 		if (index == -1 || cpu == -1) {
272 			p_err("cpu and index must be specified together");
273 			goto err_close_map;
274 		}
275 
276 		nfds = 1;
277 	} else {
278 		nfds = min(get_possible_cpus(), map_info.max_entries);
279 		cpu = 0;
280 		index = 0;
281 	}
282 
283 	rings = calloc(nfds, sizeof(rings[0]));
284 	if (!rings)
285 		goto err_close_map;
286 
287 	pfds = calloc(nfds, sizeof(pfds[0]));
288 	if (!pfds)
289 		goto err_free_rings;
290 
291 	for (i = 0; i < nfds; i++) {
292 		rings[i].cpu = cpu + i;
293 		rings[i].key = index + i;
294 
295 		rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
296 						  rings[i].cpu);
297 		if (rings[i].fd < 0)
298 			goto err_close_fds_prev;
299 
300 		rings[i].mem = perf_event_mmap(rings[i].fd);
301 		if (!rings[i].mem)
302 			goto err_close_fds_current;
303 
304 		pfds[i].fd = rings[i].fd;
305 		pfds[i].events = POLLIN;
306 	}
307 
308 	signal(SIGINT, int_exit);
309 	signal(SIGHUP, int_exit);
310 	signal(SIGTERM, int_exit);
311 
312 	if (json_output)
313 		jsonw_start_array(json_wtr);
314 
315 	while (!stop) {
316 		poll(pfds, nfds, 200);
317 		for (i = 0; i < nfds; i++)
318 			perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
319 	}
320 	free(tmp_buf);
321 
322 	if (json_output)
323 		jsonw_end_array(json_wtr);
324 
325 	for (i = 0; i < nfds; i++) {
326 		perf_event_unmap(rings[i].mem);
327 		close(rings[i].fd);
328 	}
329 	free(pfds);
330 	free(rings);
331 	close(map_fd);
332 
333 	return 0;
334 
335 err_close_fds_prev:
336 	while (i--) {
337 		perf_event_unmap(rings[i].mem);
338 err_close_fds_current:
339 		close(rings[i].fd);
340 	}
341 	free(pfds);
342 err_free_rings:
343 	free(rings);
344 err_close_map:
345 	close(map_fd);
346 	return -1;
347 }
348