xref: /linux/tools/testing/selftests/bpf/xdp_hw_metadata.c (revision 460e462d22542adfafd8a5bc979437df73f1cbf3)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Reference program for verifying XDP metadata on real HW. Functional test
4  * only, doesn't test the performance.
5  *
6  * RX:
7  * - UDP 9091 packets are diverted into AF_XDP
8  * - Metadata verified:
9  *   - rx_timestamp
10  *   - rx_hash
11  *
12  * TX:
13  * - TBD
14  */
15 
16 #include <test_progs.h>
17 #include <network_helpers.h>
18 #include "xdp_hw_metadata.skel.h"
19 #include "xsk.h"
20 
21 #include <error.h>
22 #include <linux/errqueue.h>
23 #include <linux/if_link.h>
24 #include <linux/net_tstamp.h>
25 #include <linux/udp.h>
26 #include <linux/sockios.h>
27 #include <sys/mman.h>
28 #include <net/if.h>
29 #include <ctype.h>
30 #include <poll.h>
31 #include <time.h>
32 
33 #include "xdp_metadata.h"
34 
35 #define UMEM_NUM 16
36 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
37 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
38 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
39 
40 struct xsk {
41 	void *umem_area;
42 	struct xsk_umem *umem;
43 	struct xsk_ring_prod fill;
44 	struct xsk_ring_cons comp;
45 	struct xsk_ring_prod tx;
46 	struct xsk_ring_cons rx;
47 	struct xsk_socket *socket;
48 };
49 
50 struct xdp_hw_metadata *bpf_obj;
51 __u16 bind_flags = XDP_COPY;
52 struct xsk *rx_xsk;
53 const char *ifname;
54 int ifindex;
55 int rxq;
56 
57 void test__fail(void) { /* for network_helpers.c */ }
58 
59 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
60 {
61 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
62 	const struct xsk_socket_config socket_config = {
63 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
64 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
65 		.bind_flags = bind_flags,
66 	};
67 	const struct xsk_umem_config umem_config = {
68 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
69 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
70 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
71 		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
72 	};
73 	__u32 idx;
74 	u64 addr;
75 	int ret;
76 	int i;
77 
78 	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
79 	if (xsk->umem_area == MAP_FAILED)
80 		return -ENOMEM;
81 
82 	ret = xsk_umem__create(&xsk->umem,
83 			       xsk->umem_area, UMEM_SIZE,
84 			       &xsk->fill,
85 			       &xsk->comp,
86 			       &umem_config);
87 	if (ret)
88 		return ret;
89 
90 	ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
91 				 xsk->umem,
92 				 &xsk->rx,
93 				 &xsk->tx,
94 				 &socket_config);
95 	if (ret)
96 		return ret;
97 
98 	/* First half of umem is for TX. This way address matches 1-to-1
99 	 * to the completion queue index.
100 	 */
101 
102 	for (i = 0; i < UMEM_NUM / 2; i++) {
103 		addr = i * UMEM_FRAME_SIZE;
104 		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
105 	}
106 
107 	/* Second half of umem is for RX. */
108 
109 	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
110 	for (i = 0; i < UMEM_NUM / 2; i++) {
111 		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
112 		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
113 		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
114 	}
115 	xsk_ring_prod__submit(&xsk->fill, ret);
116 
117 	return 0;
118 }
119 
120 static void close_xsk(struct xsk *xsk)
121 {
122 	if (xsk->umem)
123 		xsk_umem__delete(xsk->umem);
124 	if (xsk->socket)
125 		xsk_socket__delete(xsk->socket);
126 	munmap(xsk->umem_area, UMEM_SIZE);
127 }
128 
129 static void refill_rx(struct xsk *xsk, __u64 addr)
130 {
131 	__u32 idx;
132 
133 	if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
134 		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
135 		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
136 		xsk_ring_prod__submit(&xsk->fill, 1);
137 	}
138 }
139 
140 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
141 static __u64 gettime(clockid_t clock_id)
142 {
143 	struct timespec t;
144 	int res;
145 
146 	/* See man clock_gettime(2) for type of clock_id's */
147 	res = clock_gettime(clock_id, &t);
148 
149 	if (res < 0)
150 		error(res, errno, "Error with clock_gettime()");
151 
152 	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
153 }
154 
155 static void verify_xdp_metadata(void *data, clockid_t clock_id)
156 {
157 	struct xdp_meta *meta;
158 
159 	meta = data - sizeof(*meta);
160 
161 	if (meta->rx_hash_err < 0)
162 		printf("No rx_hash err=%d\n", meta->rx_hash_err);
163 	else
164 		printf("rx_hash: 0x%X with RSS type:0x%X\n",
165 		       meta->rx_hash, meta->rx_hash_type);
166 
167 	printf("rx_timestamp:  %llu (sec:%0.4f)\n", meta->rx_timestamp,
168 	       (double)meta->rx_timestamp / NANOSEC_PER_SEC);
169 	if (meta->rx_timestamp) {
170 		__u64 usr_clock = gettime(clock_id);
171 		__u64 xdp_clock = meta->xdp_timestamp;
172 		__s64 delta_X = xdp_clock - meta->rx_timestamp;
173 		__s64 delta_X2U = usr_clock - xdp_clock;
174 
175 		printf("XDP RX-time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
176 		       xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
177 		       (double)delta_X / NANOSEC_PER_SEC,
178 		       (double)delta_X / 1000);
179 
180 		printf("AF_XDP time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
181 		       usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
182 		       (double)delta_X2U / NANOSEC_PER_SEC,
183 		       (double)delta_X2U / 1000);
184 	}
185 
186 }
187 
188 static void verify_skb_metadata(int fd)
189 {
190 	char cmsg_buf[1024];
191 	char packet_buf[128];
192 
193 	struct scm_timestamping *ts;
194 	struct iovec packet_iov;
195 	struct cmsghdr *cmsg;
196 	struct msghdr hdr;
197 
198 	memset(&hdr, 0, sizeof(hdr));
199 	hdr.msg_iov = &packet_iov;
200 	hdr.msg_iovlen = 1;
201 	packet_iov.iov_base = packet_buf;
202 	packet_iov.iov_len = sizeof(packet_buf);
203 
204 	hdr.msg_control = cmsg_buf;
205 	hdr.msg_controllen = sizeof(cmsg_buf);
206 
207 	if (recvmsg(fd, &hdr, 0) < 0)
208 		error(1, errno, "recvmsg");
209 
210 	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
211 	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
212 
213 		if (cmsg->cmsg_level != SOL_SOCKET)
214 			continue;
215 
216 		switch (cmsg->cmsg_type) {
217 		case SCM_TIMESTAMPING:
218 			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
219 			if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
220 				printf("found skb hwtstamp = %lu.%lu\n",
221 				       ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
222 				return;
223 			}
224 			break;
225 		default:
226 			break;
227 		}
228 	}
229 
230 	printf("skb hwtstamp is not found!\n");
231 }
232 
233 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
234 {
235 	const struct xdp_desc *rx_desc;
236 	struct pollfd fds[rxq + 1];
237 	__u64 comp_addr;
238 	__u64 addr;
239 	__u32 idx = 0;
240 	int ret;
241 	int i;
242 
243 	for (i = 0; i < rxq; i++) {
244 		fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
245 		fds[i].events = POLLIN;
246 		fds[i].revents = 0;
247 	}
248 
249 	fds[rxq].fd = server_fd;
250 	fds[rxq].events = POLLIN;
251 	fds[rxq].revents = 0;
252 
253 	while (true) {
254 		errno = 0;
255 		ret = poll(fds, rxq + 1, 1000);
256 		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
257 		       ret, errno, bpf_obj->bss->pkts_skip,
258 		       bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
259 		if (ret < 0)
260 			break;
261 		if (ret == 0)
262 			continue;
263 
264 		if (fds[rxq].revents)
265 			verify_skb_metadata(server_fd);
266 
267 		for (i = 0; i < rxq; i++) {
268 			bool first_seg = true;
269 			bool is_eop = true;
270 
271 			if (fds[i].revents == 0)
272 				continue;
273 
274 			struct xsk *xsk = &rx_xsk[i];
275 peek:
276 			ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
277 			printf("xsk_ring_cons__peek: %d\n", ret);
278 			if (ret != 1)
279 				continue;
280 
281 			rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
282 			comp_addr = xsk_umem__extract_addr(rx_desc->addr);
283 			addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
284 			is_eop = !(rx_desc->options & XDP_PKT_CONTD);
285 			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
286 			       xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
287 			if (first_seg) {
288 				verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
289 						    clock_id);
290 				first_seg = false;
291 			}
292 
293 			xsk_ring_cons__release(&xsk->rx, 1);
294 			refill_rx(xsk, comp_addr);
295 			if (!is_eop)
296 				goto peek;
297 		}
298 	}
299 
300 	return 0;
301 }
302 
303 struct ethtool_channels {
304 	__u32	cmd;
305 	__u32	max_rx;
306 	__u32	max_tx;
307 	__u32	max_other;
308 	__u32	max_combined;
309 	__u32	rx_count;
310 	__u32	tx_count;
311 	__u32	other_count;
312 	__u32	combined_count;
313 };
314 
315 #define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
316 
317 static int rxq_num(const char *ifname)
318 {
319 	struct ethtool_channels ch = {
320 		.cmd = ETHTOOL_GCHANNELS,
321 	};
322 
323 	struct ifreq ifr = {
324 		.ifr_data = (void *)&ch,
325 	};
326 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
327 	int fd, ret;
328 
329 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
330 	if (fd < 0)
331 		error(1, errno, "socket");
332 
333 	ret = ioctl(fd, SIOCETHTOOL, &ifr);
334 	if (ret < 0)
335 		error(1, errno, "ioctl(SIOCETHTOOL)");
336 
337 	close(fd);
338 
339 	return ch.rx_count + ch.combined_count;
340 }
341 
342 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
343 {
344 	struct ifreq ifr = {
345 		.ifr_data = (void *)cfg,
346 	};
347 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
348 	int fd, ret;
349 
350 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
351 	if (fd < 0)
352 		error(1, errno, "socket");
353 
354 	ret = ioctl(fd, op, &ifr);
355 	if (ret < 0)
356 		error(1, errno, "ioctl(%d)", op);
357 
358 	close(fd);
359 }
360 
361 static struct hwtstamp_config saved_hwtstamp_cfg;
362 static const char *saved_hwtstamp_ifname;
363 
364 static void hwtstamp_restore(void)
365 {
366 	hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
367 }
368 
369 static void hwtstamp_enable(const char *ifname)
370 {
371 	struct hwtstamp_config cfg = {
372 		.rx_filter = HWTSTAMP_FILTER_ALL,
373 	};
374 
375 	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
376 	saved_hwtstamp_ifname = strdup(ifname);
377 	atexit(hwtstamp_restore);
378 
379 	hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
380 }
381 
382 static void cleanup(void)
383 {
384 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
385 	int ret;
386 	int i;
387 
388 	if (bpf_obj) {
389 		opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
390 		if (opts.old_prog_fd >= 0) {
391 			printf("detaching bpf program....\n");
392 			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
393 			if (ret)
394 				printf("failed to detach XDP program: %d\n", ret);
395 		}
396 	}
397 
398 	for (i = 0; i < rxq; i++)
399 		close_xsk(&rx_xsk[i]);
400 
401 	if (bpf_obj)
402 		xdp_hw_metadata__destroy(bpf_obj);
403 }
404 
405 static void handle_signal(int sig)
406 {
407 	/* interrupting poll() is all we need */
408 }
409 
410 static void timestamping_enable(int fd, int val)
411 {
412 	int ret;
413 
414 	ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
415 	if (ret < 0)
416 		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
417 }
418 
419 static void print_usage(void)
420 {
421 	const char *usage =
422 		"Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
423 		"  -m    Enable multi-buffer XDP for larger MTU\n"
424 		"  -h    Display this help and exit\n\n"
425 		"Generate test packets on the other machine with:\n"
426 		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
427 
428 	printf("%s", usage);
429 }
430 
431 static void read_args(int argc, char *argv[])
432 {
433 	int opt;
434 
435 	while ((opt = getopt(argc, argv, "mh")) != -1) {
436 		switch (opt) {
437 		case 'm':
438 			bind_flags |= XDP_USE_SG;
439 			break;
440 		case 'h':
441 			print_usage();
442 			exit(0);
443 		case '?':
444 			if (isprint(optopt))
445 				fprintf(stderr, "Unknown option: -%c\n", optopt);
446 			fallthrough;
447 		default:
448 			print_usage();
449 			error(-1, opterr, "Command line options error");
450 		}
451 	}
452 
453 	if (optind >= argc) {
454 		fprintf(stderr, "No device name provided\n");
455 		print_usage();
456 		exit(-1);
457 	}
458 
459 	ifname = argv[optind];
460 	ifindex = if_nametoindex(ifname);
461 
462 	if (!ifname)
463 		error(-1, errno, "Invalid interface name");
464 }
465 
466 int main(int argc, char *argv[])
467 {
468 	clockid_t clock_id = CLOCK_TAI;
469 	int server_fd = -1;
470 	int ret;
471 	int i;
472 
473 	struct bpf_program *prog;
474 
475 	read_args(argc, argv);
476 
477 	rxq = rxq_num(ifname);
478 
479 	printf("rxq: %d\n", rxq);
480 
481 	hwtstamp_enable(ifname);
482 
483 	rx_xsk = malloc(sizeof(struct xsk) * rxq);
484 	if (!rx_xsk)
485 		error(1, ENOMEM, "malloc");
486 
487 	for (i = 0; i < rxq; i++) {
488 		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
489 		ret = open_xsk(ifindex, &rx_xsk[i], i);
490 		if (ret)
491 			error(1, -ret, "open_xsk");
492 
493 		printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
494 	}
495 
496 	printf("open bpf program...\n");
497 	bpf_obj = xdp_hw_metadata__open();
498 	if (libbpf_get_error(bpf_obj))
499 		error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
500 
501 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
502 	bpf_program__set_ifindex(prog, ifindex);
503 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
504 
505 	printf("load bpf program...\n");
506 	ret = xdp_hw_metadata__load(bpf_obj);
507 	if (ret)
508 		error(1, -ret, "xdp_hw_metadata__load");
509 
510 	printf("prepare skb endpoint...\n");
511 	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
512 	if (server_fd < 0)
513 		error(1, errno, "start_server");
514 	timestamping_enable(server_fd,
515 			    SOF_TIMESTAMPING_SOFTWARE |
516 			    SOF_TIMESTAMPING_RAW_HARDWARE);
517 
518 	printf("prepare xsk map...\n");
519 	for (i = 0; i < rxq; i++) {
520 		int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
521 		__u32 queue_id = i;
522 
523 		printf("map[%d] = %d\n", queue_id, sock_fd);
524 		ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
525 		if (ret)
526 			error(1, -ret, "bpf_map_update_elem");
527 	}
528 
529 	printf("attach bpf program...\n");
530 	ret = bpf_xdp_attach(ifindex,
531 			     bpf_program__fd(bpf_obj->progs.rx),
532 			     XDP_FLAGS, NULL);
533 	if (ret)
534 		error(1, -ret, "bpf_xdp_attach");
535 
536 	signal(SIGINT, handle_signal);
537 	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
538 	close(server_fd);
539 	cleanup();
540 	if (ret)
541 		error(1, -ret, "verify_metadata");
542 }
543