xref: /linux/tools/testing/selftests/bpf/xdp_hw_metadata.c (revision 621cde16e49b3ecf7d59a8106a20aaebfb4a59a9)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Reference program for verifying XDP metadata on real HW. Functional test
4  * only, doesn't test the performance.
5  *
6  * RX:
7  * - UDP 9091 packets are diverted into AF_XDP
8  * - Metadata verified:
9  *   - rx_timestamp
10  *   - rx_hash
11  *
12  * TX:
13  * - UDP 9091 packets trigger TX reply
14  * - TX HW timestamp is requested and reported back upon completion
15  * - TX checksum is requested
16  */
17 
18 #include <test_progs.h>
19 #include <network_helpers.h>
20 #include "xdp_hw_metadata.skel.h"
21 #include "xsk.h"
22 
23 #include <error.h>
24 #include <linux/kernel.h>
25 #include <linux/bits.h>
26 #include <linux/bitfield.h>
27 #include <linux/errqueue.h>
28 #include <linux/if_link.h>
29 #include <linux/net_tstamp.h>
30 #include <linux/udp.h>
31 #include <linux/sockios.h>
32 #include <linux/if_xdp.h>
33 #include <sys/mman.h>
34 #include <net/if.h>
35 #include <ctype.h>
36 #include <poll.h>
37 #include <time.h>
38 #include <unistd.h>
39 #include <libgen.h>
40 
41 #include "xdp_metadata.h"
42 
43 #define UMEM_NUM 256
44 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
45 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
46 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
47 
48 struct xsk {
49 	void *umem_area;
50 	struct xsk_umem *umem;
51 	struct xsk_ring_prod fill;
52 	struct xsk_ring_cons comp;
53 	struct xsk_ring_prod tx;
54 	struct xsk_ring_cons rx;
55 	struct xsk_socket *socket;
56 };
57 
58 struct xdp_hw_metadata *bpf_obj;
59 __u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
60 struct xsk *rx_xsk;
61 const char *ifname;
62 int ifindex;
63 int rxq;
64 bool skip_tx;
65 __u64 last_hw_rx_timestamp;
66 __u64 last_xdp_rx_timestamp;
67 
test__fail(void)68 void test__fail(void) { /* for network_helpers.c */ }
69 
open_xsk(int ifindex,struct xsk * xsk,__u32 queue_id)70 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
71 {
72 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
73 	const struct xsk_socket_config socket_config = {
74 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
75 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
76 		.bind_flags = bind_flags,
77 	};
78 	const struct xsk_umem_config umem_config = {
79 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
80 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
81 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
82 		.flags = XSK_UMEM__DEFAULT_FLAGS,
83 		.tx_metadata_len = sizeof(struct xsk_tx_metadata),
84 	};
85 	__u32 idx = 0;
86 	u64 addr;
87 	int ret;
88 	int i;
89 
90 	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
91 	if (xsk->umem_area == MAP_FAILED)
92 		return -ENOMEM;
93 
94 	ret = xsk_umem__create(&xsk->umem,
95 			       xsk->umem_area, UMEM_SIZE,
96 			       &xsk->fill,
97 			       &xsk->comp,
98 			       &umem_config);
99 	if (ret)
100 		return ret;
101 
102 	ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
103 				 xsk->umem,
104 				 &xsk->rx,
105 				 &xsk->tx,
106 				 &socket_config);
107 	if (ret)
108 		return ret;
109 
110 	/* First half of umem is for TX. This way address matches 1-to-1
111 	 * to the completion queue index.
112 	 */
113 
114 	for (i = 0; i < UMEM_NUM / 2; i++) {
115 		addr = i * UMEM_FRAME_SIZE;
116 		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
117 	}
118 
119 	/* Second half of umem is for RX. */
120 
121 	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
122 	for (i = 0; i < UMEM_NUM / 2; i++) {
123 		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
124 		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
125 		*xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr;
126 	}
127 	xsk_ring_prod__submit(&xsk->fill, ret);
128 
129 	return 0;
130 }
131 
close_xsk(struct xsk * xsk)132 static void close_xsk(struct xsk *xsk)
133 {
134 	if (xsk->umem)
135 		xsk_umem__delete(xsk->umem);
136 	if (xsk->socket)
137 		xsk_socket__delete(xsk->socket);
138 	munmap(xsk->umem_area, UMEM_SIZE);
139 }
140 
refill_rx(struct xsk * xsk,__u64 addr)141 static void refill_rx(struct xsk *xsk, __u64 addr)
142 {
143 	__u32 idx;
144 
145 	if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
146 		printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
147 		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
148 		xsk_ring_prod__submit(&xsk->fill, 1);
149 	}
150 }
151 
kick_tx(struct xsk * xsk)152 static int kick_tx(struct xsk *xsk)
153 {
154 	return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
155 }
156 
kick_rx(struct xsk * xsk)157 static int kick_rx(struct xsk *xsk)
158 {
159 	return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
160 }
161 
162 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
gettime(clockid_t clock_id)163 static __u64 gettime(clockid_t clock_id)
164 {
165 	struct timespec t;
166 	int res;
167 
168 	/* See man clock_gettime(2) for type of clock_id's */
169 	res = clock_gettime(clock_id, &t);
170 
171 	if (res < 0)
172 		error(res, errno, "Error with clock_gettime()");
173 
174 	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
175 }
176 
print_tstamp_delta(const char * name,const char * refname,__u64 tstamp,__u64 reference)177 static void print_tstamp_delta(const char *name, const char *refname,
178 			       __u64 tstamp, __u64 reference)
179 {
180 	__s64 delta = (__s64)reference - (__s64)tstamp;
181 
182 	printf("%s:   %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n",
183 	       name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname,
184 	       (double)delta / NANOSEC_PER_SEC,
185 	       (double)delta / 1000);
186 }
187 
188 #define VLAN_PRIO_MASK		GENMASK(15, 13) /* Priority Code Point */
189 #define VLAN_DEI_MASK		GENMASK(12, 12) /* Drop Eligible Indicator */
190 #define VLAN_VID_MASK		GENMASK(11, 0)	/* VLAN Identifier */
print_vlan_tci(__u16 tag)191 static void print_vlan_tci(__u16 tag)
192 {
193 	__u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag);
194 	__u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag);
195 	bool dei = FIELD_GET(VLAN_DEI_MASK, tag);
196 
197 	printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id);
198 }
199 
verify_xdp_metadata(void * data,clockid_t clock_id)200 static void verify_xdp_metadata(void *data, clockid_t clock_id)
201 {
202 	struct xdp_meta *meta;
203 
204 	meta = data - sizeof(*meta);
205 
206 	if (meta->hint_valid & XDP_META_FIELD_RSS)
207 		printf("rx_hash: 0x%X with RSS type:0x%X\n",
208 		       meta->rx_hash, meta->rx_hash_type);
209 	else
210 		printf("No rx_hash, err=%d\n", meta->rx_hash_err);
211 
212 	if (meta->hint_valid & XDP_META_FIELD_TS) {
213 		__u64 ref_tstamp = gettime(clock_id);
214 
215 		/* store received timestamps to calculate a delta at tx */
216 		last_hw_rx_timestamp = meta->rx_timestamp;
217 		last_xdp_rx_timestamp = meta->xdp_timestamp;
218 
219 		print_tstamp_delta("HW RX-time", "User RX-time",
220 				   meta->rx_timestamp, ref_tstamp);
221 		print_tstamp_delta("XDP RX-time", "User RX-time",
222 				   meta->xdp_timestamp, ref_tstamp);
223 	} else {
224 		printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err);
225 	}
226 
227 	if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) {
228 		printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto));
229 		printf("rx_vlan_tci: ");
230 		print_vlan_tci(meta->rx_vlan_tci);
231 	} else {
232 		printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n",
233 		       meta->rx_vlan_tag_err);
234 	}
235 }
236 
verify_skb_metadata(int fd)237 static void verify_skb_metadata(int fd)
238 {
239 	char cmsg_buf[1024];
240 	char packet_buf[128];
241 
242 	struct scm_timestamping *ts;
243 	struct iovec packet_iov;
244 	struct cmsghdr *cmsg;
245 	struct msghdr hdr;
246 
247 	memset(&hdr, 0, sizeof(hdr));
248 	hdr.msg_iov = &packet_iov;
249 	hdr.msg_iovlen = 1;
250 	packet_iov.iov_base = packet_buf;
251 	packet_iov.iov_len = sizeof(packet_buf);
252 
253 	hdr.msg_control = cmsg_buf;
254 	hdr.msg_controllen = sizeof(cmsg_buf);
255 
256 	if (recvmsg(fd, &hdr, 0) < 0)
257 		error(1, errno, "recvmsg");
258 
259 	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
260 	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
261 
262 		if (cmsg->cmsg_level != SOL_SOCKET)
263 			continue;
264 
265 		switch (cmsg->cmsg_type) {
266 		case SCM_TIMESTAMPING:
267 			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
268 			if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
269 				printf("found skb hwtstamp = %lu.%lu\n",
270 				       ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
271 				return;
272 			}
273 			break;
274 		default:
275 			break;
276 		}
277 	}
278 
279 	printf("skb hwtstamp is not found!\n");
280 }
281 
complete_tx(struct xsk * xsk,clockid_t clock_id)282 static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
283 {
284 	struct xsk_tx_metadata *meta;
285 	__u64 addr;
286 	void *data;
287 	__u32 idx;
288 
289 	if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx))
290 		return false;
291 
292 	addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
293 	data = xsk_umem__get_data(xsk->umem_area, addr);
294 	meta = data - sizeof(struct xsk_tx_metadata);
295 
296 	printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
297 
298 	if (meta->completion.tx_timestamp) {
299 		__u64 ref_tstamp = gettime(clock_id);
300 
301 		print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
302 				   meta->completion.tx_timestamp, ref_tstamp);
303 		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
304 				   last_xdp_rx_timestamp, ref_tstamp);
305 		print_tstamp_delta("HW RX-time", "HW TX-complete-time",
306 				   last_hw_rx_timestamp, meta->completion.tx_timestamp);
307 	} else {
308 		printf("No tx_timestamp\n");
309 	}
310 
311 	xsk_ring_cons__release(&xsk->comp, 1);
312 
313 	return true;
314 }
315 
316 #define swap(a, b, len) do { \
317 	for (int i = 0; i < len; i++) { \
318 		__u8 tmp = ((__u8 *)a)[i]; \
319 		((__u8 *)a)[i] = ((__u8 *)b)[i]; \
320 		((__u8 *)b)[i] = tmp; \
321 	} \
322 } while (0)
323 
ping_pong(struct xsk * xsk,void * rx_packet,clockid_t clock_id)324 static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
325 {
326 	struct xsk_tx_metadata *meta;
327 	struct ipv6hdr *ip6h = NULL;
328 	struct iphdr *iph = NULL;
329 	struct xdp_desc *tx_desc;
330 	struct udphdr *udph;
331 	struct ethhdr *eth;
332 	__sum16 want_csum;
333 	void *data;
334 	__u32 idx;
335 	int ret;
336 	int len;
337 
338 	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
339 	if (ret != 1) {
340 		printf("%p: failed to reserve tx slot\n", xsk);
341 		return;
342 	}
343 
344 	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
345 	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
346 	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
347 
348 	meta = data - sizeof(struct xsk_tx_metadata);
349 	memset(meta, 0, sizeof(*meta));
350 	meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
351 
352 	eth = rx_packet;
353 
354 	if (eth->h_proto == htons(ETH_P_IP)) {
355 		iph = (void *)(eth + 1);
356 		udph = (void *)(iph + 1);
357 	} else if (eth->h_proto == htons(ETH_P_IPV6)) {
358 		ip6h = (void *)(eth + 1);
359 		udph = (void *)(ip6h + 1);
360 	} else {
361 		printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto);
362 		xsk_ring_prod__cancel(&xsk->tx, 1);
363 		return;
364 	}
365 
366 	len = ETH_HLEN;
367 	if (ip6h)
368 		len += sizeof(*ip6h) + ntohs(ip6h->payload_len);
369 	if (iph)
370 		len += ntohs(iph->tot_len);
371 
372 	swap(eth->h_dest, eth->h_source, ETH_ALEN);
373 	if (iph)
374 		swap(&iph->saddr, &iph->daddr, 4);
375 	else
376 		swap(&ip6h->saddr, &ip6h->daddr, 16);
377 	swap(&udph->source, &udph->dest, 2);
378 
379 	want_csum = udph->check;
380 	if (ip6h)
381 		udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
382 					       ntohs(udph->len), IPPROTO_UDP, 0);
383 	else
384 		udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
385 						 ntohs(udph->len), IPPROTO_UDP, 0);
386 
387 	meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
388 	if (iph)
389 		meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
390 	else
391 		meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h);
392 	meta->request.csum_offset = offsetof(struct udphdr, check);
393 
394 	printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n",
395 	       xsk, ntohs(udph->check), ntohs(want_csum),
396 	       meta->request.csum_start, meta->request.csum_offset);
397 
398 	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
399 	tx_desc->options |= XDP_TX_METADATA;
400 	tx_desc->len = len;
401 
402 	xsk_ring_prod__submit(&xsk->tx, 1);
403 }
404 
verify_metadata(struct xsk * rx_xsk,int rxq,int server_fd,clockid_t clock_id)405 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
406 {
407 	const struct xdp_desc *rx_desc;
408 	struct pollfd fds[rxq + 1];
409 	__u64 comp_addr;
410 	__u64 addr;
411 	__u32 idx = 0;
412 	int ret;
413 	int i;
414 
415 	for (i = 0; i < rxq; i++) {
416 		fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
417 		fds[i].events = POLLIN;
418 		fds[i].revents = 0;
419 	}
420 
421 	fds[rxq].fd = server_fd;
422 	fds[rxq].events = POLLIN;
423 	fds[rxq].revents = 0;
424 
425 	while (true) {
426 		errno = 0;
427 
428 		for (i = 0; i < rxq; i++) {
429 			ret = kick_rx(&rx_xsk[i]);
430 			if (ret)
431 				printf("kick_rx ret=%d\n", ret);
432 		}
433 
434 		ret = poll(fds, rxq + 1, 1000);
435 		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
436 		       ret, errno, bpf_obj->bss->pkts_skip,
437 		       bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
438 		if (ret < 0)
439 			break;
440 		if (ret == 0)
441 			continue;
442 
443 		if (fds[rxq].revents)
444 			verify_skb_metadata(server_fd);
445 
446 		for (i = 0; i < rxq; i++) {
447 			bool first_seg = true;
448 			bool is_eop = true;
449 
450 			if (fds[i].revents == 0)
451 				continue;
452 
453 			struct xsk *xsk = &rx_xsk[i];
454 peek:
455 			ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
456 			printf("xsk_ring_cons__peek: %d\n", ret);
457 			if (ret != 1)
458 				continue;
459 
460 			rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
461 			comp_addr = xsk_umem__extract_addr(rx_desc->addr);
462 			addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
463 			is_eop = !(rx_desc->options & XDP_PKT_CONTD);
464 			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
465 			       xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
466 			if (first_seg) {
467 				verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
468 						    clock_id);
469 				first_seg = false;
470 
471 				if (!skip_tx) {
472 					/* mirror first chunk back */
473 					ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr),
474 						  clock_id);
475 
476 					ret = kick_tx(xsk);
477 					if (ret)
478 						printf("kick_tx ret=%d\n", ret);
479 
480 					for (int j = 0; j < 500; j++) {
481 						if (complete_tx(xsk, clock_id))
482 							break;
483 						usleep(10);
484 					}
485 				}
486 			}
487 
488 			xsk_ring_cons__release(&xsk->rx, 1);
489 			refill_rx(xsk, comp_addr);
490 			if (!is_eop)
491 				goto peek;
492 		}
493 	}
494 
495 	return 0;
496 }
497 
rxq_num(const char * ifname)498 static int rxq_num(const char *ifname)
499 {
500 	struct ethtool_channels ch = {
501 		.cmd = ETHTOOL_GCHANNELS,
502 	};
503 
504 	struct ifreq ifr = {
505 		.ifr_data = (void *)&ch,
506 	};
507 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
508 	int fd, ret;
509 
510 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
511 	if (fd < 0)
512 		error(1, errno, "socket");
513 
514 	ret = ioctl(fd, SIOCETHTOOL, &ifr);
515 	if (ret < 0)
516 		error(1, errno, "ioctl(SIOCETHTOOL)");
517 
518 	close(fd);
519 
520 	return ch.rx_count + ch.combined_count;
521 }
522 
hwtstamp_ioctl(int op,const char * ifname,struct hwtstamp_config * cfg)523 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
524 {
525 	struct ifreq ifr = {
526 		.ifr_data = (void *)cfg,
527 	};
528 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
529 	int fd, ret;
530 
531 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
532 	if (fd < 0)
533 		error(1, errno, "socket");
534 
535 	ret = ioctl(fd, op, &ifr);
536 	if (ret < 0)
537 		error(1, errno, "ioctl(%d)", op);
538 
539 	close(fd);
540 }
541 
542 static struct hwtstamp_config saved_hwtstamp_cfg;
543 static const char *saved_hwtstamp_ifname;
544 
hwtstamp_restore(void)545 static void hwtstamp_restore(void)
546 {
547 	hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
548 }
549 
hwtstamp_enable(const char * ifname)550 static void hwtstamp_enable(const char *ifname)
551 {
552 	struct hwtstamp_config cfg = {
553 		.rx_filter = HWTSTAMP_FILTER_ALL,
554 	};
555 
556 	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
557 	saved_hwtstamp_ifname = strdup(ifname);
558 	atexit(hwtstamp_restore);
559 
560 	hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
561 }
562 
cleanup(void)563 static void cleanup(void)
564 {
565 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
566 	int ret;
567 	int i;
568 
569 	if (bpf_obj) {
570 		opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
571 		if (opts.old_prog_fd >= 0) {
572 			printf("detaching bpf program....\n");
573 			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
574 			if (ret)
575 				printf("failed to detach XDP program: %d\n", ret);
576 		}
577 	}
578 
579 	for (i = 0; i < rxq; i++)
580 		close_xsk(&rx_xsk[i]);
581 
582 	if (bpf_obj)
583 		xdp_hw_metadata__destroy(bpf_obj);
584 
585 	free((void *)saved_hwtstamp_ifname);
586 }
587 
handle_signal(int sig)588 static void handle_signal(int sig)
589 {
590 	/* interrupting poll() is all we need */
591 }
592 
timestamping_enable(int fd,int val)593 static void timestamping_enable(int fd, int val)
594 {
595 	int ret;
596 
597 	ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
598 	if (ret < 0)
599 		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
600 }
601 
print_usage(void)602 static void print_usage(void)
603 {
604 	const char *usage =
605 		"Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
606 		"  -c    Run in copy mode (zerocopy is default)\n"
607 		"  -h    Display this help and exit\n\n"
608 		"  -m    Enable multi-buffer XDP for larger MTU\n"
609 		"  -r    Don't generate AF_XDP reply (rx metadata only)\n"
610 		"Generate test packets on the other machine with:\n"
611 		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
612 
613 	printf("%s", usage);
614 }
615 
read_args(int argc,char * argv[])616 static void read_args(int argc, char *argv[])
617 {
618 	int opt;
619 
620 	while ((opt = getopt(argc, argv, "chmr")) != -1) {
621 		switch (opt) {
622 		case 'c':
623 			bind_flags &= ~XDP_USE_NEED_WAKEUP;
624 			bind_flags &= ~XDP_ZEROCOPY;
625 			bind_flags |= XDP_COPY;
626 			break;
627 		case 'h':
628 			print_usage();
629 			exit(0);
630 		case 'm':
631 			bind_flags |= XDP_USE_SG;
632 			break;
633 		case 'r':
634 			skip_tx = true;
635 			break;
636 		case '?':
637 			if (isprint(optopt))
638 				fprintf(stderr, "Unknown option: -%c\n", optopt);
639 			fallthrough;
640 		default:
641 			print_usage();
642 			error(-1, opterr, "Command line options error");
643 		}
644 	}
645 
646 	if (optind >= argc) {
647 		fprintf(stderr, "No device name provided\n");
648 		print_usage();
649 		exit(-1);
650 	}
651 
652 	ifname = argv[optind];
653 	ifindex = if_nametoindex(ifname);
654 
655 	if (!ifname)
656 		error(-1, errno, "Invalid interface name");
657 }
658 
main(int argc,char * argv[])659 int main(int argc, char *argv[])
660 {
661 	clockid_t clock_id = CLOCK_TAI;
662 	int server_fd = -1;
663 	int ret;
664 	int i;
665 
666 	struct bpf_program *prog;
667 
668 	read_args(argc, argv);
669 
670 	rxq = rxq_num(ifname);
671 
672 	printf("rxq: %d\n", rxq);
673 
674 	hwtstamp_enable(ifname);
675 
676 	rx_xsk = malloc(sizeof(struct xsk) * rxq);
677 	if (!rx_xsk)
678 		error(1, ENOMEM, "malloc");
679 
680 	for (i = 0; i < rxq; i++) {
681 		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
682 		ret = open_xsk(ifindex, &rx_xsk[i], i);
683 		if (ret)
684 			error(1, -ret, "open_xsk");
685 
686 		printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
687 	}
688 
689 	printf("open bpf program...\n");
690 	bpf_obj = xdp_hw_metadata__open();
691 	if (libbpf_get_error(bpf_obj))
692 		error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
693 
694 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
695 	bpf_program__set_ifindex(prog, ifindex);
696 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
697 
698 	printf("load bpf program...\n");
699 	ret = xdp_hw_metadata__load(bpf_obj);
700 	if (ret)
701 		error(1, -ret, "xdp_hw_metadata__load");
702 
703 	printf("prepare skb endpoint...\n");
704 	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
705 	if (server_fd < 0)
706 		error(1, errno, "start_server");
707 	timestamping_enable(server_fd,
708 			    SOF_TIMESTAMPING_SOFTWARE |
709 			    SOF_TIMESTAMPING_RAW_HARDWARE);
710 
711 	printf("prepare xsk map...\n");
712 	for (i = 0; i < rxq; i++) {
713 		int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
714 		__u32 queue_id = i;
715 
716 		printf("map[%d] = %d\n", queue_id, sock_fd);
717 		ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
718 		if (ret)
719 			error(1, -ret, "bpf_map_update_elem");
720 	}
721 
722 	printf("attach bpf program...\n");
723 	ret = bpf_xdp_attach(ifindex,
724 			     bpf_program__fd(bpf_obj->progs.rx),
725 			     XDP_FLAGS, NULL);
726 	if (ret)
727 		error(1, -ret, "bpf_xdp_attach");
728 
729 	signal(SIGINT, handle_signal);
730 	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
731 	close(server_fd);
732 	cleanup();
733 	if (ret)
734 		error(1, -ret, "verify_metadata");
735 }
736