xref: /linux/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c (revision 753c8608f3e579307493a63b9242667aee35a751)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <test_progs.h>
3 #include <network_helpers.h>
4 #include "xdp_metadata.skel.h"
5 #include "xdp_metadata2.skel.h"
6 #include "xdp_metadata.h"
7 #include "xsk.h"
8 
9 #include <bpf/btf.h>
10 #include <linux/errqueue.h>
11 #include <linux/if_link.h>
12 #include <linux/net_tstamp.h>
13 #include <linux/udp.h>
14 #include <sys/mman.h>
15 #include <net/if.h>
16 #include <poll.h>
17 
18 #define TX_NAME "veTX"
19 #define RX_NAME "veRX"
20 
21 #define UDP_PAYLOAD_BYTES 4
22 
23 #define AF_XDP_SOURCE_PORT 1234
24 #define AF_XDP_CONSUMER_PORT 8080
25 
26 #define UMEM_NUM 16
27 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
28 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
29 #define XDP_FLAGS XDP_FLAGS_DRV_MODE
30 #define QUEUE_ID 0
31 
32 #define TX_ADDR "10.0.0.1"
33 #define RX_ADDR "10.0.0.2"
34 #define PREFIX_LEN "8"
35 #define FAMILY AF_INET
36 
37 struct xsk {
38 	void *umem_area;
39 	struct xsk_umem *umem;
40 	struct xsk_ring_prod fill;
41 	struct xsk_ring_cons comp;
42 	struct xsk_ring_prod tx;
43 	struct xsk_ring_cons rx;
44 	struct xsk_socket *socket;
45 };
46 
47 static int open_xsk(int ifindex, struct xsk *xsk)
48 {
49 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
50 	const struct xsk_socket_config socket_config = {
51 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
52 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
53 		.bind_flags = XDP_COPY,
54 	};
55 	const struct xsk_umem_config umem_config = {
56 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
57 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
58 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
59 		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM,
60 		.tx_metadata_len = sizeof(struct xsk_tx_metadata),
61 	};
62 	__u32 idx;
63 	u64 addr;
64 	int ret;
65 	int i;
66 
67 	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
68 	if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
69 		return -1;
70 
71 	ret = xsk_umem__create(&xsk->umem,
72 			       xsk->umem_area, UMEM_SIZE,
73 			       &xsk->fill,
74 			       &xsk->comp,
75 			       &umem_config);
76 	if (!ASSERT_OK(ret, "xsk_umem__create"))
77 		return ret;
78 
79 	ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
80 				 xsk->umem,
81 				 &xsk->rx,
82 				 &xsk->tx,
83 				 &socket_config);
84 	if (!ASSERT_OK(ret, "xsk_socket__create"))
85 		return ret;
86 
87 	/* First half of umem is for TX. This way address matches 1-to-1
88 	 * to the completion queue index.
89 	 */
90 
91 	for (i = 0; i < UMEM_NUM / 2; i++) {
92 		addr = i * UMEM_FRAME_SIZE;
93 		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
94 	}
95 
96 	/* Second half of umem is for RX. */
97 
98 	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
99 	if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
100 		return ret;
101 	if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
102 		return -1;
103 
104 	for (i = 0; i < UMEM_NUM / 2; i++) {
105 		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
106 		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
107 		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
108 	}
109 	xsk_ring_prod__submit(&xsk->fill, ret);
110 
111 	return 0;
112 }
113 
114 static void close_xsk(struct xsk *xsk)
115 {
116 	if (xsk->umem)
117 		xsk_umem__delete(xsk->umem);
118 	if (xsk->socket)
119 		xsk_socket__delete(xsk->socket);
120 	munmap(xsk->umem_area, UMEM_SIZE);
121 }
122 
123 static void ip_csum(struct iphdr *iph)
124 {
125 	__u32 sum = 0;
126 	__u16 *p;
127 	int i;
128 
129 	iph->check = 0;
130 	p = (void *)iph;
131 	for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
132 		sum += p[i];
133 
134 	while (sum >> 16)
135 		sum = (sum & 0xffff) + (sum >> 16);
136 
137 	iph->check = ~sum;
138 }
139 
140 static int generate_packet(struct xsk *xsk, __u16 dst_port)
141 {
142 	struct xsk_tx_metadata *meta;
143 	struct xdp_desc *tx_desc;
144 	struct udphdr *udph;
145 	struct ethhdr *eth;
146 	struct iphdr *iph;
147 	void *data;
148 	__u32 idx;
149 	int ret;
150 
151 	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
152 	if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
153 		return -1;
154 
155 	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
156 	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
157 	printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
158 	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
159 
160 	meta = data - sizeof(struct xsk_tx_metadata);
161 	memset(meta, 0, sizeof(*meta));
162 	meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
163 
164 	eth = data;
165 	iph = (void *)(eth + 1);
166 	udph = (void *)(iph + 1);
167 
168 	memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
169 	memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
170 	eth->h_proto = htons(ETH_P_IP);
171 
172 	iph->version = 0x4;
173 	iph->ihl = 0x5;
174 	iph->tos = 0x9;
175 	iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
176 	iph->id = 0;
177 	iph->frag_off = 0;
178 	iph->ttl = 0;
179 	iph->protocol = IPPROTO_UDP;
180 	ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
181 	ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
182 	ip_csum(iph);
183 
184 	udph->source = htons(AF_XDP_SOURCE_PORT);
185 	udph->dest = htons(dst_port);
186 	udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
187 	udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
188 					 ntohs(udph->len), IPPROTO_UDP, 0);
189 
190 	memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
191 
192 	meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
193 	meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
194 	meta->request.csum_offset = offsetof(struct udphdr, check);
195 
196 	tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
197 	tx_desc->options |= XDP_TX_METADATA;
198 	xsk_ring_prod__submit(&xsk->tx, 1);
199 
200 	ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
201 	if (!ASSERT_GE(ret, 0, "sendto"))
202 		return ret;
203 
204 	return 0;
205 }
206 
207 static void complete_tx(struct xsk *xsk)
208 {
209 	struct xsk_tx_metadata *meta;
210 	__u64 addr;
211 	void *data;
212 	__u32 idx;
213 
214 	if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
215 		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
216 
217 		printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
218 
219 		data = xsk_umem__get_data(xsk->umem_area, addr);
220 		meta = data - sizeof(struct xsk_tx_metadata);
221 
222 		ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp");
223 
224 		xsk_ring_cons__release(&xsk->comp, 1);
225 	}
226 }
227 
228 static void refill_rx(struct xsk *xsk, __u64 addr)
229 {
230 	__u32 idx;
231 
232 	if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
233 		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
234 		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
235 		xsk_ring_prod__submit(&xsk->fill, 1);
236 	}
237 }
238 
239 static int verify_xsk_metadata(struct xsk *xsk)
240 {
241 	const struct xdp_desc *rx_desc;
242 	struct pollfd fds = {};
243 	struct xdp_meta *meta;
244 	struct udphdr *udph;
245 	struct ethhdr *eth;
246 	struct iphdr *iph;
247 	__u64 comp_addr;
248 	void *data;
249 	__u64 addr;
250 	__u32 idx = 0;
251 	int ret;
252 
253 	ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
254 	if (!ASSERT_EQ(ret, 0, "recvfrom"))
255 		return -1;
256 
257 	fds.fd = xsk_socket__fd(xsk->socket);
258 	fds.events = POLLIN;
259 
260 	ret = poll(&fds, 1, 1000);
261 	if (!ASSERT_GT(ret, 0, "poll"))
262 		return -1;
263 
264 	ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
265 	if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
266 		return -2;
267 
268 	rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
269 	comp_addr = xsk_umem__extract_addr(rx_desc->addr);
270 	addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
271 	printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
272 	       xsk, idx, rx_desc->addr, addr, comp_addr);
273 	data = xsk_umem__get_data(xsk->umem_area, addr);
274 
275 	/* Make sure we got the packet offset correctly. */
276 
277 	eth = data;
278 	ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
279 	iph = (void *)(eth + 1);
280 	ASSERT_EQ((int)iph->version, 4, "iph->version");
281 	udph = (void *)(iph + 1);
282 
283 	/* custom metadata */
284 
285 	meta = data - sizeof(struct xdp_meta);
286 
287 	if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
288 		return -1;
289 
290 	if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
291 		return -1;
292 
293 	ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
294 
295 	/* checksum offload */
296 	ASSERT_EQ(udph->check, htons(0x721c), "csum");
297 
298 	xsk_ring_cons__release(&xsk->rx, 1);
299 	refill_rx(xsk, comp_addr);
300 
301 	return 0;
302 }
303 
304 void test_xdp_metadata(void)
305 {
306 	struct xdp_metadata2 *bpf_obj2 = NULL;
307 	struct xdp_metadata *bpf_obj = NULL;
308 	struct bpf_program *new_prog, *prog;
309 	struct nstoken *tok = NULL;
310 	__u32 queue_id = QUEUE_ID;
311 	struct bpf_map *prog_arr;
312 	struct xsk tx_xsk = {};
313 	struct xsk rx_xsk = {};
314 	__u32 val, key = 0;
315 	int retries = 10;
316 	int rx_ifindex;
317 	int tx_ifindex;
318 	int sock_fd;
319 	int ret;
320 
321 	/* Setup new networking namespace, with a veth pair. */
322 
323 	SYS(out, "ip netns add xdp_metadata");
324 	tok = open_netns("xdp_metadata");
325 	SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
326 	    " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
327 	SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
328 	SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
329 	SYS(out, "ip link set dev " TX_NAME " up");
330 	SYS(out, "ip link set dev " RX_NAME " up");
331 	SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
332 	SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
333 
334 	rx_ifindex = if_nametoindex(RX_NAME);
335 	tx_ifindex = if_nametoindex(TX_NAME);
336 
337 	/* Setup separate AF_XDP for TX and RX interfaces. */
338 
339 	ret = open_xsk(tx_ifindex, &tx_xsk);
340 	if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
341 		goto out;
342 
343 	ret = open_xsk(rx_ifindex, &rx_xsk);
344 	if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
345 		goto out;
346 
347 	bpf_obj = xdp_metadata__open();
348 	if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
349 		goto out;
350 
351 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
352 	bpf_program__set_ifindex(prog, rx_ifindex);
353 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
354 
355 	if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
356 		goto out;
357 
358 	/* Make sure we can't add dev-bound programs to prog maps. */
359 	prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
360 	if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
361 		goto out;
362 
363 	val = bpf_program__fd(prog);
364 	if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
365 					     &val, sizeof(val), BPF_ANY),
366 			"update prog_arr"))
367 		goto out;
368 
369 	/* Attach BPF program to RX interface. */
370 
371 	ret = bpf_xdp_attach(rx_ifindex,
372 			     bpf_program__fd(bpf_obj->progs.rx),
373 			     XDP_FLAGS, NULL);
374 	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
375 		goto out;
376 
377 	sock_fd = xsk_socket__fd(rx_xsk.socket);
378 	ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
379 	if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
380 		goto out;
381 
382 	/* Send packet destined to RX AF_XDP socket. */
383 	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
384 		       "generate AF_XDP_CONSUMER_PORT"))
385 		goto out;
386 
387 	/* Verify AF_XDP RX packet has proper metadata. */
388 	if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
389 		       "verify_xsk_metadata"))
390 		goto out;
391 
392 	complete_tx(&tx_xsk);
393 
394 	/* Make sure freplace correctly picks up original bound device
395 	 * and doesn't crash.
396 	 */
397 
398 	bpf_obj2 = xdp_metadata2__open();
399 	if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
400 		goto out;
401 
402 	new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
403 	bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
404 
405 	if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
406 		goto out;
407 
408 	if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
409 		goto out;
410 
411 	/* Send packet to trigger . */
412 	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
413 		       "generate freplace packet"))
414 		goto out;
415 
416 	while (!retries--) {
417 		if (bpf_obj2->bss->called)
418 			break;
419 		usleep(10);
420 	}
421 	ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
422 
423 out:
424 	close_xsk(&rx_xsk);
425 	close_xsk(&tx_xsk);
426 	xdp_metadata2__destroy(bpf_obj2);
427 	xdp_metadata__destroy(bpf_obj);
428 	if (tok)
429 		close_netns(tok);
430 	SYS_NOFAIL("ip netns del xdp_metadata");
431 }
432