xref: /linux/tools/testing/selftests/bpf/xsk.c (revision 60684c2bd35064043360e6f716d1b7c20e967b7d)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 
3 /*
4  * AF_XDP user-space access library.
5  *
6  * Copyright(c) 2018 - 2019 Intel Corporation.
7  *
8  * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
9  */
10 
11 #include <errno.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <arpa/inet.h>
16 #include <asm/barrier.h>
17 #include <linux/compiler.h>
18 #include <linux/ethtool.h>
19 #include <linux/filter.h>
20 #include <linux/if_ether.h>
21 #include <linux/if_packet.h>
22 #include <linux/if_xdp.h>
23 #include <linux/kernel.h>
24 #include <linux/list.h>
25 #include <linux/sockios.h>
26 #include <net/if.h>
27 #include <sys/ioctl.h>
28 #include <sys/mman.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 #include <linux/if_link.h>
32 
33 #include <bpf/bpf.h>
34 #include <bpf/libbpf.h>
35 #include "xsk.h"
36 #include "bpf_util.h"
37 
38 #ifndef SOL_XDP
39  #define SOL_XDP 283
40 #endif
41 
42 #ifndef AF_XDP
43  #define AF_XDP 44
44 #endif
45 
46 #ifndef PF_XDP
47  #define PF_XDP AF_XDP
48 #endif
49 
50 #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
51 
52 #define XSKMAP_SIZE 1
53 
54 struct xsk_umem {
55 	struct xsk_ring_prod *fill_save;
56 	struct xsk_ring_cons *comp_save;
57 	char *umem_area;
58 	struct xsk_umem_config config;
59 	int fd;
60 	int refcount;
61 	struct list_head ctx_list;
62 	bool rx_ring_setup_done;
63 	bool tx_ring_setup_done;
64 };
65 
66 struct xsk_ctx {
67 	struct xsk_ring_prod *fill;
68 	struct xsk_ring_cons *comp;
69 	__u32 queue_id;
70 	struct xsk_umem *umem;
71 	int refcount;
72 	int ifindex;
73 	struct list_head list;
74 };
75 
76 struct xsk_socket {
77 	struct xsk_ring_cons *rx;
78 	struct xsk_ring_prod *tx;
79 	struct xsk_ctx *ctx;
80 	struct xsk_socket_config config;
81 	int fd;
82 };
83 
84 int xsk_umem__fd(const struct xsk_umem *umem)
85 {
86 	return umem ? umem->fd : -EINVAL;
87 }
88 
89 int xsk_socket__fd(const struct xsk_socket *xsk)
90 {
91 	return xsk ? xsk->fd : -EINVAL;
92 }
93 
94 static bool xsk_page_aligned(void *buffer)
95 {
96 	unsigned long addr = (unsigned long)buffer;
97 
98 	return !(addr & (getpagesize() - 1));
99 }
100 
101 static void xsk_set_umem_config(struct xsk_umem_config *cfg,
102 				const struct xsk_umem_config *usr_cfg)
103 {
104 	if (!usr_cfg) {
105 		cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
106 		cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
107 		cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
108 		cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
109 		cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
110 		return;
111 	}
112 
113 	cfg->fill_size = usr_cfg->fill_size;
114 	cfg->comp_size = usr_cfg->comp_size;
115 	cfg->frame_size = usr_cfg->frame_size;
116 	cfg->frame_headroom = usr_cfg->frame_headroom;
117 	cfg->flags = usr_cfg->flags;
118 }
119 
120 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
121 				     const struct xsk_socket_config *usr_cfg)
122 {
123 	if (!usr_cfg) {
124 		cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
125 		cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
126 		cfg->bind_flags = 0;
127 		return 0;
128 	}
129 
130 	cfg->rx_size = usr_cfg->rx_size;
131 	cfg->tx_size = usr_cfg->tx_size;
132 	cfg->bind_flags = usr_cfg->bind_flags;
133 
134 	return 0;
135 }
136 
137 static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
138 {
139 	socklen_t optlen;
140 	int err;
141 
142 	optlen = sizeof(*off);
143 	err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
144 	if (err)
145 		return err;
146 
147 	if (optlen == sizeof(*off))
148 		return 0;
149 
150 	return -EINVAL;
151 }
152 
153 static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
154 				 struct xsk_ring_prod *fill,
155 				 struct xsk_ring_cons *comp)
156 {
157 	struct xdp_mmap_offsets off;
158 	void *map;
159 	int err;
160 
161 	err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
162 			 &umem->config.fill_size,
163 			 sizeof(umem->config.fill_size));
164 	if (err)
165 		return -errno;
166 
167 	err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
168 			 &umem->config.comp_size,
169 			 sizeof(umem->config.comp_size));
170 	if (err)
171 		return -errno;
172 
173 	err = xsk_get_mmap_offsets(fd, &off);
174 	if (err)
175 		return -errno;
176 
177 	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
178 		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
179 		   XDP_UMEM_PGOFF_FILL_RING);
180 	if (map == MAP_FAILED)
181 		return -errno;
182 
183 	fill->mask = umem->config.fill_size - 1;
184 	fill->size = umem->config.fill_size;
185 	fill->producer = map + off.fr.producer;
186 	fill->consumer = map + off.fr.consumer;
187 	fill->flags = map + off.fr.flags;
188 	fill->ring = map + off.fr.desc;
189 	fill->cached_cons = umem->config.fill_size;
190 
191 	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
192 		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
193 		   XDP_UMEM_PGOFF_COMPLETION_RING);
194 	if (map == MAP_FAILED) {
195 		err = -errno;
196 		goto out_mmap;
197 	}
198 
199 	comp->mask = umem->config.comp_size - 1;
200 	comp->size = umem->config.comp_size;
201 	comp->producer = map + off.cr.producer;
202 	comp->consumer = map + off.cr.consumer;
203 	comp->flags = map + off.cr.flags;
204 	comp->ring = map + off.cr.desc;
205 
206 	return 0;
207 
208 out_mmap:
209 	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
210 	return err;
211 }
212 
213 int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
214 		     __u64 size, struct xsk_ring_prod *fill,
215 		     struct xsk_ring_cons *comp,
216 		     const struct xsk_umem_config *usr_config)
217 {
218 	struct xdp_umem_reg mr;
219 	struct xsk_umem *umem;
220 	int err;
221 
222 	if (!umem_area || !umem_ptr || !fill || !comp)
223 		return -EFAULT;
224 	if (!size && !xsk_page_aligned(umem_area))
225 		return -EINVAL;
226 
227 	umem = calloc(1, sizeof(*umem));
228 	if (!umem)
229 		return -ENOMEM;
230 
231 	umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
232 	if (umem->fd < 0) {
233 		err = -errno;
234 		goto out_umem_alloc;
235 	}
236 
237 	umem->umem_area = umem_area;
238 	INIT_LIST_HEAD(&umem->ctx_list);
239 	xsk_set_umem_config(&umem->config, usr_config);
240 
241 	memset(&mr, 0, sizeof(mr));
242 	mr.addr = (uintptr_t)umem_area;
243 	mr.len = size;
244 	mr.chunk_size = umem->config.frame_size;
245 	mr.headroom = umem->config.frame_headroom;
246 	mr.flags = umem->config.flags;
247 
248 	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
249 	if (err) {
250 		err = -errno;
251 		goto out_socket;
252 	}
253 
254 	err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
255 	if (err)
256 		goto out_socket;
257 
258 	umem->fill_save = fill;
259 	umem->comp_save = comp;
260 	*umem_ptr = umem;
261 	return 0;
262 
263 out_socket:
264 	close(umem->fd);
265 out_umem_alloc:
266 	free(umem);
267 	return err;
268 }
269 
270 bool xsk_is_in_mode(u32 ifindex, int mode)
271 {
272 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
273 	int ret;
274 
275 	ret = bpf_xdp_query(ifindex, mode, &opts);
276 	if (ret) {
277 		printf("XDP mode query returned error %s\n", strerror(errno));
278 		return false;
279 	}
280 
281 	if (mode == XDP_FLAGS_DRV_MODE)
282 		return opts.attach_mode == XDP_ATTACHED_DRV;
283 	else if (mode == XDP_FLAGS_SKB_MODE)
284 		return opts.attach_mode == XDP_ATTACHED_SKB;
285 
286 	return false;
287 }
288 
289 int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
290 {
291 	int prog_fd;
292 
293 	prog_fd = bpf_program__fd(prog);
294 	return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
295 }
296 
297 void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
298 {
299 	bpf_xdp_detach(ifindex, xdp_flags, NULL);
300 }
301 
302 void xsk_clear_xskmap(struct bpf_map *map)
303 {
304 	u32 index = 0;
305 	int map_fd;
306 
307 	map_fd = bpf_map__fd(map);
308 	bpf_map_delete_elem(map_fd, &index);
309 }
310 
311 int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk)
312 {
313 	int map_fd, sock_fd;
314 	u32 index = 0;
315 
316 	map_fd = bpf_map__fd(map);
317 	sock_fd = xsk_socket__fd(xsk);
318 
319 	return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
320 }
321 
322 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
323 				   __u32 queue_id)
324 {
325 	struct xsk_ctx *ctx;
326 
327 	if (list_empty(&umem->ctx_list))
328 		return NULL;
329 
330 	list_for_each_entry(ctx, &umem->ctx_list, list) {
331 		if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
332 			ctx->refcount++;
333 			return ctx;
334 		}
335 	}
336 
337 	return NULL;
338 }
339 
340 static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
341 {
342 	struct xsk_umem *umem = ctx->umem;
343 	struct xdp_mmap_offsets off;
344 	int err;
345 
346 	if (--ctx->refcount)
347 		return;
348 
349 	if (!unmap)
350 		goto out_free;
351 
352 	err = xsk_get_mmap_offsets(umem->fd, &off);
353 	if (err)
354 		goto out_free;
355 
356 	munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
357 	       sizeof(__u64));
358 	munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
359 	       sizeof(__u64));
360 
361 out_free:
362 	list_del(&ctx->list);
363 	free(ctx);
364 }
365 
366 static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
367 				      struct xsk_umem *umem, int ifindex,
368 				      __u32 queue_id,
369 				      struct xsk_ring_prod *fill,
370 				      struct xsk_ring_cons *comp)
371 {
372 	struct xsk_ctx *ctx;
373 	int err;
374 
375 	ctx = calloc(1, sizeof(*ctx));
376 	if (!ctx)
377 		return NULL;
378 
379 	if (!umem->fill_save) {
380 		err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
381 		if (err) {
382 			free(ctx);
383 			return NULL;
384 		}
385 	} else if (umem->fill_save != fill || umem->comp_save != comp) {
386 		/* Copy over rings to new structs. */
387 		memcpy(fill, umem->fill_save, sizeof(*fill));
388 		memcpy(comp, umem->comp_save, sizeof(*comp));
389 	}
390 
391 	ctx->ifindex = ifindex;
392 	ctx->refcount = 1;
393 	ctx->umem = umem;
394 	ctx->queue_id = queue_id;
395 
396 	ctx->fill = fill;
397 	ctx->comp = comp;
398 	list_add(&ctx->list, &umem->ctx_list);
399 	return ctx;
400 }
401 
402 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
403 			      int ifindex,
404 			      __u32 queue_id, struct xsk_umem *umem,
405 			      struct xsk_ring_cons *rx,
406 			      struct xsk_ring_prod *tx,
407 			      struct xsk_ring_prod *fill,
408 			      struct xsk_ring_cons *comp,
409 			      const struct xsk_socket_config *usr_config)
410 {
411 	bool unmap, rx_setup_done = false, tx_setup_done = false;
412 	void *rx_map = NULL, *tx_map = NULL;
413 	struct sockaddr_xdp sxdp = {};
414 	struct xdp_mmap_offsets off;
415 	struct xsk_socket *xsk;
416 	struct xsk_ctx *ctx;
417 	int err;
418 
419 	if (!umem || !xsk_ptr || !(rx || tx))
420 		return -EFAULT;
421 
422 	unmap = umem->fill_save != fill;
423 
424 	xsk = calloc(1, sizeof(*xsk));
425 	if (!xsk)
426 		return -ENOMEM;
427 
428 	err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
429 	if (err)
430 		goto out_xsk_alloc;
431 
432 	if (umem->refcount++ > 0) {
433 		xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
434 		if (xsk->fd < 0) {
435 			err = -errno;
436 			goto out_xsk_alloc;
437 		}
438 	} else {
439 		xsk->fd = umem->fd;
440 		rx_setup_done = umem->rx_ring_setup_done;
441 		tx_setup_done = umem->tx_ring_setup_done;
442 	}
443 
444 	ctx = xsk_get_ctx(umem, ifindex, queue_id);
445 	if (!ctx) {
446 		if (!fill || !comp) {
447 			err = -EFAULT;
448 			goto out_socket;
449 		}
450 
451 		ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
452 		if (!ctx) {
453 			err = -ENOMEM;
454 			goto out_socket;
455 		}
456 	}
457 	xsk->ctx = ctx;
458 
459 	if (rx && !rx_setup_done) {
460 		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
461 				 &xsk->config.rx_size,
462 				 sizeof(xsk->config.rx_size));
463 		if (err) {
464 			err = -errno;
465 			goto out_put_ctx;
466 		}
467 		if (xsk->fd == umem->fd)
468 			umem->rx_ring_setup_done = true;
469 	}
470 	if (tx && !tx_setup_done) {
471 		err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
472 				 &xsk->config.tx_size,
473 				 sizeof(xsk->config.tx_size));
474 		if (err) {
475 			err = -errno;
476 			goto out_put_ctx;
477 		}
478 		if (xsk->fd == umem->fd)
479 			umem->tx_ring_setup_done = true;
480 	}
481 
482 	err = xsk_get_mmap_offsets(xsk->fd, &off);
483 	if (err) {
484 		err = -errno;
485 		goto out_put_ctx;
486 	}
487 
488 	if (rx) {
489 		rx_map = mmap(NULL, off.rx.desc +
490 			      xsk->config.rx_size * sizeof(struct xdp_desc),
491 			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
492 			      xsk->fd, XDP_PGOFF_RX_RING);
493 		if (rx_map == MAP_FAILED) {
494 			err = -errno;
495 			goto out_put_ctx;
496 		}
497 
498 		rx->mask = xsk->config.rx_size - 1;
499 		rx->size = xsk->config.rx_size;
500 		rx->producer = rx_map + off.rx.producer;
501 		rx->consumer = rx_map + off.rx.consumer;
502 		rx->flags = rx_map + off.rx.flags;
503 		rx->ring = rx_map + off.rx.desc;
504 		rx->cached_prod = *rx->producer;
505 		rx->cached_cons = *rx->consumer;
506 	}
507 	xsk->rx = rx;
508 
509 	if (tx) {
510 		tx_map = mmap(NULL, off.tx.desc +
511 			      xsk->config.tx_size * sizeof(struct xdp_desc),
512 			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
513 			      xsk->fd, XDP_PGOFF_TX_RING);
514 		if (tx_map == MAP_FAILED) {
515 			err = -errno;
516 			goto out_mmap_rx;
517 		}
518 
519 		tx->mask = xsk->config.tx_size - 1;
520 		tx->size = xsk->config.tx_size;
521 		tx->producer = tx_map + off.tx.producer;
522 		tx->consumer = tx_map + off.tx.consumer;
523 		tx->flags = tx_map + off.tx.flags;
524 		tx->ring = tx_map + off.tx.desc;
525 		tx->cached_prod = *tx->producer;
526 		/* cached_cons is r->size bigger than the real consumer pointer
527 		 * See xsk_prod_nb_free
528 		 */
529 		tx->cached_cons = *tx->consumer + xsk->config.tx_size;
530 	}
531 	xsk->tx = tx;
532 
533 	sxdp.sxdp_family = PF_XDP;
534 	sxdp.sxdp_ifindex = ctx->ifindex;
535 	sxdp.sxdp_queue_id = ctx->queue_id;
536 	if (umem->refcount > 1) {
537 		sxdp.sxdp_flags |= XDP_SHARED_UMEM;
538 		sxdp.sxdp_shared_umem_fd = umem->fd;
539 	} else {
540 		sxdp.sxdp_flags = xsk->config.bind_flags;
541 	}
542 
543 	err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
544 	if (err) {
545 		err = -errno;
546 		goto out_mmap_tx;
547 	}
548 
549 	*xsk_ptr = xsk;
550 	umem->fill_save = NULL;
551 	umem->comp_save = NULL;
552 	return 0;
553 
554 out_mmap_tx:
555 	if (tx)
556 		munmap(tx_map, off.tx.desc +
557 		       xsk->config.tx_size * sizeof(struct xdp_desc));
558 out_mmap_rx:
559 	if (rx)
560 		munmap(rx_map, off.rx.desc +
561 		       xsk->config.rx_size * sizeof(struct xdp_desc));
562 out_put_ctx:
563 	xsk_put_ctx(ctx, unmap);
564 out_socket:
565 	if (--umem->refcount)
566 		close(xsk->fd);
567 out_xsk_alloc:
568 	free(xsk);
569 	return err;
570 }
571 
572 int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
573 		       __u32 queue_id, struct xsk_umem *umem,
574 		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
575 		       const struct xsk_socket_config *usr_config)
576 {
577 	if (!umem)
578 		return -EFAULT;
579 
580 	return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
581 					 rx, tx, umem->fill_save,
582 					 umem->comp_save, usr_config);
583 }
584 
585 int xsk_umem__delete(struct xsk_umem *umem)
586 {
587 	struct xdp_mmap_offsets off;
588 	int err;
589 
590 	if (!umem)
591 		return 0;
592 
593 	if (umem->refcount)
594 		return -EBUSY;
595 
596 	err = xsk_get_mmap_offsets(umem->fd, &off);
597 	if (!err && umem->fill_save && umem->comp_save) {
598 		munmap(umem->fill_save->ring - off.fr.desc,
599 		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
600 		munmap(umem->comp_save->ring - off.cr.desc,
601 		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
602 	}
603 
604 	close(umem->fd);
605 	free(umem);
606 
607 	return 0;
608 }
609 
610 void xsk_socket__delete(struct xsk_socket *xsk)
611 {
612 	size_t desc_sz = sizeof(struct xdp_desc);
613 	struct xdp_mmap_offsets off;
614 	struct xsk_umem *umem;
615 	struct xsk_ctx *ctx;
616 	int err;
617 
618 	if (!xsk)
619 		return;
620 
621 	ctx = xsk->ctx;
622 	umem = ctx->umem;
623 
624 	xsk_put_ctx(ctx, true);
625 
626 	err = xsk_get_mmap_offsets(xsk->fd, &off);
627 	if (!err) {
628 		if (xsk->rx) {
629 			munmap(xsk->rx->ring - off.rx.desc,
630 			       off.rx.desc + xsk->config.rx_size * desc_sz);
631 		}
632 		if (xsk->tx) {
633 			munmap(xsk->tx->ring - off.tx.desc,
634 			       off.tx.desc + xsk->config.tx_size * desc_sz);
635 		}
636 	}
637 
638 	umem->refcount--;
639 	/* Do not close an fd that also has an associated umem connected
640 	 * to it.
641 	 */
642 	if (xsk->fd != umem->fd)
643 		close(xsk->fd);
644 	free(xsk);
645 }
646