1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (C) 2025 Intel Corporation */ 3 4 #ifndef __LIBETH_XDP_H 5 #define __LIBETH_XDP_H 6 7 #include <linux/bpf_trace.h> 8 #include <linux/unroll.h> 9 10 #include <net/libeth/rx.h> 11 #include <net/libeth/tx.h> 12 #include <net/xsk_buff_pool.h> 13 14 /* 15 * Defined as bits to be able to use them as a mask on Rx. 16 * Also used as internal return values on Tx. 17 */ 18 enum { 19 LIBETH_XDP_PASS = 0U, 20 LIBETH_XDP_DROP = BIT(0), 21 LIBETH_XDP_ABORTED = BIT(1), 22 LIBETH_XDP_TX = BIT(2), 23 }; 24 25 /* 26 * &xdp_buff_xsk is the largest structure &libeth_xdp_buff gets casted to, 27 * pick maximum pointer-compatible alignment. 28 */ 29 #define __LIBETH_XDP_BUFF_ALIGN \ 30 (IS_ALIGNED(sizeof(struct xdp_buff_xsk), 16) ? 16 : \ 31 IS_ALIGNED(sizeof(struct xdp_buff_xsk), 8) ? 8 : \ 32 sizeof(long)) 33 34 /** 35 * struct libeth_xdp_buff - libeth extension over &xdp_buff 36 * @base: main &xdp_buff 37 * @data: shortcut for @base.data 38 * @desc: RQ descriptor containing metadata for this buffer 39 * @priv: driver-private scratchspace 40 * 41 * The main reason for this is to have a pointer to the descriptor to be able 42 * to quickly get frame metadata from xdpmo and driver buff-to-xdp callbacks 43 * (as well as bigger alignment). 44 * Pointer/layout-compatible with &xdp_buff and &xdp_buff_xsk. 45 */ 46 struct libeth_xdp_buff { 47 union { 48 struct xdp_buff base; 49 void *data; 50 }; 51 52 const void *desc; 53 unsigned long priv[] 54 __aligned(__LIBETH_XDP_BUFF_ALIGN); 55 } __aligned(__LIBETH_XDP_BUFF_ALIGN); 56 static_assert(offsetof(struct libeth_xdp_buff, data) == 57 offsetof(struct xdp_buff_xsk, xdp.data)); 58 static_assert(offsetof(struct libeth_xdp_buff, desc) == 59 offsetof(struct xdp_buff_xsk, cb)); 60 static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk), 61 __alignof(struct libeth_xdp_buff))); 62 63 /* Common Tx bits */ 64 65 /** 66 * enum - libeth_xdp internal Tx flags 67 * @LIBETH_XDP_TX_BULK: one bulk size at which it will be flushed to the queue 68 * @LIBETH_XDP_TX_BATCH: batch size for which the queue fill loop is unrolled 69 * @LIBETH_XDP_TX_DROP: indicates the send function must drop frames not sent 70 * @LIBETH_XDP_TX_NDO: whether the send function is called from .ndo_xdp_xmit() 71 */ 72 enum { 73 LIBETH_XDP_TX_BULK = DEV_MAP_BULK_SIZE, 74 LIBETH_XDP_TX_BATCH = 8, 75 76 LIBETH_XDP_TX_DROP = BIT(0), 77 LIBETH_XDP_TX_NDO = BIT(1), 78 }; 79 80 /** 81 * enum - &libeth_xdp_tx_frame and &libeth_xdp_tx_desc flags 82 * @LIBETH_XDP_TX_LEN: only for ``XDP_TX``, [15:0] of ::len_fl is actual length 83 * @LIBETH_XDP_TX_FIRST: indicates the frag is the first one of the frame 84 * @LIBETH_XDP_TX_LAST: whether the frag is the last one of the frame 85 * @LIBETH_XDP_TX_MULTI: whether the frame contains several frags 86 * @LIBETH_XDP_TX_FLAGS: only for ``XDP_TX``, [31:16] of ::len_fl is flags 87 */ 88 enum { 89 LIBETH_XDP_TX_LEN = GENMASK(15, 0), 90 91 LIBETH_XDP_TX_FIRST = BIT(16), 92 LIBETH_XDP_TX_LAST = BIT(17), 93 LIBETH_XDP_TX_MULTI = BIT(18), 94 95 LIBETH_XDP_TX_FLAGS = GENMASK(31, 16), 96 }; 97 98 /** 99 * struct libeth_xdp_tx_frame - represents one XDP Tx element 100 * @data: frame start pointer for ``XDP_TX`` 101 * @len_fl: ``XDP_TX``, combined flags [31:16] and len [15:0] field for speed 102 * @soff: ``XDP_TX``, offset from @data to the start of &skb_shared_info 103 * @frag: one (non-head) frag for ``XDP_TX`` 104 * @xdpf: &xdp_frame for the head frag for .ndo_xdp_xmit() 105 * @dma: DMA address of the non-head frag for .ndo_xdp_xmit() 106 * @len: frag length for .ndo_xdp_xmit() 107 * @flags: Tx flags for the above 108 * @opts: combined @len + @flags for the above for speed 109 */ 110 struct libeth_xdp_tx_frame { 111 union { 112 /* ``XDP_TX`` */ 113 struct { 114 void *data; 115 u32 len_fl; 116 u32 soff; 117 }; 118 119 /* ``XDP_TX`` frag */ 120 skb_frag_t frag; 121 122 /* .ndo_xdp_xmit() */ 123 struct { 124 union { 125 struct xdp_frame *xdpf; 126 dma_addr_t dma; 127 }; 128 union { 129 struct { 130 u32 len; 131 u32 flags; 132 }; 133 aligned_u64 opts; 134 }; 135 }; 136 }; 137 } __aligned_largest; 138 static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == 139 offsetof(struct libeth_xdp_tx_frame, len_fl)); 140 141 /** 142 * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending 143 * @prog: corresponding active XDP program, %NULL for .ndo_xdp_xmit() 144 * @dev: &net_device which the frames are transmitted on 145 * @xdpsq: shortcut to the corresponding driver-specific XDPSQ structure 146 * @count: current number of frames in @bulk 147 * @bulk: array of queued frames for bulk Tx 148 * 149 * All XDP Tx operations queue each frame to the bulk first and flush it 150 * when @count reaches the array end. Bulk is always placed on the stack 151 * for performance. One bulk element contains all the data necessary 152 * for sending a frame and then freeing it on completion. 153 */ 154 struct libeth_xdp_tx_bulk { 155 const struct bpf_prog *prog; 156 struct net_device *dev; 157 void *xdpsq; 158 159 u32 count; 160 struct libeth_xdp_tx_frame bulk[LIBETH_XDP_TX_BULK]; 161 } __aligned(sizeof(struct libeth_xdp_tx_frame)); 162 163 /** 164 * LIBETH_XDP_ONSTACK_BULK - declare &libeth_xdp_tx_bulk on the stack 165 * @bq: name of the variable to declare 166 * 167 * Helper to declare a bulk on the stack with a compiler hint that it should 168 * not be initialized automatically (with `CONFIG_INIT_STACK_ALL_*`) for 169 * performance reasons. 170 */ 171 #define LIBETH_XDP_ONSTACK_BULK(bq) \ 172 struct libeth_xdp_tx_bulk bq __uninitialized 173 174 /** 175 * struct libeth_xdpsq - abstraction for an XDPSQ 176 * @sqes: array of Tx buffers from the actual queue struct 177 * @descs: opaque pointer to the HW descriptor array 178 * @ntu: pointer to the next free descriptor index 179 * @count: number of descriptors on that queue 180 * @pending: pointer to the number of sent-not-completed descs on that queue 181 * @xdp_tx: pointer to the above 182 * 183 * Abstraction for driver-independent implementation of Tx. Placed on the stack 184 * and filled by the driver before the transmission, so that the generic 185 * functions can access and modify driver-specific resources. 186 */ 187 struct libeth_xdpsq { 188 struct libeth_sqe *sqes; 189 void *descs; 190 191 u32 *ntu; 192 u32 count; 193 194 u32 *pending; 195 u32 *xdp_tx; 196 }; 197 198 /** 199 * struct libeth_xdp_tx_desc - abstraction for an XDP Tx descriptor 200 * @addr: DMA address of the frame 201 * @len: length of the frame 202 * @flags: XDP Tx flags 203 * @opts: combined @len + @flags for speed 204 * 205 * Filled by the generic functions and then passed to driver-specific functions 206 * to fill a HW Tx descriptor, always placed on the [function] stack. 207 */ 208 struct libeth_xdp_tx_desc { 209 dma_addr_t addr; 210 union { 211 struct { 212 u32 len; 213 u32 flags; 214 }; 215 aligned_u64 opts; 216 }; 217 } __aligned_largest; 218 219 /** 220 * libeth_xdp_tx_xmit_bulk - main XDP Tx function 221 * @bulk: array of frames to send 222 * @xdpsq: pointer to the driver-specific XDPSQ struct 223 * @n: number of frames to send 224 * @unroll: whether to unroll the queue filling loop for speed 225 * @priv: driver-specific private data 226 * @prep: callback for cleaning the queue and filling abstract &libeth_xdpsq 227 * @fill: internal callback for filling &libeth_sqe and &libeth_xdp_tx_desc 228 * @xmit: callback for filling a HW descriptor with the frame info 229 * 230 * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for 231 * all types of frames. 232 * @unroll greatly increases the object code size, but also greatly increases 233 * performance. 234 * The compilers inline all those onstack abstractions to direct data accesses. 235 * 236 * Return: number of frames actually placed on the queue, <= @n. The function 237 * can't fail, but can send less frames if there's no enough free descriptors 238 * available. The actual free space is returned by @prep from the driver. 239 */ 240 static __always_inline u32 241 libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, 242 u32 n, bool unroll, u64 priv, 243 u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), 244 struct libeth_xdp_tx_desc 245 (*fill)(struct libeth_xdp_tx_frame frm, u32 i, 246 const struct libeth_xdpsq *sq, u64 priv), 247 void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, 248 const struct libeth_xdpsq *sq, u64 priv)) 249 { 250 struct libeth_xdpsq sq __uninitialized; 251 u32 this, batched, off = 0; 252 u32 ntu, i = 0; 253 254 n = min(n, prep(xdpsq, &sq)); 255 if (unlikely(!n)) 256 return 0; 257 258 ntu = *sq.ntu; 259 260 this = sq.count - ntu; 261 if (likely(this > n)) 262 this = n; 263 264 again: 265 if (!unroll) 266 goto linear; 267 268 batched = ALIGN_DOWN(this, LIBETH_XDP_TX_BATCH); 269 270 for ( ; i < off + batched; i += LIBETH_XDP_TX_BATCH) { 271 u32 base = ntu + i - off; 272 273 unrolled_count(LIBETH_XDP_TX_BATCH) 274 for (u32 j = 0; j < LIBETH_XDP_TX_BATCH; j++) 275 xmit(fill(bulk[i + j], base + j, &sq, priv), 276 base + j, &sq, priv); 277 } 278 279 if (batched < this) { 280 linear: 281 for ( ; i < off + this; i++) 282 xmit(fill(bulk[i], ntu + i - off, &sq, priv), 283 ntu + i - off, &sq, priv); 284 } 285 286 ntu += this; 287 if (likely(ntu < sq.count)) 288 goto out; 289 290 ntu = 0; 291 292 if (i < n) { 293 this = n - i; 294 off = i; 295 296 goto again; 297 } 298 299 out: 300 *sq.ntu = ntu; 301 *sq.pending += n; 302 if (sq.xdp_tx) 303 *sq.xdp_tx += n; 304 305 return n; 306 } 307 308 /* ``XDP_TX`` bulking */ 309 310 void libeth_xdp_return_buff_slow(struct libeth_xdp_buff *xdp); 311 312 /** 313 * libeth_xdp_tx_queue_head - internal helper for queueing one ``XDP_TX`` head 314 * @bq: XDP Tx bulk to queue the head frag to 315 * @xdp: XDP buffer with the head to queue 316 * 317 * Return: false if it's the only frag of the frame, true if it's an S/G frame. 318 */ 319 static inline bool libeth_xdp_tx_queue_head(struct libeth_xdp_tx_bulk *bq, 320 const struct libeth_xdp_buff *xdp) 321 { 322 const struct xdp_buff *base = &xdp->base; 323 324 bq->bulk[bq->count++] = (typeof(*bq->bulk)){ 325 .data = xdp->data, 326 .len_fl = (base->data_end - xdp->data) | LIBETH_XDP_TX_FIRST, 327 .soff = xdp_data_hard_end(base) - xdp->data, 328 }; 329 330 if (!xdp_buff_has_frags(base)) 331 return false; 332 333 bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_MULTI; 334 335 return true; 336 } 337 338 /** 339 * libeth_xdp_tx_queue_frag - internal helper for queueing one ``XDP_TX`` frag 340 * @bq: XDP Tx bulk to queue the frag to 341 * @frag: frag to queue 342 */ 343 static inline void libeth_xdp_tx_queue_frag(struct libeth_xdp_tx_bulk *bq, 344 const skb_frag_t *frag) 345 { 346 bq->bulk[bq->count++].frag = *frag; 347 } 348 349 /** 350 * libeth_xdp_tx_queue_bulk - internal helper for queueing one ``XDP_TX`` frame 351 * @bq: XDP Tx bulk to queue the frame to 352 * @xdp: XDP buffer to queue 353 * @flush_bulk: driver callback to flush the bulk to the HW queue 354 * 355 * Return: true on success, false on flush error. 356 */ 357 static __always_inline bool 358 libeth_xdp_tx_queue_bulk(struct libeth_xdp_tx_bulk *bq, 359 struct libeth_xdp_buff *xdp, 360 bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, 361 u32 flags)) 362 { 363 const struct skb_shared_info *sinfo; 364 bool ret = true; 365 u32 nr_frags; 366 367 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 368 unlikely(!flush_bulk(bq, 0))) { 369 libeth_xdp_return_buff_slow(xdp); 370 return false; 371 } 372 373 if (!libeth_xdp_tx_queue_head(bq, xdp)) 374 goto out; 375 376 sinfo = xdp_get_shared_info_from_buff(&xdp->base); 377 nr_frags = sinfo->nr_frags; 378 379 for (u32 i = 0; i < nr_frags; i++) { 380 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 381 unlikely(!flush_bulk(bq, 0))) { 382 ret = false; 383 break; 384 } 385 386 libeth_xdp_tx_queue_frag(bq, &sinfo->frags[i]); 387 } 388 389 out: 390 bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_LAST; 391 xdp->data = NULL; 392 393 return ret; 394 } 395 396 /** 397 * libeth_xdp_tx_fill_stats - fill &libeth_sqe with ``XDP_TX`` frame stats 398 * @sqe: SQ element to fill 399 * @desc: libeth_xdp Tx descriptor 400 * @sinfo: &skb_shared_info for this frame 401 * 402 * Internal helper for filling an SQE with the frame stats, do not use in 403 * drivers. Fills the number of frags and bytes for this frame. 404 */ 405 #define libeth_xdp_tx_fill_stats(sqe, desc, sinfo) \ 406 __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, __UNIQUE_ID(sqe_), \ 407 __UNIQUE_ID(desc_), __UNIQUE_ID(sinfo_)) 408 409 #define __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, ue, ud, us) do { \ 410 const struct libeth_xdp_tx_desc *ud = (desc); \ 411 const struct skb_shared_info *us; \ 412 struct libeth_sqe *ue = (sqe); \ 413 \ 414 ue->nr_frags = 1; \ 415 ue->bytes = ud->len; \ 416 \ 417 if (ud->flags & LIBETH_XDP_TX_MULTI) { \ 418 us = (sinfo); \ 419 ue->nr_frags += us->nr_frags; \ 420 ue->bytes += us->xdp_frags_size; \ 421 } \ 422 } while (0) 423 424 /** 425 * libeth_xdp_tx_fill_buf - internal helper to fill one ``XDP_TX`` &libeth_sqe 426 * @frm: XDP Tx frame from the bulk 427 * @i: index on the HW queue 428 * @sq: XDPSQ abstraction for the queue 429 * @priv: private data 430 * 431 * Return: XDP Tx descriptor with the synced DMA and other info to pass to 432 * the driver callback. 433 */ 434 static inline struct libeth_xdp_tx_desc 435 libeth_xdp_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, 436 const struct libeth_xdpsq *sq, u64 priv) 437 { 438 struct libeth_xdp_tx_desc desc; 439 struct skb_shared_info *sinfo; 440 skb_frag_t *frag = &frm.frag; 441 struct libeth_sqe *sqe; 442 netmem_ref netmem; 443 444 if (frm.len_fl & LIBETH_XDP_TX_FIRST) { 445 sinfo = frm.data + frm.soff; 446 skb_frag_fill_netmem_desc(frag, virt_to_netmem(frm.data), 447 offset_in_page(frm.data), 448 frm.len_fl); 449 } else { 450 sinfo = NULL; 451 } 452 453 netmem = skb_frag_netmem(frag); 454 desc = (typeof(desc)){ 455 .addr = page_pool_get_dma_addr_netmem(netmem) + 456 skb_frag_off(frag), 457 .len = skb_frag_size(frag) & LIBETH_XDP_TX_LEN, 458 .flags = skb_frag_size(frag) & LIBETH_XDP_TX_FLAGS, 459 }; 460 461 dma_sync_single_for_device(__netmem_get_pp(netmem)->p.dev, desc.addr, 462 desc.len, DMA_BIDIRECTIONAL); 463 464 if (!sinfo) 465 return desc; 466 467 sqe = &sq->sqes[i]; 468 sqe->type = LIBETH_SQE_XDP_TX; 469 sqe->sinfo = sinfo; 470 libeth_xdp_tx_fill_stats(sqe, &desc, sinfo); 471 472 return desc; 473 } 474 475 void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent, 476 u32 flags); 477 478 /** 479 * __libeth_xdp_tx_flush_bulk - internal helper to flush one XDP Tx bulk 480 * @bq: bulk to flush 481 * @flags: XDP TX flags (.ndo_xdp_xmit() etc.) 482 * @prep: driver-specific callback to prepare the queue for sending 483 * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc 484 * @xmit: driver callback to fill a HW descriptor 485 * 486 * Internal abstraction to create bulk flush functions for drivers. 487 * 488 * Return: true if anything was sent, false otherwise. 489 */ 490 static __always_inline bool 491 __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, 492 u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), 493 struct libeth_xdp_tx_desc 494 (*fill)(struct libeth_xdp_tx_frame frm, u32 i, 495 const struct libeth_xdpsq *sq, u64 priv), 496 void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, 497 const struct libeth_xdpsq *sq, 498 u64 priv)) 499 { 500 u32 sent, drops; 501 int err = 0; 502 503 sent = libeth_xdp_tx_xmit_bulk(bq->bulk, bq->xdpsq, 504 min(bq->count, LIBETH_XDP_TX_BULK), 505 false, 0, prep, fill, xmit); 506 drops = bq->count - sent; 507 508 if (unlikely(drops)) { 509 libeth_xdp_tx_exception(bq, sent, flags); 510 err = -ENXIO; 511 } else { 512 bq->count = 0; 513 } 514 515 trace_xdp_bulk_tx(bq->dev, sent, drops, err); 516 517 return likely(sent); 518 } 519 520 /** 521 * libeth_xdp_tx_flush_bulk - wrapper to define flush of one ``XDP_TX`` bulk 522 * @bq: bulk to flush 523 * @flags: Tx flags, see above 524 * @prep: driver callback to prepare the queue 525 * @xmit: driver callback to fill a HW descriptor 526 */ 527 #define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit) \ 528 __libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf, \ 529 xmit) 530 531 /* .ndo_xdp_xmit() implementation */ 532 533 /** 534 * libeth_xdp_xmit_frame_dma - internal helper to access DMA of an &xdp_frame 535 * @xf: pointer to the XDP frame 536 * 537 * There's no place in &libeth_xdp_tx_frame to store DMA address for an 538 * &xdp_frame head. The headroom is used then, the address is placed right 539 * after the frame struct, naturally aligned. 540 * 541 * Return: pointer to the DMA address to use. 542 */ 543 #define libeth_xdp_xmit_frame_dma(xf) \ 544 _Generic((xf), \ 545 const struct xdp_frame *: \ 546 (const dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf), \ 547 struct xdp_frame *: \ 548 (dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf) \ 549 ) 550 551 static inline void *__libeth_xdp_xmit_frame_dma(const struct xdp_frame *xdpf) 552 { 553 void *addr = (void *)(xdpf + 1); 554 555 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 556 __alignof(*xdpf) < sizeof(dma_addr_t)) 557 addr = PTR_ALIGN(addr, sizeof(dma_addr_t)); 558 559 return addr; 560 } 561 562 /** 563 * libeth_xdp_xmit_queue_head - internal helper for queueing one XDP xmit head 564 * @bq: XDP Tx bulk to queue the head frag to 565 * @xdpf: XDP frame with the head to queue 566 * @dev: device to perform DMA mapping 567 * 568 * Return: ``LIBETH_XDP_DROP`` on DMA mapping error, 569 * ``LIBETH_XDP_PASS`` if it's the only frag in the frame, 570 * ``LIBETH_XDP_TX`` if it's an S/G frame. 571 */ 572 static inline u32 libeth_xdp_xmit_queue_head(struct libeth_xdp_tx_bulk *bq, 573 struct xdp_frame *xdpf, 574 struct device *dev) 575 { 576 dma_addr_t dma; 577 578 dma = dma_map_single(dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); 579 if (dma_mapping_error(dev, dma)) 580 return LIBETH_XDP_DROP; 581 582 *libeth_xdp_xmit_frame_dma(xdpf) = dma; 583 584 bq->bulk[bq->count++] = (typeof(*bq->bulk)){ 585 .xdpf = xdpf, 586 .len = xdpf->len, 587 .flags = LIBETH_XDP_TX_FIRST, 588 }; 589 590 if (!xdp_frame_has_frags(xdpf)) 591 return LIBETH_XDP_PASS; 592 593 bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_MULTI; 594 595 return LIBETH_XDP_TX; 596 } 597 598 /** 599 * libeth_xdp_xmit_queue_frag - internal helper for queueing one XDP xmit frag 600 * @bq: XDP Tx bulk to queue the frag to 601 * @frag: frag to queue 602 * @dev: device to perform DMA mapping 603 * 604 * Return: true on success, false on DMA mapping error. 605 */ 606 static inline bool libeth_xdp_xmit_queue_frag(struct libeth_xdp_tx_bulk *bq, 607 const skb_frag_t *frag, 608 struct device *dev) 609 { 610 dma_addr_t dma; 611 612 dma = skb_frag_dma_map(dev, frag); 613 if (dma_mapping_error(dev, dma)) 614 return false; 615 616 bq->bulk[bq->count++] = (typeof(*bq->bulk)){ 617 .dma = dma, 618 .len = skb_frag_size(frag), 619 }; 620 621 return true; 622 } 623 624 /** 625 * libeth_xdp_xmit_queue_bulk - internal helper for queueing one XDP xmit frame 626 * @bq: XDP Tx bulk to queue the frame to 627 * @xdpf: XDP frame to queue 628 * @flush_bulk: driver callback to flush the bulk to the HW queue 629 * 630 * Return: ``LIBETH_XDP_TX`` on success, 631 * ``LIBETH_XDP_DROP`` if the frame should be dropped by the stack, 632 * ``LIBETH_XDP_ABORTED`` if the frame will be dropped by libeth_xdp. 633 */ 634 static __always_inline u32 635 libeth_xdp_xmit_queue_bulk(struct libeth_xdp_tx_bulk *bq, 636 struct xdp_frame *xdpf, 637 bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, 638 u32 flags)) 639 { 640 u32 head, nr_frags, i, ret = LIBETH_XDP_TX; 641 struct device *dev = bq->dev->dev.parent; 642 const struct skb_shared_info *sinfo; 643 644 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 645 unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO))) 646 return LIBETH_XDP_DROP; 647 648 head = libeth_xdp_xmit_queue_head(bq, xdpf, dev); 649 if (head == LIBETH_XDP_PASS) 650 goto out; 651 else if (head == LIBETH_XDP_DROP) 652 return LIBETH_XDP_DROP; 653 654 sinfo = xdp_get_shared_info_from_frame(xdpf); 655 nr_frags = sinfo->nr_frags; 656 657 for (i = 0; i < nr_frags; i++) { 658 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 659 unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO))) 660 break; 661 662 if (!libeth_xdp_xmit_queue_frag(bq, &sinfo->frags[i], dev)) 663 break; 664 } 665 666 if (unlikely(i < nr_frags)) 667 ret = LIBETH_XDP_ABORTED; 668 669 out: 670 bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_LAST; 671 672 return ret; 673 } 674 675 /** 676 * libeth_xdp_xmit_fill_buf - internal helper to fill one XDP xmit &libeth_sqe 677 * @frm: XDP Tx frame from the bulk 678 * @i: index on the HW queue 679 * @sq: XDPSQ abstraction for the queue 680 * @priv: private data 681 * 682 * Return: XDP Tx descriptor with the mapped DMA and other info to pass to 683 * the driver callback. 684 */ 685 static inline struct libeth_xdp_tx_desc 686 libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, 687 const struct libeth_xdpsq *sq, u64 priv) 688 { 689 struct libeth_xdp_tx_desc desc; 690 struct libeth_sqe *sqe; 691 struct xdp_frame *xdpf; 692 693 if (frm.flags & LIBETH_XDP_TX_FIRST) { 694 xdpf = frm.xdpf; 695 desc.addr = *libeth_xdp_xmit_frame_dma(xdpf); 696 } else { 697 xdpf = NULL; 698 desc.addr = frm.dma; 699 } 700 desc.opts = frm.opts; 701 702 sqe = &sq->sqes[i]; 703 dma_unmap_addr_set(sqe, dma, desc.addr); 704 dma_unmap_len_set(sqe, len, desc.len); 705 706 if (!xdpf) { 707 sqe->type = LIBETH_SQE_XDP_XMIT_FRAG; 708 return desc; 709 } 710 711 sqe->type = LIBETH_SQE_XDP_XMIT; 712 sqe->xdpf = xdpf; 713 libeth_xdp_tx_fill_stats(sqe, &desc, 714 xdp_get_shared_info_from_frame(xdpf)); 715 716 return desc; 717 } 718 719 /** 720 * libeth_xdp_xmit_flush_bulk - wrapper to define flush of one XDP xmit bulk 721 * @bq: bulk to flush 722 * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk() 723 * @prep: driver callback to prepare the queue 724 * @xmit: driver callback to fill a HW descriptor 725 */ 726 #define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit) \ 727 __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_NDO, prep, \ 728 libeth_xdp_xmit_fill_buf, xmit) 729 730 u32 libeth_xdp_xmit_return_bulk(const struct libeth_xdp_tx_frame *bq, 731 u32 count, const struct net_device *dev); 732 733 /** 734 * __libeth_xdp_xmit_do_bulk - internal function to implement .ndo_xdp_xmit() 735 * @bq: XDP Tx bulk to queue frames to 736 * @frames: XDP frames passed by the stack 737 * @n: number of frames 738 * @flags: flags passed by the stack 739 * @flush_bulk: driver callback to flush an XDP xmit bulk 740 * @finalize: driver callback to finalize sending XDP Tx frames on the queue 741 * 742 * Perform common checks, map the frags and queue them to the bulk, then flush 743 * the bulk to the XDPSQ. If requested by the stack, finalize the queue. 744 * 745 * Return: number of frames send or -errno on error. 746 */ 747 static __always_inline int 748 __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, 749 struct xdp_frame **frames, u32 n, u32 flags, 750 bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, 751 u32 flags), 752 void (*finalize)(void *xdpsq, bool sent, bool flush)) 753 { 754 u32 nxmit = 0; 755 756 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 757 return -EINVAL; 758 759 for (u32 i = 0; likely(i < n); i++) { 760 u32 ret; 761 762 ret = libeth_xdp_xmit_queue_bulk(bq, frames[i], flush_bulk); 763 if (unlikely(ret != LIBETH_XDP_TX)) { 764 nxmit += ret == LIBETH_XDP_ABORTED; 765 break; 766 } 767 768 nxmit++; 769 } 770 771 if (bq->count) { 772 flush_bulk(bq, LIBETH_XDP_TX_NDO); 773 if (unlikely(bq->count)) 774 nxmit -= libeth_xdp_xmit_return_bulk(bq->bulk, 775 bq->count, 776 bq->dev); 777 } 778 779 finalize(bq->xdpsq, nxmit, flags & XDP_XMIT_FLUSH); 780 781 return nxmit; 782 } 783 784 /* Rx polling path */ 785 786 static inline void libeth_xdp_return_va(const void *data, bool napi) 787 { 788 netmem_ref netmem = virt_to_netmem(data); 789 790 page_pool_put_full_netmem(__netmem_get_pp(netmem), netmem, napi); 791 } 792 793 static inline void libeth_xdp_return_frags(const struct skb_shared_info *sinfo, 794 bool napi) 795 { 796 for (u32 i = 0; i < sinfo->nr_frags; i++) { 797 netmem_ref netmem = skb_frag_netmem(&sinfo->frags[i]); 798 799 page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi); 800 } 801 } 802 803 /** 804 * libeth_xdp_return_buff - free/recycle &libeth_xdp_buff 805 * @xdp: buffer to free 806 * 807 * Hotpath helper to free &libeth_xdp_buff. Comparing to xdp_return_buff(), 808 * it's faster as it gets inlined and always assumes order-0 pages and safe 809 * direct recycling. Zeroes @xdp->data to avoid UAFs. 810 */ 811 #define libeth_xdp_return_buff(xdp) __libeth_xdp_return_buff(xdp, true) 812 813 static inline void __libeth_xdp_return_buff(struct libeth_xdp_buff *xdp, 814 bool napi) 815 { 816 if (!xdp_buff_has_frags(&xdp->base)) 817 goto out; 818 819 libeth_xdp_return_frags(xdp_get_shared_info_from_buff(&xdp->base), 820 napi); 821 822 out: 823 libeth_xdp_return_va(xdp->data, napi); 824 xdp->data = NULL; 825 } 826 827 /* Tx buffer completion */ 828 829 void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, 830 struct xdp_frame_bulk *bq, bool frags); 831 832 /** 833 * __libeth_xdp_complete_tx - complete sent XDPSQE 834 * @sqe: SQ element / Tx buffer to complete 835 * @cp: Tx polling/completion params 836 * @bulk: internal callback to bulk-free ``XDP_TX`` buffers 837 * 838 * Use the non-underscored version in drivers instead. This one is shared 839 * internally with libeth_tx_complete_any(). 840 * Complete an XDPSQE of any type of XDP frame. This includes DMA unmapping 841 * when needed, buffer freeing, stats update, and SQE invalidation. 842 */ 843 static __always_inline void 844 __libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp, 845 typeof(libeth_xdp_return_buff_bulk) bulk) 846 { 847 enum libeth_sqe_type type = sqe->type; 848 849 switch (type) { 850 case LIBETH_SQE_EMPTY: 851 return; 852 case LIBETH_SQE_XDP_XMIT: 853 case LIBETH_SQE_XDP_XMIT_FRAG: 854 dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma), 855 dma_unmap_len(sqe, len), DMA_TO_DEVICE); 856 break; 857 default: 858 break; 859 } 860 861 switch (type) { 862 case LIBETH_SQE_XDP_TX: 863 bulk(sqe->sinfo, cp->bq, sqe->nr_frags != 1); 864 break; 865 case LIBETH_SQE_XDP_XMIT: 866 xdp_return_frame_bulk(sqe->xdpf, cp->bq); 867 break; 868 default: 869 break; 870 } 871 872 switch (type) { 873 case LIBETH_SQE_XDP_TX: 874 case LIBETH_SQE_XDP_XMIT: 875 cp->xdp_tx -= sqe->nr_frags; 876 877 cp->xss->packets++; 878 cp->xss->bytes += sqe->bytes; 879 break; 880 default: 881 break; 882 } 883 884 sqe->type = LIBETH_SQE_EMPTY; 885 } 886 887 static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe, 888 struct libeth_cq_pp *cp) 889 { 890 __libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk); 891 } 892 893 #endif /* __LIBETH_XDP_H */ 894