1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (C) 2025 Intel Corporation */ 3 4 #ifndef __LIBETH_XDP_H 5 #define __LIBETH_XDP_H 6 7 #include <linux/bpf_trace.h> 8 #include <linux/unroll.h> 9 10 #include <net/libeth/rx.h> 11 #include <net/libeth/tx.h> 12 #include <net/xsk_buff_pool.h> 13 14 /* 15 * Defined as bits to be able to use them as a mask on Rx. 16 * Also used as internal return values on Tx. 17 */ 18 enum { 19 LIBETH_XDP_PASS = 0U, 20 LIBETH_XDP_DROP = BIT(0), 21 LIBETH_XDP_ABORTED = BIT(1), 22 LIBETH_XDP_TX = BIT(2), 23 }; 24 25 /* 26 * &xdp_buff_xsk is the largest structure &libeth_xdp_buff gets casted to, 27 * pick maximum pointer-compatible alignment. 28 */ 29 #define __LIBETH_XDP_BUFF_ALIGN \ 30 (IS_ALIGNED(sizeof(struct xdp_buff_xsk), 16) ? 16 : \ 31 IS_ALIGNED(sizeof(struct xdp_buff_xsk), 8) ? 8 : \ 32 sizeof(long)) 33 34 /** 35 * struct libeth_xdp_buff - libeth extension over &xdp_buff 36 * @base: main &xdp_buff 37 * @data: shortcut for @base.data 38 * @desc: RQ descriptor containing metadata for this buffer 39 * @priv: driver-private scratchspace 40 * 41 * The main reason for this is to have a pointer to the descriptor to be able 42 * to quickly get frame metadata from xdpmo and driver buff-to-xdp callbacks 43 * (as well as bigger alignment). 44 * Pointer/layout-compatible with &xdp_buff and &xdp_buff_xsk. 45 */ 46 struct libeth_xdp_buff { 47 union { 48 struct xdp_buff base; 49 void *data; 50 }; 51 52 const void *desc; 53 unsigned long priv[] 54 __aligned(__LIBETH_XDP_BUFF_ALIGN); 55 } __aligned(__LIBETH_XDP_BUFF_ALIGN); 56 static_assert(offsetof(struct libeth_xdp_buff, data) == 57 offsetof(struct xdp_buff_xsk, xdp.data)); 58 static_assert(offsetof(struct libeth_xdp_buff, desc) == 59 offsetof(struct xdp_buff_xsk, cb)); 60 static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk), 61 __alignof(struct libeth_xdp_buff))); 62 63 /* XDPSQ sharing */ 64 65 DECLARE_STATIC_KEY_FALSE(libeth_xdpsq_share); 66 67 /** 68 * libeth_xdpsq_num - calculate optimal number of XDPSQs for this device + sys 69 * @rxq: current number of active Rx queues 70 * @txq: current number of active Tx queues 71 * @max: maximum number of Tx queues 72 * 73 * Each RQ must have its own XDPSQ for XSk pairs, each CPU must have own XDPSQ 74 * for lockless sending (``XDP_TX``, .ndo_xdp_xmit()). Cap the maximum of these 75 * two with the number of SQs the device can have (minus used ones). 76 * 77 * Return: number of XDP Tx queues the device needs to use. 78 */ 79 static inline u32 libeth_xdpsq_num(u32 rxq, u32 txq, u32 max) 80 { 81 return min(max(nr_cpu_ids, rxq), max - txq); 82 } 83 84 /** 85 * libeth_xdpsq_shared - whether XDPSQs can be shared between several CPUs 86 * @num: number of active XDPSQs 87 * 88 * Return: true if there's no 1:1 XDPSQ/CPU association, false otherwise. 89 */ 90 static inline bool libeth_xdpsq_shared(u32 num) 91 { 92 return num < nr_cpu_ids; 93 } 94 95 /** 96 * libeth_xdpsq_id - get XDPSQ index corresponding to this CPU 97 * @num: number of active XDPSQs 98 * 99 * Helper for libeth_xdp routines, do not use in drivers directly. 100 * 101 * Return: XDPSQ index needs to be used on this CPU. 102 */ 103 static inline u32 libeth_xdpsq_id(u32 num) 104 { 105 u32 ret = raw_smp_processor_id(); 106 107 if (static_branch_unlikely(&libeth_xdpsq_share) && 108 libeth_xdpsq_shared(num)) 109 ret %= num; 110 111 return ret; 112 } 113 114 void __libeth_xdpsq_get(struct libeth_xdpsq_lock *lock, 115 const struct net_device *dev); 116 void __libeth_xdpsq_put(struct libeth_xdpsq_lock *lock, 117 const struct net_device *dev); 118 119 /** 120 * libeth_xdpsq_get - initialize &libeth_xdpsq_lock 121 * @lock: lock to initialize 122 * @dev: netdev which this lock belongs to 123 * @share: whether XDPSQs can be shared 124 * 125 * Tracks the current XDPSQ association and enables the static lock 126 * if needed. 127 */ 128 static inline void libeth_xdpsq_get(struct libeth_xdpsq_lock *lock, 129 const struct net_device *dev, 130 bool share) 131 { 132 if (unlikely(share)) 133 __libeth_xdpsq_get(lock, dev); 134 } 135 136 /** 137 * libeth_xdpsq_put - deinitialize &libeth_xdpsq_lock 138 * @lock: lock to deinitialize 139 * @dev: netdev which this lock belongs to 140 * 141 * Tracks the current XDPSQ association and disables the static lock 142 * if needed. 143 */ 144 static inline void libeth_xdpsq_put(struct libeth_xdpsq_lock *lock, 145 const struct net_device *dev) 146 { 147 if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share) 148 __libeth_xdpsq_put(lock, dev); 149 } 150 151 void __libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock); 152 void __libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock); 153 154 /** 155 * libeth_xdpsq_lock - grab &libeth_xdpsq_lock if needed 156 * @lock: lock to take 157 * 158 * Touches the underlying spinlock only if the static key is enabled 159 * and the queue itself is marked as shareable. 160 */ 161 static inline void libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock) 162 { 163 if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share) 164 __libeth_xdpsq_lock(lock); 165 } 166 167 /** 168 * libeth_xdpsq_unlock - free &libeth_xdpsq_lock if needed 169 * @lock: lock to free 170 * 171 * Touches the underlying spinlock only if the static key is enabled 172 * and the queue itself is marked as shareable. 173 */ 174 static inline void libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock) 175 { 176 if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share) 177 __libeth_xdpsq_unlock(lock); 178 } 179 180 /* XDPSQ clean-up timers */ 181 182 void libeth_xdpsq_init_timer(struct libeth_xdpsq_timer *timer, void *xdpsq, 183 struct libeth_xdpsq_lock *lock, 184 void (*poll)(struct work_struct *work)); 185 186 /** 187 * libeth_xdpsq_deinit_timer - deinitialize &libeth_xdpsq_timer 188 * @timer: timer to deinitialize 189 * 190 * Flush and disable the underlying workqueue. 191 */ 192 static inline void libeth_xdpsq_deinit_timer(struct libeth_xdpsq_timer *timer) 193 { 194 cancel_delayed_work_sync(&timer->dwork); 195 } 196 197 /** 198 * libeth_xdpsq_queue_timer - run &libeth_xdpsq_timer 199 * @timer: timer to queue 200 * 201 * Should be called after the queue was filled and the transmission was run 202 * to complete the pending buffers if no further sending will be done in a 203 * second (-> lazy cleaning won't happen). 204 * If the timer was already run, it will be requeued back to one second 205 * timeout again. 206 */ 207 static inline void libeth_xdpsq_queue_timer(struct libeth_xdpsq_timer *timer) 208 { 209 mod_delayed_work_on(raw_smp_processor_id(), system_bh_highpri_wq, 210 &timer->dwork, HZ); 211 } 212 213 /** 214 * libeth_xdpsq_run_timer - wrapper to run a queue clean-up on a timer event 215 * @work: workqueue belonging to the corresponding timer 216 * @poll: driver-specific completion queue poll function 217 * 218 * Run the polling function on the locked queue and requeue the timer if 219 * there's more work to do. 220 * Designed to be used via LIBETH_XDP_DEFINE_TIMER() below. 221 */ 222 static __always_inline void 223 libeth_xdpsq_run_timer(struct work_struct *work, 224 u32 (*poll)(void *xdpsq, u32 budget)) 225 { 226 struct libeth_xdpsq_timer *timer = container_of(work, typeof(*timer), 227 dwork.work); 228 229 libeth_xdpsq_lock(timer->lock); 230 231 if (poll(timer->xdpsq, U32_MAX)) 232 libeth_xdpsq_queue_timer(timer); 233 234 libeth_xdpsq_unlock(timer->lock); 235 } 236 237 /* Common Tx bits */ 238 239 /** 240 * enum - libeth_xdp internal Tx flags 241 * @LIBETH_XDP_TX_BULK: one bulk size at which it will be flushed to the queue 242 * @LIBETH_XDP_TX_BATCH: batch size for which the queue fill loop is unrolled 243 * @LIBETH_XDP_TX_DROP: indicates the send function must drop frames not sent 244 * @LIBETH_XDP_TX_NDO: whether the send function is called from .ndo_xdp_xmit() 245 */ 246 enum { 247 LIBETH_XDP_TX_BULK = DEV_MAP_BULK_SIZE, 248 LIBETH_XDP_TX_BATCH = 8, 249 250 LIBETH_XDP_TX_DROP = BIT(0), 251 LIBETH_XDP_TX_NDO = BIT(1), 252 }; 253 254 /** 255 * enum - &libeth_xdp_tx_frame and &libeth_xdp_tx_desc flags 256 * @LIBETH_XDP_TX_LEN: only for ``XDP_TX``, [15:0] of ::len_fl is actual length 257 * @LIBETH_XDP_TX_FIRST: indicates the frag is the first one of the frame 258 * @LIBETH_XDP_TX_LAST: whether the frag is the last one of the frame 259 * @LIBETH_XDP_TX_MULTI: whether the frame contains several frags 260 * @LIBETH_XDP_TX_FLAGS: only for ``XDP_TX``, [31:16] of ::len_fl is flags 261 */ 262 enum { 263 LIBETH_XDP_TX_LEN = GENMASK(15, 0), 264 265 LIBETH_XDP_TX_FIRST = BIT(16), 266 LIBETH_XDP_TX_LAST = BIT(17), 267 LIBETH_XDP_TX_MULTI = BIT(18), 268 269 LIBETH_XDP_TX_FLAGS = GENMASK(31, 16), 270 }; 271 272 /** 273 * struct libeth_xdp_tx_frame - represents one XDP Tx element 274 * @data: frame start pointer for ``XDP_TX`` 275 * @len_fl: ``XDP_TX``, combined flags [31:16] and len [15:0] field for speed 276 * @soff: ``XDP_TX``, offset from @data to the start of &skb_shared_info 277 * @frag: one (non-head) frag for ``XDP_TX`` 278 * @xdpf: &xdp_frame for the head frag for .ndo_xdp_xmit() 279 * @dma: DMA address of the non-head frag for .ndo_xdp_xmit() 280 * @len: frag length for .ndo_xdp_xmit() 281 * @flags: Tx flags for the above 282 * @opts: combined @len + @flags for the above for speed 283 */ 284 struct libeth_xdp_tx_frame { 285 union { 286 /* ``XDP_TX`` */ 287 struct { 288 void *data; 289 u32 len_fl; 290 u32 soff; 291 }; 292 293 /* ``XDP_TX`` frag */ 294 skb_frag_t frag; 295 296 /* .ndo_xdp_xmit() */ 297 struct { 298 union { 299 struct xdp_frame *xdpf; 300 dma_addr_t dma; 301 }; 302 union { 303 struct { 304 u32 len; 305 u32 flags; 306 }; 307 aligned_u64 opts; 308 }; 309 }; 310 }; 311 } __aligned_largest; 312 static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == 313 offsetof(struct libeth_xdp_tx_frame, len_fl)); 314 315 /** 316 * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending 317 * @prog: corresponding active XDP program, %NULL for .ndo_xdp_xmit() 318 * @dev: &net_device which the frames are transmitted on 319 * @xdpsq: shortcut to the corresponding driver-specific XDPSQ structure 320 * @count: current number of frames in @bulk 321 * @bulk: array of queued frames for bulk Tx 322 * 323 * All XDP Tx operations queue each frame to the bulk first and flush it 324 * when @count reaches the array end. Bulk is always placed on the stack 325 * for performance. One bulk element contains all the data necessary 326 * for sending a frame and then freeing it on completion. 327 */ 328 struct libeth_xdp_tx_bulk { 329 const struct bpf_prog *prog; 330 struct net_device *dev; 331 void *xdpsq; 332 333 u32 count; 334 struct libeth_xdp_tx_frame bulk[LIBETH_XDP_TX_BULK]; 335 } __aligned(sizeof(struct libeth_xdp_tx_frame)); 336 337 /** 338 * LIBETH_XDP_ONSTACK_BULK - declare &libeth_xdp_tx_bulk on the stack 339 * @bq: name of the variable to declare 340 * 341 * Helper to declare a bulk on the stack with a compiler hint that it should 342 * not be initialized automatically (with `CONFIG_INIT_STACK_ALL_*`) for 343 * performance reasons. 344 */ 345 #define LIBETH_XDP_ONSTACK_BULK(bq) \ 346 struct libeth_xdp_tx_bulk bq __uninitialized 347 348 /** 349 * struct libeth_xdpsq - abstraction for an XDPSQ 350 * @sqes: array of Tx buffers from the actual queue struct 351 * @descs: opaque pointer to the HW descriptor array 352 * @ntu: pointer to the next free descriptor index 353 * @count: number of descriptors on that queue 354 * @pending: pointer to the number of sent-not-completed descs on that queue 355 * @xdp_tx: pointer to the above 356 * @lock: corresponding XDPSQ lock 357 * 358 * Abstraction for driver-independent implementation of Tx. Placed on the stack 359 * and filled by the driver before the transmission, so that the generic 360 * functions can access and modify driver-specific resources. 361 */ 362 struct libeth_xdpsq { 363 struct libeth_sqe *sqes; 364 void *descs; 365 366 u32 *ntu; 367 u32 count; 368 369 u32 *pending; 370 u32 *xdp_tx; 371 struct libeth_xdpsq_lock *lock; 372 }; 373 374 /** 375 * struct libeth_xdp_tx_desc - abstraction for an XDP Tx descriptor 376 * @addr: DMA address of the frame 377 * @len: length of the frame 378 * @flags: XDP Tx flags 379 * @opts: combined @len + @flags for speed 380 * 381 * Filled by the generic functions and then passed to driver-specific functions 382 * to fill a HW Tx descriptor, always placed on the [function] stack. 383 */ 384 struct libeth_xdp_tx_desc { 385 dma_addr_t addr; 386 union { 387 struct { 388 u32 len; 389 u32 flags; 390 }; 391 aligned_u64 opts; 392 }; 393 } __aligned_largest; 394 395 /** 396 * libeth_xdp_tx_xmit_bulk - main XDP Tx function 397 * @bulk: array of frames to send 398 * @xdpsq: pointer to the driver-specific XDPSQ struct 399 * @n: number of frames to send 400 * @unroll: whether to unroll the queue filling loop for speed 401 * @priv: driver-specific private data 402 * @prep: callback for cleaning the queue and filling abstract &libeth_xdpsq 403 * @fill: internal callback for filling &libeth_sqe and &libeth_xdp_tx_desc 404 * @xmit: callback for filling a HW descriptor with the frame info 405 * 406 * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for 407 * all types of frames. 408 * @prep must lock the queue as this function releases it at the end. @unroll 409 * greatly increases the object code size, but also greatly increases 410 * performance. 411 * The compilers inline all those onstack abstractions to direct data accesses. 412 * 413 * Return: number of frames actually placed on the queue, <= @n. The function 414 * can't fail, but can send less frames if there's no enough free descriptors 415 * available. The actual free space is returned by @prep from the driver. 416 */ 417 static __always_inline u32 418 libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, 419 u32 n, bool unroll, u64 priv, 420 u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), 421 struct libeth_xdp_tx_desc 422 (*fill)(struct libeth_xdp_tx_frame frm, u32 i, 423 const struct libeth_xdpsq *sq, u64 priv), 424 void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, 425 const struct libeth_xdpsq *sq, u64 priv)) 426 { 427 struct libeth_xdpsq sq __uninitialized; 428 u32 this, batched, off = 0; 429 u32 ntu, i = 0; 430 431 n = min(n, prep(xdpsq, &sq)); 432 if (unlikely(!n)) 433 goto unlock; 434 435 ntu = *sq.ntu; 436 437 this = sq.count - ntu; 438 if (likely(this > n)) 439 this = n; 440 441 again: 442 if (!unroll) 443 goto linear; 444 445 batched = ALIGN_DOWN(this, LIBETH_XDP_TX_BATCH); 446 447 for ( ; i < off + batched; i += LIBETH_XDP_TX_BATCH) { 448 u32 base = ntu + i - off; 449 450 unrolled_count(LIBETH_XDP_TX_BATCH) 451 for (u32 j = 0; j < LIBETH_XDP_TX_BATCH; j++) 452 xmit(fill(bulk[i + j], base + j, &sq, priv), 453 base + j, &sq, priv); 454 } 455 456 if (batched < this) { 457 linear: 458 for ( ; i < off + this; i++) 459 xmit(fill(bulk[i], ntu + i - off, &sq, priv), 460 ntu + i - off, &sq, priv); 461 } 462 463 ntu += this; 464 if (likely(ntu < sq.count)) 465 goto out; 466 467 ntu = 0; 468 469 if (i < n) { 470 this = n - i; 471 off = i; 472 473 goto again; 474 } 475 476 out: 477 *sq.ntu = ntu; 478 *sq.pending += n; 479 if (sq.xdp_tx) 480 *sq.xdp_tx += n; 481 482 unlock: 483 libeth_xdpsq_unlock(sq.lock); 484 485 return n; 486 } 487 488 /* ``XDP_TX`` bulking */ 489 490 void libeth_xdp_return_buff_slow(struct libeth_xdp_buff *xdp); 491 492 /** 493 * libeth_xdp_tx_queue_head - internal helper for queueing one ``XDP_TX`` head 494 * @bq: XDP Tx bulk to queue the head frag to 495 * @xdp: XDP buffer with the head to queue 496 * 497 * Return: false if it's the only frag of the frame, true if it's an S/G frame. 498 */ 499 static inline bool libeth_xdp_tx_queue_head(struct libeth_xdp_tx_bulk *bq, 500 const struct libeth_xdp_buff *xdp) 501 { 502 const struct xdp_buff *base = &xdp->base; 503 504 bq->bulk[bq->count++] = (typeof(*bq->bulk)){ 505 .data = xdp->data, 506 .len_fl = (base->data_end - xdp->data) | LIBETH_XDP_TX_FIRST, 507 .soff = xdp_data_hard_end(base) - xdp->data, 508 }; 509 510 if (!xdp_buff_has_frags(base)) 511 return false; 512 513 bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_MULTI; 514 515 return true; 516 } 517 518 /** 519 * libeth_xdp_tx_queue_frag - internal helper for queueing one ``XDP_TX`` frag 520 * @bq: XDP Tx bulk to queue the frag to 521 * @frag: frag to queue 522 */ 523 static inline void libeth_xdp_tx_queue_frag(struct libeth_xdp_tx_bulk *bq, 524 const skb_frag_t *frag) 525 { 526 bq->bulk[bq->count++].frag = *frag; 527 } 528 529 /** 530 * libeth_xdp_tx_queue_bulk - internal helper for queueing one ``XDP_TX`` frame 531 * @bq: XDP Tx bulk to queue the frame to 532 * @xdp: XDP buffer to queue 533 * @flush_bulk: driver callback to flush the bulk to the HW queue 534 * 535 * Return: true on success, false on flush error. 536 */ 537 static __always_inline bool 538 libeth_xdp_tx_queue_bulk(struct libeth_xdp_tx_bulk *bq, 539 struct libeth_xdp_buff *xdp, 540 bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, 541 u32 flags)) 542 { 543 const struct skb_shared_info *sinfo; 544 bool ret = true; 545 u32 nr_frags; 546 547 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 548 unlikely(!flush_bulk(bq, 0))) { 549 libeth_xdp_return_buff_slow(xdp); 550 return false; 551 } 552 553 if (!libeth_xdp_tx_queue_head(bq, xdp)) 554 goto out; 555 556 sinfo = xdp_get_shared_info_from_buff(&xdp->base); 557 nr_frags = sinfo->nr_frags; 558 559 for (u32 i = 0; i < nr_frags; i++) { 560 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 561 unlikely(!flush_bulk(bq, 0))) { 562 ret = false; 563 break; 564 } 565 566 libeth_xdp_tx_queue_frag(bq, &sinfo->frags[i]); 567 } 568 569 out: 570 bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_LAST; 571 xdp->data = NULL; 572 573 return ret; 574 } 575 576 /** 577 * libeth_xdp_tx_fill_stats - fill &libeth_sqe with ``XDP_TX`` frame stats 578 * @sqe: SQ element to fill 579 * @desc: libeth_xdp Tx descriptor 580 * @sinfo: &skb_shared_info for this frame 581 * 582 * Internal helper for filling an SQE with the frame stats, do not use in 583 * drivers. Fills the number of frags and bytes for this frame. 584 */ 585 #define libeth_xdp_tx_fill_stats(sqe, desc, sinfo) \ 586 __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, __UNIQUE_ID(sqe_), \ 587 __UNIQUE_ID(desc_), __UNIQUE_ID(sinfo_)) 588 589 #define __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, ue, ud, us) do { \ 590 const struct libeth_xdp_tx_desc *ud = (desc); \ 591 const struct skb_shared_info *us; \ 592 struct libeth_sqe *ue = (sqe); \ 593 \ 594 ue->nr_frags = 1; \ 595 ue->bytes = ud->len; \ 596 \ 597 if (ud->flags & LIBETH_XDP_TX_MULTI) { \ 598 us = (sinfo); \ 599 ue->nr_frags += us->nr_frags; \ 600 ue->bytes += us->xdp_frags_size; \ 601 } \ 602 } while (0) 603 604 /** 605 * libeth_xdp_tx_fill_buf - internal helper to fill one ``XDP_TX`` &libeth_sqe 606 * @frm: XDP Tx frame from the bulk 607 * @i: index on the HW queue 608 * @sq: XDPSQ abstraction for the queue 609 * @priv: private data 610 * 611 * Return: XDP Tx descriptor with the synced DMA and other info to pass to 612 * the driver callback. 613 */ 614 static inline struct libeth_xdp_tx_desc 615 libeth_xdp_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, 616 const struct libeth_xdpsq *sq, u64 priv) 617 { 618 struct libeth_xdp_tx_desc desc; 619 struct skb_shared_info *sinfo; 620 skb_frag_t *frag = &frm.frag; 621 struct libeth_sqe *sqe; 622 netmem_ref netmem; 623 624 if (frm.len_fl & LIBETH_XDP_TX_FIRST) { 625 sinfo = frm.data + frm.soff; 626 skb_frag_fill_netmem_desc(frag, virt_to_netmem(frm.data), 627 offset_in_page(frm.data), 628 frm.len_fl); 629 } else { 630 sinfo = NULL; 631 } 632 633 netmem = skb_frag_netmem(frag); 634 desc = (typeof(desc)){ 635 .addr = page_pool_get_dma_addr_netmem(netmem) + 636 skb_frag_off(frag), 637 .len = skb_frag_size(frag) & LIBETH_XDP_TX_LEN, 638 .flags = skb_frag_size(frag) & LIBETH_XDP_TX_FLAGS, 639 }; 640 641 dma_sync_single_for_device(__netmem_get_pp(netmem)->p.dev, desc.addr, 642 desc.len, DMA_BIDIRECTIONAL); 643 644 if (!sinfo) 645 return desc; 646 647 sqe = &sq->sqes[i]; 648 sqe->type = LIBETH_SQE_XDP_TX; 649 sqe->sinfo = sinfo; 650 libeth_xdp_tx_fill_stats(sqe, &desc, sinfo); 651 652 return desc; 653 } 654 655 void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent, 656 u32 flags); 657 658 /** 659 * __libeth_xdp_tx_flush_bulk - internal helper to flush one XDP Tx bulk 660 * @bq: bulk to flush 661 * @flags: XDP TX flags (.ndo_xdp_xmit() etc.) 662 * @prep: driver-specific callback to prepare the queue for sending 663 * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc 664 * @xmit: driver callback to fill a HW descriptor 665 * 666 * Internal abstraction to create bulk flush functions for drivers. 667 * 668 * Return: true if anything was sent, false otherwise. 669 */ 670 static __always_inline bool 671 __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, 672 u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), 673 struct libeth_xdp_tx_desc 674 (*fill)(struct libeth_xdp_tx_frame frm, u32 i, 675 const struct libeth_xdpsq *sq, u64 priv), 676 void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, 677 const struct libeth_xdpsq *sq, 678 u64 priv)) 679 { 680 u32 sent, drops; 681 int err = 0; 682 683 sent = libeth_xdp_tx_xmit_bulk(bq->bulk, bq->xdpsq, 684 min(bq->count, LIBETH_XDP_TX_BULK), 685 false, 0, prep, fill, xmit); 686 drops = bq->count - sent; 687 688 if (unlikely(drops)) { 689 libeth_xdp_tx_exception(bq, sent, flags); 690 err = -ENXIO; 691 } else { 692 bq->count = 0; 693 } 694 695 trace_xdp_bulk_tx(bq->dev, sent, drops, err); 696 697 return likely(sent); 698 } 699 700 /** 701 * libeth_xdp_tx_flush_bulk - wrapper to define flush of one ``XDP_TX`` bulk 702 * @bq: bulk to flush 703 * @flags: Tx flags, see above 704 * @prep: driver callback to prepare the queue 705 * @xmit: driver callback to fill a HW descriptor 706 */ 707 #define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit) \ 708 __libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf, \ 709 xmit) 710 711 /* .ndo_xdp_xmit() implementation */ 712 713 /** 714 * libeth_xdp_xmit_frame_dma - internal helper to access DMA of an &xdp_frame 715 * @xf: pointer to the XDP frame 716 * 717 * There's no place in &libeth_xdp_tx_frame to store DMA address for an 718 * &xdp_frame head. The headroom is used then, the address is placed right 719 * after the frame struct, naturally aligned. 720 * 721 * Return: pointer to the DMA address to use. 722 */ 723 #define libeth_xdp_xmit_frame_dma(xf) \ 724 _Generic((xf), \ 725 const struct xdp_frame *: \ 726 (const dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf), \ 727 struct xdp_frame *: \ 728 (dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf) \ 729 ) 730 731 static inline void *__libeth_xdp_xmit_frame_dma(const struct xdp_frame *xdpf) 732 { 733 void *addr = (void *)(xdpf + 1); 734 735 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 736 __alignof(*xdpf) < sizeof(dma_addr_t)) 737 addr = PTR_ALIGN(addr, sizeof(dma_addr_t)); 738 739 return addr; 740 } 741 742 /** 743 * libeth_xdp_xmit_queue_head - internal helper for queueing one XDP xmit head 744 * @bq: XDP Tx bulk to queue the head frag to 745 * @xdpf: XDP frame with the head to queue 746 * @dev: device to perform DMA mapping 747 * 748 * Return: ``LIBETH_XDP_DROP`` on DMA mapping error, 749 * ``LIBETH_XDP_PASS`` if it's the only frag in the frame, 750 * ``LIBETH_XDP_TX`` if it's an S/G frame. 751 */ 752 static inline u32 libeth_xdp_xmit_queue_head(struct libeth_xdp_tx_bulk *bq, 753 struct xdp_frame *xdpf, 754 struct device *dev) 755 { 756 dma_addr_t dma; 757 758 dma = dma_map_single(dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); 759 if (dma_mapping_error(dev, dma)) 760 return LIBETH_XDP_DROP; 761 762 *libeth_xdp_xmit_frame_dma(xdpf) = dma; 763 764 bq->bulk[bq->count++] = (typeof(*bq->bulk)){ 765 .xdpf = xdpf, 766 .len = xdpf->len, 767 .flags = LIBETH_XDP_TX_FIRST, 768 }; 769 770 if (!xdp_frame_has_frags(xdpf)) 771 return LIBETH_XDP_PASS; 772 773 bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_MULTI; 774 775 return LIBETH_XDP_TX; 776 } 777 778 /** 779 * libeth_xdp_xmit_queue_frag - internal helper for queueing one XDP xmit frag 780 * @bq: XDP Tx bulk to queue the frag to 781 * @frag: frag to queue 782 * @dev: device to perform DMA mapping 783 * 784 * Return: true on success, false on DMA mapping error. 785 */ 786 static inline bool libeth_xdp_xmit_queue_frag(struct libeth_xdp_tx_bulk *bq, 787 const skb_frag_t *frag, 788 struct device *dev) 789 { 790 dma_addr_t dma; 791 792 dma = skb_frag_dma_map(dev, frag); 793 if (dma_mapping_error(dev, dma)) 794 return false; 795 796 bq->bulk[bq->count++] = (typeof(*bq->bulk)){ 797 .dma = dma, 798 .len = skb_frag_size(frag), 799 }; 800 801 return true; 802 } 803 804 /** 805 * libeth_xdp_xmit_queue_bulk - internal helper for queueing one XDP xmit frame 806 * @bq: XDP Tx bulk to queue the frame to 807 * @xdpf: XDP frame to queue 808 * @flush_bulk: driver callback to flush the bulk to the HW queue 809 * 810 * Return: ``LIBETH_XDP_TX`` on success, 811 * ``LIBETH_XDP_DROP`` if the frame should be dropped by the stack, 812 * ``LIBETH_XDP_ABORTED`` if the frame will be dropped by libeth_xdp. 813 */ 814 static __always_inline u32 815 libeth_xdp_xmit_queue_bulk(struct libeth_xdp_tx_bulk *bq, 816 struct xdp_frame *xdpf, 817 bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, 818 u32 flags)) 819 { 820 u32 head, nr_frags, i, ret = LIBETH_XDP_TX; 821 struct device *dev = bq->dev->dev.parent; 822 const struct skb_shared_info *sinfo; 823 824 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 825 unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO))) 826 return LIBETH_XDP_DROP; 827 828 head = libeth_xdp_xmit_queue_head(bq, xdpf, dev); 829 if (head == LIBETH_XDP_PASS) 830 goto out; 831 else if (head == LIBETH_XDP_DROP) 832 return LIBETH_XDP_DROP; 833 834 sinfo = xdp_get_shared_info_from_frame(xdpf); 835 nr_frags = sinfo->nr_frags; 836 837 for (i = 0; i < nr_frags; i++) { 838 if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && 839 unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO))) 840 break; 841 842 if (!libeth_xdp_xmit_queue_frag(bq, &sinfo->frags[i], dev)) 843 break; 844 } 845 846 if (unlikely(i < nr_frags)) 847 ret = LIBETH_XDP_ABORTED; 848 849 out: 850 bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_LAST; 851 852 return ret; 853 } 854 855 /** 856 * libeth_xdp_xmit_fill_buf - internal helper to fill one XDP xmit &libeth_sqe 857 * @frm: XDP Tx frame from the bulk 858 * @i: index on the HW queue 859 * @sq: XDPSQ abstraction for the queue 860 * @priv: private data 861 * 862 * Return: XDP Tx descriptor with the mapped DMA and other info to pass to 863 * the driver callback. 864 */ 865 static inline struct libeth_xdp_tx_desc 866 libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, 867 const struct libeth_xdpsq *sq, u64 priv) 868 { 869 struct libeth_xdp_tx_desc desc; 870 struct libeth_sqe *sqe; 871 struct xdp_frame *xdpf; 872 873 if (frm.flags & LIBETH_XDP_TX_FIRST) { 874 xdpf = frm.xdpf; 875 desc.addr = *libeth_xdp_xmit_frame_dma(xdpf); 876 } else { 877 xdpf = NULL; 878 desc.addr = frm.dma; 879 } 880 desc.opts = frm.opts; 881 882 sqe = &sq->sqes[i]; 883 dma_unmap_addr_set(sqe, dma, desc.addr); 884 dma_unmap_len_set(sqe, len, desc.len); 885 886 if (!xdpf) { 887 sqe->type = LIBETH_SQE_XDP_XMIT_FRAG; 888 return desc; 889 } 890 891 sqe->type = LIBETH_SQE_XDP_XMIT; 892 sqe->xdpf = xdpf; 893 libeth_xdp_tx_fill_stats(sqe, &desc, 894 xdp_get_shared_info_from_frame(xdpf)); 895 896 return desc; 897 } 898 899 /** 900 * libeth_xdp_xmit_flush_bulk - wrapper to define flush of one XDP xmit bulk 901 * @bq: bulk to flush 902 * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk() 903 * @prep: driver callback to prepare the queue 904 * @xmit: driver callback to fill a HW descriptor 905 */ 906 #define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit) \ 907 __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_NDO, prep, \ 908 libeth_xdp_xmit_fill_buf, xmit) 909 910 u32 libeth_xdp_xmit_return_bulk(const struct libeth_xdp_tx_frame *bq, 911 u32 count, const struct net_device *dev); 912 913 /** 914 * __libeth_xdp_xmit_do_bulk - internal function to implement .ndo_xdp_xmit() 915 * @bq: XDP Tx bulk to queue frames to 916 * @frames: XDP frames passed by the stack 917 * @n: number of frames 918 * @flags: flags passed by the stack 919 * @flush_bulk: driver callback to flush an XDP xmit bulk 920 * @finalize: driver callback to finalize sending XDP Tx frames on the queue 921 * 922 * Perform common checks, map the frags and queue them to the bulk, then flush 923 * the bulk to the XDPSQ. If requested by the stack, finalize the queue. 924 * 925 * Return: number of frames send or -errno on error. 926 */ 927 static __always_inline int 928 __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, 929 struct xdp_frame **frames, u32 n, u32 flags, 930 bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, 931 u32 flags), 932 void (*finalize)(void *xdpsq, bool sent, bool flush)) 933 { 934 u32 nxmit = 0; 935 936 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 937 return -EINVAL; 938 939 for (u32 i = 0; likely(i < n); i++) { 940 u32 ret; 941 942 ret = libeth_xdp_xmit_queue_bulk(bq, frames[i], flush_bulk); 943 if (unlikely(ret != LIBETH_XDP_TX)) { 944 nxmit += ret == LIBETH_XDP_ABORTED; 945 break; 946 } 947 948 nxmit++; 949 } 950 951 if (bq->count) { 952 flush_bulk(bq, LIBETH_XDP_TX_NDO); 953 if (unlikely(bq->count)) 954 nxmit -= libeth_xdp_xmit_return_bulk(bq->bulk, 955 bq->count, 956 bq->dev); 957 } 958 959 finalize(bq->xdpsq, nxmit, flags & XDP_XMIT_FLUSH); 960 961 return nxmit; 962 } 963 964 /* Rx polling path */ 965 966 static inline void libeth_xdp_return_va(const void *data, bool napi) 967 { 968 netmem_ref netmem = virt_to_netmem(data); 969 970 page_pool_put_full_netmem(__netmem_get_pp(netmem), netmem, napi); 971 } 972 973 static inline void libeth_xdp_return_frags(const struct skb_shared_info *sinfo, 974 bool napi) 975 { 976 for (u32 i = 0; i < sinfo->nr_frags; i++) { 977 netmem_ref netmem = skb_frag_netmem(&sinfo->frags[i]); 978 979 page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi); 980 } 981 } 982 983 /** 984 * libeth_xdp_return_buff - free/recycle &libeth_xdp_buff 985 * @xdp: buffer to free 986 * 987 * Hotpath helper to free &libeth_xdp_buff. Comparing to xdp_return_buff(), 988 * it's faster as it gets inlined and always assumes order-0 pages and safe 989 * direct recycling. Zeroes @xdp->data to avoid UAFs. 990 */ 991 #define libeth_xdp_return_buff(xdp) __libeth_xdp_return_buff(xdp, true) 992 993 static inline void __libeth_xdp_return_buff(struct libeth_xdp_buff *xdp, 994 bool napi) 995 { 996 if (!xdp_buff_has_frags(&xdp->base)) 997 goto out; 998 999 libeth_xdp_return_frags(xdp_get_shared_info_from_buff(&xdp->base), 1000 napi); 1001 1002 out: 1003 libeth_xdp_return_va(xdp->data, napi); 1004 xdp->data = NULL; 1005 } 1006 1007 /* Tx buffer completion */ 1008 1009 void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, 1010 struct xdp_frame_bulk *bq, bool frags); 1011 1012 /** 1013 * __libeth_xdp_complete_tx - complete sent XDPSQE 1014 * @sqe: SQ element / Tx buffer to complete 1015 * @cp: Tx polling/completion params 1016 * @bulk: internal callback to bulk-free ``XDP_TX`` buffers 1017 * 1018 * Use the non-underscored version in drivers instead. This one is shared 1019 * internally with libeth_tx_complete_any(). 1020 * Complete an XDPSQE of any type of XDP frame. This includes DMA unmapping 1021 * when needed, buffer freeing, stats update, and SQE invalidation. 1022 */ 1023 static __always_inline void 1024 __libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp, 1025 typeof(libeth_xdp_return_buff_bulk) bulk) 1026 { 1027 enum libeth_sqe_type type = sqe->type; 1028 1029 switch (type) { 1030 case LIBETH_SQE_EMPTY: 1031 return; 1032 case LIBETH_SQE_XDP_XMIT: 1033 case LIBETH_SQE_XDP_XMIT_FRAG: 1034 dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma), 1035 dma_unmap_len(sqe, len), DMA_TO_DEVICE); 1036 break; 1037 default: 1038 break; 1039 } 1040 1041 switch (type) { 1042 case LIBETH_SQE_XDP_TX: 1043 bulk(sqe->sinfo, cp->bq, sqe->nr_frags != 1); 1044 break; 1045 case LIBETH_SQE_XDP_XMIT: 1046 xdp_return_frame_bulk(sqe->xdpf, cp->bq); 1047 break; 1048 default: 1049 break; 1050 } 1051 1052 switch (type) { 1053 case LIBETH_SQE_XDP_TX: 1054 case LIBETH_SQE_XDP_XMIT: 1055 cp->xdp_tx -= sqe->nr_frags; 1056 1057 cp->xss->packets++; 1058 cp->xss->bytes += sqe->bytes; 1059 break; 1060 default: 1061 break; 1062 } 1063 1064 sqe->type = LIBETH_SQE_EMPTY; 1065 } 1066 1067 static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe, 1068 struct libeth_cq_pp *cp) 1069 { 1070 __libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk); 1071 } 1072 1073 #endif /* __LIBETH_XDP_H */ 1074