xref: /linux/include/net/libeth/xdp.h (revision 819bbaefeded93df36d71d58d9963d706e6e99e1)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (C) 2025 Intel Corporation */
3 
4 #ifndef __LIBETH_XDP_H
5 #define __LIBETH_XDP_H
6 
7 #include <linux/bpf_trace.h>
8 #include <linux/unroll.h>
9 
10 #include <net/libeth/rx.h>
11 #include <net/libeth/tx.h>
12 #include <net/xsk_buff_pool.h>
13 
14 /*
15  * Defined as bits to be able to use them as a mask on Rx.
16  * Also used as internal return values on Tx.
17  */
18 enum {
19 	LIBETH_XDP_PASS			= 0U,
20 	LIBETH_XDP_DROP			= BIT(0),
21 	LIBETH_XDP_ABORTED		= BIT(1),
22 	LIBETH_XDP_TX			= BIT(2),
23 };
24 
25 /*
26  * &xdp_buff_xsk is the largest structure &libeth_xdp_buff gets casted to,
27  * pick maximum pointer-compatible alignment.
28  */
29 #define __LIBETH_XDP_BUFF_ALIGN						      \
30 	(IS_ALIGNED(sizeof(struct xdp_buff_xsk), 16) ? 16 :		      \
31 	 IS_ALIGNED(sizeof(struct xdp_buff_xsk), 8) ? 8 :		      \
32 	 sizeof(long))
33 
34 /**
35  * struct libeth_xdp_buff - libeth extension over &xdp_buff
36  * @base: main &xdp_buff
37  * @data: shortcut for @base.data
38  * @desc: RQ descriptor containing metadata for this buffer
39  * @priv: driver-private scratchspace
40  *
41  * The main reason for this is to have a pointer to the descriptor to be able
42  * to quickly get frame metadata from xdpmo and driver buff-to-xdp callbacks
43  * (as well as bigger alignment).
44  * Pointer/layout-compatible with &xdp_buff and &xdp_buff_xsk.
45  */
46 struct libeth_xdp_buff {
47 	union {
48 		struct xdp_buff		base;
49 		void			*data;
50 	};
51 
52 	const void			*desc;
53 	unsigned long			priv[]
54 					__aligned(__LIBETH_XDP_BUFF_ALIGN);
55 } __aligned(__LIBETH_XDP_BUFF_ALIGN);
56 static_assert(offsetof(struct libeth_xdp_buff, data) ==
57 	      offsetof(struct xdp_buff_xsk, xdp.data));
58 static_assert(offsetof(struct libeth_xdp_buff, desc) ==
59 	      offsetof(struct xdp_buff_xsk, cb));
60 static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk),
61 			 __alignof(struct libeth_xdp_buff)));
62 
63 /* XDPSQ sharing */
64 
65 DECLARE_STATIC_KEY_FALSE(libeth_xdpsq_share);
66 
67 /**
68  * libeth_xdpsq_num - calculate optimal number of XDPSQs for this device + sys
69  * @rxq: current number of active Rx queues
70  * @txq: current number of active Tx queues
71  * @max: maximum number of Tx queues
72  *
73  * Each RQ must have its own XDPSQ for XSk pairs, each CPU must have own XDPSQ
74  * for lockless sending (``XDP_TX``, .ndo_xdp_xmit()). Cap the maximum of these
75  * two with the number of SQs the device can have (minus used ones).
76  *
77  * Return: number of XDP Tx queues the device needs to use.
78  */
79 static inline u32 libeth_xdpsq_num(u32 rxq, u32 txq, u32 max)
80 {
81 	return min(max(nr_cpu_ids, rxq), max - txq);
82 }
83 
84 /**
85  * libeth_xdpsq_shared - whether XDPSQs can be shared between several CPUs
86  * @num: number of active XDPSQs
87  *
88  * Return: true if there's no 1:1 XDPSQ/CPU association, false otherwise.
89  */
90 static inline bool libeth_xdpsq_shared(u32 num)
91 {
92 	return num < nr_cpu_ids;
93 }
94 
95 /**
96  * libeth_xdpsq_id - get XDPSQ index corresponding to this CPU
97  * @num: number of active XDPSQs
98  *
99  * Helper for libeth_xdp routines, do not use in drivers directly.
100  *
101  * Return: XDPSQ index needs to be used on this CPU.
102  */
103 static inline u32 libeth_xdpsq_id(u32 num)
104 {
105 	u32 ret = raw_smp_processor_id();
106 
107 	if (static_branch_unlikely(&libeth_xdpsq_share) &&
108 	    libeth_xdpsq_shared(num))
109 		ret %= num;
110 
111 	return ret;
112 }
113 
114 void __libeth_xdpsq_get(struct libeth_xdpsq_lock *lock,
115 			const struct net_device *dev);
116 void __libeth_xdpsq_put(struct libeth_xdpsq_lock *lock,
117 			const struct net_device *dev);
118 
119 /**
120  * libeth_xdpsq_get - initialize &libeth_xdpsq_lock
121  * @lock: lock to initialize
122  * @dev: netdev which this lock belongs to
123  * @share: whether XDPSQs can be shared
124  *
125  * Tracks the current XDPSQ association and enables the static lock
126  * if needed.
127  */
128 static inline void libeth_xdpsq_get(struct libeth_xdpsq_lock *lock,
129 				    const struct net_device *dev,
130 				    bool share)
131 {
132 	if (unlikely(share))
133 		__libeth_xdpsq_get(lock, dev);
134 }
135 
136 /**
137  * libeth_xdpsq_put - deinitialize &libeth_xdpsq_lock
138  * @lock: lock to deinitialize
139  * @dev: netdev which this lock belongs to
140  *
141  * Tracks the current XDPSQ association and disables the static lock
142  * if needed.
143  */
144 static inline void libeth_xdpsq_put(struct libeth_xdpsq_lock *lock,
145 				    const struct net_device *dev)
146 {
147 	if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share)
148 		__libeth_xdpsq_put(lock, dev);
149 }
150 
151 void __libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock);
152 void __libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock);
153 
154 /**
155  * libeth_xdpsq_lock - grab &libeth_xdpsq_lock if needed
156  * @lock: lock to take
157  *
158  * Touches the underlying spinlock only if the static key is enabled
159  * and the queue itself is marked as shareable.
160  */
161 static inline void libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock)
162 {
163 	if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share)
164 		__libeth_xdpsq_lock(lock);
165 }
166 
167 /**
168  * libeth_xdpsq_unlock - free &libeth_xdpsq_lock if needed
169  * @lock: lock to free
170  *
171  * Touches the underlying spinlock only if the static key is enabled
172  * and the queue itself is marked as shareable.
173  */
174 static inline void libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock)
175 {
176 	if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share)
177 		__libeth_xdpsq_unlock(lock);
178 }
179 
180 /* XDPSQ clean-up timers */
181 
182 void libeth_xdpsq_init_timer(struct libeth_xdpsq_timer *timer, void *xdpsq,
183 			     struct libeth_xdpsq_lock *lock,
184 			     void (*poll)(struct work_struct *work));
185 
186 /**
187  * libeth_xdpsq_deinit_timer - deinitialize &libeth_xdpsq_timer
188  * @timer: timer to deinitialize
189  *
190  * Flush and disable the underlying workqueue.
191  */
192 static inline void libeth_xdpsq_deinit_timer(struct libeth_xdpsq_timer *timer)
193 {
194 	cancel_delayed_work_sync(&timer->dwork);
195 }
196 
197 /**
198  * libeth_xdpsq_queue_timer - run &libeth_xdpsq_timer
199  * @timer: timer to queue
200  *
201  * Should be called after the queue was filled and the transmission was run
202  * to complete the pending buffers if no further sending will be done in a
203  * second (-> lazy cleaning won't happen).
204  * If the timer was already run, it will be requeued back to one second
205  * timeout again.
206  */
207 static inline void libeth_xdpsq_queue_timer(struct libeth_xdpsq_timer *timer)
208 {
209 	mod_delayed_work_on(raw_smp_processor_id(), system_bh_highpri_wq,
210 			    &timer->dwork, HZ);
211 }
212 
213 /**
214  * libeth_xdpsq_run_timer - wrapper to run a queue clean-up on a timer event
215  * @work: workqueue belonging to the corresponding timer
216  * @poll: driver-specific completion queue poll function
217  *
218  * Run the polling function on the locked queue and requeue the timer if
219  * there's more work to do.
220  * Designed to be used via LIBETH_XDP_DEFINE_TIMER() below.
221  */
222 static __always_inline void
223 libeth_xdpsq_run_timer(struct work_struct *work,
224 		       u32 (*poll)(void *xdpsq, u32 budget))
225 {
226 	struct libeth_xdpsq_timer *timer = container_of(work, typeof(*timer),
227 							dwork.work);
228 
229 	libeth_xdpsq_lock(timer->lock);
230 
231 	if (poll(timer->xdpsq, U32_MAX))
232 		libeth_xdpsq_queue_timer(timer);
233 
234 	libeth_xdpsq_unlock(timer->lock);
235 }
236 
237 /* Common Tx bits */
238 
239 /**
240  * enum - libeth_xdp internal Tx flags
241  * @LIBETH_XDP_TX_BULK: one bulk size at which it will be flushed to the queue
242  * @LIBETH_XDP_TX_BATCH: batch size for which the queue fill loop is unrolled
243  * @LIBETH_XDP_TX_DROP: indicates the send function must drop frames not sent
244  * @LIBETH_XDP_TX_NDO: whether the send function is called from .ndo_xdp_xmit()
245  */
246 enum {
247 	LIBETH_XDP_TX_BULK		= DEV_MAP_BULK_SIZE,
248 	LIBETH_XDP_TX_BATCH		= 8,
249 
250 	LIBETH_XDP_TX_DROP		= BIT(0),
251 	LIBETH_XDP_TX_NDO		= BIT(1),
252 };
253 
254 /**
255  * enum - &libeth_xdp_tx_frame and &libeth_xdp_tx_desc flags
256  * @LIBETH_XDP_TX_LEN: only for ``XDP_TX``, [15:0] of ::len_fl is actual length
257  * @LIBETH_XDP_TX_FIRST: indicates the frag is the first one of the frame
258  * @LIBETH_XDP_TX_LAST: whether the frag is the last one of the frame
259  * @LIBETH_XDP_TX_MULTI: whether the frame contains several frags
260  * @LIBETH_XDP_TX_FLAGS: only for ``XDP_TX``, [31:16] of ::len_fl is flags
261  */
262 enum {
263 	LIBETH_XDP_TX_LEN		= GENMASK(15, 0),
264 
265 	LIBETH_XDP_TX_FIRST		= BIT(16),
266 	LIBETH_XDP_TX_LAST		= BIT(17),
267 	LIBETH_XDP_TX_MULTI		= BIT(18),
268 
269 	LIBETH_XDP_TX_FLAGS		= GENMASK(31, 16),
270 };
271 
272 /**
273  * struct libeth_xdp_tx_frame - represents one XDP Tx element
274  * @data: frame start pointer for ``XDP_TX``
275  * @len_fl: ``XDP_TX``, combined flags [31:16] and len [15:0] field for speed
276  * @soff: ``XDP_TX``, offset from @data to the start of &skb_shared_info
277  * @frag: one (non-head) frag for ``XDP_TX``
278  * @xdpf: &xdp_frame for the head frag for .ndo_xdp_xmit()
279  * @dma: DMA address of the non-head frag for .ndo_xdp_xmit()
280  * @len: frag length for .ndo_xdp_xmit()
281  * @flags: Tx flags for the above
282  * @opts: combined @len + @flags for the above for speed
283  */
284 struct libeth_xdp_tx_frame {
285 	union {
286 		/* ``XDP_TX`` */
287 		struct {
288 			void				*data;
289 			u32				len_fl;
290 			u32				soff;
291 		};
292 
293 		/* ``XDP_TX`` frag */
294 		skb_frag_t			frag;
295 
296 		/* .ndo_xdp_xmit() */
297 		struct {
298 			union {
299 				struct xdp_frame		*xdpf;
300 				dma_addr_t			dma;
301 			};
302 			union {
303 				struct {
304 					u32				len;
305 					u32				flags;
306 				};
307 				aligned_u64			opts;
308 			};
309 		};
310 	};
311 } __aligned_largest;
312 static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) ==
313 	      offsetof(struct libeth_xdp_tx_frame, len_fl));
314 
315 /**
316  * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending
317  * @prog: corresponding active XDP program, %NULL for .ndo_xdp_xmit()
318  * @dev: &net_device which the frames are transmitted on
319  * @xdpsq: shortcut to the corresponding driver-specific XDPSQ structure
320  * @count: current number of frames in @bulk
321  * @bulk: array of queued frames for bulk Tx
322  *
323  * All XDP Tx operations queue each frame to the bulk first and flush it
324  * when @count reaches the array end. Bulk is always placed on the stack
325  * for performance. One bulk element contains all the data necessary
326  * for sending a frame and then freeing it on completion.
327  */
328 struct libeth_xdp_tx_bulk {
329 	const struct bpf_prog		*prog;
330 	struct net_device		*dev;
331 	void				*xdpsq;
332 
333 	u32				count;
334 	struct libeth_xdp_tx_frame	bulk[LIBETH_XDP_TX_BULK];
335 } __aligned(sizeof(struct libeth_xdp_tx_frame));
336 
337 /**
338  * LIBETH_XDP_ONSTACK_BULK - declare &libeth_xdp_tx_bulk on the stack
339  * @bq: name of the variable to declare
340  *
341  * Helper to declare a bulk on the stack with a compiler hint that it should
342  * not be initialized automatically (with `CONFIG_INIT_STACK_ALL_*`) for
343  * performance reasons.
344  */
345 #define LIBETH_XDP_ONSTACK_BULK(bq)					      \
346 	struct libeth_xdp_tx_bulk bq __uninitialized
347 
348 /**
349  * struct libeth_xdpsq - abstraction for an XDPSQ
350  * @sqes: array of Tx buffers from the actual queue struct
351  * @descs: opaque pointer to the HW descriptor array
352  * @ntu: pointer to the next free descriptor index
353  * @count: number of descriptors on that queue
354  * @pending: pointer to the number of sent-not-completed descs on that queue
355  * @xdp_tx: pointer to the above
356  * @lock: corresponding XDPSQ lock
357  *
358  * Abstraction for driver-independent implementation of Tx. Placed on the stack
359  * and filled by the driver before the transmission, so that the generic
360  * functions can access and modify driver-specific resources.
361  */
362 struct libeth_xdpsq {
363 	struct libeth_sqe		*sqes;
364 	void				*descs;
365 
366 	u32				*ntu;
367 	u32				count;
368 
369 	u32				*pending;
370 	u32				*xdp_tx;
371 	struct libeth_xdpsq_lock	*lock;
372 };
373 
374 /**
375  * struct libeth_xdp_tx_desc - abstraction for an XDP Tx descriptor
376  * @addr: DMA address of the frame
377  * @len: length of the frame
378  * @flags: XDP Tx flags
379  * @opts: combined @len + @flags for speed
380  *
381  * Filled by the generic functions and then passed to driver-specific functions
382  * to fill a HW Tx descriptor, always placed on the [function] stack.
383  */
384 struct libeth_xdp_tx_desc {
385 	dma_addr_t			addr;
386 	union {
387 		struct {
388 			u32				len;
389 			u32				flags;
390 		};
391 		aligned_u64			opts;
392 	};
393 } __aligned_largest;
394 
395 /**
396  * libeth_xdp_tx_xmit_bulk - main XDP Tx function
397  * @bulk: array of frames to send
398  * @xdpsq: pointer to the driver-specific XDPSQ struct
399  * @n: number of frames to send
400  * @unroll: whether to unroll the queue filling loop for speed
401  * @priv: driver-specific private data
402  * @prep: callback for cleaning the queue and filling abstract &libeth_xdpsq
403  * @fill: internal callback for filling &libeth_sqe and &libeth_xdp_tx_desc
404  * @xmit: callback for filling a HW descriptor with the frame info
405  *
406  * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for
407  * all types of frames.
408  * @prep must lock the queue as this function releases it at the end. @unroll
409  * greatly increases the object code size, but also greatly increases
410  * performance.
411  * The compilers inline all those onstack abstractions to direct data accesses.
412  *
413  * Return: number of frames actually placed on the queue, <= @n. The function
414  * can't fail, but can send less frames if there's no enough free descriptors
415  * available. The actual free space is returned by @prep from the driver.
416  */
417 static __always_inline u32
418 libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq,
419 			u32 n, bool unroll, u64 priv,
420 			u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq),
421 			struct libeth_xdp_tx_desc
422 			(*fill)(struct libeth_xdp_tx_frame frm, u32 i,
423 				const struct libeth_xdpsq *sq, u64 priv),
424 			void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i,
425 				     const struct libeth_xdpsq *sq, u64 priv))
426 {
427 	struct libeth_xdpsq sq __uninitialized;
428 	u32 this, batched, off = 0;
429 	u32 ntu, i = 0;
430 
431 	n = min(n, prep(xdpsq, &sq));
432 	if (unlikely(!n))
433 		goto unlock;
434 
435 	ntu = *sq.ntu;
436 
437 	this = sq.count - ntu;
438 	if (likely(this > n))
439 		this = n;
440 
441 again:
442 	if (!unroll)
443 		goto linear;
444 
445 	batched = ALIGN_DOWN(this, LIBETH_XDP_TX_BATCH);
446 
447 	for ( ; i < off + batched; i += LIBETH_XDP_TX_BATCH) {
448 		u32 base = ntu + i - off;
449 
450 		unrolled_count(LIBETH_XDP_TX_BATCH)
451 		for (u32 j = 0; j < LIBETH_XDP_TX_BATCH; j++)
452 			xmit(fill(bulk[i + j], base + j, &sq, priv),
453 			     base + j, &sq, priv);
454 	}
455 
456 	if (batched < this) {
457 linear:
458 		for ( ; i < off + this; i++)
459 			xmit(fill(bulk[i], ntu + i - off, &sq, priv),
460 			     ntu + i - off, &sq, priv);
461 	}
462 
463 	ntu += this;
464 	if (likely(ntu < sq.count))
465 		goto out;
466 
467 	ntu = 0;
468 
469 	if (i < n) {
470 		this = n - i;
471 		off = i;
472 
473 		goto again;
474 	}
475 
476 out:
477 	*sq.ntu = ntu;
478 	*sq.pending += n;
479 	if (sq.xdp_tx)
480 		*sq.xdp_tx += n;
481 
482 unlock:
483 	libeth_xdpsq_unlock(sq.lock);
484 
485 	return n;
486 }
487 
488 /* ``XDP_TX`` bulking */
489 
490 void libeth_xdp_return_buff_slow(struct libeth_xdp_buff *xdp);
491 
492 /**
493  * libeth_xdp_tx_queue_head - internal helper for queueing one ``XDP_TX`` head
494  * @bq: XDP Tx bulk to queue the head frag to
495  * @xdp: XDP buffer with the head to queue
496  *
497  * Return: false if it's the only frag of the frame, true if it's an S/G frame.
498  */
499 static inline bool libeth_xdp_tx_queue_head(struct libeth_xdp_tx_bulk *bq,
500 					    const struct libeth_xdp_buff *xdp)
501 {
502 	const struct xdp_buff *base = &xdp->base;
503 
504 	bq->bulk[bq->count++] = (typeof(*bq->bulk)){
505 		.data	= xdp->data,
506 		.len_fl	= (base->data_end - xdp->data) | LIBETH_XDP_TX_FIRST,
507 		.soff	= xdp_data_hard_end(base) - xdp->data,
508 	};
509 
510 	if (!xdp_buff_has_frags(base))
511 		return false;
512 
513 	bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_MULTI;
514 
515 	return true;
516 }
517 
518 /**
519  * libeth_xdp_tx_queue_frag - internal helper for queueing one ``XDP_TX`` frag
520  * @bq: XDP Tx bulk to queue the frag to
521  * @frag: frag to queue
522  */
523 static inline void libeth_xdp_tx_queue_frag(struct libeth_xdp_tx_bulk *bq,
524 					    const skb_frag_t *frag)
525 {
526 	bq->bulk[bq->count++].frag = *frag;
527 }
528 
529 /**
530  * libeth_xdp_tx_queue_bulk - internal helper for queueing one ``XDP_TX`` frame
531  * @bq: XDP Tx bulk to queue the frame to
532  * @xdp: XDP buffer to queue
533  * @flush_bulk: driver callback to flush the bulk to the HW queue
534  *
535  * Return: true on success, false on flush error.
536  */
537 static __always_inline bool
538 libeth_xdp_tx_queue_bulk(struct libeth_xdp_tx_bulk *bq,
539 			 struct libeth_xdp_buff *xdp,
540 			 bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
541 					    u32 flags))
542 {
543 	const struct skb_shared_info *sinfo;
544 	bool ret = true;
545 	u32 nr_frags;
546 
547 	if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
548 	    unlikely(!flush_bulk(bq, 0))) {
549 		libeth_xdp_return_buff_slow(xdp);
550 		return false;
551 	}
552 
553 	if (!libeth_xdp_tx_queue_head(bq, xdp))
554 		goto out;
555 
556 	sinfo = xdp_get_shared_info_from_buff(&xdp->base);
557 	nr_frags = sinfo->nr_frags;
558 
559 	for (u32 i = 0; i < nr_frags; i++) {
560 		if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
561 		    unlikely(!flush_bulk(bq, 0))) {
562 			ret = false;
563 			break;
564 		}
565 
566 		libeth_xdp_tx_queue_frag(bq, &sinfo->frags[i]);
567 	}
568 
569 out:
570 	bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_LAST;
571 	xdp->data = NULL;
572 
573 	return ret;
574 }
575 
576 /**
577  * libeth_xdp_tx_fill_stats - fill &libeth_sqe with ``XDP_TX`` frame stats
578  * @sqe: SQ element to fill
579  * @desc: libeth_xdp Tx descriptor
580  * @sinfo: &skb_shared_info for this frame
581  *
582  * Internal helper for filling an SQE with the frame stats, do not use in
583  * drivers. Fills the number of frags and bytes for this frame.
584  */
585 #define libeth_xdp_tx_fill_stats(sqe, desc, sinfo)			      \
586 	__libeth_xdp_tx_fill_stats(sqe, desc, sinfo, __UNIQUE_ID(sqe_),	      \
587 				   __UNIQUE_ID(desc_), __UNIQUE_ID(sinfo_))
588 
589 #define __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, ue, ud, us) do {	      \
590 	const struct libeth_xdp_tx_desc *ud = (desc);			      \
591 	const struct skb_shared_info *us;				      \
592 	struct libeth_sqe *ue = (sqe);					      \
593 									      \
594 	ue->nr_frags = 1;						      \
595 	ue->bytes = ud->len;						      \
596 									      \
597 	if (ud->flags & LIBETH_XDP_TX_MULTI) {				      \
598 		us = (sinfo);						      \
599 		ue->nr_frags += us->nr_frags;				      \
600 		ue->bytes += us->xdp_frags_size;			      \
601 	}								      \
602 } while (0)
603 
604 /**
605  * libeth_xdp_tx_fill_buf - internal helper to fill one ``XDP_TX`` &libeth_sqe
606  * @frm: XDP Tx frame from the bulk
607  * @i: index on the HW queue
608  * @sq: XDPSQ abstraction for the queue
609  * @priv: private data
610  *
611  * Return: XDP Tx descriptor with the synced DMA and other info to pass to
612  * the driver callback.
613  */
614 static inline struct libeth_xdp_tx_desc
615 libeth_xdp_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i,
616 		       const struct libeth_xdpsq *sq, u64 priv)
617 {
618 	struct libeth_xdp_tx_desc desc;
619 	struct skb_shared_info *sinfo;
620 	skb_frag_t *frag = &frm.frag;
621 	struct libeth_sqe *sqe;
622 	netmem_ref netmem;
623 
624 	if (frm.len_fl & LIBETH_XDP_TX_FIRST) {
625 		sinfo = frm.data + frm.soff;
626 		skb_frag_fill_netmem_desc(frag, virt_to_netmem(frm.data),
627 					  offset_in_page(frm.data),
628 					  frm.len_fl);
629 	} else {
630 		sinfo = NULL;
631 	}
632 
633 	netmem = skb_frag_netmem(frag);
634 	desc = (typeof(desc)){
635 		.addr	= page_pool_get_dma_addr_netmem(netmem) +
636 			  skb_frag_off(frag),
637 		.len	= skb_frag_size(frag) & LIBETH_XDP_TX_LEN,
638 		.flags	= skb_frag_size(frag) & LIBETH_XDP_TX_FLAGS,
639 	};
640 
641 	dma_sync_single_for_device(__netmem_get_pp(netmem)->p.dev, desc.addr,
642 				   desc.len, DMA_BIDIRECTIONAL);
643 
644 	if (!sinfo)
645 		return desc;
646 
647 	sqe = &sq->sqes[i];
648 	sqe->type = LIBETH_SQE_XDP_TX;
649 	sqe->sinfo = sinfo;
650 	libeth_xdp_tx_fill_stats(sqe, &desc, sinfo);
651 
652 	return desc;
653 }
654 
655 void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent,
656 			     u32 flags);
657 
658 /**
659  * __libeth_xdp_tx_flush_bulk - internal helper to flush one XDP Tx bulk
660  * @bq: bulk to flush
661  * @flags: XDP TX flags (.ndo_xdp_xmit() etc.)
662  * @prep: driver-specific callback to prepare the queue for sending
663  * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc
664  * @xmit: driver callback to fill a HW descriptor
665  *
666  * Internal abstraction to create bulk flush functions for drivers.
667  *
668  * Return: true if anything was sent, false otherwise.
669  */
670 static __always_inline bool
671 __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags,
672 			   u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq),
673 			   struct libeth_xdp_tx_desc
674 			   (*fill)(struct libeth_xdp_tx_frame frm, u32 i,
675 				   const struct libeth_xdpsq *sq, u64 priv),
676 			   void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i,
677 					const struct libeth_xdpsq *sq,
678 					u64 priv))
679 {
680 	u32 sent, drops;
681 	int err = 0;
682 
683 	sent = libeth_xdp_tx_xmit_bulk(bq->bulk, bq->xdpsq,
684 				       min(bq->count, LIBETH_XDP_TX_BULK),
685 				       false, 0, prep, fill, xmit);
686 	drops = bq->count - sent;
687 
688 	if (unlikely(drops)) {
689 		libeth_xdp_tx_exception(bq, sent, flags);
690 		err = -ENXIO;
691 	} else {
692 		bq->count = 0;
693 	}
694 
695 	trace_xdp_bulk_tx(bq->dev, sent, drops, err);
696 
697 	return likely(sent);
698 }
699 
700 /**
701  * libeth_xdp_tx_flush_bulk - wrapper to define flush of one ``XDP_TX`` bulk
702  * @bq: bulk to flush
703  * @flags: Tx flags, see above
704  * @prep: driver callback to prepare the queue
705  * @xmit: driver callback to fill a HW descriptor
706  */
707 #define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit)			      \
708 	__libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf,   \
709 				   xmit)
710 
711 /* .ndo_xdp_xmit() implementation */
712 
713 /**
714  * libeth_xdp_xmit_frame_dma - internal helper to access DMA of an &xdp_frame
715  * @xf: pointer to the XDP frame
716  *
717  * There's no place in &libeth_xdp_tx_frame to store DMA address for an
718  * &xdp_frame head. The headroom is used then, the address is placed right
719  * after the frame struct, naturally aligned.
720  *
721  * Return: pointer to the DMA address to use.
722  */
723 #define libeth_xdp_xmit_frame_dma(xf)					      \
724 	_Generic((xf),							      \
725 		 const struct xdp_frame *:				      \
726 			(const dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf),  \
727 		 struct xdp_frame *:					      \
728 			(dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf)	      \
729 	)
730 
731 static inline void *__libeth_xdp_xmit_frame_dma(const struct xdp_frame *xdpf)
732 {
733 	void *addr = (void *)(xdpf + 1);
734 
735 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
736 	    __alignof(*xdpf) < sizeof(dma_addr_t))
737 		addr = PTR_ALIGN(addr, sizeof(dma_addr_t));
738 
739 	return addr;
740 }
741 
742 /**
743  * libeth_xdp_xmit_queue_head - internal helper for queueing one XDP xmit head
744  * @bq: XDP Tx bulk to queue the head frag to
745  * @xdpf: XDP frame with the head to queue
746  * @dev: device to perform DMA mapping
747  *
748  * Return: ``LIBETH_XDP_DROP`` on DMA mapping error,
749  *	   ``LIBETH_XDP_PASS`` if it's the only frag in the frame,
750  *	   ``LIBETH_XDP_TX`` if it's an S/G frame.
751  */
752 static inline u32 libeth_xdp_xmit_queue_head(struct libeth_xdp_tx_bulk *bq,
753 					     struct xdp_frame *xdpf,
754 					     struct device *dev)
755 {
756 	dma_addr_t dma;
757 
758 	dma = dma_map_single(dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
759 	if (dma_mapping_error(dev, dma))
760 		return LIBETH_XDP_DROP;
761 
762 	*libeth_xdp_xmit_frame_dma(xdpf) = dma;
763 
764 	bq->bulk[bq->count++] = (typeof(*bq->bulk)){
765 		.xdpf	= xdpf,
766 		.len	= xdpf->len,
767 		.flags	= LIBETH_XDP_TX_FIRST,
768 	};
769 
770 	if (!xdp_frame_has_frags(xdpf))
771 		return LIBETH_XDP_PASS;
772 
773 	bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_MULTI;
774 
775 	return LIBETH_XDP_TX;
776 }
777 
778 /**
779  * libeth_xdp_xmit_queue_frag - internal helper for queueing one XDP xmit frag
780  * @bq: XDP Tx bulk to queue the frag to
781  * @frag: frag to queue
782  * @dev: device to perform DMA mapping
783  *
784  * Return: true on success, false on DMA mapping error.
785  */
786 static inline bool libeth_xdp_xmit_queue_frag(struct libeth_xdp_tx_bulk *bq,
787 					      const skb_frag_t *frag,
788 					      struct device *dev)
789 {
790 	dma_addr_t dma;
791 
792 	dma = skb_frag_dma_map(dev, frag);
793 	if (dma_mapping_error(dev, dma))
794 		return false;
795 
796 	bq->bulk[bq->count++] = (typeof(*bq->bulk)){
797 		.dma	= dma,
798 		.len	= skb_frag_size(frag),
799 	};
800 
801 	return true;
802 }
803 
804 /**
805  * libeth_xdp_xmit_queue_bulk - internal helper for queueing one XDP xmit frame
806  * @bq: XDP Tx bulk to queue the frame to
807  * @xdpf: XDP frame to queue
808  * @flush_bulk: driver callback to flush the bulk to the HW queue
809  *
810  * Return: ``LIBETH_XDP_TX`` on success,
811  *	   ``LIBETH_XDP_DROP`` if the frame should be dropped by the stack,
812  *	   ``LIBETH_XDP_ABORTED`` if the frame will be dropped by libeth_xdp.
813  */
814 static __always_inline u32
815 libeth_xdp_xmit_queue_bulk(struct libeth_xdp_tx_bulk *bq,
816 			   struct xdp_frame *xdpf,
817 			   bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
818 					      u32 flags))
819 {
820 	u32 head, nr_frags, i, ret = LIBETH_XDP_TX;
821 	struct device *dev = bq->dev->dev.parent;
822 	const struct skb_shared_info *sinfo;
823 
824 	if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
825 	    unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO)))
826 		return LIBETH_XDP_DROP;
827 
828 	head = libeth_xdp_xmit_queue_head(bq, xdpf, dev);
829 	if (head == LIBETH_XDP_PASS)
830 		goto out;
831 	else if (head == LIBETH_XDP_DROP)
832 		return LIBETH_XDP_DROP;
833 
834 	sinfo = xdp_get_shared_info_from_frame(xdpf);
835 	nr_frags = sinfo->nr_frags;
836 
837 	for (i = 0; i < nr_frags; i++) {
838 		if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
839 		    unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO)))
840 			break;
841 
842 		if (!libeth_xdp_xmit_queue_frag(bq, &sinfo->frags[i], dev))
843 			break;
844 	}
845 
846 	if (unlikely(i < nr_frags))
847 		ret = LIBETH_XDP_ABORTED;
848 
849 out:
850 	bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_LAST;
851 
852 	return ret;
853 }
854 
855 /**
856  * libeth_xdp_xmit_fill_buf - internal helper to fill one XDP xmit &libeth_sqe
857  * @frm: XDP Tx frame from the bulk
858  * @i: index on the HW queue
859  * @sq: XDPSQ abstraction for the queue
860  * @priv: private data
861  *
862  * Return: XDP Tx descriptor with the mapped DMA and other info to pass to
863  * the driver callback.
864  */
865 static inline struct libeth_xdp_tx_desc
866 libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i,
867 			 const struct libeth_xdpsq *sq, u64 priv)
868 {
869 	struct libeth_xdp_tx_desc desc;
870 	struct libeth_sqe *sqe;
871 	struct xdp_frame *xdpf;
872 
873 	if (frm.flags & LIBETH_XDP_TX_FIRST) {
874 		xdpf = frm.xdpf;
875 		desc.addr = *libeth_xdp_xmit_frame_dma(xdpf);
876 	} else {
877 		xdpf = NULL;
878 		desc.addr = frm.dma;
879 	}
880 	desc.opts = frm.opts;
881 
882 	sqe = &sq->sqes[i];
883 	dma_unmap_addr_set(sqe, dma, desc.addr);
884 	dma_unmap_len_set(sqe, len, desc.len);
885 
886 	if (!xdpf) {
887 		sqe->type = LIBETH_SQE_XDP_XMIT_FRAG;
888 		return desc;
889 	}
890 
891 	sqe->type = LIBETH_SQE_XDP_XMIT;
892 	sqe->xdpf = xdpf;
893 	libeth_xdp_tx_fill_stats(sqe, &desc,
894 				 xdp_get_shared_info_from_frame(xdpf));
895 
896 	return desc;
897 }
898 
899 /**
900  * libeth_xdp_xmit_flush_bulk - wrapper to define flush of one XDP xmit bulk
901  * @bq: bulk to flush
902  * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk()
903  * @prep: driver callback to prepare the queue
904  * @xmit: driver callback to fill a HW descriptor
905  */
906 #define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit)		      \
907 	__libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_NDO, prep,     \
908 				   libeth_xdp_xmit_fill_buf, xmit)
909 
910 u32 libeth_xdp_xmit_return_bulk(const struct libeth_xdp_tx_frame *bq,
911 				u32 count, const struct net_device *dev);
912 
913 /**
914  * __libeth_xdp_xmit_do_bulk - internal function to implement .ndo_xdp_xmit()
915  * @bq: XDP Tx bulk to queue frames to
916  * @frames: XDP frames passed by the stack
917  * @n: number of frames
918  * @flags: flags passed by the stack
919  * @flush_bulk: driver callback to flush an XDP xmit bulk
920  * @finalize: driver callback to finalize sending XDP Tx frames on the queue
921  *
922  * Perform common checks, map the frags and queue them to the bulk, then flush
923  * the bulk to the XDPSQ. If requested by the stack, finalize the queue.
924  *
925  * Return: number of frames send or -errno on error.
926  */
927 static __always_inline int
928 __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq,
929 			  struct xdp_frame **frames, u32 n, u32 flags,
930 			  bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
931 					     u32 flags),
932 			  void (*finalize)(void *xdpsq, bool sent, bool flush))
933 {
934 	u32 nxmit = 0;
935 
936 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
937 		return -EINVAL;
938 
939 	for (u32 i = 0; likely(i < n); i++) {
940 		u32 ret;
941 
942 		ret = libeth_xdp_xmit_queue_bulk(bq, frames[i], flush_bulk);
943 		if (unlikely(ret != LIBETH_XDP_TX)) {
944 			nxmit += ret == LIBETH_XDP_ABORTED;
945 			break;
946 		}
947 
948 		nxmit++;
949 	}
950 
951 	if (bq->count) {
952 		flush_bulk(bq, LIBETH_XDP_TX_NDO);
953 		if (unlikely(bq->count))
954 			nxmit -= libeth_xdp_xmit_return_bulk(bq->bulk,
955 							     bq->count,
956 							     bq->dev);
957 	}
958 
959 	finalize(bq->xdpsq, nxmit, flags & XDP_XMIT_FLUSH);
960 
961 	return nxmit;
962 }
963 
964 /* Rx polling path */
965 
966 static inline void libeth_xdp_return_va(const void *data, bool napi)
967 {
968 	netmem_ref netmem = virt_to_netmem(data);
969 
970 	page_pool_put_full_netmem(__netmem_get_pp(netmem), netmem, napi);
971 }
972 
973 static inline void libeth_xdp_return_frags(const struct skb_shared_info *sinfo,
974 					   bool napi)
975 {
976 	for (u32 i = 0; i < sinfo->nr_frags; i++) {
977 		netmem_ref netmem = skb_frag_netmem(&sinfo->frags[i]);
978 
979 		page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi);
980 	}
981 }
982 
983 /**
984  * libeth_xdp_return_buff - free/recycle &libeth_xdp_buff
985  * @xdp: buffer to free
986  *
987  * Hotpath helper to free &libeth_xdp_buff. Comparing to xdp_return_buff(),
988  * it's faster as it gets inlined and always assumes order-0 pages and safe
989  * direct recycling. Zeroes @xdp->data to avoid UAFs.
990  */
991 #define libeth_xdp_return_buff(xdp)	__libeth_xdp_return_buff(xdp, true)
992 
993 static inline void __libeth_xdp_return_buff(struct libeth_xdp_buff *xdp,
994 					    bool napi)
995 {
996 	if (!xdp_buff_has_frags(&xdp->base))
997 		goto out;
998 
999 	libeth_xdp_return_frags(xdp_get_shared_info_from_buff(&xdp->base),
1000 				napi);
1001 
1002 out:
1003 	libeth_xdp_return_va(xdp->data, napi);
1004 	xdp->data = NULL;
1005 }
1006 
1007 /* Tx buffer completion */
1008 
1009 void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo,
1010 				 struct xdp_frame_bulk *bq, bool frags);
1011 
1012 /**
1013  * __libeth_xdp_complete_tx - complete sent XDPSQE
1014  * @sqe: SQ element / Tx buffer to complete
1015  * @cp: Tx polling/completion params
1016  * @bulk: internal callback to bulk-free ``XDP_TX`` buffers
1017  *
1018  * Use the non-underscored version in drivers instead. This one is shared
1019  * internally with libeth_tx_complete_any().
1020  * Complete an XDPSQE of any type of XDP frame. This includes DMA unmapping
1021  * when needed, buffer freeing, stats update, and SQE invalidation.
1022  */
1023 static __always_inline void
1024 __libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp,
1025 			 typeof(libeth_xdp_return_buff_bulk) bulk)
1026 {
1027 	enum libeth_sqe_type type = sqe->type;
1028 
1029 	switch (type) {
1030 	case LIBETH_SQE_EMPTY:
1031 		return;
1032 	case LIBETH_SQE_XDP_XMIT:
1033 	case LIBETH_SQE_XDP_XMIT_FRAG:
1034 		dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma),
1035 			       dma_unmap_len(sqe, len), DMA_TO_DEVICE);
1036 		break;
1037 	default:
1038 		break;
1039 	}
1040 
1041 	switch (type) {
1042 	case LIBETH_SQE_XDP_TX:
1043 		bulk(sqe->sinfo, cp->bq, sqe->nr_frags != 1);
1044 		break;
1045 	case LIBETH_SQE_XDP_XMIT:
1046 		xdp_return_frame_bulk(sqe->xdpf, cp->bq);
1047 		break;
1048 	default:
1049 		break;
1050 	}
1051 
1052 	switch (type) {
1053 	case LIBETH_SQE_XDP_TX:
1054 	case LIBETH_SQE_XDP_XMIT:
1055 		cp->xdp_tx -= sqe->nr_frags;
1056 
1057 		cp->xss->packets++;
1058 		cp->xss->bytes += sqe->bytes;
1059 		break;
1060 	default:
1061 		break;
1062 	}
1063 
1064 	sqe->type = LIBETH_SQE_EMPTY;
1065 }
1066 
1067 static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe,
1068 					  struct libeth_cq_pp *cp)
1069 {
1070 	__libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk);
1071 }
1072 
1073 #endif /* __LIBETH_XDP_H */
1074