xref: /linux/drivers/net/ethernet/intel/idpf/idpf_txrx.h (revision fc3a2810412c163b5df1b377d332e048860f45db)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (C) 2023 Intel Corporation */
3 
4 #ifndef _IDPF_TXRX_H_
5 #define _IDPF_TXRX_H_
6 
7 #include <linux/dim.h>
8 
9 #include <net/libeth/cache.h>
10 #include <net/libeth/types.h>
11 #include <net/netdev_queues.h>
12 #include <net/tcp.h>
13 #include <net/xdp.h>
14 
15 #include "idpf_lan_txrx.h"
16 #include "virtchnl2_lan_desc.h"
17 
18 #define IDPF_LARGE_MAX_Q			256
19 #define IDPF_MAX_Q				16
20 #define IDPF_MIN_Q				2
21 /* Mailbox Queue */
22 #define IDPF_MAX_MBXQ				1
23 
24 #define IDPF_MIN_TXQ_DESC			64
25 #define IDPF_MIN_RXQ_DESC			64
26 #define IDPF_MIN_TXQ_COMPLQ_DESC		256
27 #define IDPF_MAX_QIDS				256
28 
29 /* Number of descriptors in a queue should be a multiple of 32. RX queue
30  * descriptors alone should be a multiple of IDPF_REQ_RXQ_DESC_MULTIPLE
31  * to achieve BufQ descriptors aligned to 32
32  */
33 #define IDPF_REQ_DESC_MULTIPLE			32
34 #define IDPF_REQ_RXQ_DESC_MULTIPLE (IDPF_MAX_BUFQS_PER_RXQ_GRP * 32)
35 #define IDPF_MIN_TX_DESC_NEEDED (MAX_SKB_FRAGS + 6)
36 #define IDPF_TX_WAKE_THRESH ((u16)IDPF_MIN_TX_DESC_NEEDED * 2)
37 
38 #define IDPF_MAX_DESCS				8160
39 #define IDPF_MAX_TXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_DESC_MULTIPLE)
40 #define IDPF_MAX_RXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_RXQ_DESC_MULTIPLE)
41 #define MIN_SUPPORT_TXDID (\
42 	VIRTCHNL2_TXDID_FLEX_FLOW_SCHED |\
43 	VIRTCHNL2_TXDID_FLEX_TSO_CTX)
44 
45 #define IDPF_DFLT_SINGLEQ_TX_Q_GROUPS		1
46 #define IDPF_DFLT_SINGLEQ_RX_Q_GROUPS		1
47 #define IDPF_DFLT_SINGLEQ_TXQ_PER_GROUP		4
48 #define IDPF_DFLT_SINGLEQ_RXQ_PER_GROUP		4
49 
50 #define IDPF_COMPLQ_PER_GROUP			1
51 #define IDPF_SINGLE_BUFQ_PER_RXQ_GRP		1
52 #define IDPF_MAX_BUFQS_PER_RXQ_GRP		2
53 #define IDPF_BUFQ2_ENA				1
54 #define IDPF_NUMQ_PER_CHUNK			1
55 
56 #define IDPF_DFLT_SPLITQ_TXQ_PER_GROUP		1
57 #define IDPF_DFLT_SPLITQ_RXQ_PER_GROUP		1
58 
59 /* Default vector sharing */
60 #define IDPF_MBX_Q_VEC		1
61 #define IDPF_MIN_Q_VEC		1
62 #define IDPF_MIN_RDMA_VEC	2
63 /* Data vector for NOIRQ queues */
64 #define IDPF_RESERVED_VECS			1
65 
66 #define IDPF_DFLT_TX_Q_DESC_COUNT		512
67 #define IDPF_DFLT_TX_COMPLQ_DESC_COUNT		512
68 #define IDPF_DFLT_RX_Q_DESC_COUNT		512
69 
70 /* IMPORTANT: We absolutely _cannot_ have more buffers in the system than a
71  * given RX completion queue has descriptors. This includes _ALL_ buffer
72  * queues. E.g.: If you have two buffer queues of 512 descriptors and buffers,
73  * you have a total of 1024 buffers so your RX queue _must_ have at least that
74  * many descriptors. This macro divides a given number of RX descriptors by
75  * number of buffer queues to calculate how many descriptors each buffer queue
76  * can have without overrunning the RX queue.
77  *
78  * If you give hardware more buffers than completion descriptors what will
79  * happen is that if hardware gets a chance to post more than ring wrap of
80  * descriptors before SW gets an interrupt and overwrites SW head, the gen bit
81  * in the descriptor will be wrong. Any overwritten descriptors' buffers will
82  * be gone forever and SW has no reasonable way to tell that this has happened.
83  * From SW perspective, when we finally get an interrupt, it looks like we're
84  * still waiting for descriptor to be done, stalling forever.
85  */
86 #define IDPF_RX_BUFQ_DESC_COUNT(RXD, NUM_BUFQ)	((RXD) / (NUM_BUFQ))
87 
88 #define IDPF_RX_BUFQ_WORKING_SET(rxq)		((rxq)->desc_count - 1)
89 
90 #define IDPF_RX_BUMP_NTC(rxq, ntc)				\
91 do {								\
92 	if (unlikely(++(ntc) == (rxq)->desc_count)) {		\
93 		ntc = 0;					\
94 		idpf_queue_change(GEN_CHK, rxq);		\
95 	}							\
96 } while (0)
97 
98 #define IDPF_SINGLEQ_BUMP_RING_IDX(q, idx)			\
99 do {								\
100 	if (unlikely(++(idx) == (q)->desc_count))		\
101 		idx = 0;					\
102 } while (0)
103 
104 #define IDPF_RX_BUF_STRIDE			32
105 #define IDPF_RX_BUF_POST_STRIDE			16
106 #define IDPF_LOW_WATERMARK			64
107 
108 #define IDPF_TX_TSO_MIN_MSS			88
109 
110 /* Minimum number of descriptors between 2 descriptors with the RE bit set;
111  * only relevant in flow scheduling mode
112  */
113 #define IDPF_TX_SPLITQ_RE_MIN_GAP	64
114 
115 #define IDPF_RFL_BI_GEN_M		BIT(16)
116 #define IDPF_RFL_BI_BUFID_M		GENMASK(15, 0)
117 
118 #define IDPF_RXD_EOF_SPLITQ		VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M
119 #define IDPF_RXD_EOF_SINGLEQ		VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M
120 
121 #define IDPF_DESC_UNUSED(txq)     \
122 	((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \
123 	(txq)->next_to_clean - (txq)->next_to_use - 1)
124 
125 #define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq)	((txcq)->desc_count >> 1)
126 /* Determine the absolute number of completions pending, i.e. the number of
127  * completions that are expected to arrive on the TX completion queue.
128  */
129 #define IDPF_TX_COMPLQ_PENDING(txq)	\
130 	(((txq)->num_completions_pending >= (txq)->complq->num_completions ? \
131 	0 : U32_MAX) + \
132 	(txq)->num_completions_pending - (txq)->complq->num_completions)
133 
134 #define IDPF_TXBUF_NULL			U32_MAX
135 
136 #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS)
137 
138 #define IDPF_TX_FLAGS_TSO		BIT(0)
139 #define IDPF_TX_FLAGS_IPV4		BIT(1)
140 #define IDPF_TX_FLAGS_IPV6		BIT(2)
141 #define IDPF_TX_FLAGS_TUNNEL		BIT(3)
142 #define IDPF_TX_FLAGS_TSYN		BIT(4)
143 
144 union idpf_tx_flex_desc {
145 	struct idpf_flex_tx_desc q; /* queue based scheduling */
146 	struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */
147 };
148 
149 #define idpf_tx_buf libeth_sqe
150 
151 /**
152  * struct idpf_tx_offload_params - Offload parameters for a given packet
153  * @tx_flags: Feature flags enabled for this packet
154  * @hdr_offsets: Offset parameter for single queue model
155  * @cd_tunneling: Type of tunneling enabled for single queue model
156  * @tso_len: Total length of payload to segment
157  * @mss: Segment size
158  * @tso_segs: Number of segments to be sent
159  * @tso_hdr_len: Length of headers to be duplicated
160  * @td_cmd: Command field to be inserted into descriptor
161  */
162 struct idpf_tx_offload_params {
163 	u32 tx_flags;
164 
165 	u32 hdr_offsets;
166 	u32 cd_tunneling;
167 
168 	u32 tso_len;
169 	u16 mss;
170 	u16 tso_segs;
171 	u16 tso_hdr_len;
172 
173 	u16 td_cmd;
174 };
175 
176 /**
177  * struct idpf_tx_splitq_params
178  * @dtype: General descriptor info
179  * @eop_cmd: Type of EOP
180  * @compl_tag: Associated tag for completion
181  * @td_tag: Descriptor tunneling tag
182  * @offload: Offload parameters
183  * @prev_ntu: stored TxQ next_to_use in case of rollback
184  * @prev_refill_ntc: stored refillq next_to_clean in case of packet rollback
185  * @prev_refill_gen: stored refillq generation bit in case of packet rollback
186  */
187 struct idpf_tx_splitq_params {
188 	enum idpf_tx_desc_dtype_value dtype;
189 	u16 eop_cmd;
190 	union {
191 		u16 compl_tag;
192 		u16 td_tag;
193 	};
194 
195 	struct idpf_tx_offload_params offload;
196 
197 	u16 prev_ntu;
198 	u16 prev_refill_ntc;
199 	bool prev_refill_gen;
200 };
201 
202 enum idpf_tx_ctx_desc_eipt_offload {
203 	IDPF_TX_CTX_EXT_IP_NONE         = 0x0,
204 	IDPF_TX_CTX_EXT_IP_IPV6         = 0x1,
205 	IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2,
206 	IDPF_TX_CTX_EXT_IP_IPV4         = 0x3
207 };
208 
209 #define IDPF_TX_COMPLQ_CLEAN_BUDGET	256
210 #define IDPF_TX_MIN_PKT_LEN		17
211 #define IDPF_TX_DESCS_FOR_SKB_DATA_PTR	1
212 #define IDPF_TX_DESCS_PER_CACHE_LINE	(L1_CACHE_BYTES / \
213 					 sizeof(struct idpf_flex_tx_desc))
214 #define IDPF_TX_DESCS_FOR_CTX		1
215 /* TX descriptors needed, worst case */
216 #define IDPF_TX_DESC_NEEDED (MAX_SKB_FRAGS + IDPF_TX_DESCS_FOR_CTX + \
217 			     IDPF_TX_DESCS_PER_CACHE_LINE + \
218 			     IDPF_TX_DESCS_FOR_SKB_DATA_PTR)
219 
220 /* The size limit for a transmit buffer in a descriptor is (16K - 1).
221  * In order to align with the read requests we will align the value to
222  * the nearest 4K which represents our maximum read request size.
223  */
224 #define IDPF_TX_MAX_READ_REQ_SIZE	SZ_4K
225 #define IDPF_TX_MAX_DESC_DATA		(SZ_16K - 1)
226 #define IDPF_TX_MAX_DESC_DATA_ALIGNED \
227 	ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE)
228 
229 #define idpf_rx_buf libeth_fqe
230 
231 #define IDPF_RX_MAX_PTYPE_PROTO_IDS    32
232 #define IDPF_RX_MAX_PTYPE_SZ	(sizeof(struct virtchnl2_ptype) + \
233 				 (sizeof(u16) * IDPF_RX_MAX_PTYPE_PROTO_IDS))
234 #define IDPF_RX_PTYPE_HDR_SZ	sizeof(struct virtchnl2_get_ptype_info)
235 #define IDPF_RX_MAX_PTYPES_PER_BUF	\
236 	DIV_ROUND_DOWN_ULL((IDPF_CTLQ_MAX_BUF_LEN - IDPF_RX_PTYPE_HDR_SZ), \
237 			   IDPF_RX_MAX_PTYPE_SZ)
238 
239 #define IDPF_GET_PTYPE_SIZE(p) struct_size((p), proto_id, (p)->proto_id_count)
240 
241 #define IDPF_TUN_IP_GRE (\
242 	IDPF_PTYPE_TUNNEL_IP |\
243 	IDPF_PTYPE_TUNNEL_IP_GRENAT)
244 
245 #define IDPF_TUN_IP_GRE_MAC (\
246 	IDPF_TUN_IP_GRE |\
247 	IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC)
248 
249 #define IDPF_RX_MAX_PTYPE	1024
250 #define IDPF_RX_MAX_BASE_PTYPE	256
251 #define IDPF_INVALID_PTYPE_ID	0xFFFF
252 
253 enum idpf_tunnel_state {
254 	IDPF_PTYPE_TUNNEL_IP                    = BIT(0),
255 	IDPF_PTYPE_TUNNEL_IP_GRENAT             = BIT(1),
256 	IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC         = BIT(2),
257 };
258 
259 struct idpf_ptype_state {
260 	bool outer_ip:1;
261 	bool outer_frag:1;
262 	u8 tunnel_state:6;
263 };
264 
265 /**
266  * enum idpf_queue_flags_t
267  * @__IDPF_Q_GEN_CHK: Queues operating in splitq mode use a generation bit to
268  *		      identify new descriptor writebacks on the ring. HW sets
269  *		      the gen bit to 1 on the first writeback of any given
270  *		      descriptor. After the ring wraps, HW sets the gen bit of
271  *		      those descriptors to 0, and continues flipping
272  *		      0->1 or 1->0 on each ring wrap. SW maintains its own
273  *		      gen bit to know what value will indicate writebacks on
274  *		      the next pass around the ring. E.g. it is initialized
275  *		      to 1 and knows that reading a gen bit of 1 in any
276  *		      descriptor on the initial pass of the ring indicates a
277  *		      writeback. It also flips on every ring wrap.
278  * @__IDPF_Q_RFL_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW
279  *			  bit and Q_RFL_GEN is the SW bit.
280  * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling
281  * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
282  * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode
283  * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq)
284  * @__IDPF_Q_PTP: indicates whether the Rx timestamping is enabled for the
285  *		  queue
286  * @__IDPF_Q_NOIRQ: queue is polling-driven and has no interrupt
287  * @__IDPF_Q_XDP: this is an XDP queue
288  * @__IDPF_Q_FLAGS_NBITS: Must be last
289  */
290 enum idpf_queue_flags_t {
291 	__IDPF_Q_GEN_CHK,
292 	__IDPF_Q_RFL_GEN_CHK,
293 	__IDPF_Q_FLOW_SCH_EN,
294 	__IDPF_Q_SW_MARKER,
295 	__IDPF_Q_CRC_EN,
296 	__IDPF_Q_HSPLIT_EN,
297 	__IDPF_Q_PTP,
298 	__IDPF_Q_NOIRQ,
299 	__IDPF_Q_XDP,
300 
301 	__IDPF_Q_FLAGS_NBITS,
302 };
303 
304 #define idpf_queue_set(f, q)		__set_bit(__IDPF_Q_##f, (q)->flags)
305 #define idpf_queue_clear(f, q)		__clear_bit(__IDPF_Q_##f, (q)->flags)
306 #define idpf_queue_change(f, q)		__change_bit(__IDPF_Q_##f, (q)->flags)
307 #define idpf_queue_has(f, q)		test_bit(__IDPF_Q_##f, (q)->flags)
308 
309 #define idpf_queue_has_clear(f, q)			\
310 	__test_and_clear_bit(__IDPF_Q_##f, (q)->flags)
311 #define idpf_queue_assign(f, q, v)			\
312 	__assign_bit(__IDPF_Q_##f, (q)->flags, v)
313 
314 /**
315  * struct idpf_vec_regs
316  * @dyn_ctl_reg: Dynamic control interrupt register offset
317  * @itrn_reg: Interrupt Throttling Rate register offset
318  * @itrn_index_spacing: Register spacing between ITR registers of the same
319  *			vector
320  */
321 struct idpf_vec_regs {
322 	u32 dyn_ctl_reg;
323 	u32 itrn_reg;
324 	u32 itrn_index_spacing;
325 };
326 
327 /**
328  * struct idpf_intr_reg
329  * @dyn_ctl: Dynamic control interrupt register
330  * @dyn_ctl_intena_m: Mask for dyn_ctl interrupt enable
331  * @dyn_ctl_intena_msk_m: Mask for dyn_ctl interrupt enable mask
332  * @dyn_ctl_itridx_s: Register bit offset for ITR index
333  * @dyn_ctl_itridx_m: Mask for ITR index
334  * @dyn_ctl_intrvl_s: Register bit offset for ITR interval
335  * @dyn_ctl_wb_on_itr_m: Mask for WB on ITR feature
336  * @dyn_ctl_sw_itridx_ena_m: Mask for SW ITR index
337  * @dyn_ctl_swint_trig_m: Mask for dyn_ctl SW triggered interrupt enable
338  * @rx_itr: RX ITR register
339  * @tx_itr: TX ITR register
340  * @icr_ena: Interrupt cause register offset
341  * @icr_ena_ctlq_m: Mask for ICR
342  */
343 struct idpf_intr_reg {
344 	void __iomem *dyn_ctl;
345 	u32 dyn_ctl_intena_m;
346 	u32 dyn_ctl_intena_msk_m;
347 	u32 dyn_ctl_itridx_s;
348 	u32 dyn_ctl_itridx_m;
349 	u32 dyn_ctl_intrvl_s;
350 	u32 dyn_ctl_wb_on_itr_m;
351 	u32 dyn_ctl_sw_itridx_ena_m;
352 	u32 dyn_ctl_swint_trig_m;
353 	void __iomem *rx_itr;
354 	void __iomem *tx_itr;
355 	void __iomem *icr_ena;
356 	u32 icr_ena_ctlq_m;
357 };
358 
359 /**
360  * struct idpf_q_vector
361  * @vport: Vport back pointer
362  * @num_rxq: Number of RX queues
363  * @num_txq: Number of TX queues
364  * @num_bufq: Number of buffer queues
365  * @num_complq: number of completion queues
366  * @rx: Array of RX queues to service
367  * @tx: Array of TX queues to service
368  * @bufq: Array of buffer queues to service
369  * @complq: array of completion queues
370  * @intr_reg: See struct idpf_intr_reg
371  * @napi: napi handler
372  * @total_events: Number of interrupts processed
373  * @wb_on_itr: whether WB on ITR is enabled
374  * @tx_dim: Data for TX net_dim algorithm
375  * @tx_itr_value: TX interrupt throttling rate
376  * @tx_intr_mode: Dynamic ITR or not
377  * @tx_itr_idx: TX ITR index
378  * @rx_dim: Data for RX net_dim algorithm
379  * @rx_itr_value: RX interrupt throttling rate
380  * @rx_intr_mode: Dynamic ITR or not
381  * @rx_itr_idx: RX ITR index
382  * @v_idx: Vector index
383  */
384 struct idpf_q_vector {
385 	__cacheline_group_begin_aligned(read_mostly);
386 	struct idpf_vport *vport;
387 
388 	u16 num_rxq;
389 	u16 num_txq;
390 	u16 num_bufq;
391 	u16 num_complq;
392 	struct idpf_rx_queue **rx;
393 	struct idpf_tx_queue **tx;
394 	struct idpf_buf_queue **bufq;
395 	struct idpf_compl_queue **complq;
396 
397 	struct idpf_intr_reg intr_reg;
398 	__cacheline_group_end_aligned(read_mostly);
399 
400 	__cacheline_group_begin_aligned(read_write);
401 	struct napi_struct napi;
402 	u16 total_events;
403 	bool wb_on_itr;
404 
405 	struct dim tx_dim;
406 	u16 tx_itr_value;
407 	bool tx_intr_mode;
408 	u32 tx_itr_idx;
409 
410 	struct dim rx_dim;
411 	u16 rx_itr_value;
412 	bool rx_intr_mode;
413 	u32 rx_itr_idx;
414 	__cacheline_group_end_aligned(read_write);
415 
416 	__cacheline_group_begin_aligned(cold);
417 	u16 v_idx;
418 
419 	__cacheline_group_end_aligned(cold);
420 };
421 libeth_cacheline_set_assert(struct idpf_q_vector, 120,
422 			    24 + sizeof(struct napi_struct) +
423 			    2 * sizeof(struct dim),
424 			    8);
425 
426 struct idpf_rx_queue_stats {
427 	u64_stats_t packets;
428 	u64_stats_t bytes;
429 	u64_stats_t rsc_pkts;
430 	u64_stats_t hw_csum_err;
431 	u64_stats_t hsplit_pkts;
432 	u64_stats_t hsplit_buf_ovf;
433 	u64_stats_t bad_descs;
434 };
435 
436 struct idpf_tx_queue_stats {
437 	u64_stats_t packets;
438 	u64_stats_t bytes;
439 	u64_stats_t lso_pkts;
440 	u64_stats_t linearize;
441 	u64_stats_t q_busy;
442 	u64_stats_t skb_drops;
443 	u64_stats_t dma_map_errs;
444 	u64_stats_t tstamp_skipped;
445 };
446 
447 #define IDPF_ITR_DYNAMIC	1
448 #define IDPF_ITR_MAX		0x1FE0
449 #define IDPF_ITR_20K		0x0032
450 #define IDPF_ITR_GRAN_S		1	/* Assume ITR granularity is 2us */
451 #define IDPF_ITR_MASK		0x1FFE  /* ITR register value alignment mask */
452 #define ITR_REG_ALIGN(setting)	((setting) & IDPF_ITR_MASK)
453 #define IDPF_ITR_IS_DYNAMIC(itr_mode) (itr_mode)
454 #define IDPF_ITR_TX_DEF		IDPF_ITR_20K
455 #define IDPF_ITR_RX_DEF		IDPF_ITR_20K
456 /* Index used for 'SW ITR' update in DYN_CTL register */
457 #define IDPF_SW_ITR_UPDATE_IDX	2
458 /* Index used for 'No ITR' update in DYN_CTL register */
459 #define IDPF_NO_ITR_UPDATE_IDX	3
460 #define IDPF_ITR_IDX_SPACING(spacing, dflt)	(spacing ? spacing : dflt)
461 #define IDPF_DIM_DEFAULT_PROFILE_IX		1
462 
463 /**
464  * struct idpf_rx_queue - software structure representing a receive queue
465  * @rx: universal receive descriptor array
466  * @single_buf: buffer descriptor array in singleq
467  * @desc_ring: virtual descriptor ring address
468  * @bufq_sets: Pointer to the array of buffer queues in splitq mode
469  * @napi: NAPI instance corresponding to this queue (splitq)
470  * @xdp_prog: attached XDP program
471  * @rx_buf: See struct &libeth_fqe
472  * @pp: Page pool pointer in singleq mode
473  * @tail: Tail offset. Used for both queue models single and split.
474  * @flags: See enum idpf_queue_flags_t
475  * @idx: For RX queue, it is used to index to total RX queue across groups and
476  *	 used for skb reporting.
477  * @desc_count: Number of descriptors
478  * @num_xdp_txq: total number of XDP Tx queues
479  * @xdpsqs: shortcut for XDP Tx queues array
480  * @rxdids: Supported RX descriptor ids
481  * @truesize: data buffer truesize in singleq
482  * @rx_ptype_lkup: LUT of Rx ptypes
483  * @xdp_rxq: XDP queue info
484  * @next_to_use: Next descriptor to use
485  * @next_to_clean: Next descriptor to clean
486  * @next_to_alloc: RX buffer to allocate at
487  * @xdp: XDP buffer with the current frame
488  * @cached_phc_time: Cached PHC time for the Rx queue
489  * @stats_sync: See struct u64_stats_sync
490  * @q_stats: See union idpf_rx_queue_stats
491  * @q_id: Queue id
492  * @size: Length of descriptor ring in bytes
493  * @dma: Physical address of ring
494  * @q_vector: Backreference to associated vector
495  * @rx_buffer_low_watermark: RX buffer low watermark
496  * @rx_hbuf_size: Header buffer size
497  * @rx_buf_size: Buffer size
498  * @rx_max_pkt_size: RX max packet size
499  */
500 struct idpf_rx_queue {
501 	__cacheline_group_begin_aligned(read_mostly);
502 	union {
503 		union virtchnl2_rx_desc *rx;
504 		struct virtchnl2_singleq_rx_buf_desc *single_buf;
505 
506 		void *desc_ring;
507 	};
508 	union {
509 		struct {
510 			struct idpf_bufq_set *bufq_sets;
511 			struct napi_struct *napi;
512 			struct bpf_prog __rcu *xdp_prog;
513 		};
514 		struct {
515 			struct libeth_fqe *rx_buf;
516 			struct page_pool *pp;
517 			void __iomem *tail;
518 		};
519 	};
520 
521 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
522 	u16 idx;
523 	u16 desc_count;
524 
525 	u32 num_xdp_txq;
526 	union {
527 		struct idpf_tx_queue **xdpsqs;
528 		struct {
529 			u32 rxdids;
530 			u32 truesize;
531 		};
532 	};
533 	const struct libeth_rx_pt *rx_ptype_lkup;
534 
535 	struct xdp_rxq_info xdp_rxq;
536 	__cacheline_group_end_aligned(read_mostly);
537 
538 	__cacheline_group_begin_aligned(read_write);
539 	u32 next_to_use;
540 	u32 next_to_clean;
541 	u32 next_to_alloc;
542 
543 	struct libeth_xdp_buff_stash xdp;
544 	u64 cached_phc_time;
545 
546 	struct u64_stats_sync stats_sync;
547 	struct idpf_rx_queue_stats q_stats;
548 	__cacheline_group_end_aligned(read_write);
549 
550 	__cacheline_group_begin_aligned(cold);
551 	u32 q_id;
552 	u32 size;
553 	dma_addr_t dma;
554 
555 	struct idpf_q_vector *q_vector;
556 
557 	u16 rx_buffer_low_watermark;
558 	u16 rx_hbuf_size;
559 	u16 rx_buf_size;
560 	u16 rx_max_pkt_size;
561 	__cacheline_group_end_aligned(cold);
562 };
563 libeth_cacheline_set_assert(struct idpf_rx_queue,
564 			    ALIGN(64, __alignof(struct xdp_rxq_info)) +
565 			    sizeof(struct xdp_rxq_info),
566 			    96 + offsetof(struct idpf_rx_queue, q_stats) -
567 			    offsetofend(struct idpf_rx_queue, cached_phc_time),
568 			    32);
569 
570 /**
571  * struct idpf_tx_queue - software structure representing a transmit queue
572  * @base_tx: base Tx descriptor array
573  * @base_ctx: base Tx context descriptor array
574  * @flex_tx: flex Tx descriptor array
575  * @flex_ctx: flex Tx context descriptor array
576  * @desc_ring: virtual descriptor ring address
577  * @tx_buf: See struct idpf_tx_buf
578  * @txq_grp: See struct idpf_txq_group
579  * @complq: corresponding completion queue in XDP mode
580  * @dev: Device back pointer for DMA mapping
581  * @tail: Tail offset. Used for both queue models single and split
582  * @flags: See enum idpf_queue_flags_t
583  * @idx: For TX queue, it is used as index to map between TX queue group and
584  *	 hot path TX pointers stored in vport. Used in both singleq/splitq.
585  * @desc_count: Number of descriptors
586  * @tx_min_pkt_len: Min supported packet length
587  * @thresh: XDP queue cleaning threshold
588  * @netdev: &net_device corresponding to this queue
589  * @next_to_use: Next descriptor to use
590  * @next_to_clean: Next descriptor to clean
591  * @last_re: last descriptor index that RE bit was set
592  * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
593  * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on
594  *		   the TX completion queue, it can be for any TXQ associated
595  *		   with that completion queue. This means we can clean up to
596  *		   N TXQs during a single call to clean the completion queue.
597  *		   cleaned_bytes|pkts tracks the clean stats per TXQ during
598  *		   that single call to clean the completion queue. By doing so,
599  *		   we can update BQL with aggregate cleaned stats for each TXQ
600  *		   only once at the end of the cleaning routine.
601  * @clean_budget: singleq only, queue cleaning budget
602  * @cleaned_pkts: Number of packets cleaned for the above said case
603  * @refillq: Pointer to refill queue
604  * @pending: number of pending descriptors to send in QB
605  * @xdp_tx: number of pending &xdp_buff or &xdp_frame buffers
606  * @timer: timer for XDP Tx queue cleanup
607  * @xdp_lock: lock for XDP Tx queues sharing
608  * @cached_tstamp_caps: Tx timestamp capabilities negotiated with the CP
609  * @tstamp_task: Work that handles Tx timestamp read
610  * @stats_sync: See struct u64_stats_sync
611  * @q_stats: See union idpf_tx_queue_stats
612  * @q_id: Queue id
613  * @size: Length of descriptor ring in bytes
614  * @dma: Physical address of ring
615  * @q_vector: Backreference to associated vector
616  * @buf_pool_size: Total number of idpf_tx_buf
617  */
618 struct idpf_tx_queue {
619 	__cacheline_group_begin_aligned(read_mostly);
620 	union {
621 		struct idpf_base_tx_desc *base_tx;
622 		struct idpf_base_tx_ctx_desc *base_ctx;
623 		union idpf_tx_flex_desc *flex_tx;
624 		union idpf_flex_tx_ctx_desc *flex_ctx;
625 
626 		void *desc_ring;
627 	};
628 	struct libeth_sqe *tx_buf;
629 	union {
630 		struct idpf_txq_group *txq_grp;
631 		struct idpf_compl_queue *complq;
632 	};
633 	struct device *dev;
634 	void __iomem *tail;
635 
636 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
637 	u16 idx;
638 	u16 desc_count;
639 
640 	union {
641 		u16 tx_min_pkt_len;
642 		u32 thresh;
643 	};
644 
645 	struct net_device *netdev;
646 	__cacheline_group_end_aligned(read_mostly);
647 
648 	__cacheline_group_begin_aligned(read_write);
649 	u32 next_to_use;
650 	u32 next_to_clean;
651 
652 	union {
653 		struct {
654 			u16 last_re;
655 			u16 tx_max_bufs;
656 
657 			union {
658 				u32 cleaned_bytes;
659 				u32 clean_budget;
660 			};
661 			u16 cleaned_pkts;
662 
663 			struct idpf_sw_queue *refillq;
664 		};
665 		struct {
666 			u32 pending;
667 			u32 xdp_tx;
668 
669 			struct libeth_xdpsq_timer *timer;
670 			struct libeth_xdpsq_lock xdp_lock;
671 		};
672 	};
673 
674 	struct idpf_ptp_vport_tx_tstamp_caps *cached_tstamp_caps;
675 	struct work_struct *tstamp_task;
676 
677 	struct u64_stats_sync stats_sync;
678 	struct idpf_tx_queue_stats q_stats;
679 	__cacheline_group_end_aligned(read_write);
680 
681 	__cacheline_group_begin_aligned(cold);
682 	u32 q_id;
683 	u32 size;
684 	dma_addr_t dma;
685 
686 	struct idpf_q_vector *q_vector;
687 	u32 buf_pool_size;
688 	__cacheline_group_end_aligned(cold);
689 };
690 libeth_cacheline_set_assert(struct idpf_tx_queue, 64,
691 			    104 +
692 			    offsetof(struct idpf_tx_queue, cached_tstamp_caps) -
693 			    offsetofend(struct idpf_tx_queue, timer) +
694 			    offsetof(struct idpf_tx_queue, q_stats) -
695 			    offsetofend(struct idpf_tx_queue, tstamp_task),
696 			    32);
697 
698 /**
699  * struct idpf_buf_queue - software structure representing a buffer queue
700  * @split_buf: buffer descriptor array
701  * @hdr_buf: &libeth_fqe for header buffers
702  * @hdr_pp: &page_pool for header buffers
703  * @buf: &libeth_fqe for data buffers
704  * @pp: &page_pool for data buffers
705  * @tail: Tail offset
706  * @flags: See enum idpf_queue_flags_t
707  * @desc_count: Number of descriptors
708  * @next_to_use: Next descriptor to use
709  * @next_to_clean: Next descriptor to clean
710  * @next_to_alloc: RX buffer to allocate at
711  * @hdr_truesize: truesize for buffer headers
712  * @truesize: truesize for data buffers
713  * @q_id: Queue id
714  * @size: Length of descriptor ring in bytes
715  * @dma: Physical address of ring
716  * @q_vector: Backreference to associated vector
717  * @rx_buffer_low_watermark: RX buffer low watermark
718  * @rx_hbuf_size: Header buffer size
719  * @rx_buf_size: Buffer size
720  */
721 struct idpf_buf_queue {
722 	__cacheline_group_begin_aligned(read_mostly);
723 	struct virtchnl2_splitq_rx_buf_desc *split_buf;
724 	struct libeth_fqe *hdr_buf;
725 	struct page_pool *hdr_pp;
726 	struct libeth_fqe *buf;
727 	struct page_pool *pp;
728 	void __iomem *tail;
729 
730 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
731 	u32 desc_count;
732 	__cacheline_group_end_aligned(read_mostly);
733 
734 	__cacheline_group_begin_aligned(read_write);
735 	u32 next_to_use;
736 	u32 next_to_clean;
737 	u32 next_to_alloc;
738 
739 	u32 hdr_truesize;
740 	u32 truesize;
741 	__cacheline_group_end_aligned(read_write);
742 
743 	__cacheline_group_begin_aligned(cold);
744 	u32 q_id;
745 	u32 size;
746 	dma_addr_t dma;
747 
748 	struct idpf_q_vector *q_vector;
749 
750 	u16 rx_buffer_low_watermark;
751 	u16 rx_hbuf_size;
752 	u16 rx_buf_size;
753 	__cacheline_group_end_aligned(cold);
754 };
755 libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32);
756 
757 /**
758  * struct idpf_compl_queue - software structure representing a completion queue
759  * @comp: 8-byte completion descriptor array
760  * @comp_4b: 4-byte completion descriptor array
761  * @desc_ring: virtual descriptor ring address
762  * @txq_grp: See struct idpf_txq_group
763  * @flags: See enum idpf_queue_flags_t
764  * @desc_count: Number of descriptors
765  * @clean_budget: queue cleaning budget
766  * @netdev: &net_device corresponding to this queue
767  * @next_to_use: Next descriptor to use. Relevant in both split & single txq
768  *		 and bufq.
769  * @next_to_clean: Next descriptor to clean
770  * @num_completions: Only relevant for TX completion queue. It tracks the
771  *		     number of completions received to compare against the
772  *		     number of completions pending, as accumulated by the
773  *		     TX queues.
774  * @q_id: Queue id
775  * @size: Length of descriptor ring in bytes
776  * @dma: Physical address of ring
777  * @q_vector: Backreference to associated vector
778  */
779 struct idpf_compl_queue {
780 	__cacheline_group_begin_aligned(read_mostly);
781 	union {
782 		struct idpf_splitq_tx_compl_desc *comp;
783 		struct idpf_splitq_4b_tx_compl_desc *comp_4b;
784 
785 		void *desc_ring;
786 	};
787 	struct idpf_txq_group *txq_grp;
788 
789 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
790 	u32 desc_count;
791 
792 	u32 clean_budget;
793 	struct net_device *netdev;
794 	__cacheline_group_end_aligned(read_mostly);
795 
796 	__cacheline_group_begin_aligned(read_write);
797 	u32 next_to_use;
798 	u32 next_to_clean;
799 
800 	aligned_u64 num_completions;
801 	__cacheline_group_end_aligned(read_write);
802 
803 	__cacheline_group_begin_aligned(cold);
804 	u32 q_id;
805 	u32 size;
806 	dma_addr_t dma;
807 
808 	struct idpf_q_vector *q_vector;
809 	__cacheline_group_end_aligned(cold);
810 };
811 libeth_cacheline_set_assert(struct idpf_compl_queue, 40, 16, 24);
812 
813 /**
814  * struct idpf_sw_queue
815  * @ring: Pointer to the ring
816  * @flags: See enum idpf_queue_flags_t
817  * @desc_count: Descriptor count
818  * @next_to_use: Buffer to allocate at
819  * @next_to_clean: Next descriptor to clean
820  *
821  * Software queues are used in splitq mode to manage buffers between rxq
822  * producer and the bufq consumer.  These are required in order to maintain a
823  * lockless buffer management system and are strictly software only constructs.
824  */
825 struct idpf_sw_queue {
826 	__cacheline_group_begin_aligned(read_mostly);
827 	u32 *ring;
828 
829 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
830 	u32 desc_count;
831 	__cacheline_group_end_aligned(read_mostly);
832 
833 	__cacheline_group_begin_aligned(read_write);
834 	u32 next_to_use;
835 	u32 next_to_clean;
836 	__cacheline_group_end_aligned(read_write);
837 };
838 libeth_cacheline_group_assert(struct idpf_sw_queue, read_mostly, 24);
839 libeth_cacheline_group_assert(struct idpf_sw_queue, read_write, 8);
840 libeth_cacheline_struct_assert(struct idpf_sw_queue, 24, 8);
841 
842 /**
843  * struct idpf_rxq_set
844  * @rxq: RX queue
845  * @refillq: pointers to refill queues
846  *
847  * Splitq only.  idpf_rxq_set associates an rxq with at an array of refillqs.
848  * Each rxq needs a refillq to return used buffers back to the respective bufq.
849  * Bufqs then clean these refillqs for buffers to give to hardware.
850  */
851 struct idpf_rxq_set {
852 	struct idpf_rx_queue rxq;
853 	struct idpf_sw_queue *refillq[IDPF_MAX_BUFQS_PER_RXQ_GRP];
854 };
855 
856 /**
857  * struct idpf_bufq_set
858  * @bufq: Buffer queue
859  * @num_refillqs: Number of refill queues. This is always equal to num_rxq_sets
860  *		  in idpf_rxq_group.
861  * @refillqs: Pointer to refill queues array.
862  *
863  * Splitq only. idpf_bufq_set associates a bufq to an array of refillqs.
864  * In this bufq_set, there will be one refillq for each rxq in this rxq_group.
865  * Used buffers received by rxqs will be put on refillqs which bufqs will
866  * clean to return new buffers back to hardware.
867  *
868  * Buffers needed by some number of rxqs associated in this rxq_group are
869  * managed by at most two bufqs (depending on performance configuration).
870  */
871 struct idpf_bufq_set {
872 	struct idpf_buf_queue bufq;
873 	int num_refillqs;
874 	struct idpf_sw_queue *refillqs;
875 };
876 
877 /**
878  * struct idpf_rxq_group
879  * @vport: Vport back pointer
880  * @singleq: Struct with single queue related members
881  * @singleq.num_rxq: Number of RX queues associated
882  * @singleq.rxqs: Array of RX queue pointers
883  * @splitq: Struct with split queue related members
884  * @splitq.num_rxq_sets: Number of RX queue sets
885  * @splitq.rxq_sets: Array of RX queue sets
886  * @splitq.bufq_sets: Buffer queue set pointer
887  *
888  * In singleq mode, an rxq_group is simply an array of rxqs.  In splitq, a
889  * rxq_group contains all the rxqs, bufqs and refillqs needed to
890  * manage buffers in splitq mode.
891  */
892 struct idpf_rxq_group {
893 	struct idpf_vport *vport;
894 
895 	union {
896 		struct {
897 			u16 num_rxq;
898 			struct idpf_rx_queue *rxqs[IDPF_LARGE_MAX_Q];
899 		} singleq;
900 		struct {
901 			u16 num_rxq_sets;
902 			struct idpf_rxq_set *rxq_sets[IDPF_LARGE_MAX_Q];
903 			struct idpf_bufq_set *bufq_sets;
904 		} splitq;
905 	};
906 };
907 
908 /**
909  * struct idpf_txq_group
910  * @vport: Vport back pointer
911  * @num_txq: Number of TX queues associated
912  * @txqs: Array of TX queue pointers
913  * @complq: Associated completion queue pointer, split queue only
914  * @num_completions_pending: Total number of completions pending for the
915  *			     completion queue, acculumated for all TX queues
916  *			     associated with that completion queue.
917  *
918  * Between singleq and splitq, a txq_group is largely the same except for the
919  * complq. In splitq a single complq is responsible for handling completions
920  * for some number of txqs associated in this txq_group.
921  */
922 struct idpf_txq_group {
923 	struct idpf_vport *vport;
924 
925 	u16 num_txq;
926 	struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q];
927 
928 	struct idpf_compl_queue *complq;
929 
930 	aligned_u64 num_completions_pending;
931 };
932 
933 static inline int idpf_q_vector_to_mem(const struct idpf_q_vector *q_vector)
934 {
935 	u32 cpu;
936 
937 	if (!q_vector)
938 		return NUMA_NO_NODE;
939 
940 	cpu = cpumask_first(&q_vector->napi.config->affinity_mask);
941 
942 	return cpu < nr_cpu_ids ? cpu_to_mem(cpu) : NUMA_NO_NODE;
943 }
944 
945 /**
946  * idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag
947  * @size: transmit request size in bytes
948  *
949  * In the case where a large frag (>= 16K) needs to be split across multiple
950  * descriptors, we need to assume that we can have no more than 12K of data
951  * per descriptor due to hardware alignment restrictions (4K alignment).
952  */
953 static inline u32 idpf_size_to_txd_count(unsigned int size)
954 {
955 	return DIV_ROUND_UP(size, IDPF_TX_MAX_DESC_DATA_ALIGNED);
956 }
957 
958 /**
959  * idpf_tx_singleq_build_ctob - populate command tag offset and size
960  * @td_cmd: Command to be filled in desc
961  * @td_offset: Offset to be filled in desc
962  * @size: Size of the buffer
963  * @td_tag: td tag to be filled
964  *
965  * Returns the 64 bit value populated with the input parameters
966  */
967 static inline __le64 idpf_tx_singleq_build_ctob(u64 td_cmd, u64 td_offset,
968 						unsigned int size, u64 td_tag)
969 {
970 	return cpu_to_le64(IDPF_TX_DESC_DTYPE_DATA |
971 			   (td_cmd << IDPF_TXD_QW1_CMD_S) |
972 			   (td_offset << IDPF_TXD_QW1_OFFSET_S) |
973 			   ((u64)size << IDPF_TXD_QW1_TX_BUF_SZ_S) |
974 			   (td_tag << IDPF_TXD_QW1_L2TAG1_S));
975 }
976 
977 void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc,
978 			      struct idpf_tx_splitq_params *params,
979 			      u16 td_cmd, u16 size);
980 void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
981 				    struct idpf_tx_splitq_params *params,
982 				    u16 td_cmd, u16 size);
983 /**
984  * idpf_tx_splitq_build_desc - determine which type of data descriptor to build
985  * @desc: descriptor to populate
986  * @params: pointer to tx params struct
987  * @td_cmd: command to be filled in desc
988  * @size: size of buffer
989  */
990 static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc,
991 					     struct idpf_tx_splitq_params *params,
992 					     u16 td_cmd, u16 size)
993 {
994 	if (params->dtype == IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2)
995 		idpf_tx_splitq_build_ctb(desc, params, td_cmd, size);
996 	else
997 		idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size);
998 }
999 
1000 /**
1001  * idpf_vport_intr_set_wb_on_itr - enable descriptor writeback on disabled interrupts
1002  * @q_vector: pointer to queue vector struct
1003  */
1004 static inline void idpf_vport_intr_set_wb_on_itr(struct idpf_q_vector *q_vector)
1005 {
1006 	struct idpf_intr_reg *reg;
1007 
1008 	if (q_vector->wb_on_itr)
1009 		return;
1010 
1011 	q_vector->wb_on_itr = true;
1012 	reg = &q_vector->intr_reg;
1013 
1014 	writel(reg->dyn_ctl_wb_on_itr_m | reg->dyn_ctl_intena_msk_m |
1015 	       (IDPF_NO_ITR_UPDATE_IDX << reg->dyn_ctl_itridx_s),
1016 	       reg->dyn_ctl);
1017 }
1018 
1019 /**
1020  * idpf_tx_splitq_get_free_bufs - get number of free buf_ids in refillq
1021  * @refillq: pointer to refillq containing buf_ids
1022  */
1023 static inline u32 idpf_tx_splitq_get_free_bufs(struct idpf_sw_queue *refillq)
1024 {
1025 	return (refillq->next_to_use > refillq->next_to_clean ?
1026 		0 : refillq->desc_count) +
1027 	       refillq->next_to_use - refillq->next_to_clean - 1;
1028 }
1029 
1030 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget);
1031 void idpf_vport_init_num_qs(struct idpf_vport *vport,
1032 			    struct virtchnl2_create_vport *vport_msg);
1033 void idpf_vport_calc_num_q_desc(struct idpf_vport *vport);
1034 int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_index,
1035 			     struct virtchnl2_create_vport *vport_msg,
1036 			     struct idpf_vport_max_q *max_q);
1037 void idpf_vport_calc_num_q_groups(struct idpf_vport *vport);
1038 int idpf_vport_queues_alloc(struct idpf_vport *vport);
1039 void idpf_vport_queues_rel(struct idpf_vport *vport);
1040 void idpf_vport_intr_rel(struct idpf_vport *vport);
1041 int idpf_vport_intr_alloc(struct idpf_vport *vport);
1042 void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
1043 void idpf_vport_intr_deinit(struct idpf_vport *vport);
1044 int idpf_vport_intr_init(struct idpf_vport *vport);
1045 void idpf_vport_intr_ena(struct idpf_vport *vport);
1046 int idpf_config_rss(struct idpf_vport *vport);
1047 int idpf_init_rss(struct idpf_vport *vport);
1048 void idpf_deinit_rss(struct idpf_vport *vport);
1049 int idpf_rx_bufs_init_all(struct idpf_vport *vport);
1050 void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
1051 			   bool xmit_more);
1052 unsigned int idpf_size_to_txd_count(unsigned int size);
1053 netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb);
1054 unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq,
1055 					struct sk_buff *skb, u32 *buf_count);
1056 void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
1057 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
1058 				  struct idpf_tx_queue *tx_q);
1059 netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev);
1060 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
1061 				      u16 cleaned_count);
1062 int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
1063 
1064 void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq);
1065 
1066 #endif /* !_IDPF_TXRX_H_ */
1067