xref: /freebsd/sys/dev/iavf/iavf_txrx_iflib.c (revision 3e8eb5c7f4909209c042403ddee340b2ee7003a5)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2021, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*$FreeBSD$*/
32 
33 /**
34  * @file iavf_txrx_iflib.c
35  * @brief Tx/Rx hotpath implementation for the iflib driver
36  *
37  * Contains functions used to implement the Tx and Rx hotpaths of the iflib
38  * driver implementation.
39  */
40 #include "iavf_iflib.h"
41 #include "iavf_txrx_common.h"
42 
43 #ifdef RSS
44 #include <net/rss_config.h>
45 #endif
46 
47 /* Local Prototypes */
48 static void	iavf_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype);
49 
50 static int	iavf_isc_txd_encap(void *arg, if_pkt_info_t pi);
51 static void	iavf_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
52 static int	iavf_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear);
53 static int	iavf_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear);
54 
55 static void	iavf_isc_rxd_refill(void *arg, if_rxd_update_t iru);
56 static void	iavf_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
57 				  qidx_t pidx);
58 static int	iavf_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
59 				      qidx_t budget);
60 static int	iavf_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
61 
62 /**
63  * @var iavf_txrx_hwb
64  * @brief iflib Tx/Rx operations for head write back
65  *
66  * iflib ops structure for when operating the device in head write back mode.
67  */
68 struct if_txrx iavf_txrx_hwb = {
69 	iavf_isc_txd_encap,
70 	iavf_isc_txd_flush,
71 	iavf_isc_txd_credits_update_hwb,
72 	iavf_isc_rxd_available,
73 	iavf_isc_rxd_pkt_get,
74 	iavf_isc_rxd_refill,
75 	iavf_isc_rxd_flush,
76 	NULL
77 };
78 
79 /**
80  * @var iavf_txrx_dwb
81  * @brief iflib Tx/Rx operations for descriptor write back
82  *
83  * iflib ops structure for when operating the device in descriptor write back
84  * mode.
85  */
86 struct if_txrx iavf_txrx_dwb = {
87 	iavf_isc_txd_encap,
88 	iavf_isc_txd_flush,
89 	iavf_isc_txd_credits_update_dwb,
90 	iavf_isc_rxd_available,
91 	iavf_isc_rxd_pkt_get,
92 	iavf_isc_rxd_refill,
93 	iavf_isc_rxd_flush,
94 	NULL
95 };
96 
97 /**
98  * iavf_is_tx_desc_done - Check if a Tx descriptor is ready
99  * @txr: the Tx ring to check in
100  * @idx: ring index to check
101  *
102  * @returns true if the descriptor has been written back by hardware, and
103  * false otherwise.
104  */
105 static bool
106 iavf_is_tx_desc_done(struct tx_ring *txr, int idx)
107 {
108 	return (((txr->tx_base[idx].cmd_type_offset_bsz >> IAVF_TXD_QW1_DTYPE_SHIFT)
109 	    & IAVF_TXD_QW1_DTYPE_MASK) == IAVF_TX_DESC_DTYPE_DESC_DONE);
110 }
111 
112 
113 /**
114  * iavf_tso_detect_sparse - detect TSO packets with too many segments
115  * @segs: packet segments array
116  * @nsegs: number of packet segments
117  * @pi: packet information
118  *
119  * Hardware only transmits packets with a maximum of 8 descriptors. For TSO
120  * packets, hardware needs to be able to build the split packets using 8 or
121  * fewer descriptors. Additionally, the header must be contained within at
122  * most 3 descriptors.
123  *
124  * To verify this, we walk the headers to find out how many descriptors the
125  * headers require (usually 1). Then we ensure that, for each TSO segment, its
126  * data plus the headers are contained within 8 or fewer descriptors.
127  *
128  * @returns zero if the packet is valid, one otherwise.
129  */
130 static int
131 iavf_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi)
132 {
133 	int	count, curseg, i, hlen, segsz, seglen, tsolen;
134 
135 	if (nsegs <= IAVF_MAX_TX_SEGS-2)
136 		return (0);
137 	segsz = pi->ipi_tso_segsz;
138 	curseg = count = 0;
139 
140 	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
141 	tsolen = pi->ipi_len - hlen;
142 
143 	i = 0;
144 	curseg = segs[0].ds_len;
145 	while (hlen > 0) {
146 		count++;
147 		if (count > IAVF_MAX_TX_SEGS - 2)
148 			return (1);
149 		if (curseg == 0) {
150 			i++;
151 			if (__predict_false(i == nsegs))
152 				return (1);
153 
154 			curseg = segs[i].ds_len;
155 		}
156 		seglen = min(curseg, hlen);
157 		curseg -= seglen;
158 		hlen -= seglen;
159 	}
160 	while (tsolen > 0) {
161 		segsz = pi->ipi_tso_segsz;
162 		while (segsz > 0 && tsolen != 0) {
163 			count++;
164 			if (count > IAVF_MAX_TX_SEGS - 2) {
165 				return (1);
166 			}
167 			if (curseg == 0) {
168 				i++;
169 				if (__predict_false(i == nsegs)) {
170 					return (1);
171 				}
172 				curseg = segs[i].ds_len;
173 			}
174 			seglen = min(curseg, segsz);
175 			segsz -= seglen;
176 			curseg -= seglen;
177 			tsolen -= seglen;
178 		}
179 		count = 0;
180 	}
181 
182 	return (0);
183 }
184 
185 /**
186  * iavf_tx_setup_offload - Setup Tx offload parameters
187  * @que: pointer to the Tx queue
188  * @pi: Tx packet info
189  * @cmd: pointer to command descriptor value
190  * @off: pointer to offset descriptor value
191  *
192  * Based on packet type and Tx offloads requested, sets up the command and
193  * offset values for a Tx descriptor to enable the requested offloads.
194  */
195 static void
196 iavf_tx_setup_offload(struct iavf_tx_queue *que __unused,
197     if_pkt_info_t pi, u32 *cmd, u32 *off)
198 {
199 	switch (pi->ipi_etype) {
200 #ifdef INET
201 		case ETHERTYPE_IP:
202 			if (pi->ipi_csum_flags & IAVF_CSUM_IPV4)
203 				*cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
204 			else
205 				*cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
206 			break;
207 #endif
208 #ifdef INET6
209 		case ETHERTYPE_IPV6:
210 			*cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
211 			break;
212 #endif
213 		default:
214 			break;
215 	}
216 
217 	*off |= (pi->ipi_ehdrlen >> 1) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
218 	*off |= (pi->ipi_ip_hlen >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
219 
220 	switch (pi->ipi_ipproto) {
221 		case IPPROTO_TCP:
222 			if (pi->ipi_csum_flags & IAVF_CSUM_TCP) {
223 				*cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
224 				*off |= (pi->ipi_tcp_hlen >> 2) <<
225 				    IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
226 				/* Check for NO_HEAD MDD event */
227 				MPASS(pi->ipi_tcp_hlen != 0);
228 			}
229 			break;
230 		case IPPROTO_UDP:
231 			if (pi->ipi_csum_flags & IAVF_CSUM_UDP) {
232 				*cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
233 				*off |= (sizeof(struct udphdr) >> 2) <<
234 				    IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
235 			}
236 			break;
237 		case IPPROTO_SCTP:
238 			if (pi->ipi_csum_flags & IAVF_CSUM_SCTP) {
239 				*cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
240 				*off |= (sizeof(struct sctphdr) >> 2) <<
241 				    IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
242 			}
243 			/* Fall Thru */
244 		default:
245 			break;
246 	}
247 }
248 
249 /**
250  * iavf_tso_setup - Setup TSO context descriptor
251  * @txr: the Tx ring to process
252  * @pi: packet info structure
253  *
254  * Enable hardware segmentation offload (TSO) for a given packet by creating
255  * a context descriptor with the necessary details for offloading.
256  *
257  * @returns the new ring index to use for the data descriptor.
258  */
259 static int
260 iavf_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
261 {
262 	if_softc_ctx_t			scctx;
263 	struct iavf_tx_context_desc	*TXD;
264 	u32				cmd, mss, type, tsolen;
265 	int				idx, total_hdr_len;
266 	u64				type_cmd_tso_mss;
267 
268 	idx = pi->ipi_pidx;
269 	TXD = (struct iavf_tx_context_desc *) &txr->tx_base[idx];
270 	total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
271 	tsolen = pi->ipi_len - total_hdr_len;
272 	scctx = txr->que->vsi->shared;
273 
274 	type = IAVF_TX_DESC_DTYPE_CONTEXT;
275 	cmd = IAVF_TX_CTX_DESC_TSO;
276 	/*
277 	 * TSO MSS must not be less than 64; this prevents a
278 	 * BAD_LSO_MSS MDD event when the MSS is too small.
279 	 */
280 	if (pi->ipi_tso_segsz < IAVF_MIN_TSO_MSS) {
281 		txr->mss_too_small++;
282 		pi->ipi_tso_segsz = IAVF_MIN_TSO_MSS;
283 	}
284 	mss = pi->ipi_tso_segsz;
285 
286 	/* Check for BAD_LS0_MSS MDD event (mss too large) */
287 	MPASS(mss <= IAVF_MAX_TSO_MSS);
288 	/* Check for NO_HEAD MDD event (header lengths are 0) */
289 	MPASS(pi->ipi_ehdrlen != 0);
290 	MPASS(pi->ipi_ip_hlen != 0);
291 	/* Partial check for BAD_LSO_LEN MDD event */
292 	MPASS(tsolen != 0);
293 	/* Partial check for WRONG_SIZE MDD event (during TSO) */
294 	MPASS(total_hdr_len + mss <= IAVF_MAX_FRAME);
295 
296 	type_cmd_tso_mss = ((u64)type << IAVF_TXD_CTX_QW1_DTYPE_SHIFT) |
297 	    ((u64)cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) |
298 	    ((u64)tsolen << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) |
299 	    ((u64)mss << IAVF_TXD_CTX_QW1_MSS_SHIFT);
300 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
301 
302 	TXD->tunneling_params = htole32(0);
303 	txr->que->tso++;
304 
305 	return ((idx + 1) & (scctx->isc_ntxd[0]-1));
306 }
307 
308 #define IAVF_TXD_CMD (IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_RS)
309 
310 /**
311  * iavf_isc_txd_encap - Encapsulate a Tx packet into descriptors
312  * @arg: void pointer to the VSI structure
313  * @pi: packet info to encapsulate
314  *
315  * This routine maps the mbufs to tx descriptors, allowing the
316  * TX engine to transmit the packets.
317  *
318  * @returns 0 on success, positive on failure
319  */
320 static int
321 iavf_isc_txd_encap(void *arg, if_pkt_info_t pi)
322 {
323 	struct iavf_vsi		*vsi = arg;
324 	if_softc_ctx_t		scctx = vsi->shared;
325 	struct iavf_tx_queue	*que = &vsi->tx_queues[pi->ipi_qsidx];
326 	struct tx_ring		*txr = &que->txr;
327 	int			nsegs = pi->ipi_nsegs;
328 	bus_dma_segment_t *segs = pi->ipi_segs;
329 	struct iavf_tx_desc	*txd = NULL;
330 	int			i, j, mask, pidx_last;
331 	u32			cmd, off, tx_intr;
332 
333 	if (__predict_false(pi->ipi_len < IAVF_MIN_FRAME)) {
334 		que->pkt_too_small++;
335 		return (EINVAL);
336 	}
337 
338 	cmd = off = 0;
339 	i = pi->ipi_pidx;
340 
341 	tx_intr = (pi->ipi_flags & IPI_TX_INTR);
342 
343 	/* Set up the TSO/CSUM offload */
344 	if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
345 		/* Set up the TSO context descriptor if required */
346 		if (pi->ipi_csum_flags & CSUM_TSO) {
347 			/* Prevent MAX_BUFF MDD event (for TSO) */
348 			if (iavf_tso_detect_sparse(segs, nsegs, pi))
349 				return (EFBIG);
350 			i = iavf_tso_setup(txr, pi);
351 		}
352 		iavf_tx_setup_offload(que, pi, &cmd, &off);
353 	}
354 	if (pi->ipi_mflags & M_VLANTAG)
355 		cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
356 
357 	cmd |= IAVF_TX_DESC_CMD_ICRC;
358 	mask = scctx->isc_ntxd[0] - 1;
359 	/* Check for WRONG_SIZE MDD event */
360 	MPASS(pi->ipi_len >= IAVF_MIN_FRAME);
361 #ifdef INVARIANTS
362 	if (!(pi->ipi_csum_flags & CSUM_TSO))
363 		MPASS(pi->ipi_len <= IAVF_MAX_FRAME);
364 #endif
365 	for (j = 0; j < nsegs; j++) {
366 		bus_size_t seglen;
367 
368 		txd = &txr->tx_base[i];
369 		seglen = segs[j].ds_len;
370 
371 		/* Check for ZERO_BSIZE MDD event */
372 		MPASS(seglen != 0);
373 
374 		txd->buffer_addr = htole64(segs[j].ds_addr);
375 		txd->cmd_type_offset_bsz =
376 		    htole64(IAVF_TX_DESC_DTYPE_DATA
377 		    | ((u64)cmd  << IAVF_TXD_QW1_CMD_SHIFT)
378 		    | ((u64)off << IAVF_TXD_QW1_OFFSET_SHIFT)
379 		    | ((u64)seglen  << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
380 	            | ((u64)htole16(pi->ipi_vtag) << IAVF_TXD_QW1_L2TAG1_SHIFT));
381 
382 		txr->tx_bytes += seglen;
383 		pidx_last = i;
384 		i = (i+1) & mask;
385 	}
386 	/* Set the last descriptor for report */
387 	txd->cmd_type_offset_bsz |=
388 	    htole64(((u64)IAVF_TXD_CMD << IAVF_TXD_QW1_CMD_SHIFT));
389 	/* Add to report status array (if using TX interrupts) */
390 	if (!vsi->enable_head_writeback && tx_intr) {
391 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
392 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask;
393 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
394 	}
395 	pi->ipi_new_pidx = i;
396 
397 	++txr->tx_packets;
398 	return (0);
399 }
400 
401 /**
402  * iavf_isc_txd_flush - Flush Tx ring
403  * @arg: void pointer to the VSI
404  * @txqid: the Tx queue to flush
405  * @pidx: the ring index to flush to
406  *
407  * Advance the Transmit Descriptor Tail (Tdt), this tells the
408  * hardware that this frame is available to transmit.
409  */
410 static void
411 iavf_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
412 {
413 	struct iavf_vsi *vsi = arg;
414 	struct tx_ring *txr = &vsi->tx_queues[txqid].txr;
415 
416 	/* Check for ENDLESS_TX MDD event */
417 	MPASS(pidx < vsi->shared->isc_ntxd[0]);
418 	wr32(vsi->hw, txr->tail, pidx);
419 }
420 
421 /**
422  * iavf_init_tx_ring - Initialize queue Tx ring
423  * @vsi: pointer to the VSI
424  * @que: pointer to queue to initialize
425  *
426  * (Re)Initialize a queue transmit ring by clearing its memory.
427  */
428 void
429 iavf_init_tx_ring(struct iavf_vsi *vsi, struct iavf_tx_queue *que)
430 {
431 	struct tx_ring *txr = &que->txr;
432 
433 	/* Clear the old ring contents */
434 	bzero((void *)txr->tx_base,
435 	      (sizeof(struct iavf_tx_desc)) *
436 	      (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0)));
437 
438 	wr32(vsi->hw, txr->tail, 0);
439 }
440 
441 /**
442  * iavf_get_tx_head - Get the index of the head of a ring
443  * @que: queue to read
444  *
445  * Retrieve the value from the location the HW records its HEAD index
446  *
447  * @returns the index of the HW head of the Tx queue
448  */
449 static inline u32
450 iavf_get_tx_head(struct iavf_tx_queue *que)
451 {
452 	if_softc_ctx_t          scctx = que->vsi->shared;
453 	struct tx_ring  *txr = &que->txr;
454 	void *head = &txr->tx_base[scctx->isc_ntxd[0]];
455 
456 	return LE32_TO_CPU(*(volatile __le32 *)head);
457 }
458 
459 /**
460  * iavf_isc_txd_credits_update_hwb - Update Tx ring credits
461  * @arg: void pointer to the VSI
462  * @qid: the queue id to update
463  * @clear: whether to update or only report current status
464  *
465  * Checks the number of packets in the queue that could be cleaned up.
466  *
467  * if clear is true, the iflib stack has cleaned the packets and is
468  * notifying the driver to update its processed ring pointer.
469  *
470  * @returns the number of packets in the ring that can be cleaned.
471  *
472  * @remark this function is intended for the head write back mode.
473  */
474 static int
475 iavf_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear)
476 {
477 	struct iavf_vsi          *vsi = arg;
478 	if_softc_ctx_t          scctx = vsi->shared;
479 	struct iavf_tx_queue     *que = &vsi->tx_queues[qid];
480 	struct tx_ring		*txr = &que->txr;
481 	int			 head, credits;
482 
483 	/* Get the Head WB value */
484 	head = iavf_get_tx_head(que);
485 
486 	credits = head - txr->tx_cidx_processed;
487 	if (credits < 0)
488 		credits += scctx->isc_ntxd[0];
489 	if (clear)
490 		txr->tx_cidx_processed = head;
491 
492 	return (credits);
493 }
494 
495 /**
496  * iavf_isc_txd_credits_update_dwb - Update Tx ring credits
497  * @arg: void pointer to the VSI
498  * @txqid: the queue id to update
499  * @clear: whether to update or only report current status
500  *
501  * Checks the number of packets in the queue that could be cleaned up.
502  *
503  * if clear is true, the iflib stack has cleaned the packets and is
504  * notifying the driver to update its processed ring pointer.
505  *
506  * @returns the number of packets in the ring that can be cleaned.
507  *
508  * @remark this function is intended for the descriptor write back mode.
509  */
510 static int
511 iavf_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear)
512 {
513 	struct iavf_vsi *vsi = arg;
514 	struct iavf_tx_queue *tx_que = &vsi->tx_queues[txqid];
515 	if_softc_ctx_t scctx = vsi->shared;
516 	struct tx_ring *txr = &tx_que->txr;
517 
518 	qidx_t processed = 0;
519 	qidx_t cur, prev, ntxd, rs_cidx;
520 	int32_t delta;
521 	bool is_done;
522 
523 	rs_cidx = txr->tx_rs_cidx;
524 	if (rs_cidx == txr->tx_rs_pidx)
525 		return (0);
526 	cur = txr->tx_rsq[rs_cidx];
527 	MPASS(cur != QIDX_INVALID);
528 	is_done = iavf_is_tx_desc_done(txr, cur);
529 
530 	if (!is_done)
531 		return (0);
532 
533 	/* If clear is false just let caller know that there
534 	 * are descriptors to reclaim */
535 	if (!clear)
536 		return (1);
537 
538 	prev = txr->tx_cidx_processed;
539 	ntxd = scctx->isc_ntxd[0];
540 	do {
541 		MPASS(prev != cur);
542 		delta = (int32_t)cur - (int32_t)prev;
543 		if (delta < 0)
544 			delta += ntxd;
545 		MPASS(delta > 0);
546 		processed += delta;
547 		prev = cur;
548 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
549 		if (rs_cidx == txr->tx_rs_pidx)
550 			break;
551 		cur = txr->tx_rsq[rs_cidx];
552 		MPASS(cur != QIDX_INVALID);
553 		is_done = iavf_is_tx_desc_done(txr, cur);
554 	} while (is_done);
555 
556 	txr->tx_rs_cidx = rs_cidx;
557 	txr->tx_cidx_processed = prev;
558 
559 	return (processed);
560 }
561 
562 /**
563  * iavf_isc_rxd_refill - Prepare descriptors for re-use
564  * @arg: void pointer to the VSI
565  * @iru: the Rx descriptor update structure
566  *
567  * Update Rx descriptors for a given queue so that they can be re-used by
568  * hardware for future packets.
569  */
570 static void
571 iavf_isc_rxd_refill(void *arg, if_rxd_update_t iru)
572 {
573 	struct iavf_vsi *vsi = arg;
574 	if_softc_ctx_t scctx = vsi->shared;
575 	struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr);
576 	uint64_t *paddrs;
577 	uint16_t next_pidx, pidx;
578 	uint16_t count;
579 	int i;
580 
581 	paddrs = iru->iru_paddrs;
582 	pidx = iru->iru_pidx;
583 	count = iru->iru_count;
584 
585 	for (i = 0, next_pidx = pidx; i < count; i++) {
586 		rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]);
587 		if (++next_pidx == scctx->isc_nrxd[0])
588 			next_pidx = 0;
589 	}
590 }
591 
592 /**
593  * iavf_isc_rxd_flush - Notify hardware of new Rx descriptors
594  * @arg: void pointer to the VSI
595  * @rxqid: Rx queue to update
596  * @flid: unused parameter
597  * @pidx: ring index to update to
598  *
599  * Updates the tail pointer of the Rx ring, notifying hardware of new
600  * descriptors available for receiving packets.
601  */
602 static void
603 iavf_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
604 {
605 	struct iavf_vsi		*vsi = arg;
606 	struct rx_ring		*rxr = &vsi->rx_queues[rxqid].rxr;
607 
608 	wr32(vsi->hw, rxr->tail, pidx);
609 }
610 
611 /**
612  * iavf_isc_rxd_available - Calculate number of available Rx descriptors
613  * @arg: void pointer to the VSI
614  * @rxqid: Rx queue to check
615  * @idx: starting index to check from
616  * @budget: maximum Rx budget
617  *
618  * Determines how many packets are ready to be processed in the Rx queue, up
619  * to the specified budget.
620  *
621  * @returns the number of packets ready to be processed, up to the budget.
622  */
623 static int
624 iavf_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
625 {
626 	struct iavf_vsi *vsi = arg;
627 	struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr;
628 	union iavf_rx_desc *rxd;
629 	u64 qword;
630 	uint32_t status;
631 	int cnt, i, nrxd;
632 
633 	nrxd = vsi->shared->isc_nrxd[0];
634 
635 	for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) {
636 		rxd = &rxr->rx_base[i];
637 		qword = le64toh(rxd->wb.qword1.status_error_len);
638 		status = (qword & IAVF_RXD_QW1_STATUS_MASK)
639 			>> IAVF_RXD_QW1_STATUS_SHIFT;
640 
641 		if ((status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)) == 0)
642 			break;
643 		if (++i == nrxd)
644 			i = 0;
645 		if (status & (1 << IAVF_RX_DESC_STATUS_EOF_SHIFT))
646 			cnt++;
647 	}
648 
649 	return (cnt);
650 }
651 
652 /**
653  * iavf_isc_rxd_pkt_get - Decapsulate packet from Rx descriptors
654  * @arg: void pointer to the VSI
655  * @ri: packet info structure
656  *
657  * Read packet data from the Rx ring descriptors and fill in the packet info
658  * structure so that the iflib stack can process the packet.
659  *
660  * @remark this routine executes in ithread context.
661  *
662  * @returns zero success, or EBADMSG if the packet is corrupted.
663  */
664 static int
665 iavf_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
666 {
667 	struct iavf_vsi		*vsi = arg;
668 	if_softc_ctx_t		scctx = vsi->shared;
669 	struct iavf_rx_queue	*que = &vsi->rx_queues[ri->iri_qsidx];
670 	struct rx_ring		*rxr = &que->rxr;
671 	union iavf_rx_desc	*cur;
672 	u32		status, error;
673 	u16		plen;
674 	u64		qword;
675 	u8		ptype;
676 	bool		eop;
677 	int i, cidx;
678 
679 	cidx = ri->iri_cidx;
680 	i = 0;
681 	do {
682 		/* 5 descriptor receive limit */
683 		MPASS(i < IAVF_MAX_RX_SEGS);
684 
685 		cur = &rxr->rx_base[cidx];
686 		qword = le64toh(cur->wb.qword1.status_error_len);
687 		status = (qword & IAVF_RXD_QW1_STATUS_MASK)
688 		    >> IAVF_RXD_QW1_STATUS_SHIFT;
689 		error = (qword & IAVF_RXD_QW1_ERROR_MASK)
690 		    >> IAVF_RXD_QW1_ERROR_SHIFT;
691 		plen = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK)
692 		    >> IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
693 		ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK)
694 		    >> IAVF_RXD_QW1_PTYPE_SHIFT;
695 
696 		/* we should never be called without a valid descriptor */
697 		MPASS((status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)) != 0);
698 
699 		ri->iri_len += plen;
700 		rxr->rx_bytes += plen;
701 
702 		cur->wb.qword1.status_error_len = 0;
703 		eop = (status & (1 << IAVF_RX_DESC_STATUS_EOF_SHIFT));
704 
705 		/*
706 		** Make sure bad packets are discarded,
707 		** note that only EOP descriptor has valid
708 		** error results.
709 		*/
710 		if (eop && (error & (1 << IAVF_RX_DESC_ERROR_RXE_SHIFT))) {
711 			rxr->desc_errs++;
712 			return (EBADMSG);
713 		}
714 		ri->iri_frags[i].irf_flid = 0;
715 		ri->iri_frags[i].irf_idx = cidx;
716 		ri->iri_frags[i].irf_len = plen;
717 		if (++cidx == vsi->shared->isc_nrxd[0])
718 			cidx = 0;
719 		i++;
720 	} while (!eop);
721 
722 	/* capture data for dynamic ITR adjustment */
723 	rxr->packets++;
724 	rxr->rx_packets++;
725 
726 	if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
727 		iavf_rx_checksum(ri, status, error, ptype);
728 	ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss);
729 	ri->iri_rsstype = iavf_ptype_to_hash(ptype);
730 	if (status & (1 << IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
731 		ri->iri_vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
732 		ri->iri_flags |= M_VLANTAG;
733 	}
734 	ri->iri_nfrags = i;
735 	return (0);
736 }
737 
738 /**
739  * iavf_rx_checksum - Handle Rx hardware checksum indication
740  * @ri: Rx packet info structure
741  * @status: status from Rx descriptor
742  * @error: error from Rx descriptor
743  * @ptype: packet type
744  *
745  * Verify that the hardware indicated that the checksum is valid.
746  * Inform the stack about the status of checksum so that stack
747  * doesn't spend time verifying the checksum.
748  */
749 static void
750 iavf_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
751 {
752 	struct iavf_rx_ptype_decoded decoded;
753 
754 	ri->iri_csum_flags = 0;
755 
756 	/* No L3 or L4 checksum was calculated */
757 	if (!(status & (1 << IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
758 		return;
759 
760 	decoded = decode_rx_desc_ptype(ptype);
761 
762 	/* IPv6 with extension headers likely have bad csum */
763 	if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
764 	    decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6) {
765 		if (status &
766 		    (1 << IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
767 			ri->iri_csum_flags = 0;
768 			return;
769 		}
770 	}
771 
772 	ri->iri_csum_flags |= CSUM_L3_CALC;
773 
774 	/* IPv4 checksum error */
775 	if (error & (1 << IAVF_RX_DESC_ERROR_IPE_SHIFT))
776 		return;
777 
778 	ri->iri_csum_flags |= CSUM_L3_VALID;
779 	ri->iri_csum_flags |= CSUM_L4_CALC;
780 
781 	/* L4 checksum error */
782 	if (error & (1 << IAVF_RX_DESC_ERROR_L4E_SHIFT))
783 		return;
784 
785 	ri->iri_csum_flags |= CSUM_L4_VALID;
786 	ri->iri_csum_data |= htons(0xffff);
787 }
788