xref: /freebsd/sys/dev/iavf/iavf_txrx_iflib.c (revision 214e3e09b3381e44bf5d9c1dcd19c4b1b923a796)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2021, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file iavf_txrx_iflib.c
34  * @brief Tx/Rx hotpath implementation for the iflib driver
35  *
36  * Contains functions used to implement the Tx and Rx hotpaths of the iflib
37  * driver implementation.
38  */
39 #include "iavf_iflib.h"
40 #include "iavf_txrx_common.h"
41 
42 #ifdef RSS
43 #include <net/rss_config.h>
44 #endif
45 
46 /* Local Prototypes */
47 static void	iavf_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype);
48 
49 static int	iavf_isc_txd_encap(void *arg, if_pkt_info_t pi);
50 static void	iavf_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
51 static int	iavf_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear);
52 static int	iavf_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear);
53 
54 static void	iavf_isc_rxd_refill(void *arg, if_rxd_update_t iru);
55 static void	iavf_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
56 				  qidx_t pidx);
57 static int	iavf_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
58 				      qidx_t budget);
59 static int	iavf_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
60 
61 /**
62  * @var iavf_txrx_hwb
63  * @brief iflib Tx/Rx operations for head write back
64  *
65  * iflib ops structure for when operating the device in head write back mode.
66  */
67 struct if_txrx iavf_txrx_hwb = {
68 	iavf_isc_txd_encap,
69 	iavf_isc_txd_flush,
70 	iavf_isc_txd_credits_update_hwb,
71 	iavf_isc_rxd_available,
72 	iavf_isc_rxd_pkt_get,
73 	iavf_isc_rxd_refill,
74 	iavf_isc_rxd_flush,
75 	NULL
76 };
77 
78 /**
79  * @var iavf_txrx_dwb
80  * @brief iflib Tx/Rx operations for descriptor write back
81  *
82  * iflib ops structure for when operating the device in descriptor write back
83  * mode.
84  */
85 struct if_txrx iavf_txrx_dwb = {
86 	iavf_isc_txd_encap,
87 	iavf_isc_txd_flush,
88 	iavf_isc_txd_credits_update_dwb,
89 	iavf_isc_rxd_available,
90 	iavf_isc_rxd_pkt_get,
91 	iavf_isc_rxd_refill,
92 	iavf_isc_rxd_flush,
93 	NULL
94 };
95 
96 /**
97  * iavf_is_tx_desc_done - Check if a Tx descriptor is ready
98  * @txr: the Tx ring to check in
99  * @idx: ring index to check
100  *
101  * @returns true if the descriptor has been written back by hardware, and
102  * false otherwise.
103  */
104 static bool
105 iavf_is_tx_desc_done(struct tx_ring *txr, int idx)
106 {
107 	return (((txr->tx_base[idx].cmd_type_offset_bsz >> IAVF_TXD_QW1_DTYPE_SHIFT)
108 	    & IAVF_TXD_QW1_DTYPE_MASK) == IAVF_TX_DESC_DTYPE_DESC_DONE);
109 }
110 
111 
112 /**
113  * iavf_tso_detect_sparse - detect TSO packets with too many segments
114  * @segs: packet segments array
115  * @nsegs: number of packet segments
116  * @pi: packet information
117  *
118  * Hardware only transmits packets with a maximum of 8 descriptors. For TSO
119  * packets, hardware needs to be able to build the split packets using 8 or
120  * fewer descriptors. Additionally, the header must be contained within at
121  * most 3 descriptors.
122  *
123  * To verify this, we walk the headers to find out how many descriptors the
124  * headers require (usually 1). Then we ensure that, for each TSO segment, its
125  * data plus the headers are contained within 8 or fewer descriptors.
126  *
127  * @returns zero if the packet is valid, one otherwise.
128  */
129 static int
130 iavf_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi)
131 {
132 	int	count, curseg, i, hlen, segsz, seglen, tsolen;
133 
134 	if (nsegs <= IAVF_MAX_TX_SEGS-2)
135 		return (0);
136 	segsz = pi->ipi_tso_segsz;
137 	curseg = count = 0;
138 
139 	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
140 	tsolen = pi->ipi_len - hlen;
141 
142 	i = 0;
143 	curseg = segs[0].ds_len;
144 	while (hlen > 0) {
145 		count++;
146 		if (count > IAVF_MAX_TX_SEGS - 2)
147 			return (1);
148 		if (curseg == 0) {
149 			i++;
150 			if (__predict_false(i == nsegs))
151 				return (1);
152 
153 			curseg = segs[i].ds_len;
154 		}
155 		seglen = min(curseg, hlen);
156 		curseg -= seglen;
157 		hlen -= seglen;
158 	}
159 	while (tsolen > 0) {
160 		segsz = pi->ipi_tso_segsz;
161 		while (segsz > 0 && tsolen != 0) {
162 			count++;
163 			if (count > IAVF_MAX_TX_SEGS - 2) {
164 				return (1);
165 			}
166 			if (curseg == 0) {
167 				i++;
168 				if (__predict_false(i == nsegs)) {
169 					return (1);
170 				}
171 				curseg = segs[i].ds_len;
172 			}
173 			seglen = min(curseg, segsz);
174 			segsz -= seglen;
175 			curseg -= seglen;
176 			tsolen -= seglen;
177 		}
178 		count = 0;
179 	}
180 
181 	return (0);
182 }
183 
184 /**
185  * iavf_tx_setup_offload - Setup Tx offload parameters
186  * @que: pointer to the Tx queue
187  * @pi: Tx packet info
188  * @cmd: pointer to command descriptor value
189  * @off: pointer to offset descriptor value
190  *
191  * Based on packet type and Tx offloads requested, sets up the command and
192  * offset values for a Tx descriptor to enable the requested offloads.
193  */
194 static void
195 iavf_tx_setup_offload(struct iavf_tx_queue *que __unused,
196     if_pkt_info_t pi, u32 *cmd, u32 *off)
197 {
198 	switch (pi->ipi_etype) {
199 #ifdef INET
200 		case ETHERTYPE_IP:
201 			if (pi->ipi_csum_flags & IAVF_CSUM_IPV4)
202 				*cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
203 			else
204 				*cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
205 			break;
206 #endif
207 #ifdef INET6
208 		case ETHERTYPE_IPV6:
209 			*cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
210 			break;
211 #endif
212 		default:
213 			break;
214 	}
215 
216 	*off |= (pi->ipi_ehdrlen >> 1) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
217 	*off |= (pi->ipi_ip_hlen >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
218 
219 	switch (pi->ipi_ipproto) {
220 		case IPPROTO_TCP:
221 			if (pi->ipi_csum_flags & IAVF_CSUM_TCP) {
222 				*cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
223 				*off |= (pi->ipi_tcp_hlen >> 2) <<
224 				    IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
225 				/* Check for NO_HEAD MDD event */
226 				MPASS(pi->ipi_tcp_hlen != 0);
227 			}
228 			break;
229 		case IPPROTO_UDP:
230 			if (pi->ipi_csum_flags & IAVF_CSUM_UDP) {
231 				*cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
232 				*off |= (sizeof(struct udphdr) >> 2) <<
233 				    IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
234 			}
235 			break;
236 		case IPPROTO_SCTP:
237 			if (pi->ipi_csum_flags & IAVF_CSUM_SCTP) {
238 				*cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
239 				*off |= (sizeof(struct sctphdr) >> 2) <<
240 				    IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
241 			}
242 			/* Fall Thru */
243 		default:
244 			break;
245 	}
246 }
247 
248 /**
249  * iavf_tso_setup - Setup TSO context descriptor
250  * @txr: the Tx ring to process
251  * @pi: packet info structure
252  *
253  * Enable hardware segmentation offload (TSO) for a given packet by creating
254  * a context descriptor with the necessary details for offloading.
255  *
256  * @returns the new ring index to use for the data descriptor.
257  */
258 static int
259 iavf_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
260 {
261 	if_softc_ctx_t			scctx;
262 	struct iavf_tx_context_desc	*TXD;
263 	u32				cmd, mss, type, tsolen;
264 	int				idx, total_hdr_len;
265 	u64				type_cmd_tso_mss;
266 
267 	idx = pi->ipi_pidx;
268 	TXD = (struct iavf_tx_context_desc *) &txr->tx_base[idx];
269 	total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
270 	tsolen = pi->ipi_len - total_hdr_len;
271 	scctx = txr->que->vsi->shared;
272 
273 	type = IAVF_TX_DESC_DTYPE_CONTEXT;
274 	cmd = IAVF_TX_CTX_DESC_TSO;
275 	/*
276 	 * TSO MSS must not be less than 64; this prevents a
277 	 * BAD_LSO_MSS MDD event when the MSS is too small.
278 	 */
279 	if (pi->ipi_tso_segsz < IAVF_MIN_TSO_MSS) {
280 		txr->mss_too_small++;
281 		pi->ipi_tso_segsz = IAVF_MIN_TSO_MSS;
282 	}
283 	mss = pi->ipi_tso_segsz;
284 
285 	/* Check for BAD_LS0_MSS MDD event (mss too large) */
286 	MPASS(mss <= IAVF_MAX_TSO_MSS);
287 	/* Check for NO_HEAD MDD event (header lengths are 0) */
288 	MPASS(pi->ipi_ehdrlen != 0);
289 	MPASS(pi->ipi_ip_hlen != 0);
290 	/* Partial check for BAD_LSO_LEN MDD event */
291 	MPASS(tsolen != 0);
292 	/* Partial check for WRONG_SIZE MDD event (during TSO) */
293 	MPASS(total_hdr_len + mss <= IAVF_MAX_FRAME);
294 
295 	type_cmd_tso_mss = ((u64)type << IAVF_TXD_CTX_QW1_DTYPE_SHIFT) |
296 	    ((u64)cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) |
297 	    ((u64)tsolen << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) |
298 	    ((u64)mss << IAVF_TXD_CTX_QW1_MSS_SHIFT);
299 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
300 
301 	TXD->tunneling_params = htole32(0);
302 	txr->que->tso++;
303 
304 	return ((idx + 1) & (scctx->isc_ntxd[0]-1));
305 }
306 
307 #define IAVF_TXD_CMD (IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_RS)
308 
309 /**
310  * iavf_isc_txd_encap - Encapsulate a Tx packet into descriptors
311  * @arg: void pointer to the VSI structure
312  * @pi: packet info to encapsulate
313  *
314  * This routine maps the mbufs to tx descriptors, allowing the
315  * TX engine to transmit the packets.
316  *
317  * @returns 0 on success, positive on failure
318  */
319 static int
320 iavf_isc_txd_encap(void *arg, if_pkt_info_t pi)
321 {
322 	struct iavf_vsi		*vsi = arg;
323 	if_softc_ctx_t		scctx = vsi->shared;
324 	struct iavf_tx_queue	*que = &vsi->tx_queues[pi->ipi_qsidx];
325 	struct tx_ring		*txr = &que->txr;
326 	int			nsegs = pi->ipi_nsegs;
327 	bus_dma_segment_t *segs = pi->ipi_segs;
328 	struct iavf_tx_desc	*txd = NULL;
329 	int			i, j, mask, pidx_last;
330 	u32			cmd, off, tx_intr;
331 
332 	if (__predict_false(pi->ipi_len < IAVF_MIN_FRAME)) {
333 		que->pkt_too_small++;
334 		return (EINVAL);
335 	}
336 
337 	cmd = off = 0;
338 	i = pi->ipi_pidx;
339 
340 	tx_intr = (pi->ipi_flags & IPI_TX_INTR);
341 
342 	/* Set up the TSO/CSUM offload */
343 	if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
344 		/* Set up the TSO context descriptor if required */
345 		if (pi->ipi_csum_flags & CSUM_TSO) {
346 			/* Prevent MAX_BUFF MDD event (for TSO) */
347 			if (iavf_tso_detect_sparse(segs, nsegs, pi))
348 				return (EFBIG);
349 			i = iavf_tso_setup(txr, pi);
350 		}
351 		iavf_tx_setup_offload(que, pi, &cmd, &off);
352 	}
353 	if (pi->ipi_mflags & M_VLANTAG)
354 		cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
355 
356 	cmd |= IAVF_TX_DESC_CMD_ICRC;
357 	mask = scctx->isc_ntxd[0] - 1;
358 	/* Check for WRONG_SIZE MDD event */
359 	MPASS(pi->ipi_len >= IAVF_MIN_FRAME);
360 #ifdef INVARIANTS
361 	if (!(pi->ipi_csum_flags & CSUM_TSO))
362 		MPASS(pi->ipi_len <= IAVF_MAX_FRAME);
363 #endif
364 	for (j = 0; j < nsegs; j++) {
365 		bus_size_t seglen;
366 
367 		txd = &txr->tx_base[i];
368 		seglen = segs[j].ds_len;
369 
370 		/* Check for ZERO_BSIZE MDD event */
371 		MPASS(seglen != 0);
372 
373 		txd->buffer_addr = htole64(segs[j].ds_addr);
374 		txd->cmd_type_offset_bsz =
375 		    htole64(IAVF_TX_DESC_DTYPE_DATA
376 		    | ((u64)cmd  << IAVF_TXD_QW1_CMD_SHIFT)
377 		    | ((u64)off << IAVF_TXD_QW1_OFFSET_SHIFT)
378 		    | ((u64)seglen  << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
379 	            | ((u64)htole16(pi->ipi_vtag) << IAVF_TXD_QW1_L2TAG1_SHIFT));
380 
381 		txr->tx_bytes += seglen;
382 		pidx_last = i;
383 		i = (i+1) & mask;
384 	}
385 	/* Set the last descriptor for report */
386 	txd->cmd_type_offset_bsz |=
387 	    htole64(((u64)IAVF_TXD_CMD << IAVF_TXD_QW1_CMD_SHIFT));
388 	/* Add to report status array (if using TX interrupts) */
389 	if (!vsi->enable_head_writeback && tx_intr) {
390 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
391 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask;
392 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
393 	}
394 	pi->ipi_new_pidx = i;
395 
396 	++txr->tx_packets;
397 	return (0);
398 }
399 
400 /**
401  * iavf_isc_txd_flush - Flush Tx ring
402  * @arg: void pointer to the VSI
403  * @txqid: the Tx queue to flush
404  * @pidx: the ring index to flush to
405  *
406  * Advance the Transmit Descriptor Tail (Tdt), this tells the
407  * hardware that this frame is available to transmit.
408  */
409 static void
410 iavf_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
411 {
412 	struct iavf_vsi *vsi = arg;
413 	struct tx_ring *txr = &vsi->tx_queues[txqid].txr;
414 
415 	/* Check for ENDLESS_TX MDD event */
416 	MPASS(pidx < vsi->shared->isc_ntxd[0]);
417 	wr32(vsi->hw, txr->tail, pidx);
418 }
419 
420 /**
421  * iavf_init_tx_ring - Initialize queue Tx ring
422  * @vsi: pointer to the VSI
423  * @que: pointer to queue to initialize
424  *
425  * (Re)Initialize a queue transmit ring by clearing its memory.
426  */
427 void
428 iavf_init_tx_ring(struct iavf_vsi *vsi, struct iavf_tx_queue *que)
429 {
430 	struct tx_ring *txr = &que->txr;
431 
432 	/* Clear the old ring contents */
433 	bzero((void *)txr->tx_base,
434 	      (sizeof(struct iavf_tx_desc)) *
435 	      (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0)));
436 
437 	wr32(vsi->hw, txr->tail, 0);
438 }
439 
440 /**
441  * iavf_get_tx_head - Get the index of the head of a ring
442  * @que: queue to read
443  *
444  * Retrieve the value from the location the HW records its HEAD index
445  *
446  * @returns the index of the HW head of the Tx queue
447  */
448 static inline u32
449 iavf_get_tx_head(struct iavf_tx_queue *que)
450 {
451 	if_softc_ctx_t          scctx = que->vsi->shared;
452 	struct tx_ring  *txr = &que->txr;
453 	void *head = &txr->tx_base[scctx->isc_ntxd[0]];
454 
455 	return LE32_TO_CPU(*(volatile __le32 *)head);
456 }
457 
458 /**
459  * iavf_isc_txd_credits_update_hwb - Update Tx ring credits
460  * @arg: void pointer to the VSI
461  * @qid: the queue id to update
462  * @clear: whether to update or only report current status
463  *
464  * Checks the number of packets in the queue that could be cleaned up.
465  *
466  * if clear is true, the iflib stack has cleaned the packets and is
467  * notifying the driver to update its processed ring pointer.
468  *
469  * @returns the number of packets in the ring that can be cleaned.
470  *
471  * @remark this function is intended for the head write back mode.
472  */
473 static int
474 iavf_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear)
475 {
476 	struct iavf_vsi          *vsi = arg;
477 	if_softc_ctx_t          scctx = vsi->shared;
478 	struct iavf_tx_queue     *que = &vsi->tx_queues[qid];
479 	struct tx_ring		*txr = &que->txr;
480 	int			 head, credits;
481 
482 	/* Get the Head WB value */
483 	head = iavf_get_tx_head(que);
484 
485 	credits = head - txr->tx_cidx_processed;
486 	if (credits < 0)
487 		credits += scctx->isc_ntxd[0];
488 	if (clear)
489 		txr->tx_cidx_processed = head;
490 
491 	return (credits);
492 }
493 
494 /**
495  * iavf_isc_txd_credits_update_dwb - Update Tx ring credits
496  * @arg: void pointer to the VSI
497  * @txqid: the queue id to update
498  * @clear: whether to update or only report current status
499  *
500  * Checks the number of packets in the queue that could be cleaned up.
501  *
502  * if clear is true, the iflib stack has cleaned the packets and is
503  * notifying the driver to update its processed ring pointer.
504  *
505  * @returns the number of packets in the ring that can be cleaned.
506  *
507  * @remark this function is intended for the descriptor write back mode.
508  */
509 static int
510 iavf_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear)
511 {
512 	struct iavf_vsi *vsi = arg;
513 	struct iavf_tx_queue *tx_que = &vsi->tx_queues[txqid];
514 	if_softc_ctx_t scctx = vsi->shared;
515 	struct tx_ring *txr = &tx_que->txr;
516 
517 	qidx_t processed = 0;
518 	qidx_t cur, prev, ntxd, rs_cidx;
519 	int32_t delta;
520 	bool is_done;
521 
522 	rs_cidx = txr->tx_rs_cidx;
523 	if (rs_cidx == txr->tx_rs_pidx)
524 		return (0);
525 	cur = txr->tx_rsq[rs_cidx];
526 	MPASS(cur != QIDX_INVALID);
527 	is_done = iavf_is_tx_desc_done(txr, cur);
528 
529 	if (!is_done)
530 		return (0);
531 
532 	/* If clear is false just let caller know that there
533 	 * are descriptors to reclaim */
534 	if (!clear)
535 		return (1);
536 
537 	prev = txr->tx_cidx_processed;
538 	ntxd = scctx->isc_ntxd[0];
539 	do {
540 		MPASS(prev != cur);
541 		delta = (int32_t)cur - (int32_t)prev;
542 		if (delta < 0)
543 			delta += ntxd;
544 		MPASS(delta > 0);
545 		processed += delta;
546 		prev = cur;
547 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
548 		if (rs_cidx == txr->tx_rs_pidx)
549 			break;
550 		cur = txr->tx_rsq[rs_cidx];
551 		MPASS(cur != QIDX_INVALID);
552 		is_done = iavf_is_tx_desc_done(txr, cur);
553 	} while (is_done);
554 
555 	txr->tx_rs_cidx = rs_cidx;
556 	txr->tx_cidx_processed = prev;
557 
558 	return (processed);
559 }
560 
561 /**
562  * iavf_isc_rxd_refill - Prepare descriptors for re-use
563  * @arg: void pointer to the VSI
564  * @iru: the Rx descriptor update structure
565  *
566  * Update Rx descriptors for a given queue so that they can be re-used by
567  * hardware for future packets.
568  */
569 static void
570 iavf_isc_rxd_refill(void *arg, if_rxd_update_t iru)
571 {
572 	struct iavf_vsi *vsi = arg;
573 	if_softc_ctx_t scctx = vsi->shared;
574 	struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr);
575 	uint64_t *paddrs;
576 	uint16_t next_pidx, pidx;
577 	uint16_t count;
578 	int i;
579 
580 	paddrs = iru->iru_paddrs;
581 	pidx = iru->iru_pidx;
582 	count = iru->iru_count;
583 
584 	for (i = 0, next_pidx = pidx; i < count; i++) {
585 		rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]);
586 		if (++next_pidx == scctx->isc_nrxd[0])
587 			next_pidx = 0;
588 	}
589 }
590 
591 /**
592  * iavf_isc_rxd_flush - Notify hardware of new Rx descriptors
593  * @arg: void pointer to the VSI
594  * @rxqid: Rx queue to update
595  * @flid: unused parameter
596  * @pidx: ring index to update to
597  *
598  * Updates the tail pointer of the Rx ring, notifying hardware of new
599  * descriptors available for receiving packets.
600  */
601 static void
602 iavf_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
603 {
604 	struct iavf_vsi		*vsi = arg;
605 	struct rx_ring		*rxr = &vsi->rx_queues[rxqid].rxr;
606 
607 	wr32(vsi->hw, rxr->tail, pidx);
608 }
609 
610 /**
611  * iavf_isc_rxd_available - Calculate number of available Rx descriptors
612  * @arg: void pointer to the VSI
613  * @rxqid: Rx queue to check
614  * @idx: starting index to check from
615  * @budget: maximum Rx budget
616  *
617  * Determines how many packets are ready to be processed in the Rx queue, up
618  * to the specified budget.
619  *
620  * @returns the number of packets ready to be processed, up to the budget.
621  */
622 static int
623 iavf_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
624 {
625 	struct iavf_vsi *vsi = arg;
626 	struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr;
627 	union iavf_rx_desc *rxd;
628 	u64 qword;
629 	uint32_t status;
630 	int cnt, i, nrxd;
631 
632 	nrxd = vsi->shared->isc_nrxd[0];
633 
634 	for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) {
635 		rxd = &rxr->rx_base[i];
636 		qword = le64toh(rxd->wb.qword1.status_error_len);
637 		status = (qword & IAVF_RXD_QW1_STATUS_MASK)
638 			>> IAVF_RXD_QW1_STATUS_SHIFT;
639 
640 		if ((status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)) == 0)
641 			break;
642 		if (++i == nrxd)
643 			i = 0;
644 		if (status & (1 << IAVF_RX_DESC_STATUS_EOF_SHIFT))
645 			cnt++;
646 	}
647 
648 	return (cnt);
649 }
650 
651 /**
652  * iavf_isc_rxd_pkt_get - Decapsulate packet from Rx descriptors
653  * @arg: void pointer to the VSI
654  * @ri: packet info structure
655  *
656  * Read packet data from the Rx ring descriptors and fill in the packet info
657  * structure so that the iflib stack can process the packet.
658  *
659  * @remark this routine executes in ithread context.
660  *
661  * @returns zero success, or EBADMSG if the packet is corrupted.
662  */
663 static int
664 iavf_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
665 {
666 	struct iavf_vsi		*vsi = arg;
667 	if_softc_ctx_t		scctx = vsi->shared;
668 	struct iavf_rx_queue	*que = &vsi->rx_queues[ri->iri_qsidx];
669 	struct rx_ring		*rxr = &que->rxr;
670 	union iavf_rx_desc	*cur;
671 	u32		status, error;
672 	u16		plen;
673 	u64		qword;
674 	u8		ptype;
675 	bool		eop;
676 	int i, cidx;
677 
678 	cidx = ri->iri_cidx;
679 	i = 0;
680 	do {
681 		/* 5 descriptor receive limit */
682 		MPASS(i < IAVF_MAX_RX_SEGS);
683 
684 		cur = &rxr->rx_base[cidx];
685 		qword = le64toh(cur->wb.qword1.status_error_len);
686 		status = (qword & IAVF_RXD_QW1_STATUS_MASK)
687 		    >> IAVF_RXD_QW1_STATUS_SHIFT;
688 		error = (qword & IAVF_RXD_QW1_ERROR_MASK)
689 		    >> IAVF_RXD_QW1_ERROR_SHIFT;
690 		plen = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK)
691 		    >> IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
692 		ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK)
693 		    >> IAVF_RXD_QW1_PTYPE_SHIFT;
694 
695 		/* we should never be called without a valid descriptor */
696 		MPASS((status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)) != 0);
697 
698 		ri->iri_len += plen;
699 		rxr->rx_bytes += plen;
700 
701 		cur->wb.qword1.status_error_len = 0;
702 		eop = (status & (1 << IAVF_RX_DESC_STATUS_EOF_SHIFT));
703 
704 		/*
705 		** Make sure bad packets are discarded,
706 		** note that only EOP descriptor has valid
707 		** error results.
708 		*/
709 		if (eop && (error & (1 << IAVF_RX_DESC_ERROR_RXE_SHIFT))) {
710 			rxr->desc_errs++;
711 			return (EBADMSG);
712 		}
713 		ri->iri_frags[i].irf_flid = 0;
714 		ri->iri_frags[i].irf_idx = cidx;
715 		ri->iri_frags[i].irf_len = plen;
716 		if (++cidx == vsi->shared->isc_nrxd[0])
717 			cidx = 0;
718 		i++;
719 	} while (!eop);
720 
721 	/* capture data for dynamic ITR adjustment */
722 	rxr->packets++;
723 	rxr->rx_packets++;
724 
725 	if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
726 		iavf_rx_checksum(ri, status, error, ptype);
727 	ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss);
728 	ri->iri_rsstype = iavf_ptype_to_hash(ptype);
729 	if (status & (1 << IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
730 		ri->iri_vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
731 		ri->iri_flags |= M_VLANTAG;
732 	}
733 	ri->iri_nfrags = i;
734 	return (0);
735 }
736 
737 /**
738  * iavf_rx_checksum - Handle Rx hardware checksum indication
739  * @ri: Rx packet info structure
740  * @status: status from Rx descriptor
741  * @error: error from Rx descriptor
742  * @ptype: packet type
743  *
744  * Verify that the hardware indicated that the checksum is valid.
745  * Inform the stack about the status of checksum so that stack
746  * doesn't spend time verifying the checksum.
747  */
748 static void
749 iavf_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
750 {
751 	struct iavf_rx_ptype_decoded decoded;
752 
753 	ri->iri_csum_flags = 0;
754 
755 	/* No L3 or L4 checksum was calculated */
756 	if (!(status & (1 << IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
757 		return;
758 
759 	decoded = decode_rx_desc_ptype(ptype);
760 
761 	/* IPv6 with extension headers likely have bad csum */
762 	if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
763 	    decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6) {
764 		if (status &
765 		    (1 << IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
766 			ri->iri_csum_flags = 0;
767 			return;
768 		}
769 	}
770 
771 	ri->iri_csum_flags |= CSUM_L3_CALC;
772 
773 	/* IPv4 checksum error */
774 	if (error & (1 << IAVF_RX_DESC_ERROR_IPE_SHIFT))
775 		return;
776 
777 	ri->iri_csum_flags |= CSUM_L3_VALID;
778 	ri->iri_csum_flags |= CSUM_L4_CALC;
779 
780 	/* L4 checksum error */
781 	if (error & (1 << IAVF_RX_DESC_ERROR_L4E_SHIFT))
782 		return;
783 
784 	ri->iri_csum_flags |= CSUM_L4_VALID;
785 	ri->iri_csum_data |= htons(0xffff);
786 }
787