xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision 405c3050f102b8c74782f0366c8ead927bd07b68)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2018, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the PF and VF drivers.
39 */
40 
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46 
47 #include "ixl.h"
48 
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52 
53 /* Local Prototypes */
54 static void	ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype);
55 
56 static int	ixl_isc_txd_encap(void *arg, if_pkt_info_t pi);
57 static void	ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
58 static int	ixl_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear);
59 static int	ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear);
60 
61 static void	ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru);
62 static void	ixl_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
63 				  qidx_t pidx);
64 static int	ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
65 				      qidx_t budget);
66 static int	ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
67 
68 struct if_txrx ixl_txrx_hwb = {
69 	ixl_isc_txd_encap,
70 	ixl_isc_txd_flush,
71 	ixl_isc_txd_credits_update_hwb,
72 	ixl_isc_rxd_available,
73 	ixl_isc_rxd_pkt_get,
74 	ixl_isc_rxd_refill,
75 	ixl_isc_rxd_flush,
76 	NULL
77 };
78 
79 struct if_txrx ixl_txrx_dwb = {
80 	ixl_isc_txd_encap,
81 	ixl_isc_txd_flush,
82 	ixl_isc_txd_credits_update_dwb,
83 	ixl_isc_rxd_available,
84 	ixl_isc_rxd_pkt_get,
85 	ixl_isc_rxd_refill,
86 	ixl_isc_rxd_flush,
87 	NULL
88 };
89 
90 /*
91  * @key key is saved into this parameter
92  */
93 void
94 ixl_get_default_rss_key(u32 *key)
95 {
96 	MPASS(key != NULL);
97 
98 	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
99 	    0x183cfd8c, 0xce880440, 0x580cbc3c,
100 	    0x35897377, 0x328b25e1, 0x4fa98922,
101 	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
102 	    0x0, 0x0, 0x0};
103 
104 	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
105 }
106 
107 /**
108  * i40e_vc_stat_str - convert virtchnl status err code to a string
109  * @hw: pointer to the HW structure
110  * @stat_err: the status error code to convert
111  **/
112 const char *
113 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
114 {
115 	switch (stat_err) {
116 	case VIRTCHNL_STATUS_SUCCESS:
117 		return "OK";
118 	case VIRTCHNL_ERR_PARAM:
119 		return "VIRTCHNL_ERR_PARAM";
120 	case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
121 		return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
122 	case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
123 		return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
124 	case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
125 		return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
126 	case VIRTCHNL_STATUS_NOT_SUPPORTED:
127 		return "VIRTCHNL_STATUS_NOT_SUPPORTED";
128 	}
129 
130 	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
131 	return hw->err_str;
132 }
133 
134 void
135 ixl_debug_core(device_t dev, u32 enabled_mask, u32 mask, char *fmt, ...)
136 {
137 	va_list args;
138 
139 	if (!(mask & enabled_mask))
140 		return;
141 
142 	/* Re-implement device_printf() */
143 	device_print_prettyname(dev);
144 	va_start(args, fmt);
145 	vprintf(fmt, args);
146 	va_end(args);
147 }
148 
149 static bool
150 ixl_is_tx_desc_done(struct tx_ring *txr, int idx)
151 {
152 	return (((txr->tx_base[idx].cmd_type_offset_bsz >> I40E_TXD_QW1_DTYPE_SHIFT)
153 	    & I40E_TXD_QW1_DTYPE_MASK) == I40E_TX_DESC_DTYPE_DESC_DONE);
154 }
155 
156 static int
157 ixl_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi)
158 {
159 	int	count, curseg, i, hlen, segsz, seglen, tsolen;
160 
161 	if (nsegs <= IXL_MAX_TX_SEGS-2)
162 		return (0);
163 	segsz = pi->ipi_tso_segsz;
164 	curseg = count = 0;
165 
166 	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
167 	tsolen = pi->ipi_len - hlen;
168 
169 	i = 0;
170 	curseg = segs[0].ds_len;
171 	while (hlen > 0) {
172 		count++;
173 		if (count > IXL_MAX_TX_SEGS - 2)
174 			return (1);
175 		if (curseg == 0) {
176 			i++;
177 			if (__predict_false(i == nsegs))
178 				return (1);
179 
180 			curseg = segs[i].ds_len;
181 		}
182 		seglen = min(curseg, hlen);
183 		curseg -= seglen;
184 		hlen -= seglen;
185 		// printf("H:seglen = %d, count=%d\n", seglen, count);
186 	}
187 	while (tsolen > 0) {
188 		segsz = pi->ipi_tso_segsz;
189 		while (segsz > 0 && tsolen != 0) {
190 			count++;
191 			if (count > IXL_MAX_TX_SEGS - 2) {
192 				// printf("bad: count = %d\n", count);
193 				return (1);
194 			}
195 			if (curseg == 0) {
196 				i++;
197 				if (__predict_false(i == nsegs)) {
198 					// printf("bad: tsolen = %d", tsolen);
199 					return (1);
200 				}
201 				curseg = segs[i].ds_len;
202 			}
203 			seglen = min(curseg, segsz);
204 			segsz -= seglen;
205 			curseg -= seglen;
206 			tsolen -= seglen;
207 			// printf("D:seglen = %d, count=%d\n", seglen, count);
208 		}
209 		count = 0;
210 	}
211 
212  	return (0);
213 }
214 
215 /*********************************************************************
216  *
217  *  Setup descriptor for hw offloads
218  *
219  **********************************************************************/
220 
221 static void
222 ixl_tx_setup_offload(struct ixl_tx_queue *que,
223     if_pkt_info_t pi, u32 *cmd, u32 *off)
224 {
225 	switch (pi->ipi_etype) {
226 #ifdef INET
227 		case ETHERTYPE_IP:
228 			if (pi->ipi_csum_flags & IXL_CSUM_IPV4)
229 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
230 			else
231 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
232 			break;
233 #endif
234 #ifdef INET6
235 		case ETHERTYPE_IPV6:
236 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
237 			break;
238 #endif
239 		default:
240 			break;
241 	}
242 
243 	*off |= (pi->ipi_ehdrlen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
244 	*off |= (pi->ipi_ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
245 
246 	switch (pi->ipi_ipproto) {
247 		case IPPROTO_TCP:
248 			if (pi->ipi_csum_flags & IXL_CSUM_TCP) {
249 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
250 				*off |= (pi->ipi_tcp_hlen >> 2) <<
251 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
252 				/* Check for NO_HEAD MDD event */
253 				MPASS(pi->ipi_tcp_hlen != 0);
254 			}
255 			break;
256 		case IPPROTO_UDP:
257 			if (pi->ipi_csum_flags & IXL_CSUM_UDP) {
258 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
259 				*off |= (sizeof(struct udphdr) >> 2) <<
260 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
261 			}
262 			break;
263 		case IPPROTO_SCTP:
264 			if (pi->ipi_csum_flags & IXL_CSUM_SCTP) {
265 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
266 				*off |= (sizeof(struct sctphdr) >> 2) <<
267 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
268 			}
269 			/* Fall Thru */
270 		default:
271 			break;
272 	}
273 }
274 
275 /**********************************************************************
276  *
277  *  Setup context for hardware segmentation offload (TSO)
278  *
279  **********************************************************************/
280 static int
281 ixl_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
282 {
283 	if_softc_ctx_t			scctx;
284 	struct i40e_tx_context_desc	*TXD;
285 	u32				cmd, mss, type, tsolen;
286 	int				idx, total_hdr_len;
287 	u64				type_cmd_tso_mss;
288 
289 	idx = pi->ipi_pidx;
290 	TXD = (struct i40e_tx_context_desc *) &txr->tx_base[idx];
291 	total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
292 	tsolen = pi->ipi_len - total_hdr_len;
293 	scctx = txr->que->vsi->shared;
294 
295 	type = I40E_TX_DESC_DTYPE_CONTEXT;
296 	cmd = I40E_TX_CTX_DESC_TSO;
297 	/*
298 	 * TSO MSS must not be less than 64; this prevents a
299 	 * BAD_LSO_MSS MDD event when the MSS is too small.
300 	 */
301 	if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
302 		txr->mss_too_small++;
303 		pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
304 	}
305 	mss = pi->ipi_tso_segsz;
306 
307 	/* Check for BAD_LS0_MSS MDD event (mss too large) */
308 	MPASS(mss <= IXL_MAX_TSO_MSS);
309 	/* Check for NO_HEAD MDD event (header lengths are 0) */
310 	MPASS(pi->ipi_ehdrlen != 0);
311 	MPASS(pi->ipi_ip_hlen != 0);
312 	/* Partial check for BAD_LSO_LEN MDD event */
313 	MPASS(tsolen != 0);
314 	/* Partial check for WRONG_SIZE MDD event (during TSO) */
315 	MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
316 
317 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
318 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
319 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
320 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
321 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
322 
323 	TXD->tunneling_params = htole32(0);
324 	txr->que->tso++;
325 
326 	return ((idx + 1) & (scctx->isc_ntxd[0]-1));
327 }
328 
329 /*********************************************************************
330   *
331  *  This routine maps the mbufs to tx descriptors, allowing the
332  *  TX engine to transmit the packets.
333  *  	- return 0 on success, positive on failure
334   *
335   **********************************************************************/
336 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
337 
338 static int
339 ixl_isc_txd_encap(void *arg, if_pkt_info_t pi)
340 {
341 	struct ixl_vsi		*vsi = arg;
342 	if_softc_ctx_t		scctx = vsi->shared;
343 	struct ixl_tx_queue	*que = &vsi->tx_queues[pi->ipi_qsidx];
344  	struct tx_ring		*txr = &que->txr;
345 	int			nsegs = pi->ipi_nsegs;
346 	bus_dma_segment_t *segs = pi->ipi_segs;
347 	struct i40e_tx_desc	*txd = NULL;
348 	int             	i, j, mask, pidx_last;
349 	u32			cmd, off, tx_intr;
350 
351 	cmd = off = 0;
352 	i = pi->ipi_pidx;
353 
354 	tx_intr = (pi->ipi_flags & IPI_TX_INTR);
355 
356 	/* Set up the TSO/CSUM offload */
357 	if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
358 		/* Set up the TSO context descriptor if required */
359 		if (pi->ipi_csum_flags & CSUM_TSO) {
360 			/* Prevent MAX_BUFF MDD event (for TSO) */
361 			if (ixl_tso_detect_sparse(segs, nsegs, pi))
362 				return (EFBIG);
363 			i = ixl_tso_setup(txr, pi);
364 		}
365 		ixl_tx_setup_offload(que, pi, &cmd, &off);
366 	}
367 	if (pi->ipi_mflags & M_VLANTAG)
368 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
369 
370 	cmd |= I40E_TX_DESC_CMD_ICRC;
371 	mask = scctx->isc_ntxd[0] - 1;
372 	/* Check for WRONG_SIZE MDD event */
373 	MPASS(pi->ipi_len >= IXL_MIN_FRAME);
374 #ifdef INVARIANTS
375 	if (!(pi->ipi_csum_flags & CSUM_TSO))
376 		MPASS(pi->ipi_len <= IXL_MAX_FRAME);
377 #endif
378 	for (j = 0; j < nsegs; j++) {
379 		bus_size_t seglen;
380 
381 		txd = &txr->tx_base[i];
382 		seglen = segs[j].ds_len;
383 
384 		/* Check for ZERO_BSIZE MDD event */
385 		MPASS(seglen != 0);
386 
387 		txd->buffer_addr = htole64(segs[j].ds_addr);
388 		txd->cmd_type_offset_bsz =
389 		    htole64(I40E_TX_DESC_DTYPE_DATA
390 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
391 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
392 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
393 	            | ((u64)htole16(pi->ipi_vtag) << I40E_TXD_QW1_L2TAG1_SHIFT));
394 
395 		txr->tx_bytes += seglen;
396 		pidx_last = i;
397 		i = (i+1) & mask;
398 	}
399 	/* Set the last descriptor for report */
400 	txd->cmd_type_offset_bsz |=
401 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
402 	/* Add to report status array (if using TX interrupts) */
403 	if (!vsi->enable_head_writeback && tx_intr) {
404 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
405 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask;
406 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
407  	}
408 	pi->ipi_new_pidx = i;
409 
410 	++txr->tx_packets;
411 	return (0);
412 }
413 
414 static void
415 ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
416 {
417 	struct ixl_vsi *vsi = arg;
418 	struct tx_ring *txr = &vsi->tx_queues[txqid].txr;
419 
420  	/*
421 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
422 	 * hardware that this frame is available to transmit.
423  	 */
424 	/* Check for ENDLESS_TX MDD event */
425 	MPASS(pidx < vsi->shared->isc_ntxd[0]);
426 	wr32(vsi->hw, txr->tail, pidx);
427 }
428 
429 
430 /*********************************************************************
431  *
432  *  (Re)Initialize a queue transmit ring by clearing its memory.
433  *
434  **********************************************************************/
435 void
436 ixl_init_tx_ring(struct ixl_vsi *vsi, struct ixl_tx_queue *que)
437 {
438 	struct tx_ring *txr = &que->txr;
439 
440 	/* Clear the old ring contents */
441 	bzero((void *)txr->tx_base,
442 	      (sizeof(struct i40e_tx_desc)) *
443 	      (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0)));
444 
445 	wr32(vsi->hw, txr->tail, 0);
446 }
447 
448 /*
449  * ixl_get_tx_head - Retrieve the value from the
450  *    location the HW records its HEAD index
451  */
452 static inline u32
453 ixl_get_tx_head(struct ixl_tx_queue *que)
454 {
455 	if_softc_ctx_t          scctx = que->vsi->shared;
456 	struct tx_ring  *txr = &que->txr;
457 	void *head = &txr->tx_base[scctx->isc_ntxd[0]];
458 
459 	return LE32_TO_CPU(*(volatile __le32 *)head);
460 }
461 
462 static int
463 ixl_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear)
464 {
465 	struct ixl_vsi          *vsi = arg;
466 	if_softc_ctx_t          scctx = vsi->shared;
467 	struct ixl_tx_queue     *que = &vsi->tx_queues[qid];
468 	struct tx_ring		*txr = &que->txr;
469 	int			 head, credits;
470 
471 	/* Get the Head WB value */
472 	head = ixl_get_tx_head(que);
473 
474 	credits = head - txr->tx_cidx_processed;
475 	if (credits < 0)
476 		credits += scctx->isc_ntxd[0];
477 	if (clear)
478 		txr->tx_cidx_processed = head;
479 
480 	return (credits);
481 }
482 
483 static int
484 ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear)
485 {
486 	struct ixl_vsi *vsi = arg;
487 	struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid];
488 	if_softc_ctx_t scctx = vsi->shared;
489 	struct tx_ring *txr = &tx_que->txr;
490 
491 	qidx_t processed = 0;
492 	qidx_t cur, prev, ntxd, rs_cidx;
493 	int32_t delta;
494 	bool is_done;
495 
496 	rs_cidx = txr->tx_rs_cidx;
497 #if 0
498 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) rs_cidx %d, txr->tx_rs_pidx %d\n", __func__,
499 	    txr->me, rs_cidx, txr->tx_rs_pidx);
500 #endif
501 	if (rs_cidx == txr->tx_rs_pidx)
502 		return (0);
503 	cur = txr->tx_rsq[rs_cidx];
504 	MPASS(cur != QIDX_INVALID);
505 	is_done = ixl_is_tx_desc_done(txr, cur);
506 
507 	if (!is_done)
508 		return (0);
509 
510 	/* If clear is false just let caller know that there
511 	 * are descriptors to reclaim */
512 	if (!clear)
513 		return (1);
514 
515 	prev = txr->tx_cidx_processed;
516 	ntxd = scctx->isc_ntxd[0];
517 	do {
518 		delta = (int32_t)cur - (int32_t)prev;
519 		/*
520 		 * XXX This appears to be a hack for first-packet.
521 		 * A correct fix would prevent prev == cur in the first place.
522 		 */
523 		MPASS(prev == 0 || delta != 0);
524 		if (prev == 0 && cur == 0)
525 			delta += 1;
526 		if (delta < 0)
527 			delta += ntxd;
528 #if 0
529 		device_printf(iflib_get_dev(vsi->ctx),
530 			      "%s: (q%d) cidx_processed=%u cur=%u clear=%d delta=%d\n",
531 			      __func__, txr->me, prev, cur, clear, delta);
532 #endif
533 		processed += delta;
534 		prev = cur;
535 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
536 		if (rs_cidx == txr->tx_rs_pidx)
537 			break;
538 		cur = txr->tx_rsq[rs_cidx];
539 		MPASS(cur != QIDX_INVALID);
540 		is_done = ixl_is_tx_desc_done(txr, cur);
541 	} while (is_done);
542 
543 	txr->tx_rs_cidx = rs_cidx;
544 	txr->tx_cidx_processed = prev;
545 
546 #if 0
547 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) processed %d\n", __func__, txr->me, processed);
548 #endif
549 	return (processed);
550 }
551 
552 static void
553 ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru)
554 {
555 	struct ixl_vsi *vsi = arg;
556 	if_softc_ctx_t scctx = vsi->shared;
557 	struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr);
558 	uint64_t *paddrs;
559 	uint32_t next_pidx, pidx;
560 	uint16_t count;
561 	int i;
562 
563 	paddrs = iru->iru_paddrs;
564 	pidx = iru->iru_pidx;
565 	count = iru->iru_count;
566 
567 	for (i = 0, next_pidx = pidx; i < count; i++) {
568 		rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]);
569 		if (++next_pidx == scctx->isc_nrxd[0])
570 			next_pidx = 0;
571  	}
572 }
573 
574 static void
575 ixl_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
576 {
577 	struct ixl_vsi		*vsi = arg;
578 	struct rx_ring		*rxr = &vsi->rx_queues[rxqid].rxr;
579 
580 	wr32(vsi->hw, rxr->tail, pidx);
581 }
582 
583 static int
584 ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
585 {
586 	struct ixl_vsi *vsi = arg;
587 	struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr;
588 	union i40e_rx_desc *rxd;
589 	u64 qword;
590 	uint32_t status;
591 	int cnt, i, nrxd;
592 
593 	nrxd = vsi->shared->isc_nrxd[0];
594 
595 	for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) {
596 		rxd = &rxr->rx_base[i];
597 		qword = le64toh(rxd->wb.qword1.status_error_len);
598 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
599 			>> I40E_RXD_QW1_STATUS_SHIFT;
600 
601 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0)
602 			break;
603 		if (++i == nrxd)
604 			i = 0;
605 		if (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT))
606 			cnt++;
607 	}
608 
609 	return (cnt);
610 }
611 
612 /*
613 ** i40e_ptype_to_hash: parse the packet type
614 ** to determine the appropriate hash.
615 */
616 static inline int
617 ixl_ptype_to_hash(u8 ptype)
618 {
619         struct i40e_rx_ptype_decoded	decoded;
620 
621 	decoded = decode_rx_desc_ptype(ptype);
622 
623 	if (!decoded.known)
624 		return M_HASHTYPE_OPAQUE;
625 
626 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
627 		return M_HASHTYPE_OPAQUE;
628 
629 	/* Note: anything that gets to this point is IP */
630         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
631 		switch (decoded.inner_prot) {
632 		case I40E_RX_PTYPE_INNER_PROT_TCP:
633 			return M_HASHTYPE_RSS_TCP_IPV6;
634 		case I40E_RX_PTYPE_INNER_PROT_UDP:
635 			return M_HASHTYPE_RSS_UDP_IPV6;
636 		default:
637 			return M_HASHTYPE_RSS_IPV6;
638 		}
639 	}
640         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
641 		switch (decoded.inner_prot) {
642 		case I40E_RX_PTYPE_INNER_PROT_TCP:
643 			return M_HASHTYPE_RSS_TCP_IPV4;
644 		case I40E_RX_PTYPE_INNER_PROT_UDP:
645 			return M_HASHTYPE_RSS_UDP_IPV4;
646 		default:
647 			return M_HASHTYPE_RSS_IPV4;
648 		}
649 	}
650 	/* We should never get here!! */
651 	return M_HASHTYPE_OPAQUE;
652 }
653 
654 /*********************************************************************
655  *
656  *  This routine executes in ithread context. It sends data which has been
657  *  dma'ed into host memory to upper layer.
658  *
659  *  Returns 0 upon success, errno on failure
660  *
661  *********************************************************************/
662 static int
663 ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
664 {
665 	struct ixl_vsi		*vsi = arg;
666 	struct ixl_rx_queue	*que = &vsi->rx_queues[ri->iri_qsidx];
667 	struct rx_ring		*rxr = &que->rxr;
668 	union i40e_rx_desc	*cur;
669 	u32		status, error;
670 	u16		plen, vtag;
671 	u64		qword;
672 	u8		ptype;
673 	bool		eop;
674 	int i, cidx;
675 
676 	cidx = ri->iri_cidx;
677 	i = 0;
678 	do {
679 		/* 5 descriptor receive limit */
680 		MPASS(i < IXL_MAX_RX_SEGS);
681 
682 		cur = &rxr->rx_base[cidx];
683 		qword = le64toh(cur->wb.qword1.status_error_len);
684 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
685 		    >> I40E_RXD_QW1_STATUS_SHIFT;
686 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
687 		    >> I40E_RXD_QW1_ERROR_SHIFT;
688 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
689 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
690 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
691 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
692 
693 		/* we should never be called without a valid descriptor */
694 		MPASS((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) != 0);
695 
696 		ri->iri_len += plen;
697 		rxr->rx_bytes += plen;
698 
699 		cur->wb.qword1.status_error_len = 0;
700 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
701 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
702 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
703 		else
704 			vtag = 0;
705 
706 		/*
707 		** Make sure bad packets are discarded,
708 		** note that only EOP descriptor has valid
709 		** error results.
710 		*/
711 		if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
712 			rxr->desc_errs++;
713 			return (EBADMSG);
714 		}
715 		ri->iri_frags[i].irf_flid = 0;
716 		ri->iri_frags[i].irf_idx = cidx;
717 		ri->iri_frags[i].irf_len = plen;
718 		if (++cidx == vsi->shared->isc_nrxd[0])
719 			cidx = 0;
720 		i++;
721 	} while (!eop);
722 
723 	/* capture data for dynamic ITR adjustment */
724 	rxr->packets++;
725 	rxr->rx_packets++;
726 
727 	if ((if_getcapenable(vsi->ifp) & IFCAP_RXCSUM) != 0)
728 		ixl_rx_checksum(ri, status, error, ptype);
729 	ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss);
730 	ri->iri_rsstype = ixl_ptype_to_hash(ptype);
731 	ri->iri_vtag = vtag;
732 	ri->iri_nfrags = i;
733 	if (vtag)
734 		ri->iri_flags |= M_VLANTAG;
735 	return (0);
736 }
737 
738 /*********************************************************************
739  *
740  *  Verify that the hardware indicated that the checksum is valid.
741  *  Inform the stack about the status of checksum so that stack
742  *  doesn't spend time verifying the checksum.
743  *
744  *********************************************************************/
745 static void
746 ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
747 {
748 	struct i40e_rx_ptype_decoded decoded;
749 
750 	ri->iri_csum_flags = 0;
751 
752 	/* No L3 or L4 checksum was calculated */
753 	if (!(status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
754 		return;
755 
756 	decoded = decode_rx_desc_ptype(ptype);
757 
758 	/* IPv6 with extension headers likely have bad csum */
759 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
760 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
761 		if (status &
762 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
763 			ri->iri_csum_flags = 0;
764 			return;
765 		}
766 	}
767 
768 	ri->iri_csum_flags |= CSUM_L3_CALC;
769 
770 	/* IPv4 checksum error */
771 	if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
772 		return;
773 
774 	ri->iri_csum_flags |= CSUM_L3_VALID;
775 	ri->iri_csum_flags |= CSUM_L4_CALC;
776 
777 	/* L4 checksum error */
778 	if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
779 		return;
780 
781 	ri->iri_csum_flags |= CSUM_L4_VALID;
782 	ri->iri_csum_data |= htons(0xffff);
783 }
784 
785 /* Set Report Status queue fields to 0 */
786 void
787 ixl_init_tx_rsqs(struct ixl_vsi *vsi)
788 {
789 	if_softc_ctx_t scctx = vsi->shared;
790 	struct ixl_tx_queue *tx_que;
791 	int i, j;
792 
793 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
794 		struct tx_ring *txr = &tx_que->txr;
795 
796 		txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0;
797 
798 		for (j = 0; j < scctx->isc_ntxd[0]; j++)
799 			txr->tx_rsq[j] = QIDX_INVALID;
800 	}
801 }
802 
803 void
804 ixl_init_tx_cidx(struct ixl_vsi *vsi)
805 {
806 	struct ixl_tx_queue *tx_que;
807 	int i;
808 
809 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
810 		struct tx_ring *txr = &tx_que->txr;
811 
812 		txr->tx_cidx_processed = 0;
813 	}
814 }
815 
816 /*
817  * Input: bitmap of enum virtchnl_link_speed
818  */
819 u64
820 ixl_max_vc_speed_to_value(u8 link_speeds)
821 {
822 	if (link_speeds & VIRTCHNL_LINK_SPEED_40GB)
823 		return IF_Gbps(40);
824 	if (link_speeds & VIRTCHNL_LINK_SPEED_25GB)
825 		return IF_Gbps(25);
826 	if (link_speeds & VIRTCHNL_LINK_SPEED_20GB)
827 		return IF_Gbps(20);
828 	if (link_speeds & VIRTCHNL_LINK_SPEED_10GB)
829 		return IF_Gbps(10);
830 	if (link_speeds & VIRTCHNL_LINK_SPEED_1GB)
831 		return IF_Gbps(1);
832 	if (link_speeds & VIRTCHNL_LINK_SPEED_100MB)
833 		return IF_Mbps(100);
834 	else
835 		/* Minimum supported link speed */
836 		return IF_Mbps(100);
837 }
838 
839 void
840 ixl_add_vsi_sysctls(device_t dev, struct ixl_vsi *vsi,
841     struct sysctl_ctx_list *ctx, const char *sysctl_name)
842 {
843 	struct sysctl_oid *tree;
844 	struct sysctl_oid_list *child;
845 	struct sysctl_oid_list *vsi_list;
846 
847 	tree = device_get_sysctl_tree(dev);
848 	child = SYSCTL_CHILDREN(tree);
849 	vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name,
850 				   CTLFLAG_RD, NULL, "VSI Number");
851 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
852 
853 	ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats);
854 }
855 
856 void
857 ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
858 	struct sysctl_oid_list *child,
859 	struct i40e_eth_stats *eth_stats)
860 {
861 	struct ixl_sysctl_info ctls[] =
862 	{
863 		{&eth_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"},
864 		{&eth_stats->rx_unicast, "ucast_pkts_rcvd",
865 			"Unicast Packets Received"},
866 		{&eth_stats->rx_multicast, "mcast_pkts_rcvd",
867 			"Multicast Packets Received"},
868 		{&eth_stats->rx_broadcast, "bcast_pkts_rcvd",
869 			"Broadcast Packets Received"},
870 		{&eth_stats->rx_discards, "rx_discards", "Discarded RX packets"},
871 		{&eth_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"},
872 		{&eth_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"},
873 		{&eth_stats->tx_multicast, "mcast_pkts_txd",
874 			"Multicast Packets Transmitted"},
875 		{&eth_stats->tx_broadcast, "bcast_pkts_txd",
876 			"Broadcast Packets Transmitted"},
877 		// end
878 		{0,0,0}
879 	};
880 
881 	struct ixl_sysctl_info *entry = ctls;
882 	while (entry->stat != 0)
883 	{
884 		SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name,
885 				CTLFLAG_RD, entry->stat,
886 				entry->description);
887 		entry++;
888 	}
889 }
890 
891 void
892 ixl_add_queues_sysctls(device_t dev, struct ixl_vsi *vsi)
893 {
894 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
895 	struct sysctl_oid_list *vsi_list, *queue_list;
896 	struct sysctl_oid *queue_node;
897 	char queue_namebuf[32];
898 
899 	struct ixl_rx_queue *rx_que;
900 	struct ixl_tx_queue *tx_que;
901 	struct tx_ring *txr;
902 	struct rx_ring *rxr;
903 
904 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
905 
906 	/* Queue statistics */
907 	for (int q = 0; q < vsi->num_rx_queues; q++) {
908 		bzero(queue_namebuf, sizeof(queue_namebuf));
909 		snprintf(queue_namebuf, QUEUE_NAME_LEN, "rxq%02d", q);
910 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
911 		    OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "RX Queue #");
912 		queue_list = SYSCTL_CHILDREN(queue_node);
913 
914 		rx_que = &(vsi->rx_queues[q]);
915 		rxr = &(rx_que->rxr);
916 
917 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
918 				CTLFLAG_RD, &(rx_que->irqs),
919 				"irqs on this queue (both Tx and Rx)");
920 
921 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
922 				CTLFLAG_RD, &(rxr->rx_packets),
923 				"Queue Packets Received");
924 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
925 				CTLFLAG_RD, &(rxr->rx_bytes),
926 				"Queue Bytes Received");
927 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "desc_err",
928 				CTLFLAG_RD, &(rxr->desc_errs),
929 				"Queue Rx Descriptor Errors");
930 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
931 				CTLFLAG_RD, &(rxr->itr), 0,
932 				"Queue Rx ITR Interval");
933 	}
934 	for (int q = 0; q < vsi->num_tx_queues; q++) {
935 		bzero(queue_namebuf, sizeof(queue_namebuf));
936 		snprintf(queue_namebuf, QUEUE_NAME_LEN, "txq%02d", q);
937 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
938 		    OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "TX Queue #");
939 		queue_list = SYSCTL_CHILDREN(queue_node);
940 
941 		tx_que = &(vsi->tx_queues[q]);
942 		txr = &(tx_que->txr);
943 
944 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso",
945 				CTLFLAG_RD, &(tx_que->tso),
946 				"TSO");
947 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
948 				CTLFLAG_RD, &(txr->mss_too_small),
949 				"TSO sends with an MSS less than 64");
950 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
951 				CTLFLAG_RD, &(txr->tx_packets),
952 				"Queue Packets Transmitted");
953 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
954 				CTLFLAG_RD, &(txr->tx_bytes),
955 				"Queue Bytes Transmitted");
956 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
957 				CTLFLAG_RD, &(txr->itr), 0,
958 				"Queue Tx ITR Interval");
959 	}
960 }
961