xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision b0d29bc47dba79f6f38e67eabadfb4b32ffd9390)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2018, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the PF and VF drivers.
39 */
40 
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46 
47 #include "ixl.h"
48 
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52 
53 /* Local Prototypes */
54 static void	ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype);
55 
56 static int	ixl_isc_txd_encap(void *arg, if_pkt_info_t pi);
57 static void	ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
58 static int	ixl_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear);
59 static int	ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear);
60 
61 static void	ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru);
62 static void	ixl_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
63 				  qidx_t pidx);
64 static int	ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
65 				      qidx_t budget);
66 static int	ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
67 
68 struct if_txrx ixl_txrx_hwb = {
69 	ixl_isc_txd_encap,
70 	ixl_isc_txd_flush,
71 	ixl_isc_txd_credits_update_hwb,
72 	ixl_isc_rxd_available,
73 	ixl_isc_rxd_pkt_get,
74 	ixl_isc_rxd_refill,
75 	ixl_isc_rxd_flush,
76 	NULL
77 };
78 
79 struct if_txrx ixl_txrx_dwb = {
80 	ixl_isc_txd_encap,
81 	ixl_isc_txd_flush,
82 	ixl_isc_txd_credits_update_dwb,
83 	ixl_isc_rxd_available,
84 	ixl_isc_rxd_pkt_get,
85 	ixl_isc_rxd_refill,
86 	ixl_isc_rxd_flush,
87 	NULL
88 };
89 
90 /*
91  * @key key is saved into this parameter
92  */
93 void
94 ixl_get_default_rss_key(u32 *key)
95 {
96 	MPASS(key != NULL);
97 
98 	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
99 	    0x183cfd8c, 0xce880440, 0x580cbc3c,
100 	    0x35897377, 0x328b25e1, 0x4fa98922,
101 	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
102 	    0x0, 0x0, 0x0};
103 
104 	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
105 }
106 
107 /**
108  * i40e_vc_stat_str - convert virtchnl status err code to a string
109  * @hw: pointer to the HW structure
110  * @stat_err: the status error code to convert
111  **/
112 const char *
113 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
114 {
115 	switch (stat_err) {
116 	case VIRTCHNL_STATUS_SUCCESS:
117 		return "OK";
118 	case VIRTCHNL_ERR_PARAM:
119 		return "VIRTCHNL_ERR_PARAM";
120 	case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
121 		return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
122 	case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
123 		return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
124 	case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
125 		return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
126 	case VIRTCHNL_STATUS_NOT_SUPPORTED:
127 		return "VIRTCHNL_STATUS_NOT_SUPPORTED";
128 	}
129 
130 	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
131 	return hw->err_str;
132 }
133 
134 void
135 ixl_debug_core(device_t dev, u32 enabled_mask, u32 mask, char *fmt, ...)
136 {
137 	va_list args;
138 
139 	if (!(mask & enabled_mask))
140 		return;
141 
142 	/* Re-implement device_printf() */
143 	device_print_prettyname(dev);
144 	va_start(args, fmt);
145 	vprintf(fmt, args);
146 	va_end(args);
147 }
148 
149 static bool
150 ixl_is_tx_desc_done(struct tx_ring *txr, int idx)
151 {
152 	return (((txr->tx_base[idx].cmd_type_offset_bsz >> I40E_TXD_QW1_DTYPE_SHIFT)
153 	    & I40E_TXD_QW1_DTYPE_MASK) == I40E_TX_DESC_DTYPE_DESC_DONE);
154 }
155 
156 static int
157 ixl_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi)
158 {
159 	int	count, curseg, i, hlen, segsz, seglen, tsolen;
160 
161 	if (nsegs <= IXL_MAX_TX_SEGS-2)
162 		return (0);
163 	segsz = pi->ipi_tso_segsz;
164 	curseg = count = 0;
165 
166 	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
167 	tsolen = pi->ipi_len - hlen;
168 
169 	i = 0;
170 	curseg = segs[0].ds_len;
171 	while (hlen > 0) {
172 		count++;
173 		if (count > IXL_MAX_TX_SEGS - 2)
174 			return (1);
175 		if (curseg == 0) {
176 			i++;
177 			if (__predict_false(i == nsegs))
178 				return (1);
179 
180 			curseg = segs[i].ds_len;
181 		}
182 		seglen = min(curseg, hlen);
183 		curseg -= seglen;
184 		hlen -= seglen;
185 		// printf("H:seglen = %d, count=%d\n", seglen, count);
186 	}
187 	while (tsolen > 0) {
188 		segsz = pi->ipi_tso_segsz;
189 		while (segsz > 0 && tsolen != 0) {
190 			count++;
191 			if (count > IXL_MAX_TX_SEGS - 2) {
192 				// printf("bad: count = %d\n", count);
193 				return (1);
194 			}
195 			if (curseg == 0) {
196 				i++;
197 				if (__predict_false(i == nsegs)) {
198 					// printf("bad: tsolen = %d", tsolen);
199 					return (1);
200 				}
201 				curseg = segs[i].ds_len;
202 			}
203 			seglen = min(curseg, segsz);
204 			segsz -= seglen;
205 			curseg -= seglen;
206 			tsolen -= seglen;
207 			// printf("D:seglen = %d, count=%d\n", seglen, count);
208 		}
209 		count = 0;
210 	}
211 
212  	return (0);
213 }
214 
215 /*********************************************************************
216  *
217  *  Setup descriptor for hw offloads
218  *
219  **********************************************************************/
220 
221 static void
222 ixl_tx_setup_offload(struct ixl_tx_queue *que,
223     if_pkt_info_t pi, u32 *cmd, u32 *off)
224 {
225 	switch (pi->ipi_etype) {
226 #ifdef INET
227 		case ETHERTYPE_IP:
228 			if (pi->ipi_csum_flags & IXL_CSUM_IPV4)
229 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
230 			else
231 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
232 			break;
233 #endif
234 #ifdef INET6
235 		case ETHERTYPE_IPV6:
236 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
237 			break;
238 #endif
239 		default:
240 			break;
241 	}
242 
243 	*off |= (pi->ipi_ehdrlen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
244 	*off |= (pi->ipi_ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
245 
246 	switch (pi->ipi_ipproto) {
247 		case IPPROTO_TCP:
248 			if (pi->ipi_csum_flags & IXL_CSUM_TCP) {
249 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
250 				*off |= (pi->ipi_tcp_hlen >> 2) <<
251 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
252 				/* Check for NO_HEAD MDD event */
253 				MPASS(pi->ipi_tcp_hlen != 0);
254 			}
255 			break;
256 		case IPPROTO_UDP:
257 			if (pi->ipi_csum_flags & IXL_CSUM_UDP) {
258 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
259 				*off |= (sizeof(struct udphdr) >> 2) <<
260 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
261 			}
262 			break;
263 		case IPPROTO_SCTP:
264 			if (pi->ipi_csum_flags & IXL_CSUM_SCTP) {
265 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
266 				*off |= (sizeof(struct sctphdr) >> 2) <<
267 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
268 			}
269 			/* Fall Thru */
270 		default:
271 			break;
272 	}
273 }
274 
275 /**********************************************************************
276  *
277  *  Setup context for hardware segmentation offload (TSO)
278  *
279  **********************************************************************/
280 static int
281 ixl_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
282 {
283 	if_softc_ctx_t			scctx;
284 	struct i40e_tx_context_desc	*TXD;
285 	u32				cmd, mss, type, tsolen;
286 	int				idx, total_hdr_len;
287 	u64				type_cmd_tso_mss;
288 
289 	idx = pi->ipi_pidx;
290 	TXD = (struct i40e_tx_context_desc *) &txr->tx_base[idx];
291 	total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
292 	tsolen = pi->ipi_len - total_hdr_len;
293 	scctx = txr->que->vsi->shared;
294 
295 	type = I40E_TX_DESC_DTYPE_CONTEXT;
296 	cmd = I40E_TX_CTX_DESC_TSO;
297 	/*
298 	 * TSO MSS must not be less than 64; this prevents a
299 	 * BAD_LSO_MSS MDD event when the MSS is too small.
300 	 */
301 	if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
302 		txr->mss_too_small++;
303 		pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
304 	}
305 	mss = pi->ipi_tso_segsz;
306 
307 	/* Check for BAD_LS0_MSS MDD event (mss too large) */
308 	MPASS(mss <= IXL_MAX_TSO_MSS);
309 	/* Check for NO_HEAD MDD event (header lengths are 0) */
310 	MPASS(pi->ipi_ehdrlen != 0);
311 	MPASS(pi->ipi_ip_hlen != 0);
312 	/* Partial check for BAD_LSO_LEN MDD event */
313 	MPASS(tsolen != 0);
314 	/* Partial check for WRONG_SIZE MDD event (during TSO) */
315 	MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
316 
317 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
318 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
319 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
320 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
321 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
322 
323 	TXD->tunneling_params = htole32(0);
324 	txr->que->tso++;
325 
326 	return ((idx + 1) & (scctx->isc_ntxd[0]-1));
327 }
328 
329 /*********************************************************************
330   *
331  *  This routine maps the mbufs to tx descriptors, allowing the
332  *  TX engine to transmit the packets.
333  *  	- return 0 on success, positive on failure
334   *
335   **********************************************************************/
336 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
337 
338 static int
339 ixl_isc_txd_encap(void *arg, if_pkt_info_t pi)
340 {
341 	struct ixl_vsi		*vsi = arg;
342 	if_softc_ctx_t		scctx = vsi->shared;
343 	struct ixl_tx_queue	*que = &vsi->tx_queues[pi->ipi_qsidx];
344  	struct tx_ring		*txr = &que->txr;
345 	int			nsegs = pi->ipi_nsegs;
346 	bus_dma_segment_t *segs = pi->ipi_segs;
347 	struct i40e_tx_desc	*txd = NULL;
348 	int             	i, j, mask, pidx_last;
349 	u32			cmd, off, tx_intr;
350 
351 	cmd = off = 0;
352 	i = pi->ipi_pidx;
353 
354 	tx_intr = (pi->ipi_flags & IPI_TX_INTR);
355 
356 	/* Set up the TSO/CSUM offload */
357 	if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
358 		/* Set up the TSO context descriptor if required */
359 		if (pi->ipi_csum_flags & CSUM_TSO) {
360 			/* Prevent MAX_BUFF MDD event (for TSO) */
361 			if (ixl_tso_detect_sparse(segs, nsegs, pi))
362 				return (EFBIG);
363 			i = ixl_tso_setup(txr, pi);
364 		}
365 		ixl_tx_setup_offload(que, pi, &cmd, &off);
366 	}
367 	if (pi->ipi_mflags & M_VLANTAG)
368 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
369 
370 	cmd |= I40E_TX_DESC_CMD_ICRC;
371 	mask = scctx->isc_ntxd[0] - 1;
372 	/* Check for WRONG_SIZE MDD event */
373 	MPASS(pi->ipi_len >= IXL_MIN_FRAME);
374 #ifdef INVARIANTS
375 	if (!(pi->ipi_csum_flags & CSUM_TSO))
376 		MPASS(pi->ipi_len <= IXL_MAX_FRAME);
377 #endif
378 	for (j = 0; j < nsegs; j++) {
379 		bus_size_t seglen;
380 
381 		txd = &txr->tx_base[i];
382 		seglen = segs[j].ds_len;
383 
384 		/* Check for ZERO_BSIZE MDD event */
385 		MPASS(seglen != 0);
386 
387 		txd->buffer_addr = htole64(segs[j].ds_addr);
388 		txd->cmd_type_offset_bsz =
389 		    htole64(I40E_TX_DESC_DTYPE_DATA
390 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
391 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
392 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
393 	            | ((u64)htole16(pi->ipi_vtag) << I40E_TXD_QW1_L2TAG1_SHIFT));
394 
395 		txr->tx_bytes += seglen;
396 		pidx_last = i;
397 		i = (i+1) & mask;
398 	}
399 	/* Set the last descriptor for report */
400 	txd->cmd_type_offset_bsz |=
401 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
402 	/* Add to report status array (if using TX interrupts) */
403 	if (!vsi->enable_head_writeback && tx_intr) {
404 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
405 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask;
406 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
407  	}
408 	pi->ipi_new_pidx = i;
409 
410 	++txr->tx_packets;
411 	return (0);
412 }
413 
414 static void
415 ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
416 {
417 	struct ixl_vsi *vsi = arg;
418 	struct tx_ring *txr = &vsi->tx_queues[txqid].txr;
419 
420  	/*
421 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
422 	 * hardware that this frame is available to transmit.
423  	 */
424 	/* Check for ENDLESS_TX MDD event */
425 	MPASS(pidx < vsi->shared->isc_ntxd[0]);
426 	wr32(vsi->hw, txr->tail, pidx);
427 }
428 
429 
430 /*********************************************************************
431  *
432  *  (Re)Initialize a queue transmit ring by clearing its memory.
433  *
434  **********************************************************************/
435 void
436 ixl_init_tx_ring(struct ixl_vsi *vsi, struct ixl_tx_queue *que)
437 {
438 	struct tx_ring *txr = &que->txr;
439 
440 	/* Clear the old ring contents */
441 	bzero((void *)txr->tx_base,
442 	      (sizeof(struct i40e_tx_desc)) *
443 	      (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0)));
444 
445 	wr32(vsi->hw, txr->tail, 0);
446 }
447 
448 /*
449  * ixl_get_tx_head - Retrieve the value from the
450  *    location the HW records its HEAD index
451  */
452 static inline u32
453 ixl_get_tx_head(struct ixl_tx_queue *que)
454 {
455 	if_softc_ctx_t          scctx = que->vsi->shared;
456 	struct tx_ring  *txr = &que->txr;
457 	void *head = &txr->tx_base[scctx->isc_ntxd[0]];
458 
459 	return LE32_TO_CPU(*(volatile __le32 *)head);
460 }
461 
462 static int
463 ixl_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear)
464 {
465 	struct ixl_vsi          *vsi = arg;
466 	if_softc_ctx_t          scctx = vsi->shared;
467 	struct ixl_tx_queue     *que = &vsi->tx_queues[qid];
468 	struct tx_ring		*txr = &que->txr;
469 	int			 head, credits;
470 
471 	/* Get the Head WB value */
472 	head = ixl_get_tx_head(que);
473 
474 	credits = head - txr->tx_cidx_processed;
475 	if (credits < 0)
476 		credits += scctx->isc_ntxd[0];
477 	if (clear)
478 		txr->tx_cidx_processed = head;
479 
480 	return (credits);
481 }
482 
483 static int
484 ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear)
485 {
486 	struct ixl_vsi *vsi = arg;
487 	struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid];
488 	if_softc_ctx_t scctx = vsi->shared;
489 	struct tx_ring *txr = &tx_que->txr;
490 
491 	qidx_t processed = 0;
492 	qidx_t cur, prev, ntxd, rs_cidx;
493 	int32_t delta;
494 	bool is_done;
495 
496 	rs_cidx = txr->tx_rs_cidx;
497 #if 0
498 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) rs_cidx %d, txr->tx_rs_pidx %d\n", __func__,
499 	    txr->me, rs_cidx, txr->tx_rs_pidx);
500 #endif
501 	if (rs_cidx == txr->tx_rs_pidx)
502 		return (0);
503 	cur = txr->tx_rsq[rs_cidx];
504 	MPASS(cur != QIDX_INVALID);
505 	is_done = ixl_is_tx_desc_done(txr, cur);
506 
507 	if (!is_done)
508 		return (0);
509 
510 	/* If clear is false just let caller know that there
511 	 * are descriptors to reclaim */
512 	if (!clear)
513 		return (1);
514 
515 	prev = txr->tx_cidx_processed;
516 	ntxd = scctx->isc_ntxd[0];
517 	do {
518 		MPASS(prev != cur);
519 		delta = (int32_t)cur - (int32_t)prev;
520 		if (delta < 0)
521 			delta += ntxd;
522 		MPASS(delta > 0);
523 #if 0
524 		device_printf(iflib_get_dev(vsi->ctx),
525 			      "%s: (q%d) cidx_processed=%u cur=%u clear=%d delta=%d\n",
526 			      __func__, txr->me, prev, cur, clear, delta);
527 #endif
528 		processed += delta;
529 		prev = cur;
530 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
531 		if (rs_cidx == txr->tx_rs_pidx)
532 			break;
533 		cur = txr->tx_rsq[rs_cidx];
534 		MPASS(cur != QIDX_INVALID);
535 		is_done = ixl_is_tx_desc_done(txr, cur);
536 	} while (is_done);
537 
538 	txr->tx_rs_cidx = rs_cidx;
539 	txr->tx_cidx_processed = prev;
540 
541 #if 0
542 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) processed %d\n", __func__, txr->me, processed);
543 #endif
544 	return (processed);
545 }
546 
547 static void
548 ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru)
549 {
550 	struct ixl_vsi *vsi = arg;
551 	if_softc_ctx_t scctx = vsi->shared;
552 	struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr);
553 	uint64_t *paddrs;
554 	uint32_t next_pidx, pidx;
555 	uint16_t count;
556 	int i;
557 
558 	paddrs = iru->iru_paddrs;
559 	pidx = iru->iru_pidx;
560 	count = iru->iru_count;
561 
562 	for (i = 0, next_pidx = pidx; i < count; i++) {
563 		rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]);
564 		if (++next_pidx == scctx->isc_nrxd[0])
565 			next_pidx = 0;
566  	}
567 }
568 
569 static void
570 ixl_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
571 {
572 	struct ixl_vsi		*vsi = arg;
573 	struct rx_ring		*rxr = &vsi->rx_queues[rxqid].rxr;
574 
575 	wr32(vsi->hw, rxr->tail, pidx);
576 }
577 
578 static int
579 ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
580 {
581 	struct ixl_vsi *vsi = arg;
582 	struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr;
583 	union i40e_rx_desc *rxd;
584 	u64 qword;
585 	uint32_t status;
586 	int cnt, i, nrxd;
587 
588 	nrxd = vsi->shared->isc_nrxd[0];
589 
590 	for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) {
591 		rxd = &rxr->rx_base[i];
592 		qword = le64toh(rxd->wb.qword1.status_error_len);
593 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
594 			>> I40E_RXD_QW1_STATUS_SHIFT;
595 
596 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0)
597 			break;
598 		if (++i == nrxd)
599 			i = 0;
600 		if (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT))
601 			cnt++;
602 	}
603 
604 	return (cnt);
605 }
606 
607 /*
608 ** i40e_ptype_to_hash: parse the packet type
609 ** to determine the appropriate hash.
610 */
611 static inline int
612 ixl_ptype_to_hash(u8 ptype)
613 {
614         struct i40e_rx_ptype_decoded	decoded;
615 
616 	decoded = decode_rx_desc_ptype(ptype);
617 
618 	if (!decoded.known)
619 		return M_HASHTYPE_OPAQUE;
620 
621 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
622 		return M_HASHTYPE_OPAQUE;
623 
624 	/* Note: anything that gets to this point is IP */
625         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
626 		switch (decoded.inner_prot) {
627 		case I40E_RX_PTYPE_INNER_PROT_TCP:
628 			return M_HASHTYPE_RSS_TCP_IPV6;
629 		case I40E_RX_PTYPE_INNER_PROT_UDP:
630 			return M_HASHTYPE_RSS_UDP_IPV6;
631 		default:
632 			return M_HASHTYPE_RSS_IPV6;
633 		}
634 	}
635         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
636 		switch (decoded.inner_prot) {
637 		case I40E_RX_PTYPE_INNER_PROT_TCP:
638 			return M_HASHTYPE_RSS_TCP_IPV4;
639 		case I40E_RX_PTYPE_INNER_PROT_UDP:
640 			return M_HASHTYPE_RSS_UDP_IPV4;
641 		default:
642 			return M_HASHTYPE_RSS_IPV4;
643 		}
644 	}
645 	/* We should never get here!! */
646 	return M_HASHTYPE_OPAQUE;
647 }
648 
649 /*********************************************************************
650  *
651  *  This routine executes in ithread context. It sends data which has been
652  *  dma'ed into host memory to upper layer.
653  *
654  *  Returns 0 upon success, errno on failure
655  *
656  *********************************************************************/
657 static int
658 ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
659 {
660 	struct ixl_vsi		*vsi = arg;
661 	struct ixl_rx_queue	*que = &vsi->rx_queues[ri->iri_qsidx];
662 	struct rx_ring		*rxr = &que->rxr;
663 	union i40e_rx_desc	*cur;
664 	u32		status, error;
665 	u16		plen, vtag;
666 	u64		qword;
667 	u8		ptype;
668 	bool		eop;
669 	int i, cidx;
670 
671 	cidx = ri->iri_cidx;
672 	i = 0;
673 	do {
674 		/* 5 descriptor receive limit */
675 		MPASS(i < IXL_MAX_RX_SEGS);
676 
677 		cur = &rxr->rx_base[cidx];
678 		qword = le64toh(cur->wb.qword1.status_error_len);
679 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
680 		    >> I40E_RXD_QW1_STATUS_SHIFT;
681 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
682 		    >> I40E_RXD_QW1_ERROR_SHIFT;
683 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
684 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
685 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
686 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
687 
688 		/* we should never be called without a valid descriptor */
689 		MPASS((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) != 0);
690 
691 		ri->iri_len += plen;
692 		rxr->rx_bytes += plen;
693 
694 		cur->wb.qword1.status_error_len = 0;
695 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
696 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
697 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
698 		else
699 			vtag = 0;
700 
701 		/*
702 		** Make sure bad packets are discarded,
703 		** note that only EOP descriptor has valid
704 		** error results.
705 		*/
706 		if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
707 			rxr->desc_errs++;
708 			return (EBADMSG);
709 		}
710 		ri->iri_frags[i].irf_flid = 0;
711 		ri->iri_frags[i].irf_idx = cidx;
712 		ri->iri_frags[i].irf_len = plen;
713 		if (++cidx == vsi->shared->isc_nrxd[0])
714 			cidx = 0;
715 		i++;
716 	} while (!eop);
717 
718 	/* capture data for dynamic ITR adjustment */
719 	rxr->packets++;
720 	rxr->rx_packets++;
721 
722 	if ((if_getcapenable(vsi->ifp) & IFCAP_RXCSUM) != 0)
723 		ixl_rx_checksum(ri, status, error, ptype);
724 	ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss);
725 	ri->iri_rsstype = ixl_ptype_to_hash(ptype);
726 	ri->iri_vtag = vtag;
727 	ri->iri_nfrags = i;
728 	if (vtag)
729 		ri->iri_flags |= M_VLANTAG;
730 	return (0);
731 }
732 
733 /*********************************************************************
734  *
735  *  Verify that the hardware indicated that the checksum is valid.
736  *  Inform the stack about the status of checksum so that stack
737  *  doesn't spend time verifying the checksum.
738  *
739  *********************************************************************/
740 static void
741 ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
742 {
743 	struct i40e_rx_ptype_decoded decoded;
744 
745 	ri->iri_csum_flags = 0;
746 
747 	/* No L3 or L4 checksum was calculated */
748 	if (!(status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
749 		return;
750 
751 	decoded = decode_rx_desc_ptype(ptype);
752 
753 	/* IPv6 with extension headers likely have bad csum */
754 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
755 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
756 		if (status &
757 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
758 			ri->iri_csum_flags = 0;
759 			return;
760 		}
761 	}
762 
763 	ri->iri_csum_flags |= CSUM_L3_CALC;
764 
765 	/* IPv4 checksum error */
766 	if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
767 		return;
768 
769 	ri->iri_csum_flags |= CSUM_L3_VALID;
770 	ri->iri_csum_flags |= CSUM_L4_CALC;
771 
772 	/* L4 checksum error */
773 	if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
774 		return;
775 
776 	ri->iri_csum_flags |= CSUM_L4_VALID;
777 	ri->iri_csum_data |= htons(0xffff);
778 }
779 
780 /* Set Report Status queue fields to 0 */
781 void
782 ixl_init_tx_rsqs(struct ixl_vsi *vsi)
783 {
784 	if_softc_ctx_t scctx = vsi->shared;
785 	struct ixl_tx_queue *tx_que;
786 	int i, j;
787 
788 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
789 		struct tx_ring *txr = &tx_que->txr;
790 
791 		txr->tx_rs_cidx = txr->tx_rs_pidx;
792 
793 		/* Initialize the last processed descriptor to be the end of
794 		 * the ring, rather than the start, so that we avoid an
795 		 * off-by-one error when calculating how many descriptors are
796 		 * done in the credits_update function.
797 		 */
798 		txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1;
799 
800 		for (j = 0; j < scctx->isc_ntxd[0]; j++)
801 			txr->tx_rsq[j] = QIDX_INVALID;
802 	}
803 }
804 
805 void
806 ixl_init_tx_cidx(struct ixl_vsi *vsi)
807 {
808 	if_softc_ctx_t scctx = vsi->shared;
809 	struct ixl_tx_queue *tx_que;
810 	int i;
811 
812 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
813 		struct tx_ring *txr = &tx_que->txr;
814 
815 		txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1;
816 	}
817 }
818 
819 /*
820  * Input: bitmap of enum virtchnl_link_speed
821  */
822 u64
823 ixl_max_vc_speed_to_value(u8 link_speeds)
824 {
825 	if (link_speeds & VIRTCHNL_LINK_SPEED_40GB)
826 		return IF_Gbps(40);
827 	if (link_speeds & VIRTCHNL_LINK_SPEED_25GB)
828 		return IF_Gbps(25);
829 	if (link_speeds & VIRTCHNL_LINK_SPEED_20GB)
830 		return IF_Gbps(20);
831 	if (link_speeds & VIRTCHNL_LINK_SPEED_10GB)
832 		return IF_Gbps(10);
833 	if (link_speeds & VIRTCHNL_LINK_SPEED_1GB)
834 		return IF_Gbps(1);
835 	if (link_speeds & VIRTCHNL_LINK_SPEED_100MB)
836 		return IF_Mbps(100);
837 	else
838 		/* Minimum supported link speed */
839 		return IF_Mbps(100);
840 }
841 
842 void
843 ixl_add_vsi_sysctls(device_t dev, struct ixl_vsi *vsi,
844     struct sysctl_ctx_list *ctx, const char *sysctl_name)
845 {
846 	struct sysctl_oid *tree;
847 	struct sysctl_oid_list *child;
848 	struct sysctl_oid_list *vsi_list;
849 
850 	tree = device_get_sysctl_tree(dev);
851 	child = SYSCTL_CHILDREN(tree);
852 	vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name,
853 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "VSI Number");
854 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
855 
856 	ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats);
857 }
858 
859 void
860 ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
861 	struct sysctl_oid_list *child,
862 	struct i40e_eth_stats *eth_stats)
863 {
864 	struct ixl_sysctl_info ctls[] =
865 	{
866 		{&eth_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"},
867 		{&eth_stats->rx_unicast, "ucast_pkts_rcvd",
868 			"Unicast Packets Received"},
869 		{&eth_stats->rx_multicast, "mcast_pkts_rcvd",
870 			"Multicast Packets Received"},
871 		{&eth_stats->rx_broadcast, "bcast_pkts_rcvd",
872 			"Broadcast Packets Received"},
873 		{&eth_stats->rx_discards, "rx_discards", "Discarded RX packets"},
874 		{&eth_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"},
875 		{&eth_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"},
876 		{&eth_stats->tx_multicast, "mcast_pkts_txd",
877 			"Multicast Packets Transmitted"},
878 		{&eth_stats->tx_broadcast, "bcast_pkts_txd",
879 			"Broadcast Packets Transmitted"},
880 		// end
881 		{0,0,0}
882 	};
883 
884 	struct ixl_sysctl_info *entry = ctls;
885 	while (entry->stat != 0)
886 	{
887 		SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name,
888 				CTLFLAG_RD, entry->stat,
889 				entry->description);
890 		entry++;
891 	}
892 }
893 
894 void
895 ixl_add_queues_sysctls(device_t dev, struct ixl_vsi *vsi)
896 {
897 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
898 	struct sysctl_oid_list *vsi_list, *queue_list;
899 	struct sysctl_oid *queue_node;
900 	char queue_namebuf[32];
901 
902 	struct ixl_rx_queue *rx_que;
903 	struct ixl_tx_queue *tx_que;
904 	struct tx_ring *txr;
905 	struct rx_ring *rxr;
906 
907 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
908 
909 	/* Queue statistics */
910 	for (int q = 0; q < vsi->num_rx_queues; q++) {
911 		bzero(queue_namebuf, sizeof(queue_namebuf));
912 		snprintf(queue_namebuf, QUEUE_NAME_LEN, "rxq%02d", q);
913 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
914 		    OID_AUTO, queue_namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE,
915 		    NULL, "RX Queue #");
916 		queue_list = SYSCTL_CHILDREN(queue_node);
917 
918 		rx_que = &(vsi->rx_queues[q]);
919 		rxr = &(rx_que->rxr);
920 
921 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
922 				CTLFLAG_RD, &(rx_que->irqs),
923 				"irqs on this queue (both Tx and Rx)");
924 
925 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
926 				CTLFLAG_RD, &(rxr->rx_packets),
927 				"Queue Packets Received");
928 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
929 				CTLFLAG_RD, &(rxr->rx_bytes),
930 				"Queue Bytes Received");
931 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "desc_err",
932 				CTLFLAG_RD, &(rxr->desc_errs),
933 				"Queue Rx Descriptor Errors");
934 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
935 				CTLFLAG_RD, &(rxr->itr), 0,
936 				"Queue Rx ITR Interval");
937 	}
938 	for (int q = 0; q < vsi->num_tx_queues; q++) {
939 		bzero(queue_namebuf, sizeof(queue_namebuf));
940 		snprintf(queue_namebuf, QUEUE_NAME_LEN, "txq%02d", q);
941 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
942 		    OID_AUTO, queue_namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE,
943 		    NULL, "TX Queue #");
944 		queue_list = SYSCTL_CHILDREN(queue_node);
945 
946 		tx_que = &(vsi->tx_queues[q]);
947 		txr = &(tx_que->txr);
948 
949 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso",
950 				CTLFLAG_RD, &(tx_que->tso),
951 				"TSO");
952 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
953 				CTLFLAG_RD, &(txr->mss_too_small),
954 				"TSO sends with an MSS less than 64");
955 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
956 				CTLFLAG_RD, &(txr->tx_packets),
957 				"Queue Packets Transmitted");
958 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
959 				CTLFLAG_RD, &(txr->tx_bytes),
960 				"Queue Bytes Transmitted");
961 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
962 				CTLFLAG_RD, &(txr->itr), 0,
963 				"Queue Tx ITR Interval");
964 	}
965 }
966