xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2018, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 
34 /*
35 **	IXL driver TX/RX Routines:
36 **	    This was seperated to allow usage by
37 ** 	    both the PF and VF drivers.
38 */
39 
40 #ifndef IXL_STANDALONE_BUILD
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "opt_rss.h"
44 #endif
45 
46 #include "ixl.h"
47 
48 #ifdef RSS
49 #include <net/rss_config.h>
50 #endif
51 
52 /* Local Prototypes */
53 static u8	ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype);
54 
55 static int	ixl_isc_txd_encap(void *arg, if_pkt_info_t pi);
56 static void	ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
57 static int	ixl_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear);
58 static int	ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear);
59 
60 static void	ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru);
61 static void	ixl_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
62 				  qidx_t pidx);
63 static int	ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
64 				      qidx_t budget);
65 static int	ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
66 
67 struct if_txrx ixl_txrx_hwb = {
68 	ixl_isc_txd_encap,
69 	ixl_isc_txd_flush,
70 	ixl_isc_txd_credits_update_hwb,
71 	ixl_isc_rxd_available,
72 	ixl_isc_rxd_pkt_get,
73 	ixl_isc_rxd_refill,
74 	ixl_isc_rxd_flush,
75 	NULL
76 };
77 
78 struct if_txrx ixl_txrx_dwb = {
79 	ixl_isc_txd_encap,
80 	ixl_isc_txd_flush,
81 	ixl_isc_txd_credits_update_dwb,
82 	ixl_isc_rxd_available,
83 	ixl_isc_rxd_pkt_get,
84 	ixl_isc_rxd_refill,
85 	ixl_isc_rxd_flush,
86 	NULL
87 };
88 
89 /*
90  * @key key is saved into this parameter
91  */
92 void
93 ixl_get_default_rss_key(u32 *key)
94 {
95 	MPASS(key != NULL);
96 
97 	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
98 	    0x183cfd8c, 0xce880440, 0x580cbc3c,
99 	    0x35897377, 0x328b25e1, 0x4fa98922,
100 	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
101 	    0x0, 0x0, 0x0};
102 
103 	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
104 }
105 
106 /**
107  * i40e_vc_stat_str - convert virtchnl status err code to a string
108  * @hw: pointer to the HW structure
109  * @stat_err: the status error code to convert
110  **/
111 const char *
112 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
113 {
114 	switch (stat_err) {
115 	case VIRTCHNL_STATUS_SUCCESS:
116 		return "OK";
117 	case VIRTCHNL_ERR_PARAM:
118 		return "VIRTCHNL_ERR_PARAM";
119 	case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
120 		return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
121 	case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
122 		return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
123 	case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
124 		return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
125 	case VIRTCHNL_STATUS_NOT_SUPPORTED:
126 		return "VIRTCHNL_STATUS_NOT_SUPPORTED";
127 	}
128 
129 	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
130 	return hw->err_str;
131 }
132 
133 void
134 ixl_debug_core(device_t dev, u32 enabled_mask, u32 mask, char *fmt, ...)
135 {
136 	va_list args;
137 
138 	if (!(mask & enabled_mask))
139 		return;
140 
141 	/* Re-implement device_printf() */
142 	device_print_prettyname(dev);
143 	va_start(args, fmt);
144 	vprintf(fmt, args);
145 	va_end(args);
146 }
147 
148 static bool
149 ixl_is_tx_desc_done(struct tx_ring *txr, int idx)
150 {
151 	return (((txr->tx_base[idx].cmd_type_offset_bsz >> I40E_TXD_QW1_DTYPE_SHIFT)
152 	    & I40E_TXD_QW1_DTYPE_MASK) == I40E_TX_DESC_DTYPE_DESC_DONE);
153 }
154 
155 static int
156 ixl_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi)
157 {
158 	int	count, curseg, i, hlen, segsz, seglen, tsolen;
159 
160 	if (nsegs <= IXL_MAX_TX_SEGS-2)
161 		return (0);
162 	segsz = pi->ipi_tso_segsz;
163 	curseg = count = 0;
164 
165 	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
166 	tsolen = pi->ipi_len - hlen;
167 
168 	i = 0;
169 	curseg = segs[0].ds_len;
170 	while (hlen > 0) {
171 		count++;
172 		if (count > IXL_MAX_TX_SEGS - 2)
173 			return (1);
174 		if (curseg == 0) {
175 			i++;
176 			if (__predict_false(i == nsegs))
177 				return (1);
178 
179 			curseg = segs[i].ds_len;
180 		}
181 		seglen = min(curseg, hlen);
182 		curseg -= seglen;
183 		hlen -= seglen;
184 		// printf("H:seglen = %d, count=%d\n", seglen, count);
185 	}
186 	while (tsolen > 0) {
187 		segsz = pi->ipi_tso_segsz;
188 		while (segsz > 0 && tsolen != 0) {
189 			count++;
190 			if (count > IXL_MAX_TX_SEGS - 2) {
191 				// printf("bad: count = %d\n", count);
192 				return (1);
193 			}
194 			if (curseg == 0) {
195 				i++;
196 				if (__predict_false(i == nsegs)) {
197 					// printf("bad: tsolen = %d", tsolen);
198 					return (1);
199 				}
200 				curseg = segs[i].ds_len;
201 			}
202 			seglen = min(curseg, segsz);
203 			segsz -= seglen;
204 			curseg -= seglen;
205 			tsolen -= seglen;
206 			// printf("D:seglen = %d, count=%d\n", seglen, count);
207 		}
208 		count = 0;
209 	}
210 
211  	return (0);
212 }
213 
214 /*********************************************************************
215  *
216  *  Setup descriptor for hw offloads
217  *
218  **********************************************************************/
219 
220 static void
221 ixl_tx_setup_offload(struct ixl_tx_queue *que,
222     if_pkt_info_t pi, u32 *cmd, u32 *off)
223 {
224 	switch (pi->ipi_etype) {
225 #ifdef INET
226 		case ETHERTYPE_IP:
227 			if (pi->ipi_csum_flags & IXL_CSUM_IPV4)
228 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
229 			else
230 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
231 			break;
232 #endif
233 #ifdef INET6
234 		case ETHERTYPE_IPV6:
235 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
236 			break;
237 #endif
238 		default:
239 			break;
240 	}
241 
242 	*off |= (pi->ipi_ehdrlen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
243 	*off |= (pi->ipi_ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
244 
245 	switch (pi->ipi_ipproto) {
246 		case IPPROTO_TCP:
247 			if (pi->ipi_csum_flags & IXL_CSUM_TCP) {
248 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
249 				*off |= (pi->ipi_tcp_hlen >> 2) <<
250 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
251 				/* Check for NO_HEAD MDD event */
252 				MPASS(pi->ipi_tcp_hlen != 0);
253 			}
254 			break;
255 		case IPPROTO_UDP:
256 			if (pi->ipi_csum_flags & IXL_CSUM_UDP) {
257 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
258 				*off |= (sizeof(struct udphdr) >> 2) <<
259 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
260 			}
261 			break;
262 		case IPPROTO_SCTP:
263 			if (pi->ipi_csum_flags & IXL_CSUM_SCTP) {
264 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
265 				*off |= (sizeof(struct sctphdr) >> 2) <<
266 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
267 			}
268 			/* Fall Thru */
269 		default:
270 			break;
271 	}
272 }
273 
274 /**********************************************************************
275  *
276  *  Setup context for hardware segmentation offload (TSO)
277  *
278  **********************************************************************/
279 static int
280 ixl_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
281 {
282 	if_softc_ctx_t			scctx;
283 	struct i40e_tx_context_desc	*TXD;
284 	u32				cmd, mss, type, tsolen;
285 	int				idx, total_hdr_len;
286 	u64				type_cmd_tso_mss;
287 
288 	idx = pi->ipi_pidx;
289 	TXD = (struct i40e_tx_context_desc *) &txr->tx_base[idx];
290 	total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
291 	tsolen = pi->ipi_len - total_hdr_len;
292 	scctx = txr->que->vsi->shared;
293 
294 	type = I40E_TX_DESC_DTYPE_CONTEXT;
295 	cmd = I40E_TX_CTX_DESC_TSO;
296 	/*
297 	 * TSO MSS must not be less than 64; this prevents a
298 	 * BAD_LSO_MSS MDD event when the MSS is too small.
299 	 */
300 	if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
301 		txr->mss_too_small++;
302 		pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
303 	}
304 	mss = pi->ipi_tso_segsz;
305 
306 	/* Check for BAD_LS0_MSS MDD event (mss too large) */
307 	MPASS(mss <= IXL_MAX_TSO_MSS);
308 	/* Check for NO_HEAD MDD event (header lengths are 0) */
309 	MPASS(pi->ipi_ehdrlen != 0);
310 	MPASS(pi->ipi_ip_hlen != 0);
311 	/* Partial check for BAD_LSO_LEN MDD event */
312 	MPASS(tsolen != 0);
313 	/* Partial check for WRONG_SIZE MDD event (during TSO) */
314 	MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
315 
316 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
317 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
318 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
319 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
320 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
321 
322 	TXD->tunneling_params = htole32(0);
323 	txr->que->tso++;
324 
325 	return ((idx + 1) & (scctx->isc_ntxd[0]-1));
326 }
327 
328 /*********************************************************************
329   *
330  *  This routine maps the mbufs to tx descriptors, allowing the
331  *  TX engine to transmit the packets.
332  *  	- return 0 on success, positive on failure
333   *
334   **********************************************************************/
335 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
336 
337 static int
338 ixl_isc_txd_encap(void *arg, if_pkt_info_t pi)
339 {
340 	struct ixl_vsi		*vsi = arg;
341 	if_softc_ctx_t		scctx = vsi->shared;
342 	struct ixl_tx_queue	*que = &vsi->tx_queues[pi->ipi_qsidx];
343  	struct tx_ring		*txr = &que->txr;
344 	int			nsegs = pi->ipi_nsegs;
345 	bus_dma_segment_t *segs = pi->ipi_segs;
346 	struct i40e_tx_desc	*txd = NULL;
347 	int             	i, j, mask, pidx_last;
348 	u32			cmd, off, tx_intr;
349 
350 	cmd = off = 0;
351 	i = pi->ipi_pidx;
352 
353 	tx_intr = (pi->ipi_flags & IPI_TX_INTR);
354 
355 	/* Set up the TSO/CSUM offload */
356 	if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
357 		/* Set up the TSO context descriptor if required */
358 		if (pi->ipi_csum_flags & CSUM_TSO) {
359 			/* Prevent MAX_BUFF MDD event (for TSO) */
360 			if (ixl_tso_detect_sparse(segs, nsegs, pi))
361 				return (EFBIG);
362 			i = ixl_tso_setup(txr, pi);
363 		}
364 		ixl_tx_setup_offload(que, pi, &cmd, &off);
365 	}
366 	if (pi->ipi_mflags & M_VLANTAG)
367 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
368 
369 	cmd |= I40E_TX_DESC_CMD_ICRC;
370 	mask = scctx->isc_ntxd[0] - 1;
371 	/* Check for WRONG_SIZE MDD event */
372 	MPASS(pi->ipi_len >= IXL_MIN_FRAME);
373 #ifdef INVARIANTS
374 	if (!(pi->ipi_csum_flags & CSUM_TSO))
375 		MPASS(pi->ipi_len <= IXL_MAX_FRAME);
376 #endif
377 	for (j = 0; j < nsegs; j++) {
378 		bus_size_t seglen;
379 
380 		txd = &txr->tx_base[i];
381 		seglen = segs[j].ds_len;
382 
383 		/* Check for ZERO_BSIZE MDD event */
384 		MPASS(seglen != 0);
385 
386 		txd->buffer_addr = htole64(segs[j].ds_addr);
387 		txd->cmd_type_offset_bsz =
388 		    htole64(I40E_TX_DESC_DTYPE_DATA
389 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
390 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
391 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
392 	            | ((u64)htole16(pi->ipi_vtag) << I40E_TXD_QW1_L2TAG1_SHIFT));
393 
394 		txr->tx_bytes += seglen;
395 		pidx_last = i;
396 		i = (i+1) & mask;
397 	}
398 	/* Set the last descriptor for report */
399 	txd->cmd_type_offset_bsz |=
400 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
401 	/* Add to report status array (if using TX interrupts) */
402 	if (!vsi->enable_head_writeback && tx_intr) {
403 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
404 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask;
405 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
406  	}
407 	pi->ipi_new_pidx = i;
408 
409 	++txr->tx_packets;
410 	return (0);
411 }
412 
413 static void
414 ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
415 {
416 	struct ixl_vsi *vsi = arg;
417 	struct tx_ring *txr = &vsi->tx_queues[txqid].txr;
418 
419  	/*
420 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
421 	 * hardware that this frame is available to transmit.
422  	 */
423 	/* Check for ENDLESS_TX MDD event */
424 	MPASS(pidx < vsi->shared->isc_ntxd[0]);
425 	wr32(vsi->hw, txr->tail, pidx);
426 }
427 
428 
429 /*********************************************************************
430  *
431  *  (Re)Initialize a queue transmit ring by clearing its memory.
432  *
433  **********************************************************************/
434 void
435 ixl_init_tx_ring(struct ixl_vsi *vsi, struct ixl_tx_queue *que)
436 {
437 	struct tx_ring *txr = &que->txr;
438 
439 	/* Clear the old ring contents */
440 	bzero((void *)txr->tx_base,
441 	      (sizeof(struct i40e_tx_desc)) *
442 	      (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0)));
443 
444 	wr32(vsi->hw, txr->tail, 0);
445 }
446 
447 /*
448  * ixl_get_tx_head - Retrieve the value from the
449  *    location the HW records its HEAD index
450  */
451 static inline u32
452 ixl_get_tx_head(struct ixl_tx_queue *que)
453 {
454 	if_softc_ctx_t          scctx = que->vsi->shared;
455 	struct tx_ring  *txr = &que->txr;
456 	void *head = &txr->tx_base[scctx->isc_ntxd[0]];
457 
458 	return LE32_TO_CPU(*(volatile __le32 *)head);
459 }
460 
461 static int
462 ixl_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear)
463 {
464 	struct ixl_vsi          *vsi = arg;
465 	if_softc_ctx_t          scctx = vsi->shared;
466 	struct ixl_tx_queue     *que = &vsi->tx_queues[qid];
467 	struct tx_ring		*txr = &que->txr;
468 	int			 head, credits;
469 
470 	/* Get the Head WB value */
471 	head = ixl_get_tx_head(que);
472 
473 	credits = head - txr->tx_cidx_processed;
474 	if (credits < 0)
475 		credits += scctx->isc_ntxd[0];
476 	if (clear)
477 		txr->tx_cidx_processed = head;
478 
479 	return (credits);
480 }
481 
482 static int
483 ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear)
484 {
485 	struct ixl_vsi *vsi = arg;
486 	struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid];
487 	if_softc_ctx_t scctx = vsi->shared;
488 	struct tx_ring *txr = &tx_que->txr;
489 
490 	qidx_t processed = 0;
491 	qidx_t cur, prev, ntxd, rs_cidx;
492 	int32_t delta;
493 	bool is_done;
494 
495 	rs_cidx = txr->tx_rs_cidx;
496 #if 0
497 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) rs_cidx %d, txr->tx_rs_pidx %d\n", __func__,
498 	    txr->me, rs_cidx, txr->tx_rs_pidx);
499 #endif
500 	if (rs_cidx == txr->tx_rs_pidx)
501 		return (0);
502 	cur = txr->tx_rsq[rs_cidx];
503 	MPASS(cur != QIDX_INVALID);
504 	is_done = ixl_is_tx_desc_done(txr, cur);
505 
506 	if (!is_done)
507 		return (0);
508 
509 	/* If clear is false just let caller know that there
510 	 * are descriptors to reclaim */
511 	if (!clear)
512 		return (1);
513 
514 	prev = txr->tx_cidx_processed;
515 	ntxd = scctx->isc_ntxd[0];
516 	do {
517 		MPASS(prev != cur);
518 		delta = (int32_t)cur - (int32_t)prev;
519 		if (delta < 0)
520 			delta += ntxd;
521 		MPASS(delta > 0);
522 #if 0
523 		device_printf(iflib_get_dev(vsi->ctx),
524 			      "%s: (q%d) cidx_processed=%u cur=%u clear=%d delta=%d\n",
525 			      __func__, txr->me, prev, cur, clear, delta);
526 #endif
527 		processed += delta;
528 		prev = cur;
529 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
530 		if (rs_cidx == txr->tx_rs_pidx)
531 			break;
532 		cur = txr->tx_rsq[rs_cidx];
533 		MPASS(cur != QIDX_INVALID);
534 		is_done = ixl_is_tx_desc_done(txr, cur);
535 	} while (is_done);
536 
537 	txr->tx_rs_cidx = rs_cidx;
538 	txr->tx_cidx_processed = prev;
539 
540 #if 0
541 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) processed %d\n", __func__, txr->me, processed);
542 #endif
543 	return (processed);
544 }
545 
546 static void
547 ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru)
548 {
549 	struct ixl_vsi *vsi = arg;
550 	if_softc_ctx_t scctx = vsi->shared;
551 	struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr);
552 	uint64_t *paddrs;
553 	uint32_t next_pidx, pidx;
554 	uint16_t count;
555 	int i;
556 
557 	paddrs = iru->iru_paddrs;
558 	pidx = iru->iru_pidx;
559 	count = iru->iru_count;
560 
561 	for (i = 0, next_pidx = pidx; i < count; i++) {
562 		rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]);
563 		if (++next_pidx == scctx->isc_nrxd[0])
564 			next_pidx = 0;
565  	}
566 }
567 
568 static void
569 ixl_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
570 {
571 	struct ixl_vsi		*vsi = arg;
572 	struct rx_ring		*rxr = &vsi->rx_queues[rxqid].rxr;
573 
574 	wr32(vsi->hw, rxr->tail, pidx);
575 }
576 
577 static int
578 ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
579 {
580 	struct ixl_vsi *vsi = arg;
581 	struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr;
582 	union i40e_rx_desc *rxd;
583 	u64 qword;
584 	uint32_t status;
585 	int cnt, i, nrxd;
586 
587 	nrxd = vsi->shared->isc_nrxd[0];
588 
589 	for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) {
590 		rxd = &rxr->rx_base[i];
591 		qword = le64toh(rxd->wb.qword1.status_error_len);
592 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
593 			>> I40E_RXD_QW1_STATUS_SHIFT;
594 
595 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0)
596 			break;
597 		if (++i == nrxd)
598 			i = 0;
599 		if (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT))
600 			cnt++;
601 	}
602 
603 	return (cnt);
604 }
605 
606 /*
607 ** i40e_ptype_to_hash: parse the packet type
608 ** to determine the appropriate hash.
609 */
610 static inline int
611 ixl_ptype_to_hash(u8 ptype)
612 {
613         struct i40e_rx_ptype_decoded	decoded;
614 
615 	decoded = decode_rx_desc_ptype(ptype);
616 
617 	if (!decoded.known)
618 		return M_HASHTYPE_OPAQUE;
619 
620 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
621 		return M_HASHTYPE_OPAQUE;
622 
623 	/* Note: anything that gets to this point is IP */
624         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
625 		switch (decoded.inner_prot) {
626 		case I40E_RX_PTYPE_INNER_PROT_TCP:
627 			return M_HASHTYPE_RSS_TCP_IPV6;
628 		case I40E_RX_PTYPE_INNER_PROT_UDP:
629 			return M_HASHTYPE_RSS_UDP_IPV6;
630 		default:
631 			return M_HASHTYPE_RSS_IPV6;
632 		}
633 	}
634         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
635 		switch (decoded.inner_prot) {
636 		case I40E_RX_PTYPE_INNER_PROT_TCP:
637 			return M_HASHTYPE_RSS_TCP_IPV4;
638 		case I40E_RX_PTYPE_INNER_PROT_UDP:
639 			return M_HASHTYPE_RSS_UDP_IPV4;
640 		default:
641 			return M_HASHTYPE_RSS_IPV4;
642 		}
643 	}
644 	/* We should never get here!! */
645 	return M_HASHTYPE_OPAQUE;
646 }
647 
648 /*********************************************************************
649  *
650  *  This routine executes in ithread context. It sends data which has been
651  *  dma'ed into host memory to upper layer.
652  *
653  *  Returns 0 upon success, errno on failure
654  *
655  *********************************************************************/
656 static int
657 ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
658 {
659 	struct ixl_vsi		*vsi = arg;
660 	if_softc_ctx_t		scctx = vsi->shared;
661 	struct ixl_rx_queue	*que = &vsi->rx_queues[ri->iri_qsidx];
662 	struct rx_ring		*rxr = &que->rxr;
663 	union i40e_rx_desc	*cur;
664 	u32		status, error;
665 	u16		plen;
666 	u64		qword;
667 	u8		ptype;
668 	bool		eop;
669 	int i, cidx;
670 
671 	cidx = ri->iri_cidx;
672 	i = 0;
673 	do {
674 		/* 5 descriptor receive limit */
675 		MPASS(i < IXL_MAX_RX_SEGS);
676 
677 		cur = &rxr->rx_base[cidx];
678 		qword = le64toh(cur->wb.qword1.status_error_len);
679 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
680 		    >> I40E_RXD_QW1_STATUS_SHIFT;
681 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
682 		    >> I40E_RXD_QW1_ERROR_SHIFT;
683 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
684 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
685 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
686 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
687 
688 		/* we should never be called without a valid descriptor */
689 		MPASS((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) != 0);
690 
691 		ri->iri_len += plen;
692 		rxr->rx_bytes += plen;
693 
694 		cur->wb.qword1.status_error_len = 0;
695 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
696 
697 		/*
698 		** Make sure bad packets are discarded,
699 		** note that only EOP descriptor has valid
700 		** error results.
701 		*/
702 		if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
703 			rxr->desc_errs++;
704 			return (EBADMSG);
705 		}
706 		ri->iri_frags[i].irf_flid = 0;
707 		ri->iri_frags[i].irf_idx = cidx;
708 		ri->iri_frags[i].irf_len = plen;
709 		if (++cidx == vsi->shared->isc_nrxd[0])
710 			cidx = 0;
711 		i++;
712 	} while (!eop);
713 
714 	/* capture data for dynamic ITR adjustment */
715 	rxr->packets++;
716 	rxr->rx_packets++;
717 
718 	if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
719 		rxr->csum_errs += ixl_rx_checksum(ri, status, error, ptype);
720 	ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss);
721 	ri->iri_rsstype = ixl_ptype_to_hash(ptype);
722 	if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
723 		ri->iri_vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
724 		ri->iri_flags |= M_VLANTAG;
725 	}
726 	ri->iri_nfrags = i;
727 	return (0);
728 }
729 
730 /*********************************************************************
731  *
732  *  Verify that the hardware indicated that the checksum is valid.
733  *  Inform the stack about the status of checksum so that stack
734  *  doesn't spend time verifying the checksum.
735  *
736  *********************************************************************/
737 static u8
738 ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
739 {
740 	struct i40e_rx_ptype_decoded decoded;
741 
742 	ri->iri_csum_flags = 0;
743 
744 	/* No L3 or L4 checksum was calculated */
745 	if (!(status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
746 		return (0);
747 
748 	decoded = decode_rx_desc_ptype(ptype);
749 
750 	/* IPv6 with extension headers likely have bad csum */
751 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
752 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
753 		if (status &
754 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
755 			ri->iri_csum_flags = 0;
756 			return (1);
757 		}
758 	}
759 
760 	ri->iri_csum_flags |= CSUM_L3_CALC;
761 
762 	/* IPv4 checksum error */
763 	if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
764 		return (1);
765 
766 	ri->iri_csum_flags |= CSUM_L3_VALID;
767 	ri->iri_csum_flags |= CSUM_L4_CALC;
768 
769 	/* L4 checksum error */
770 	if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
771 		return (1);
772 
773 	ri->iri_csum_flags |= CSUM_L4_VALID;
774 	ri->iri_csum_data |= htons(0xffff);
775 
776 	return (0);
777 }
778 
779 /* Set Report Status queue fields to 0 */
780 void
781 ixl_init_tx_rsqs(struct ixl_vsi *vsi)
782 {
783 	if_softc_ctx_t scctx = vsi->shared;
784 	struct ixl_tx_queue *tx_que;
785 	int i, j;
786 
787 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
788 		struct tx_ring *txr = &tx_que->txr;
789 
790 		txr->tx_rs_cidx = txr->tx_rs_pidx;
791 
792 		/* Initialize the last processed descriptor to be the end of
793 		 * the ring, rather than the start, so that we avoid an
794 		 * off-by-one error when calculating how many descriptors are
795 		 * done in the credits_update function.
796 		 */
797 		txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1;
798 
799 		for (j = 0; j < scctx->isc_ntxd[0]; j++)
800 			txr->tx_rsq[j] = QIDX_INVALID;
801 	}
802 }
803 
804 void
805 ixl_init_tx_cidx(struct ixl_vsi *vsi)
806 {
807 	if_softc_ctx_t scctx = vsi->shared;
808 	struct ixl_tx_queue *tx_que;
809 	int i;
810 
811 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
812 		struct tx_ring *txr = &tx_que->txr;
813 
814 		txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1;
815 	}
816 }
817 
818 /*
819  * Input: bitmap of enum virtchnl_link_speed
820  */
821 u64
822 ixl_max_vc_speed_to_value(u8 link_speeds)
823 {
824 	if (link_speeds & VIRTCHNL_LINK_SPEED_40GB)
825 		return IF_Gbps(40);
826 	if (link_speeds & VIRTCHNL_LINK_SPEED_25GB)
827 		return IF_Gbps(25);
828 	if (link_speeds & VIRTCHNL_LINK_SPEED_20GB)
829 		return IF_Gbps(20);
830 	if (link_speeds & VIRTCHNL_LINK_SPEED_10GB)
831 		return IF_Gbps(10);
832 	if (link_speeds & VIRTCHNL_LINK_SPEED_1GB)
833 		return IF_Gbps(1);
834 	if (link_speeds & VIRTCHNL_LINK_SPEED_100MB)
835 		return IF_Mbps(100);
836 	else
837 		/* Minimum supported link speed */
838 		return IF_Mbps(100);
839 }
840 
841 void
842 ixl_add_vsi_sysctls(device_t dev, struct ixl_vsi *vsi,
843     struct sysctl_ctx_list *ctx, const char *sysctl_name)
844 {
845 	struct sysctl_oid *tree;
846 	struct sysctl_oid_list *child;
847 	struct sysctl_oid_list *vsi_list;
848 
849 	tree = device_get_sysctl_tree(dev);
850 	child = SYSCTL_CHILDREN(tree);
851 	vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name,
852 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "VSI Number");
853 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
854 
855 	ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats);
856 }
857 
858 void
859 ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
860 	struct sysctl_oid_list *child,
861 	struct i40e_eth_stats *eth_stats)
862 {
863 	struct ixl_sysctl_info ctls[] =
864 	{
865 		{&eth_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"},
866 		{&eth_stats->rx_unicast, "ucast_pkts_rcvd",
867 			"Unicast Packets Received"},
868 		{&eth_stats->rx_multicast, "mcast_pkts_rcvd",
869 			"Multicast Packets Received"},
870 		{&eth_stats->rx_broadcast, "bcast_pkts_rcvd",
871 			"Broadcast Packets Received"},
872 		{&eth_stats->rx_discards, "rx_discards", "Discarded RX packets"},
873 		{&eth_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"},
874 		{&eth_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"},
875 		{&eth_stats->tx_multicast, "mcast_pkts_txd",
876 			"Multicast Packets Transmitted"},
877 		{&eth_stats->tx_broadcast, "bcast_pkts_txd",
878 			"Broadcast Packets Transmitted"},
879 		// end
880 		{0,0,0}
881 	};
882 
883 	struct ixl_sysctl_info *entry = ctls;
884 	while (entry->stat != 0)
885 	{
886 		SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name,
887 				CTLFLAG_RD, entry->stat,
888 				entry->description);
889 		entry++;
890 	}
891 }
892 
893 void
894 ixl_vsi_add_queues_stats(struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx)
895 {
896 	struct sysctl_oid_list *vsi_list, *queue_list;
897 	struct sysctl_oid *queue_node;
898 	char queue_namebuf[IXL_QUEUE_NAME_LEN];
899 
900 	struct ixl_rx_queue *rx_que;
901 	struct ixl_tx_queue *tx_que;
902 	struct tx_ring *txr;
903 	struct rx_ring *rxr;
904 
905 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
906 
907 	/* Queue statistics */
908 	for (int q = 0; q < vsi->num_rx_queues; q++) {
909 		bzero(queue_namebuf, sizeof(queue_namebuf));
910 		snprintf(queue_namebuf, sizeof(queue_namebuf), "rxq%02d", q);
911 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
912 		    OID_AUTO, queue_namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE,
913 		    NULL, "RX Queue #");
914 		queue_list = SYSCTL_CHILDREN(queue_node);
915 
916 		rx_que = &(vsi->rx_queues[q]);
917 		rxr = &(rx_que->rxr);
918 
919 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
920 				CTLFLAG_RD, &(rx_que->irqs),
921 				"irqs on this queue (both Tx and Rx)");
922 
923 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
924 				CTLFLAG_RD, &(rxr->rx_packets),
925 				"Queue Packets Received");
926 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
927 				CTLFLAG_RD, &(rxr->rx_bytes),
928 				"Queue Bytes Received");
929 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "desc_err",
930 				CTLFLAG_RD, &(rxr->desc_errs),
931 				"Queue Rx Descriptor Errors");
932 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
933 				CTLFLAG_RD, &(rxr->itr), 0,
934 				"Queue Rx ITR Interval");
935 	}
936 	for (int q = 0; q < vsi->num_tx_queues; q++) {
937 		bzero(queue_namebuf, sizeof(queue_namebuf));
938 		snprintf(queue_namebuf, sizeof(queue_namebuf), "txq%02d", q);
939 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
940 		    OID_AUTO, queue_namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE,
941 		    NULL, "TX Queue #");
942 		queue_list = SYSCTL_CHILDREN(queue_node);
943 
944 		tx_que = &(vsi->tx_queues[q]);
945 		txr = &(tx_que->txr);
946 
947 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso",
948 				CTLFLAG_RD, &(tx_que->tso),
949 				"TSO");
950 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
951 				CTLFLAG_RD, &(txr->mss_too_small),
952 				"TSO sends with an MSS less than 64");
953 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
954 				CTLFLAG_RD, &(txr->tx_packets),
955 				"Queue Packets Transmitted");
956 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
957 				CTLFLAG_RD, &(txr->tx_bytes),
958 				"Queue Bytes Transmitted");
959 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
960 				CTLFLAG_RD, &(txr->itr), 0,
961 				"Queue Tx ITR Interval");
962 	}
963 }
964