xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision 752d135e0dacd9a463d24ffb89779b67ce0a7ea0)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2018, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the PF and VF drivers.
39 */
40 
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46 
47 #include "ixl.h"
48 
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52 
53 /* Local Prototypes */
54 static void	ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype);
55 
56 static int	ixl_isc_txd_encap(void *arg, if_pkt_info_t pi);
57 static void	ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
58 static int	ixl_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear);
59 static int	ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear);
60 
61 static void	ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru);
62 static void	ixl_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
63 				  qidx_t pidx);
64 static int	ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
65 				      qidx_t budget);
66 static int	ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
67 
68 struct if_txrx ixl_txrx_hwb = {
69 	ixl_isc_txd_encap,
70 	ixl_isc_txd_flush,
71 	ixl_isc_txd_credits_update_hwb,
72 	ixl_isc_rxd_available,
73 	ixl_isc_rxd_pkt_get,
74 	ixl_isc_rxd_refill,
75 	ixl_isc_rxd_flush,
76 	NULL
77 };
78 
79 struct if_txrx ixl_txrx_dwb = {
80 	ixl_isc_txd_encap,
81 	ixl_isc_txd_flush,
82 	ixl_isc_txd_credits_update_dwb,
83 	ixl_isc_rxd_available,
84 	ixl_isc_rxd_pkt_get,
85 	ixl_isc_rxd_refill,
86 	ixl_isc_rxd_flush,
87 	NULL
88 };
89 
90 /*
91  * @key key is saved into this parameter
92  */
93 void
94 ixl_get_default_rss_key(u32 *key)
95 {
96 	MPASS(key != NULL);
97 
98 	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
99 	    0x183cfd8c, 0xce880440, 0x580cbc3c,
100 	    0x35897377, 0x328b25e1, 0x4fa98922,
101 	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
102 	    0x0, 0x0, 0x0};
103 
104 	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
105 }
106 
107 /**
108  * i40e_vc_stat_str - convert virtchnl status err code to a string
109  * @hw: pointer to the HW structure
110  * @stat_err: the status error code to convert
111  **/
112 const char *
113 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
114 {
115 	switch (stat_err) {
116 	case VIRTCHNL_STATUS_SUCCESS:
117 		return "OK";
118 	case VIRTCHNL_ERR_PARAM:
119 		return "VIRTCHNL_ERR_PARAM";
120 	case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
121 		return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
122 	case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
123 		return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
124 	case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
125 		return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
126 	case VIRTCHNL_STATUS_NOT_SUPPORTED:
127 		return "VIRTCHNL_STATUS_NOT_SUPPORTED";
128 	}
129 
130 	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
131 	return hw->err_str;
132 }
133 
134 void
135 ixl_debug_core(device_t dev, u32 enabled_mask, u32 mask, char *fmt, ...)
136 {
137 	va_list args;
138 
139 	if (!(mask & enabled_mask))
140 		return;
141 
142 	/* Re-implement device_printf() */
143 	device_print_prettyname(dev);
144 	va_start(args, fmt);
145 	vprintf(fmt, args);
146 	va_end(args);
147 }
148 
149 static bool
150 ixl_is_tx_desc_done(struct tx_ring *txr, int idx)
151 {
152 	return (((txr->tx_base[idx].cmd_type_offset_bsz >> I40E_TXD_QW1_DTYPE_SHIFT)
153 	    & I40E_TXD_QW1_DTYPE_MASK) == I40E_TX_DESC_DTYPE_DESC_DONE);
154 }
155 
156 static int
157 ixl_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi)
158 {
159 	int	count, curseg, i, hlen, segsz, seglen, tsolen;
160 
161 	if (nsegs <= IXL_MAX_TX_SEGS-2)
162 		return (0);
163 	segsz = pi->ipi_tso_segsz;
164 	curseg = count = 0;
165 
166 	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
167 	tsolen = pi->ipi_len - hlen;
168 
169 	i = 0;
170 	curseg = segs[0].ds_len;
171 	while (hlen > 0) {
172 		count++;
173 		if (count > IXL_MAX_TX_SEGS - 2)
174 			return (1);
175 		if (curseg == 0) {
176 			i++;
177 			if (__predict_false(i == nsegs))
178 				return (1);
179 
180 			curseg = segs[i].ds_len;
181 		}
182 		seglen = min(curseg, hlen);
183 		curseg -= seglen;
184 		hlen -= seglen;
185 		// printf("H:seglen = %d, count=%d\n", seglen, count);
186 	}
187 	while (tsolen > 0) {
188 		segsz = pi->ipi_tso_segsz;
189 		while (segsz > 0 && tsolen != 0) {
190 			count++;
191 			if (count > IXL_MAX_TX_SEGS - 2) {
192 				// printf("bad: count = %d\n", count);
193 				return (1);
194 			}
195 			if (curseg == 0) {
196 				i++;
197 				if (__predict_false(i == nsegs)) {
198 					// printf("bad: tsolen = %d", tsolen);
199 					return (1);
200 				}
201 				curseg = segs[i].ds_len;
202 			}
203 			seglen = min(curseg, segsz);
204 			segsz -= seglen;
205 			curseg -= seglen;
206 			tsolen -= seglen;
207 			// printf("D:seglen = %d, count=%d\n", seglen, count);
208 		}
209 		count = 0;
210 	}
211 
212  	return (0);
213 }
214 
215 /*********************************************************************
216  *
217  *  Setup descriptor for hw offloads
218  *
219  **********************************************************************/
220 
221 static void
222 ixl_tx_setup_offload(struct ixl_tx_queue *que,
223     if_pkt_info_t pi, u32 *cmd, u32 *off)
224 {
225 	switch (pi->ipi_etype) {
226 #ifdef INET
227 		case ETHERTYPE_IP:
228 			if (pi->ipi_csum_flags & IXL_CSUM_IPV4)
229 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
230 			else
231 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
232 			break;
233 #endif
234 #ifdef INET6
235 		case ETHERTYPE_IPV6:
236 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
237 			break;
238 #endif
239 		default:
240 			break;
241 	}
242 
243 	*off |= (pi->ipi_ehdrlen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
244 	*off |= (pi->ipi_ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
245 
246 	switch (pi->ipi_ipproto) {
247 		case IPPROTO_TCP:
248 			if (pi->ipi_csum_flags & IXL_CSUM_TCP) {
249 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
250 				*off |= (pi->ipi_tcp_hlen >> 2) <<
251 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
252 				/* Check for NO_HEAD MDD event */
253 				MPASS(pi->ipi_tcp_hlen != 0);
254 			}
255 			break;
256 		case IPPROTO_UDP:
257 			if (pi->ipi_csum_flags & IXL_CSUM_UDP) {
258 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
259 				*off |= (sizeof(struct udphdr) >> 2) <<
260 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
261 			}
262 			break;
263 		case IPPROTO_SCTP:
264 			if (pi->ipi_csum_flags & IXL_CSUM_SCTP) {
265 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
266 				*off |= (sizeof(struct sctphdr) >> 2) <<
267 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
268 			}
269 			/* Fall Thru */
270 		default:
271 			break;
272 	}
273 }
274 
275 /**********************************************************************
276  *
277  *  Setup context for hardware segmentation offload (TSO)
278  *
279  **********************************************************************/
280 static int
281 ixl_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
282 {
283 	if_softc_ctx_t			scctx;
284 	struct i40e_tx_context_desc	*TXD;
285 	u32				cmd, mss, type, tsolen;
286 	int				idx, total_hdr_len;
287 	u64				type_cmd_tso_mss;
288 
289 	idx = pi->ipi_pidx;
290 	TXD = (struct i40e_tx_context_desc *) &txr->tx_base[idx];
291 	total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
292 	tsolen = pi->ipi_len - total_hdr_len;
293 	scctx = txr->que->vsi->shared;
294 
295 	type = I40E_TX_DESC_DTYPE_CONTEXT;
296 	cmd = I40E_TX_CTX_DESC_TSO;
297 	/*
298 	 * TSO MSS must not be less than 64; this prevents a
299 	 * BAD_LSO_MSS MDD event when the MSS is too small.
300 	 */
301 	if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
302 		txr->mss_too_small++;
303 		pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
304 	}
305 	mss = pi->ipi_tso_segsz;
306 
307 	/* Check for BAD_LS0_MSS MDD event (mss too large) */
308 	MPASS(mss <= IXL_MAX_TSO_MSS);
309 	/* Check for NO_HEAD MDD event (header lengths are 0) */
310 	MPASS(pi->ipi_ehdrlen != 0);
311 	MPASS(pi->ipi_ip_hlen != 0);
312 	/* Partial check for BAD_LSO_LEN MDD event */
313 	MPASS(tsolen != 0);
314 	/* Partial check for WRONG_SIZE MDD event (during TSO) */
315 	MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
316 
317 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
318 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
319 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
320 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
321 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
322 
323 	TXD->tunneling_params = htole32(0);
324 	txr->que->tso++;
325 
326 	return ((idx + 1) & (scctx->isc_ntxd[0]-1));
327 }
328 
329 /*********************************************************************
330   *
331  *  This routine maps the mbufs to tx descriptors, allowing the
332  *  TX engine to transmit the packets.
333  *  	- return 0 on success, positive on failure
334   *
335   **********************************************************************/
336 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
337 
338 static int
339 ixl_isc_txd_encap(void *arg, if_pkt_info_t pi)
340 {
341 	struct ixl_vsi		*vsi = arg;
342 	if_softc_ctx_t		scctx = vsi->shared;
343 	struct ixl_tx_queue	*que = &vsi->tx_queues[pi->ipi_qsidx];
344  	struct tx_ring		*txr = &que->txr;
345 	int			nsegs = pi->ipi_nsegs;
346 	bus_dma_segment_t *segs = pi->ipi_segs;
347 	struct i40e_tx_desc	*txd = NULL;
348 	int             	i, j, mask, pidx_last;
349 	u32			cmd, off, tx_intr;
350 
351 	cmd = off = 0;
352 	i = pi->ipi_pidx;
353 
354 	tx_intr = (pi->ipi_flags & IPI_TX_INTR);
355 
356 	/* Set up the TSO/CSUM offload */
357 	if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
358 		/* Set up the TSO context descriptor if required */
359 		if (pi->ipi_csum_flags & CSUM_TSO) {
360 			/* Prevent MAX_BUFF MDD event (for TSO) */
361 			if (ixl_tso_detect_sparse(segs, nsegs, pi))
362 				return (EFBIG);
363 			i = ixl_tso_setup(txr, pi);
364 		}
365 		ixl_tx_setup_offload(que, pi, &cmd, &off);
366 	}
367 	if (pi->ipi_mflags & M_VLANTAG)
368 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
369 
370 	cmd |= I40E_TX_DESC_CMD_ICRC;
371 	mask = scctx->isc_ntxd[0] - 1;
372 	/* Check for WRONG_SIZE MDD event */
373 	MPASS(pi->ipi_len >= IXL_MIN_FRAME);
374 #ifdef INVARIANTS
375 	if (!(pi->ipi_csum_flags & CSUM_TSO))
376 		MPASS(pi->ipi_len <= IXL_MAX_FRAME);
377 #endif
378 	for (j = 0; j < nsegs; j++) {
379 		bus_size_t seglen;
380 
381 		txd = &txr->tx_base[i];
382 		seglen = segs[j].ds_len;
383 
384 		/* Check for ZERO_BSIZE MDD event */
385 		MPASS(seglen != 0);
386 
387 		txd->buffer_addr = htole64(segs[j].ds_addr);
388 		txd->cmd_type_offset_bsz =
389 		    htole64(I40E_TX_DESC_DTYPE_DATA
390 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
391 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
392 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
393 	            | ((u64)htole16(pi->ipi_vtag) << I40E_TXD_QW1_L2TAG1_SHIFT));
394 
395 		txr->tx_bytes += seglen;
396 		pidx_last = i;
397 		i = (i+1) & mask;
398 	}
399 	/* Set the last descriptor for report */
400 	txd->cmd_type_offset_bsz |=
401 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
402 	/* Add to report status array (if using TX interrupts) */
403 	if (!vsi->enable_head_writeback && tx_intr) {
404 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
405 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask;
406 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
407  	}
408 	pi->ipi_new_pidx = i;
409 
410 	++txr->tx_packets;
411 	return (0);
412 }
413 
414 static void
415 ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
416 {
417 	struct ixl_vsi *vsi = arg;
418 	struct tx_ring *txr = &vsi->tx_queues[txqid].txr;
419 
420  	/*
421 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
422 	 * hardware that this frame is available to transmit.
423  	 */
424 	/* Check for ENDLESS_TX MDD event */
425 	MPASS(pidx < vsi->shared->isc_ntxd[0]);
426 	wr32(vsi->hw, txr->tail, pidx);
427 }
428 
429 
430 /*********************************************************************
431  *
432  *  (Re)Initialize a queue transmit ring by clearing its memory.
433  *
434  **********************************************************************/
435 void
436 ixl_init_tx_ring(struct ixl_vsi *vsi, struct ixl_tx_queue *que)
437 {
438 	struct tx_ring *txr = &que->txr;
439 
440 	/* Clear the old ring contents */
441 	bzero((void *)txr->tx_base,
442 	      (sizeof(struct i40e_tx_desc)) *
443 	      (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0)));
444 
445 	wr32(vsi->hw, txr->tail, 0);
446 }
447 
448 /*
449  * ixl_get_tx_head - Retrieve the value from the
450  *    location the HW records its HEAD index
451  */
452 static inline u32
453 ixl_get_tx_head(struct ixl_tx_queue *que)
454 {
455 	if_softc_ctx_t          scctx = que->vsi->shared;
456 	struct tx_ring  *txr = &que->txr;
457 	void *head = &txr->tx_base[scctx->isc_ntxd[0]];
458 
459 	return LE32_TO_CPU(*(volatile __le32 *)head);
460 }
461 
462 static int
463 ixl_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear)
464 {
465 	struct ixl_vsi          *vsi = arg;
466 	if_softc_ctx_t          scctx = vsi->shared;
467 	struct ixl_tx_queue     *que = &vsi->tx_queues[qid];
468 	struct tx_ring		*txr = &que->txr;
469 	int			 head, credits;
470 
471 	/* Get the Head WB value */
472 	head = ixl_get_tx_head(que);
473 
474 	credits = head - txr->tx_cidx_processed;
475 	if (credits < 0)
476 		credits += scctx->isc_ntxd[0];
477 	if (clear)
478 		txr->tx_cidx_processed = head;
479 
480 	return (credits);
481 }
482 
483 static int
484 ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear)
485 {
486 	struct ixl_vsi *vsi = arg;
487 	struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid];
488 	if_softc_ctx_t scctx = vsi->shared;
489 	struct tx_ring *txr = &tx_que->txr;
490 
491 	qidx_t processed = 0;
492 	qidx_t cur, prev, ntxd, rs_cidx;
493 	int32_t delta;
494 	bool is_done;
495 
496 	rs_cidx = txr->tx_rs_cidx;
497 #if 0
498 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) rs_cidx %d, txr->tx_rs_pidx %d\n", __func__,
499 	    txr->me, rs_cidx, txr->tx_rs_pidx);
500 #endif
501 	if (rs_cidx == txr->tx_rs_pidx)
502 		return (0);
503 	cur = txr->tx_rsq[rs_cidx];
504 	MPASS(cur != QIDX_INVALID);
505 	is_done = ixl_is_tx_desc_done(txr, cur);
506 
507 	if (!is_done)
508 		return (0);
509 
510 	/* If clear is false just let caller know that there
511 	 * are descriptors to reclaim */
512 	if (!clear)
513 		return (1);
514 
515 	prev = txr->tx_cidx_processed;
516 	ntxd = scctx->isc_ntxd[0];
517 	do {
518 		delta = (int32_t)cur - (int32_t)prev;
519 		MPASS(prev == 0 || delta != 0);
520 		if (delta < 0)
521 			delta += ntxd;
522 #if 0
523 		device_printf(iflib_get_dev(vsi->ctx),
524 			      "%s: (q%d) cidx_processed=%u cur=%u clear=%d delta=%d\n",
525 			      __func__, txr->me, prev, cur, clear, delta);
526 #endif
527 		processed += delta;
528 		prev = cur;
529 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
530 		if (rs_cidx == txr->tx_rs_pidx)
531 			break;
532 		cur = txr->tx_rsq[rs_cidx];
533 		MPASS(cur != QIDX_INVALID);
534 		is_done = ixl_is_tx_desc_done(txr, cur);
535 	} while (is_done);
536 
537 	txr->tx_rs_cidx = rs_cidx;
538 	txr->tx_cidx_processed = prev;
539 
540 #if 0
541 	device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) processed %d\n", __func__, txr->me, processed);
542 #endif
543 	return (processed);
544 }
545 
546 static void
547 ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru)
548 {
549 	struct ixl_vsi *vsi = arg;
550 	if_softc_ctx_t scctx = vsi->shared;
551 	struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr);
552 	uint64_t *paddrs;
553 	uint32_t next_pidx, pidx;
554 	uint16_t count;
555 	int i;
556 
557 	paddrs = iru->iru_paddrs;
558 	pidx = iru->iru_pidx;
559 	count = iru->iru_count;
560 
561 	for (i = 0, next_pidx = pidx; i < count; i++) {
562 		rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]);
563 		if (++next_pidx == scctx->isc_nrxd[0])
564 			next_pidx = 0;
565  	}
566 }
567 
568 static void
569 ixl_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
570 {
571 	struct ixl_vsi		*vsi = arg;
572 	struct rx_ring		*rxr = &vsi->rx_queues[rxqid].rxr;
573 
574 	wr32(vsi->hw, rxr->tail, pidx);
575 }
576 
577 static int
578 ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
579 {
580 	struct ixl_vsi *vsi = arg;
581 	struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr;
582 	union i40e_rx_desc *rxd;
583 	u64 qword;
584 	uint32_t status;
585 	int cnt, i, nrxd;
586 
587 	nrxd = vsi->shared->isc_nrxd[0];
588 
589 	for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) {
590 		rxd = &rxr->rx_base[i];
591 		qword = le64toh(rxd->wb.qword1.status_error_len);
592 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
593 			>> I40E_RXD_QW1_STATUS_SHIFT;
594 
595 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0)
596 			break;
597 		if (++i == nrxd)
598 			i = 0;
599 		if (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT))
600 			cnt++;
601 	}
602 
603 	return (cnt);
604 }
605 
606 /*
607 ** i40e_ptype_to_hash: parse the packet type
608 ** to determine the appropriate hash.
609 */
610 static inline int
611 ixl_ptype_to_hash(u8 ptype)
612 {
613         struct i40e_rx_ptype_decoded	decoded;
614 
615 	decoded = decode_rx_desc_ptype(ptype);
616 
617 	if (!decoded.known)
618 		return M_HASHTYPE_OPAQUE;
619 
620 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
621 		return M_HASHTYPE_OPAQUE;
622 
623 	/* Note: anything that gets to this point is IP */
624         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
625 		switch (decoded.inner_prot) {
626 		case I40E_RX_PTYPE_INNER_PROT_TCP:
627 			return M_HASHTYPE_RSS_TCP_IPV6;
628 		case I40E_RX_PTYPE_INNER_PROT_UDP:
629 			return M_HASHTYPE_RSS_UDP_IPV6;
630 		default:
631 			return M_HASHTYPE_RSS_IPV6;
632 		}
633 	}
634         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
635 		switch (decoded.inner_prot) {
636 		case I40E_RX_PTYPE_INNER_PROT_TCP:
637 			return M_HASHTYPE_RSS_TCP_IPV4;
638 		case I40E_RX_PTYPE_INNER_PROT_UDP:
639 			return M_HASHTYPE_RSS_UDP_IPV4;
640 		default:
641 			return M_HASHTYPE_RSS_IPV4;
642 		}
643 	}
644 	/* We should never get here!! */
645 	return M_HASHTYPE_OPAQUE;
646 }
647 
648 /*********************************************************************
649  *
650  *  This routine executes in ithread context. It sends data which has been
651  *  dma'ed into host memory to upper layer.
652  *
653  *  Returns 0 upon success, errno on failure
654  *
655  *********************************************************************/
656 static int
657 ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
658 {
659 	struct ixl_vsi		*vsi = arg;
660 	struct ixl_rx_queue	*que = &vsi->rx_queues[ri->iri_qsidx];
661 	struct rx_ring		*rxr = &que->rxr;
662 	union i40e_rx_desc	*cur;
663 	u32		status, error;
664 	u16		plen, vtag;
665 	u64		qword;
666 	u8		ptype;
667 	bool		eop;
668 	int i, cidx;
669 
670 	cidx = ri->iri_cidx;
671 	i = 0;
672 	do {
673 		/* 5 descriptor receive limit */
674 		MPASS(i < IXL_MAX_RX_SEGS);
675 
676 		cur = &rxr->rx_base[cidx];
677 		qword = le64toh(cur->wb.qword1.status_error_len);
678 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
679 		    >> I40E_RXD_QW1_STATUS_SHIFT;
680 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
681 		    >> I40E_RXD_QW1_ERROR_SHIFT;
682 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
683 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
684 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
685 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
686 
687 		/* we should never be called without a valid descriptor */
688 		MPASS((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) != 0);
689 
690 		ri->iri_len += plen;
691 		rxr->rx_bytes += plen;
692 
693 		cur->wb.qword1.status_error_len = 0;
694 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
695 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
696 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
697 		else
698 			vtag = 0;
699 
700 		/*
701 		** Make sure bad packets are discarded,
702 		** note that only EOP descriptor has valid
703 		** error results.
704 		*/
705 		if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
706 			rxr->desc_errs++;
707 			return (EBADMSG);
708 		}
709 		ri->iri_frags[i].irf_flid = 0;
710 		ri->iri_frags[i].irf_idx = cidx;
711 		ri->iri_frags[i].irf_len = plen;
712 		if (++cidx == vsi->shared->isc_nrxd[0])
713 			cidx = 0;
714 		i++;
715 	} while (!eop);
716 
717 	/* capture data for dynamic ITR adjustment */
718 	rxr->packets++;
719 	rxr->rx_packets++;
720 
721 	if ((if_getcapenable(vsi->ifp) & IFCAP_RXCSUM) != 0)
722 		ixl_rx_checksum(ri, status, error, ptype);
723 	ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss);
724 	ri->iri_rsstype = ixl_ptype_to_hash(ptype);
725 	ri->iri_vtag = vtag;
726 	ri->iri_nfrags = i;
727 	if (vtag)
728 		ri->iri_flags |= M_VLANTAG;
729 	return (0);
730 }
731 
732 /*********************************************************************
733  *
734  *  Verify that the hardware indicated that the checksum is valid.
735  *  Inform the stack about the status of checksum so that stack
736  *  doesn't spend time verifying the checksum.
737  *
738  *********************************************************************/
739 static void
740 ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
741 {
742 	struct i40e_rx_ptype_decoded decoded;
743 
744 	ri->iri_csum_flags = 0;
745 
746 	/* No L3 or L4 checksum was calculated */
747 	if (!(status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
748 		return;
749 
750 	decoded = decode_rx_desc_ptype(ptype);
751 
752 	/* IPv6 with extension headers likely have bad csum */
753 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
754 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
755 		if (status &
756 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
757 			ri->iri_csum_flags = 0;
758 			return;
759 		}
760 	}
761 
762 	ri->iri_csum_flags |= CSUM_L3_CALC;
763 
764 	/* IPv4 checksum error */
765 	if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
766 		return;
767 
768 	ri->iri_csum_flags |= CSUM_L3_VALID;
769 	ri->iri_csum_flags |= CSUM_L4_CALC;
770 
771 	/* L4 checksum error */
772 	if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
773 		return;
774 
775 	ri->iri_csum_flags |= CSUM_L4_VALID;
776 	ri->iri_csum_data |= htons(0xffff);
777 }
778 
779 /* Set Report Status queue fields to 0 */
780 void
781 ixl_init_tx_rsqs(struct ixl_vsi *vsi)
782 {
783 	if_softc_ctx_t scctx = vsi->shared;
784 	struct ixl_tx_queue *tx_que;
785 	int i, j;
786 
787 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
788 		struct tx_ring *txr = &tx_que->txr;
789 
790 		txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0;
791 
792 		for (j = 0; j < scctx->isc_ntxd[0]; j++)
793 			txr->tx_rsq[j] = QIDX_INVALID;
794 	}
795 }
796 
797 void
798 ixl_init_tx_cidx(struct ixl_vsi *vsi)
799 {
800 	struct ixl_tx_queue *tx_que;
801 	int i;
802 
803 	for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) {
804 		struct tx_ring *txr = &tx_que->txr;
805 
806 		txr->tx_cidx_processed = 0;
807 	}
808 }
809 
810 /*
811  * Input: bitmap of enum virtchnl_link_speed
812  */
813 u64
814 ixl_max_vc_speed_to_value(u8 link_speeds)
815 {
816 	if (link_speeds & VIRTCHNL_LINK_SPEED_40GB)
817 		return IF_Gbps(40);
818 	if (link_speeds & VIRTCHNL_LINK_SPEED_25GB)
819 		return IF_Gbps(25);
820 	if (link_speeds & VIRTCHNL_LINK_SPEED_20GB)
821 		return IF_Gbps(20);
822 	if (link_speeds & VIRTCHNL_LINK_SPEED_10GB)
823 		return IF_Gbps(10);
824 	if (link_speeds & VIRTCHNL_LINK_SPEED_1GB)
825 		return IF_Gbps(1);
826 	if (link_speeds & VIRTCHNL_LINK_SPEED_100MB)
827 		return IF_Mbps(100);
828 	else
829 		/* Minimum supported link speed */
830 		return IF_Mbps(100);
831 }
832 
833 void
834 ixl_add_vsi_sysctls(device_t dev, struct ixl_vsi *vsi,
835     struct sysctl_ctx_list *ctx, const char *sysctl_name)
836 {
837 	struct sysctl_oid *tree;
838 	struct sysctl_oid_list *child;
839 	struct sysctl_oid_list *vsi_list;
840 
841 	tree = device_get_sysctl_tree(dev);
842 	child = SYSCTL_CHILDREN(tree);
843 	vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name,
844 				   CTLFLAG_RD, NULL, "VSI Number");
845 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
846 
847 	ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats);
848 }
849 
850 void
851 ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
852 	struct sysctl_oid_list *child,
853 	struct i40e_eth_stats *eth_stats)
854 {
855 	struct ixl_sysctl_info ctls[] =
856 	{
857 		{&eth_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"},
858 		{&eth_stats->rx_unicast, "ucast_pkts_rcvd",
859 			"Unicast Packets Received"},
860 		{&eth_stats->rx_multicast, "mcast_pkts_rcvd",
861 			"Multicast Packets Received"},
862 		{&eth_stats->rx_broadcast, "bcast_pkts_rcvd",
863 			"Broadcast Packets Received"},
864 		{&eth_stats->rx_discards, "rx_discards", "Discarded RX packets"},
865 		{&eth_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"},
866 		{&eth_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"},
867 		{&eth_stats->tx_multicast, "mcast_pkts_txd",
868 			"Multicast Packets Transmitted"},
869 		{&eth_stats->tx_broadcast, "bcast_pkts_txd",
870 			"Broadcast Packets Transmitted"},
871 		// end
872 		{0,0,0}
873 	};
874 
875 	struct ixl_sysctl_info *entry = ctls;
876 	while (entry->stat != 0)
877 	{
878 		SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name,
879 				CTLFLAG_RD, entry->stat,
880 				entry->description);
881 		entry++;
882 	}
883 }
884 
885 void
886 ixl_add_queues_sysctls(device_t dev, struct ixl_vsi *vsi)
887 {
888 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
889 	struct sysctl_oid_list *vsi_list, *queue_list;
890 	struct sysctl_oid *queue_node;
891 	char queue_namebuf[32];
892 
893 	struct ixl_rx_queue *rx_que;
894 	struct ixl_tx_queue *tx_que;
895 	struct tx_ring *txr;
896 	struct rx_ring *rxr;
897 
898 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
899 
900 	/* Queue statistics */
901 	for (int q = 0; q < vsi->num_rx_queues; q++) {
902 		bzero(queue_namebuf, sizeof(queue_namebuf));
903 		snprintf(queue_namebuf, QUEUE_NAME_LEN, "rxq%02d", q);
904 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
905 		    OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "RX Queue #");
906 		queue_list = SYSCTL_CHILDREN(queue_node);
907 
908 		rx_que = &(vsi->rx_queues[q]);
909 		rxr = &(rx_que->rxr);
910 
911 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
912 				CTLFLAG_RD, &(rx_que->irqs),
913 				"irqs on this queue (both Tx and Rx)");
914 
915 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
916 				CTLFLAG_RD, &(rxr->rx_packets),
917 				"Queue Packets Received");
918 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
919 				CTLFLAG_RD, &(rxr->rx_bytes),
920 				"Queue Bytes Received");
921 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "desc_err",
922 				CTLFLAG_RD, &(rxr->desc_errs),
923 				"Queue Rx Descriptor Errors");
924 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
925 				CTLFLAG_RD, &(rxr->itr), 0,
926 				"Queue Rx ITR Interval");
927 	}
928 	for (int q = 0; q < vsi->num_tx_queues; q++) {
929 		bzero(queue_namebuf, sizeof(queue_namebuf));
930 		snprintf(queue_namebuf, QUEUE_NAME_LEN, "txq%02d", q);
931 		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
932 		    OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "TX Queue #");
933 		queue_list = SYSCTL_CHILDREN(queue_node);
934 
935 		tx_que = &(vsi->tx_queues[q]);
936 		txr = &(tx_que->txr);
937 
938 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso",
939 				CTLFLAG_RD, &(tx_que->tso),
940 				"TSO");
941 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
942 				CTLFLAG_RD, &(txr->mss_too_small),
943 				"TSO sends with an MSS less than 64");
944 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets",
945 				CTLFLAG_RD, &(txr->tx_packets),
946 				"Queue Packets Transmitted");
947 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes",
948 				CTLFLAG_RD, &(txr->tx_bytes),
949 				"Queue Bytes Transmitted");
950 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr",
951 				CTLFLAG_RD, &(txr->itr), 0,
952 				"Queue Tx ITR Interval");
953 	}
954 }
955