xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision f39bffc62c1395bde25d152c7f68fdf7cbaab414)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2017, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the PF and VF drivers.
39 */
40 
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46 
47 #include "ixl.h"
48 
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52 
53 /* Local Prototypes */
54 static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void	ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int	ixl_tx_setup_offload(struct ixl_queue *,
58 		    struct mbuf *, u32 *, u32 *);
59 static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60 static void	ixl_queue_sw_irq(struct ixl_vsi *, int);
61 
62 static inline void ixl_rx_discard(struct rx_ring *, int);
63 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
64 		    struct mbuf *, u8);
65 
66 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
67 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
68 
69 #ifdef DEV_NETMAP
70 #include <dev/netmap/if_ixl_netmap.h>
71 #if __FreeBSD_version >= 1200000
72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
73 #endif
74 #endif /* DEV_NETMAP */
75 
76 /*
77  * @key key is saved into this parameter
78  */
79 void
80 ixl_get_default_rss_key(u32 *key)
81 {
82 	MPASS(key != NULL);
83 
84 	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
85 	    0x183cfd8c, 0xce880440, 0x580cbc3c,
86 	    0x35897377, 0x328b25e1, 0x4fa98922,
87 	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
88 	    0x0, 0x0, 0x0};
89 
90 	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
91 }
92 
93 /**
94  * i40e_vc_stat_str - convert virtchnl status err code to a string
95  * @hw: pointer to the HW structure
96  * @stat_err: the status error code to convert
97  **/
98 const char *
99 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
100 {
101 	switch (stat_err) {
102 	case VIRTCHNL_STATUS_SUCCESS:
103 		return "OK";
104 	case VIRTCHNL_ERR_PARAM:
105 		return "VIRTCHNL_ERR_PARAM";
106 	case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
107 		return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
108 	case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
109 		return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
110 	case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
111 		return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
112 	case VIRTCHNL_STATUS_NOT_SUPPORTED:
113 		return "VIRTCHNL_STATUS_NOT_SUPPORTED";
114 	}
115 
116 	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
117 	return hw->err_str;
118 }
119 
120 /*
121  * PCI BUSMASTER needs to be set for proper operation.
122  */
123 void
124 ixl_set_busmaster(device_t dev)
125 {
126 	u16 pci_cmd_word;
127 
128 	pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
129 	pci_cmd_word |= PCIM_CMD_BUSMASTEREN;
130 	pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2);
131 }
132 
133 /*
134  * Rewrite the ENABLE bit in the MSIX control register
135  */
136 void
137 ixl_set_msix_enable(device_t dev)
138 {
139 	int msix_ctrl, rid;
140 
141 	pci_find_cap(dev, PCIY_MSIX, &rid);
142 	rid += PCIR_MSIX_CTRL;
143 	msix_ctrl = pci_read_config(dev, rid, 2);
144 	msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
145 	pci_write_config(dev, rid, msix_ctrl, 2);
146 }
147 
148 
149 /*
150 ** Multiqueue Transmit driver
151 */
152 int
153 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
154 {
155 	struct ixl_vsi		*vsi = ifp->if_softc;
156 	struct ixl_queue	*que;
157 	struct tx_ring		*txr;
158 	int 			err, i;
159 #ifdef RSS
160 	u32			bucket_id;
161 #endif
162 
163 	/*
164 	 * Which queue to use:
165 	 *
166 	 * When doing RSS, map it to the same outbound
167 	 * queue as the incoming flow would be mapped to.
168 	 * If everything is setup correctly, it should be
169 	 * the same bucket that the current CPU we're on is.
170 	 */
171 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
172 #ifdef  RSS
173 		if (rss_hash2bucket(m->m_pkthdr.flowid,
174 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
175 			i = bucket_id % vsi->num_queues;
176                 } else
177 #endif
178                         i = m->m_pkthdr.flowid % vsi->num_queues;
179         } else
180 		i = curcpu % vsi->num_queues;
181 
182 	que = &vsi->queues[i];
183 	txr = &que->txr;
184 
185 	err = drbr_enqueue(ifp, txr->br, m);
186 	if (err)
187 		return (err);
188 	if (IXL_TX_TRYLOCK(txr)) {
189 		ixl_mq_start_locked(ifp, txr);
190 		IXL_TX_UNLOCK(txr);
191 	} else
192 		taskqueue_enqueue(que->tq, &que->tx_task);
193 
194 	return (0);
195 }
196 
197 int
198 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
199 {
200 	struct ixl_queue	*que = txr->que;
201 	struct ixl_vsi		*vsi = que->vsi;
202         struct mbuf		*next;
203         int			err = 0;
204 
205 
206 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
207 	    vsi->link_active == 0)
208 		return (ENETDOWN);
209 
210 	/* Process the transmit queue */
211 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
212 		if ((err = ixl_xmit(que, &next)) != 0) {
213 			if (next == NULL)
214 				drbr_advance(ifp, txr->br);
215 			else
216 				drbr_putback(ifp, txr->br, next);
217 			break;
218 		}
219 		drbr_advance(ifp, txr->br);
220 		/* Send a copy of the frame to the BPF listener */
221 		ETHER_BPF_MTAP(ifp, next);
222 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
223 			break;
224 	}
225 
226 	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
227 		ixl_txeof(que);
228 
229 	return (err);
230 }
231 
232 /*
233  * Called from a taskqueue to drain queued transmit packets.
234  */
235 void
236 ixl_deferred_mq_start(void *arg, int pending)
237 {
238 	struct ixl_queue	*que = arg;
239         struct tx_ring		*txr = &que->txr;
240 	struct ixl_vsi		*vsi = que->vsi;
241         struct ifnet		*ifp = vsi->ifp;
242 
243 	IXL_TX_LOCK(txr);
244 	if (!drbr_empty(ifp, txr->br))
245 		ixl_mq_start_locked(ifp, txr);
246 	IXL_TX_UNLOCK(txr);
247 }
248 
249 /*
250 ** Flush all queue ring buffers
251 */
252 void
253 ixl_qflush(struct ifnet *ifp)
254 {
255 	struct ixl_vsi	*vsi = ifp->if_softc;
256 
257         for (int i = 0; i < vsi->num_queues; i++) {
258 		struct ixl_queue *que = &vsi->queues[i];
259 		struct tx_ring	*txr = &que->txr;
260 		struct mbuf	*m;
261 		IXL_TX_LOCK(txr);
262 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
263 			m_freem(m);
264 		IXL_TX_UNLOCK(txr);
265 	}
266 	if_qflush(ifp);
267 }
268 
269 static inline bool
270 ixl_tso_detect_sparse(struct mbuf *mp)
271 {
272 	struct mbuf	*m;
273 	int		num, mss;
274 
275 	num = 0;
276 	mss = mp->m_pkthdr.tso_segsz;
277 
278 	/* Exclude first mbuf; assume it contains all headers */
279 	for (m = mp->m_next; m != NULL; m = m->m_next) {
280 		if (m == NULL)
281 			break;
282 		num++;
283 		mss -= m->m_len % mp->m_pkthdr.tso_segsz;
284 
285 		if (num > IXL_SPARSE_CHAIN)
286 			return (true);
287 		if (mss < 1) {
288 			num = (mss == 0) ? 0 : 1;
289 			mss += mp->m_pkthdr.tso_segsz;
290 		}
291 	}
292 
293 	return (false);
294 }
295 
296 
297 /*********************************************************************
298  *
299  *  This routine maps the mbufs to tx descriptors, allowing the
300  *  TX engine to transmit the packets.
301  *  	- return 0 on success, positive on failure
302  *
303  **********************************************************************/
304 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
305 
306 static int
307 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
308 {
309 	struct ixl_vsi		*vsi = que->vsi;
310 	struct i40e_hw		*hw = vsi->hw;
311 	struct tx_ring		*txr = &que->txr;
312 	struct ixl_tx_buf	*buf;
313 	struct i40e_tx_desc	*txd = NULL;
314 	struct mbuf		*m_head, *m;
315 	int             	i, j, error, nsegs;
316 	int			first, last = 0;
317 	u16			vtag = 0;
318 	u32			cmd, off;
319 	bus_dmamap_t		map;
320 	bus_dma_tag_t		tag;
321 	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
322 
323 	cmd = off = 0;
324 	m_head = *m_headp;
325 
326         /*
327          * Important to capture the first descriptor
328          * used because it will contain the index of
329          * the one we tell the hardware to report back
330          */
331         first = txr->next_avail;
332 	buf = &txr->buffers[first];
333 	map = buf->map;
334 	tag = txr->tx_tag;
335 
336 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
337 		/* Use larger mapping for TSO */
338 		tag = txr->tso_tag;
339 		if (ixl_tso_detect_sparse(m_head)) {
340 			m = m_defrag(m_head, M_NOWAIT);
341 			if (m == NULL) {
342 				m_freem(*m_headp);
343 				*m_headp = NULL;
344 				return (ENOBUFS);
345 			}
346 			*m_headp = m;
347 		}
348 	}
349 
350 	/*
351 	 * Map the packet for DMA.
352 	 */
353 	error = bus_dmamap_load_mbuf_sg(tag, map,
354 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
355 
356 	if (error == EFBIG) {
357 		struct mbuf *m;
358 
359 		m = m_defrag(*m_headp, M_NOWAIT);
360 		if (m == NULL) {
361 			que->mbuf_defrag_failed++;
362 			m_freem(*m_headp);
363 			*m_headp = NULL;
364 			return (ENOBUFS);
365 		}
366 		*m_headp = m;
367 
368 		/* Try it again */
369 		error = bus_dmamap_load_mbuf_sg(tag, map,
370 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
371 
372 		if (error != 0) {
373 			que->tx_dmamap_failed++;
374 			m_freem(*m_headp);
375 			*m_headp = NULL;
376 			return (error);
377 		}
378 	} else if (error != 0) {
379 		que->tx_dmamap_failed++;
380 		m_freem(*m_headp);
381 		*m_headp = NULL;
382 		return (error);
383 	}
384 
385 	/* Make certain there are enough descriptors */
386 	if (nsegs > txr->avail - 2) {
387 		txr->no_desc++;
388 		error = ENOBUFS;
389 		goto xmit_fail;
390 	}
391 	m_head = *m_headp;
392 
393 	/* Set up the TSO/CSUM offload */
394 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
395 		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
396 		if (error)
397 			goto xmit_fail;
398 	}
399 
400 	cmd |= I40E_TX_DESC_CMD_ICRC;
401 	/* Grab the VLAN tag */
402 	if (m_head->m_flags & M_VLANTAG) {
403 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
404 		vtag = htole16(m_head->m_pkthdr.ether_vtag);
405 	}
406 
407 	i = txr->next_avail;
408 	for (j = 0; j < nsegs; j++) {
409 		bus_size_t seglen;
410 
411 		buf = &txr->buffers[i];
412 		buf->tag = tag; /* Keep track of the type tag */
413 		txd = &txr->base[i];
414 		seglen = segs[j].ds_len;
415 
416 		txd->buffer_addr = htole64(segs[j].ds_addr);
417 		txd->cmd_type_offset_bsz =
418 		    htole64(I40E_TX_DESC_DTYPE_DATA
419 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
420 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
421 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
422 		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
423 
424 		last = i; /* descriptor that will get completion IRQ */
425 
426 		if (++i == que->num_tx_desc)
427 			i = 0;
428 
429 		buf->m_head = NULL;
430 		buf->eop_index = -1;
431 	}
432 	/* Set the last descriptor for report */
433 	txd->cmd_type_offset_bsz |=
434 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
435 	txr->avail -= nsegs;
436 	txr->next_avail = i;
437 
438 	buf->m_head = m_head;
439 	/* Swap the dma map between the first and last descriptor.
440 	 * The descriptor that gets checked on completion will now
441 	 * have the real map from the first descriptor.
442 	 */
443 	txr->buffers[first].map = buf->map;
444 	buf->map = map;
445 	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
446 
447         /* Set the index of the descriptor that will be marked done */
448         buf = &txr->buffers[first];
449 	buf->eop_index = last;
450 
451         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
452             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
453 	/*
454 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
455 	 * hardware that this frame is available to transmit.
456 	 */
457 	++txr->total_packets;
458 	wr32(hw, txr->tail, i);
459 
460 	/* Mark outstanding work */
461 	atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
462 	return (0);
463 
464 xmit_fail:
465 	bus_dmamap_unload(tag, buf->map);
466 	return (error);
467 }
468 
469 
470 /*********************************************************************
471  *
472  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
473  *  the information needed to transmit a packet on the wire. This is
474  *  called only once at attach, setup is done every reset.
475  *
476  **********************************************************************/
477 int
478 ixl_allocate_tx_data(struct ixl_queue *que)
479 {
480 	struct tx_ring		*txr = &que->txr;
481 	struct ixl_vsi		*vsi = que->vsi;
482 	device_t		dev = vsi->dev;
483 	struct ixl_tx_buf	*buf;
484 	int			i, error = 0;
485 
486 	/*
487 	 * Setup DMA descriptor areas.
488 	 */
489 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),		/* parent */
490 			       1, 0,			/* alignment, bounds */
491 			       BUS_SPACE_MAXADDR,	/* lowaddr */
492 			       BUS_SPACE_MAXADDR,	/* highaddr */
493 			       NULL, NULL,		/* filter, filterarg */
494 			       IXL_TSO_SIZE,		/* maxsize */
495 			       IXL_MAX_TX_SEGS,		/* nsegments */
496 			       IXL_MAX_DMA_SEG_SIZE,	/* maxsegsize */
497 			       0,			/* flags */
498 			       NULL,			/* lockfunc */
499 			       NULL,			/* lockfuncarg */
500 			       &txr->tx_tag))) {
501 		device_printf(dev,"Unable to allocate TX DMA tag\n");
502 		return (error);
503 	}
504 
505 	/* Make a special tag for TSO */
506 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),		/* parent */
507 			       1, 0,			/* alignment, bounds */
508 			       BUS_SPACE_MAXADDR,	/* lowaddr */
509 			       BUS_SPACE_MAXADDR,	/* highaddr */
510 			       NULL, NULL,		/* filter, filterarg */
511 			       IXL_TSO_SIZE,		/* maxsize */
512 			       IXL_MAX_TSO_SEGS,	/* nsegments */
513 			       IXL_MAX_DMA_SEG_SIZE,	/* maxsegsize */
514 			       0,			/* flags */
515 			       NULL,			/* lockfunc */
516 			       NULL,			/* lockfuncarg */
517 			       &txr->tso_tag))) {
518 		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
519 		goto free_tx_dma;
520 	}
521 
522 	if (!(txr->buffers =
523 	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
524 	    que->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
525 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
526 		error = ENOMEM;
527 		goto free_tx_tso_dma;
528 	}
529 
530         /* Create the descriptor buffer default dma maps */
531 	buf = txr->buffers;
532 	for (i = 0; i < que->num_tx_desc; i++, buf++) {
533 		buf->tag = txr->tx_tag;
534 		error = bus_dmamap_create(buf->tag, 0, &buf->map);
535 		if (error != 0) {
536 			device_printf(dev, "Unable to create TX DMA map\n");
537 			goto free_buffers;
538 		}
539 	}
540 
541 	return 0;
542 
543 free_buffers:
544 	while (i--) {
545 		buf--;
546 		bus_dmamap_destroy(buf->tag, buf->map);
547 	}
548 
549 	free(txr->buffers, M_DEVBUF);
550 	txr->buffers = NULL;
551 free_tx_tso_dma:
552 	bus_dma_tag_destroy(txr->tso_tag);
553 	txr->tso_tag = NULL;
554 free_tx_dma:
555 	bus_dma_tag_destroy(txr->tx_tag);
556 	txr->tx_tag = NULL;
557 
558 	return (error);
559 }
560 
561 
562 /*********************************************************************
563  *
564  *  (Re)Initialize a queue transmit ring.
565  *	- called by init, it clears the descriptor ring,
566  *	  and frees any stale mbufs
567  *
568  **********************************************************************/
569 void
570 ixl_init_tx_ring(struct ixl_queue *que)
571 {
572 #ifdef DEV_NETMAP
573 	struct netmap_adapter *na = NA(que->vsi->ifp);
574 	struct netmap_slot *slot;
575 #endif /* DEV_NETMAP */
576 	struct tx_ring		*txr = &que->txr;
577 	struct ixl_tx_buf	*buf;
578 
579 	/* Clear the old ring contents */
580 	IXL_TX_LOCK(txr);
581 
582 #ifdef DEV_NETMAP
583 	/*
584 	 * (under lock): if in netmap mode, do some consistency
585 	 * checks and set slot to entry 0 of the netmap ring.
586 	 */
587 	slot = netmap_reset(na, NR_TX, que->me, 0);
588 #endif /* DEV_NETMAP */
589 
590 	bzero((void *)txr->base,
591 	      (sizeof(struct i40e_tx_desc)) * que->num_tx_desc);
592 
593 	/* Reset indices */
594 	txr->next_avail = 0;
595 	txr->next_to_clean = 0;
596 
597 	/* Reset watchdog status */
598 	txr->watchdog_timer = 0;
599 
600 	/* Free any existing tx mbufs. */
601         buf = txr->buffers;
602 	for (int i = 0; i < que->num_tx_desc; i++, buf++) {
603 		if (buf->m_head != NULL) {
604 			bus_dmamap_sync(buf->tag, buf->map,
605 			    BUS_DMASYNC_POSTWRITE);
606 			bus_dmamap_unload(buf->tag, buf->map);
607 			m_freem(buf->m_head);
608 			buf->m_head = NULL;
609 		}
610 #ifdef DEV_NETMAP
611 		/*
612 		 * In netmap mode, set the map for the packet buffer.
613 		 * NOTE: Some drivers (not this one) also need to set
614 		 * the physical buffer address in the NIC ring.
615 		 * netmap_idx_n2k() maps a nic index, i, into the corresponding
616 		 * netmap slot index, si
617 		 */
618 		if (slot) {
619 			int si = netmap_idx_n2k(na->tx_rings[que->me], i);
620 			netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
621 		}
622 #endif /* DEV_NETMAP */
623 		/* Clear the EOP index */
624 		buf->eop_index = -1;
625         }
626 
627 	/* Set number of descriptors available */
628 	txr->avail = que->num_tx_desc;
629 
630 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
631 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
632 	IXL_TX_UNLOCK(txr);
633 }
634 
635 
636 /*********************************************************************
637  *
638  *  Free transmit ring related data structures.
639  *
640  **********************************************************************/
641 void
642 ixl_free_que_tx(struct ixl_queue *que)
643 {
644 	struct tx_ring *txr = &que->txr;
645 	struct ixl_tx_buf *buf;
646 
647 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
648 
649 	for (int i = 0; i < que->num_tx_desc; i++) {
650 		buf = &txr->buffers[i];
651 		if (buf->m_head != NULL) {
652 			bus_dmamap_sync(buf->tag, buf->map,
653 			    BUS_DMASYNC_POSTWRITE);
654 			m_freem(buf->m_head);
655 			buf->m_head = NULL;
656 			}
657 		bus_dmamap_unload(buf->tag, buf->map);
658 		bus_dmamap_destroy(buf->tag, buf->map);
659 	}
660 	if (txr->buffers != NULL) {
661 		free(txr->buffers, M_DEVBUF);
662 		txr->buffers = NULL;
663 	}
664 	if (txr->tx_tag != NULL) {
665 		bus_dma_tag_destroy(txr->tx_tag);
666 		txr->tx_tag = NULL;
667 	}
668 	if (txr->tso_tag != NULL) {
669 		bus_dma_tag_destroy(txr->tso_tag);
670 		txr->tso_tag = NULL;
671 	}
672 
673 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
674 	return;
675 }
676 
677 /*********************************************************************
678  *
679  *  Setup descriptor for hw offloads
680  *
681  **********************************************************************/
682 
683 static int
684 ixl_tx_setup_offload(struct ixl_queue *que,
685     struct mbuf *mp, u32 *cmd, u32 *off)
686 {
687 	struct ether_vlan_header	*eh;
688 #ifdef INET
689 	struct ip			*ip = NULL;
690 #endif
691 	struct tcphdr			*th = NULL;
692 #ifdef INET6
693 	struct ip6_hdr			*ip6;
694 #endif
695 	int				elen, ip_hlen = 0, tcp_hlen;
696 	u16				etype;
697 	u8				ipproto = 0;
698 	bool				tso = FALSE;
699 
700 	/* Set up the TSO context descriptor if required */
701 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
702 		tso = ixl_tso_setup(que, mp);
703 		if (tso)
704 			++que->tso;
705 		else
706 			return (ENXIO);
707 	}
708 
709 	/*
710 	 * Determine where frame payload starts.
711 	 * Jump over vlan headers if already present,
712 	 * helpful for QinQ too.
713 	 */
714 	eh = mtod(mp, struct ether_vlan_header *);
715 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
716 		etype = ntohs(eh->evl_proto);
717 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
718 	} else {
719 		etype = ntohs(eh->evl_encap_proto);
720 		elen = ETHER_HDR_LEN;
721 	}
722 
723 	switch (etype) {
724 #ifdef INET
725 		case ETHERTYPE_IP:
726 			ip = (struct ip *)(mp->m_data + elen);
727 			ip_hlen = ip->ip_hl << 2;
728 			ipproto = ip->ip_p;
729 			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
730 			/* The IP checksum must be recalculated with TSO */
731 			if (tso)
732 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
733 			else
734 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
735 			break;
736 #endif
737 #ifdef INET6
738 		case ETHERTYPE_IPV6:
739 			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
740 			ip_hlen = sizeof(struct ip6_hdr);
741 			ipproto = ip6->ip6_nxt;
742 			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
743 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
744 			break;
745 #endif
746 		default:
747 			break;
748 	}
749 
750 	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
751 	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
752 
753 	switch (ipproto) {
754 		case IPPROTO_TCP:
755 			tcp_hlen = th->th_off << 2;
756 			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
757 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
758 				*off |= (tcp_hlen >> 2) <<
759 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
760 			}
761 			break;
762 		case IPPROTO_UDP:
763 			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
764 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
765 				*off |= (sizeof(struct udphdr) >> 2) <<
766 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
767 			}
768 			break;
769 		case IPPROTO_SCTP:
770 			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
771 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
772 				*off |= (sizeof(struct sctphdr) >> 2) <<
773 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
774 			}
775 			/* Fall Thru */
776 		default:
777 			break;
778 	}
779 
780         return (0);
781 }
782 
783 
784 /**********************************************************************
785  *
786  *  Setup context for hardware segmentation offload (TSO)
787  *
788  **********************************************************************/
789 static bool
790 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
791 {
792 	struct tx_ring			*txr = &que->txr;
793 	struct i40e_tx_context_desc	*TXD;
794 	struct ixl_tx_buf		*buf;
795 	u32				cmd, mss, type, tsolen;
796 	u16				etype;
797 	int				idx, elen, ip_hlen, tcp_hlen;
798 	struct ether_vlan_header	*eh;
799 #ifdef INET
800 	struct ip			*ip;
801 #endif
802 #ifdef INET6
803 	struct ip6_hdr			*ip6;
804 #endif
805 #if defined(INET6) || defined(INET)
806 	struct tcphdr			*th;
807 #endif
808 	u64				type_cmd_tso_mss;
809 
810 	/*
811 	 * Determine where frame payload starts.
812 	 * Jump over vlan headers if already present
813 	 */
814 	eh = mtod(mp, struct ether_vlan_header *);
815 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
816 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
817 		etype = eh->evl_proto;
818 	} else {
819 		elen = ETHER_HDR_LEN;
820 		etype = eh->evl_encap_proto;
821 	}
822 
823         switch (ntohs(etype)) {
824 #ifdef INET6
825 	case ETHERTYPE_IPV6:
826 		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
827 		if (ip6->ip6_nxt != IPPROTO_TCP)
828 			return (ENXIO);
829 		ip_hlen = sizeof(struct ip6_hdr);
830 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
831 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
832 		tcp_hlen = th->th_off << 2;
833 		/*
834 		 * The corresponding flag is set by the stack in the IPv4
835 		 * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
836 		 * So, set it here because the rest of the flow requires it.
837 		 */
838 		mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
839 		break;
840 #endif
841 #ifdef INET
842 	case ETHERTYPE_IP:
843 		ip = (struct ip *)(mp->m_data + elen);
844 		if (ip->ip_p != IPPROTO_TCP)
845 			return (ENXIO);
846 		ip->ip_sum = 0;
847 		ip_hlen = ip->ip_hl << 2;
848 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
849 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
850 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
851 		tcp_hlen = th->th_off << 2;
852 		break;
853 #endif
854 	default:
855 		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
856 		    __func__, ntohs(etype));
857 		return FALSE;
858         }
859 
860         /* Ensure we have at least the IP+TCP header in the first mbuf. */
861         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
862 		return FALSE;
863 
864 	idx = txr->next_avail;
865 	buf = &txr->buffers[idx];
866 	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
867 	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
868 
869 	type = I40E_TX_DESC_DTYPE_CONTEXT;
870 	cmd = I40E_TX_CTX_DESC_TSO;
871 	/* TSO MSS must not be less than 64 */
872 	if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
873 		que->mss_too_small++;
874 		mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
875 	}
876 	mss = mp->m_pkthdr.tso_segsz;
877 
878 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
879 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
880 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
881 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
882 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
883 
884 	TXD->tunneling_params = htole32(0);
885 	buf->m_head = NULL;
886 	buf->eop_index = -1;
887 
888 	if (++idx == que->num_tx_desc)
889 		idx = 0;
890 
891 	txr->avail--;
892 	txr->next_avail = idx;
893 
894 	return TRUE;
895 }
896 
897 /*
898  * ixl_get_tx_head - Retrieve the value from the
899  *    location the HW records its HEAD index
900  */
901 static inline u32
902 ixl_get_tx_head(struct ixl_queue *que)
903 {
904 	struct tx_ring  *txr = &que->txr;
905 	void *head = &txr->base[que->num_tx_desc];
906 	return LE32_TO_CPU(*(volatile __le32 *)head);
907 }
908 
909 /**********************************************************************
910  *
911  * Get index of last used descriptor/buffer from hardware, and clean
912  * the descriptors/buffers up to that index.
913  *
914  **********************************************************************/
915 static bool
916 ixl_txeof_hwb(struct ixl_queue *que)
917 {
918 	struct tx_ring		*txr = &que->txr;
919 	u32			first, last, head, done;
920 	struct ixl_tx_buf	*buf;
921 	struct i40e_tx_desc	*tx_desc, *eop_desc;
922 
923 	mtx_assert(&txr->mtx, MA_OWNED);
924 
925 #ifdef DEV_NETMAP
926 	// XXX todo: implement moderation
927 	if (netmap_tx_irq(que->vsi->ifp, que->me))
928 		return FALSE;
929 #endif /* DEF_NETMAP */
930 
931 	/* These are not the descriptors you seek, move along :) */
932 	if (txr->avail == que->num_tx_desc) {
933 		atomic_store_rel_32(&txr->watchdog_timer, 0);
934 		return FALSE;
935 	}
936 
937 	first = txr->next_to_clean;
938 	buf = &txr->buffers[first];
939 	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
940 	last = buf->eop_index;
941 	if (last == -1)
942 		return FALSE;
943 	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
944 
945 	/* Sync DMA before reading head index from ring */
946         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
947             BUS_DMASYNC_POSTREAD);
948 
949 	/* Get the Head WB value */
950 	head = ixl_get_tx_head(que);
951 
952 	/*
953 	** Get the index of the first descriptor
954 	** BEYOND the EOP and call that 'done'.
955 	** I do this so the comparison in the
956 	** inner while loop below can be simple
957 	*/
958 	if (++last == que->num_tx_desc) last = 0;
959 	done = last;
960 
961 	/*
962 	** The HEAD index of the ring is written in a
963 	** defined location, this rather than a done bit
964 	** is what is used to keep track of what must be
965 	** 'cleaned'.
966 	*/
967 	while (first != head) {
968 		/* We clean the range of the packet */
969 		while (first != done) {
970 			++txr->avail;
971 
972 			if (buf->m_head) {
973 				txr->bytes += /* for ITR adjustment */
974 				    buf->m_head->m_pkthdr.len;
975 				txr->tx_bytes += /* for TX stats */
976 				    buf->m_head->m_pkthdr.len;
977 				bus_dmamap_sync(buf->tag,
978 				    buf->map,
979 				    BUS_DMASYNC_POSTWRITE);
980 				bus_dmamap_unload(buf->tag,
981 				    buf->map);
982 				m_freem(buf->m_head);
983 				buf->m_head = NULL;
984 			}
985 			buf->eop_index = -1;
986 
987 			if (++first == que->num_tx_desc)
988 				first = 0;
989 
990 			buf = &txr->buffers[first];
991 			tx_desc = &txr->base[first];
992 		}
993 		++txr->packets;
994 		/* If a packet was successfully cleaned, reset the watchdog timer */
995 		atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
996 		/* See if there is more work now */
997 		last = buf->eop_index;
998 		if (last != -1) {
999 			eop_desc = &txr->base[last];
1000 			/* Get next done point */
1001 			if (++last == que->num_tx_desc) last = 0;
1002 			done = last;
1003 		} else
1004 			break;
1005 	}
1006 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1007 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1008 
1009 	txr->next_to_clean = first;
1010 
1011 	/*
1012 	 * If there are no pending descriptors, clear the timeout.
1013 	 */
1014 	if (txr->avail == que->num_tx_desc) {
1015 		atomic_store_rel_32(&txr->watchdog_timer, 0);
1016 		return FALSE;
1017 	}
1018 
1019 	return TRUE;
1020 }
1021 
1022 /**********************************************************************
1023  *
1024  * Use index kept by driver and the flag on each descriptor to find used
1025  * descriptor/buffers and clean them up for re-use.
1026  *
1027  * This method of reclaiming descriptors is current incompatible with
1028  * DEV_NETMAP.
1029  *
1030  * Returns TRUE if there are more descriptors to be cleaned after this
1031  * function exits.
1032  *
1033  **********************************************************************/
1034 static bool
1035 ixl_txeof_dwb(struct ixl_queue *que)
1036 {
1037 	struct tx_ring		*txr = &que->txr;
1038 	u32			first, last, done;
1039 	u32			limit = 256;
1040 	struct ixl_tx_buf	*buf;
1041 	struct i40e_tx_desc	*tx_desc, *eop_desc;
1042 
1043 	mtx_assert(&txr->mtx, MA_OWNED);
1044 
1045 	/* There are no descriptors to clean */
1046 	if (txr->avail == que->num_tx_desc) {
1047 		atomic_store_rel_32(&txr->watchdog_timer, 0);
1048 		return FALSE;
1049 	}
1050 
1051 	/* Set starting index/descriptor/buffer */
1052 	first = txr->next_to_clean;
1053 	buf = &txr->buffers[first];
1054 	tx_desc = &txr->base[first];
1055 
1056 	/*
1057 	 * This function operates per-packet -- identifies the start of the
1058 	 * packet and gets the index of the last descriptor of the packet from
1059 	 * it, from eop_index.
1060 	 *
1061 	 * If the last descriptor is marked "done" by the hardware, then all
1062 	 * of the descriptors for the packet are cleaned.
1063 	 */
1064 	last = buf->eop_index;
1065 	if (last == -1)
1066 		return FALSE;
1067 	eop_desc = &txr->base[last];
1068 
1069 	/* Sync DMA before reading from ring */
1070         bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD);
1071 
1072 	/*
1073 	 * Get the index of the first descriptor beyond the EOP and call that
1074 	 * 'done'. Simplifies the comparison for the inner loop below.
1075 	 */
1076 	if (++last == que->num_tx_desc)
1077 		last = 0;
1078 	done = last;
1079 
1080 	/*
1081 	 * We find the last completed descriptor by examining each
1082 	 * descriptor's status bits to see if it's done.
1083 	 */
1084 	do {
1085 		/* Break if last descriptor in packet isn't marked done */
1086 		if ((eop_desc->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK)
1087 		    != I40E_TX_DESC_DTYPE_DESC_DONE)
1088 			break;
1089 
1090 		/* Clean the descriptors that make up the processed packet */
1091 		while (first != done) {
1092 			/*
1093 			 * If there was a buffer attached to this descriptor,
1094 			 * prevent the adapter from accessing it, and add its
1095 			 * length to the queue's TX stats.
1096 			 */
1097 			if (buf->m_head) {
1098 				txr->bytes += buf->m_head->m_pkthdr.len;
1099 				txr->tx_bytes += buf->m_head->m_pkthdr.len;
1100 				bus_dmamap_sync(buf->tag, buf->map,
1101 				    BUS_DMASYNC_POSTWRITE);
1102 				bus_dmamap_unload(buf->tag, buf->map);
1103 				m_freem(buf->m_head);
1104 				buf->m_head = NULL;
1105 			}
1106 			buf->eop_index = -1;
1107 			++txr->avail;
1108 
1109 			if (++first == que->num_tx_desc)
1110 				first = 0;
1111 			buf = &txr->buffers[first];
1112 			tx_desc = &txr->base[first];
1113 		}
1114 		++txr->packets;
1115 		/* If a packet was successfully cleaned, reset the watchdog timer */
1116 		atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
1117 
1118 		/*
1119 		 * Since buf is the first buffer after the one that was just
1120 		 * cleaned, check if the packet it starts is done, too.
1121 		 */
1122 		last = buf->eop_index;
1123 		if (last != -1) {
1124 			eop_desc = &txr->base[last];
1125 			/* Get next done point */
1126 			if (++last == que->num_tx_desc) last = 0;
1127 			done = last;
1128 		} else
1129 			break;
1130 	} while (--limit);
1131 
1132 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1133 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1134 
1135 	txr->next_to_clean = first;
1136 
1137 	/*
1138 	 * If there are no pending descriptors, clear the watchdog timer.
1139 	 */
1140 	if (txr->avail == que->num_tx_desc) {
1141 		atomic_store_rel_32(&txr->watchdog_timer, 0);
1142 		return FALSE;
1143 	}
1144 
1145 	return TRUE;
1146 }
1147 
1148 bool
1149 ixl_txeof(struct ixl_queue *que)
1150 {
1151 	struct ixl_vsi *vsi = que->vsi;
1152 
1153 	return (vsi->enable_head_writeback) ? ixl_txeof_hwb(que)
1154 	    : ixl_txeof_dwb(que);
1155 }
1156 
1157 
1158 /*********************************************************************
1159  *
1160  *  Refresh mbuf buffers for RX descriptor rings
1161  *   - now keeps its own state so discards due to resource
1162  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1163  *     it just returns, keeping its placeholder, thus it can simply
1164  *     be recalled to try again.
1165  *
1166  **********************************************************************/
1167 static void
1168 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
1169 {
1170 	struct ixl_vsi		*vsi = que->vsi;
1171 	struct rx_ring		*rxr = &que->rxr;
1172 	bus_dma_segment_t	hseg[1];
1173 	bus_dma_segment_t	pseg[1];
1174 	struct ixl_rx_buf	*buf;
1175 	struct mbuf		*mh, *mp;
1176 	int			i, j, nsegs, error;
1177 	bool			refreshed = FALSE;
1178 
1179 	i = j = rxr->next_refresh;
1180 	/* Control the loop with one beyond */
1181 	if (++j == que->num_rx_desc)
1182 		j = 0;
1183 
1184 	while (j != limit) {
1185 		buf = &rxr->buffers[i];
1186 		if (rxr->hdr_split == FALSE)
1187 			goto no_split;
1188 
1189 		if (buf->m_head == NULL) {
1190 			mh = m_gethdr(M_NOWAIT, MT_DATA);
1191 			if (mh == NULL)
1192 				goto update;
1193 		} else
1194 			mh = buf->m_head;
1195 
1196 		mh->m_pkthdr.len = mh->m_len = MHLEN;
1197 		mh->m_len = MHLEN;
1198 		mh->m_flags |= M_PKTHDR;
1199 		/* Get the memory mapping */
1200 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1201 		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1202 		if (error != 0) {
1203 			printf("Refresh mbufs: hdr dmamap load"
1204 			    " failure - %d\n", error);
1205 			m_free(mh);
1206 			buf->m_head = NULL;
1207 			goto update;
1208 		}
1209 		buf->m_head = mh;
1210 		bus_dmamap_sync(rxr->htag, buf->hmap,
1211 		    BUS_DMASYNC_PREREAD);
1212 		rxr->base[i].read.hdr_addr =
1213 		   htole64(hseg[0].ds_addr);
1214 
1215 no_split:
1216 		if (buf->m_pack == NULL) {
1217 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1218 			    M_PKTHDR, rxr->mbuf_sz);
1219 			if (mp == NULL)
1220 				goto update;
1221 		} else
1222 			mp = buf->m_pack;
1223 
1224 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1225 		/* Get the memory mapping */
1226 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1227 		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1228 		if (error != 0) {
1229 			printf("Refresh mbufs: payload dmamap load"
1230 			    " failure - %d\n", error);
1231 			m_free(mp);
1232 			buf->m_pack = NULL;
1233 			goto update;
1234 		}
1235 		buf->m_pack = mp;
1236 		bus_dmamap_sync(rxr->ptag, buf->pmap,
1237 		    BUS_DMASYNC_PREREAD);
1238 		rxr->base[i].read.pkt_addr =
1239 		   htole64(pseg[0].ds_addr);
1240 		/* Used only when doing header split */
1241 		rxr->base[i].read.hdr_addr = 0;
1242 
1243 		refreshed = TRUE;
1244 		/* Next is precalculated */
1245 		i = j;
1246 		rxr->next_refresh = i;
1247 		if (++j == que->num_rx_desc)
1248 			j = 0;
1249 	}
1250 update:
1251 	if (refreshed) /* Update hardware tail index */
1252 		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1253 	return;
1254 }
1255 
1256 
1257 /*********************************************************************
1258  *
1259  *  Allocate memory for rx_buffer structures. Since we use one
1260  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1261  *  that we'll need is equal to the number of receive descriptors
1262  *  that we've defined.
1263  *
1264  **********************************************************************/
1265 int
1266 ixl_allocate_rx_data(struct ixl_queue *que)
1267 {
1268 	struct rx_ring		*rxr = &que->rxr;
1269 	struct ixl_vsi		*vsi = que->vsi;
1270 	device_t 		dev = vsi->dev;
1271 	struct ixl_rx_buf 	*buf;
1272 	int             	i, bsize, error;
1273 
1274 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1275 				   1, 0,	/* alignment, bounds */
1276 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1277 				   BUS_SPACE_MAXADDR,	/* highaddr */
1278 				   NULL, NULL,		/* filter, filterarg */
1279 				   MSIZE,		/* maxsize */
1280 				   1,			/* nsegments */
1281 				   MSIZE,		/* maxsegsize */
1282 				   0,			/* flags */
1283 				   NULL,		/* lockfunc */
1284 				   NULL,		/* lockfuncarg */
1285 				   &rxr->htag))) {
1286 		device_printf(dev, "Unable to create RX DMA htag\n");
1287 		return (error);
1288 	}
1289 
1290 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1291 				   1, 0,	/* alignment, bounds */
1292 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1293 				   BUS_SPACE_MAXADDR,	/* highaddr */
1294 				   NULL, NULL,		/* filter, filterarg */
1295 				   MJUM16BYTES,		/* maxsize */
1296 				   1,			/* nsegments */
1297 				   MJUM16BYTES,		/* maxsegsize */
1298 				   0,			/* flags */
1299 				   NULL,		/* lockfunc */
1300 				   NULL,		/* lockfuncarg */
1301 				   &rxr->ptag))) {
1302 		device_printf(dev, "Unable to create RX DMA ptag\n");
1303 		goto free_rx_htag;
1304 	}
1305 
1306 	bsize = sizeof(struct ixl_rx_buf) * que->num_rx_desc;
1307 	if (!(rxr->buffers =
1308 	    (struct ixl_rx_buf *) malloc(bsize,
1309 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1310 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1311 		error = ENOMEM;
1312 		goto free_rx_ptag;
1313 	}
1314 
1315 	for (i = 0; i < que->num_rx_desc; i++) {
1316 		buf = &rxr->buffers[i];
1317 		error = bus_dmamap_create(rxr->htag,
1318 		    BUS_DMA_NOWAIT, &buf->hmap);
1319 		if (error) {
1320 			device_printf(dev, "Unable to create RX head map\n");
1321 			goto free_buffers;
1322 		}
1323 		error = bus_dmamap_create(rxr->ptag,
1324 		    BUS_DMA_NOWAIT, &buf->pmap);
1325 		if (error) {
1326 			bus_dmamap_destroy(rxr->htag, buf->hmap);
1327 			device_printf(dev, "Unable to create RX pkt map\n");
1328 			goto free_buffers;
1329 		}
1330 	}
1331 
1332 	return 0;
1333 free_buffers:
1334 	while (i--) {
1335 		buf = &rxr->buffers[i];
1336 		bus_dmamap_destroy(rxr->ptag, buf->pmap);
1337 		bus_dmamap_destroy(rxr->htag, buf->hmap);
1338 	}
1339 	free(rxr->buffers, M_DEVBUF);
1340 	rxr->buffers = NULL;
1341 free_rx_ptag:
1342 	bus_dma_tag_destroy(rxr->ptag);
1343 	rxr->ptag = NULL;
1344 free_rx_htag:
1345 	bus_dma_tag_destroy(rxr->htag);
1346 	rxr->htag = NULL;
1347 	return (error);
1348 }
1349 
1350 
1351 /*********************************************************************
1352  *
1353  *  (Re)Initialize the queue receive ring and its buffers.
1354  *
1355  **********************************************************************/
1356 int
1357 ixl_init_rx_ring(struct ixl_queue *que)
1358 {
1359 	struct	rx_ring 	*rxr = &que->rxr;
1360 	struct ixl_vsi		*vsi = que->vsi;
1361 #if defined(INET6) || defined(INET)
1362 	struct ifnet		*ifp = vsi->ifp;
1363 	struct lro_ctrl		*lro = &rxr->lro;
1364 #endif
1365 	struct ixl_rx_buf	*buf;
1366 	bus_dma_segment_t	pseg[1], hseg[1];
1367 	int			rsize, nsegs, error = 0;
1368 #ifdef DEV_NETMAP
1369 	struct netmap_adapter *na = NA(que->vsi->ifp);
1370 	struct netmap_slot *slot;
1371 #endif /* DEV_NETMAP */
1372 
1373 	IXL_RX_LOCK(rxr);
1374 #ifdef DEV_NETMAP
1375 	/* same as in ixl_init_tx_ring() */
1376 	slot = netmap_reset(na, NR_RX, que->me, 0);
1377 #endif /* DEV_NETMAP */
1378 	/* Clear the ring contents */
1379 	rsize = roundup2(que->num_rx_desc *
1380 	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1381 	bzero((void *)rxr->base, rsize);
1382 	/* Cleanup any existing buffers */
1383 	for (int i = 0; i < que->num_rx_desc; i++) {
1384 		buf = &rxr->buffers[i];
1385 		if (buf->m_head != NULL) {
1386 			bus_dmamap_sync(rxr->htag, buf->hmap,
1387 			    BUS_DMASYNC_POSTREAD);
1388 			bus_dmamap_unload(rxr->htag, buf->hmap);
1389 			buf->m_head->m_flags |= M_PKTHDR;
1390 			m_freem(buf->m_head);
1391 		}
1392 		if (buf->m_pack != NULL) {
1393 			bus_dmamap_sync(rxr->ptag, buf->pmap,
1394 			    BUS_DMASYNC_POSTREAD);
1395 			bus_dmamap_unload(rxr->ptag, buf->pmap);
1396 			buf->m_pack->m_flags |= M_PKTHDR;
1397 			m_freem(buf->m_pack);
1398 		}
1399 		buf->m_head = NULL;
1400 		buf->m_pack = NULL;
1401 	}
1402 
1403 	/* header split is off */
1404 	rxr->hdr_split = FALSE;
1405 
1406 	/* Now replenish the mbufs */
1407 	for (int j = 0; j != que->num_rx_desc; ++j) {
1408 		struct mbuf	*mh, *mp;
1409 
1410 		buf = &rxr->buffers[j];
1411 #ifdef DEV_NETMAP
1412 		/*
1413 		 * In netmap mode, fill the map and set the buffer
1414 		 * address in the NIC ring, considering the offset
1415 		 * between the netmap and NIC rings (see comment in
1416 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1417 		 * an mbuf, so end the block with a continue;
1418 		 */
1419 		if (slot) {
1420 			int sj = netmap_idx_n2k(na->rx_rings[que->me], j);
1421 			uint64_t paddr;
1422 			void *addr;
1423 
1424 			addr = PNMB(na, slot + sj, &paddr);
1425 			netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1426 			/* Update descriptor and the cached value */
1427 			rxr->base[j].read.pkt_addr = htole64(paddr);
1428 			rxr->base[j].read.hdr_addr = 0;
1429 			continue;
1430 		}
1431 #endif /* DEV_NETMAP */
1432 		/*
1433 		** Don't allocate mbufs if not
1434 		** doing header split, its wasteful
1435 		*/
1436 		if (rxr->hdr_split == FALSE)
1437 			goto skip_head;
1438 
1439 		/* First the header */
1440 		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1441 		if (buf->m_head == NULL) {
1442 			error = ENOBUFS;
1443 			goto fail;
1444 		}
1445 		m_adj(buf->m_head, ETHER_ALIGN);
1446 		mh = buf->m_head;
1447 		mh->m_len = mh->m_pkthdr.len = MHLEN;
1448 		mh->m_flags |= M_PKTHDR;
1449 		/* Get the memory mapping */
1450 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1451 		    buf->hmap, buf->m_head, hseg,
1452 		    &nsegs, BUS_DMA_NOWAIT);
1453 		if (error != 0) /* Nothing elegant to do here */
1454 			goto fail;
1455 		bus_dmamap_sync(rxr->htag,
1456 		    buf->hmap, BUS_DMASYNC_PREREAD);
1457 		/* Update descriptor */
1458 		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1459 
1460 skip_head:
1461 		/* Now the payload cluster */
1462 		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1463 		    M_PKTHDR, rxr->mbuf_sz);
1464 		if (buf->m_pack == NULL) {
1465 			error = ENOBUFS;
1466                         goto fail;
1467 		}
1468 		mp = buf->m_pack;
1469 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1470 		/* Get the memory mapping */
1471 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1472 		    buf->pmap, mp, pseg,
1473 		    &nsegs, BUS_DMA_NOWAIT);
1474 		if (error != 0)
1475                         goto fail;
1476 		bus_dmamap_sync(rxr->ptag,
1477 		    buf->pmap, BUS_DMASYNC_PREREAD);
1478 		/* Update descriptor */
1479 		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1480 		rxr->base[j].read.hdr_addr = 0;
1481 	}
1482 
1483 
1484 	/* Setup our descriptor indices */
1485 	rxr->next_check = 0;
1486 	rxr->next_refresh = 0;
1487 	rxr->lro_enabled = FALSE;
1488 	rxr->split = 0;
1489 	rxr->bytes = 0;
1490 	rxr->discard = FALSE;
1491 
1492 	wr32(vsi->hw, rxr->tail, que->num_rx_desc - 1);
1493 	ixl_flush(vsi->hw);
1494 
1495 #if defined(INET6) || defined(INET)
1496 	/*
1497 	** Now set up the LRO interface:
1498 	*/
1499 	if (ifp->if_capenable & IFCAP_LRO) {
1500 		int err = tcp_lro_init(lro);
1501 		if (err) {
1502 			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1503 			goto fail;
1504 		}
1505 		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1506 		rxr->lro_enabled = TRUE;
1507 		lro->ifp = vsi->ifp;
1508 	}
1509 #endif
1510 
1511 	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1512 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1513 
1514 fail:
1515 	IXL_RX_UNLOCK(rxr);
1516 	return (error);
1517 }
1518 
1519 
1520 /*********************************************************************
1521  *
1522  *  Free station receive ring data structures
1523  *
1524  **********************************************************************/
1525 void
1526 ixl_free_que_rx(struct ixl_queue *que)
1527 {
1528 	struct rx_ring		*rxr = &que->rxr;
1529 	struct ixl_rx_buf	*buf;
1530 
1531 	/* Cleanup any existing buffers */
1532 	if (rxr->buffers != NULL) {
1533 		for (int i = 0; i < que->num_rx_desc; i++) {
1534 			buf = &rxr->buffers[i];
1535 
1536 			/* Free buffers and unload dma maps */
1537 			ixl_rx_discard(rxr, i);
1538 
1539 			bus_dmamap_destroy(rxr->htag, buf->hmap);
1540 			bus_dmamap_destroy(rxr->ptag, buf->pmap);
1541 		}
1542 		free(rxr->buffers, M_DEVBUF);
1543 		rxr->buffers = NULL;
1544 	}
1545 
1546 	if (rxr->htag != NULL) {
1547 		bus_dma_tag_destroy(rxr->htag);
1548 		rxr->htag = NULL;
1549 	}
1550 	if (rxr->ptag != NULL) {
1551 		bus_dma_tag_destroy(rxr->ptag);
1552 		rxr->ptag = NULL;
1553 	}
1554 }
1555 
1556 static inline void
1557 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1558 {
1559 
1560 #if defined(INET6) || defined(INET)
1561         /*
1562          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1563          * should be computed by hardware. Also it should not have VLAN tag in
1564          * ethernet header.
1565          */
1566         if (rxr->lro_enabled &&
1567             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1568             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1569             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1570                 /*
1571                  * Send to the stack if:
1572                  **  - LRO not enabled, or
1573                  **  - no LRO resources, or
1574                  **  - lro enqueue fails
1575                  */
1576                 if (rxr->lro.lro_cnt != 0)
1577                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1578                                 return;
1579         }
1580 #endif
1581         (*ifp->if_input)(ifp, m);
1582 }
1583 
1584 
1585 static inline void
1586 ixl_rx_discard(struct rx_ring *rxr, int i)
1587 {
1588 	struct ixl_rx_buf	*rbuf;
1589 
1590 	KASSERT(rxr != NULL, ("Receive ring pointer cannot be null"));
1591 	KASSERT(i < rxr->que->num_rx_desc, ("Descriptor index must be less than que->num_desc"));
1592 
1593 	rbuf = &rxr->buffers[i];
1594 
1595 	/* Free the mbufs in the current chain for the packet */
1596         if (rbuf->fmp != NULL) {
1597 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1598                 m_freem(rbuf->fmp);
1599                 rbuf->fmp = NULL;
1600 	}
1601 
1602 	/*
1603 	 * Free the mbufs for the current descriptor; and let ixl_refresh_mbufs()
1604 	 * assign new mbufs to these.
1605 	 */
1606 	if (rbuf->m_head) {
1607 		bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD);
1608 		bus_dmamap_unload(rxr->htag, rbuf->hmap);
1609 		m_free(rbuf->m_head);
1610 		rbuf->m_head = NULL;
1611 	}
1612 
1613 	if (rbuf->m_pack) {
1614 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1615 		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1616 		m_free(rbuf->m_pack);
1617 		rbuf->m_pack = NULL;
1618 	}
1619 }
1620 
1621 #ifdef RSS
1622 /*
1623 ** i40e_ptype_to_hash: parse the packet type
1624 ** to determine the appropriate hash.
1625 */
1626 static inline int
1627 ixl_ptype_to_hash(u8 ptype)
1628 {
1629         struct i40e_rx_ptype_decoded	decoded;
1630 
1631 	decoded = decode_rx_desc_ptype(ptype);
1632 
1633 	if (!decoded.known)
1634 		return M_HASHTYPE_OPAQUE_HASH;
1635 
1636 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
1637 		return M_HASHTYPE_OPAQUE_HASH;
1638 
1639 	/* Note: anything that gets to this point is IP */
1640         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
1641 		switch (decoded.inner_prot) {
1642 		case I40E_RX_PTYPE_INNER_PROT_TCP:
1643 			return M_HASHTYPE_RSS_TCP_IPV6;
1644 		case I40E_RX_PTYPE_INNER_PROT_UDP:
1645 			return M_HASHTYPE_RSS_UDP_IPV6;
1646 		default:
1647 			return M_HASHTYPE_RSS_IPV6;
1648 		}
1649 	}
1650         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
1651 		switch (decoded.inner_prot) {
1652 		case I40E_RX_PTYPE_INNER_PROT_TCP:
1653 			return M_HASHTYPE_RSS_TCP_IPV4;
1654 		case I40E_RX_PTYPE_INNER_PROT_UDP:
1655 			return M_HASHTYPE_RSS_UDP_IPV4;
1656 		default:
1657 			return M_HASHTYPE_RSS_IPV4;
1658 		}
1659 	}
1660 	/* We should never get here!! */
1661 	return M_HASHTYPE_OPAQUE_HASH;
1662 }
1663 #endif /* RSS */
1664 
1665 /*********************************************************************
1666  *
1667  *  This routine executes in interrupt context. It replenishes
1668  *  the mbufs in the descriptor and sends data which has been
1669  *  dma'ed into host memory to upper layer.
1670  *
1671  *  We loop at most count times if count is > 0, or until done if
1672  *  count < 0.
1673  *
1674  *  Return TRUE for more work, FALSE for all clean.
1675  *********************************************************************/
1676 bool
1677 ixl_rxeof(struct ixl_queue *que, int count)
1678 {
1679 	struct ixl_vsi		*vsi = que->vsi;
1680 	struct rx_ring		*rxr = &que->rxr;
1681 	struct ifnet		*ifp = vsi->ifp;
1682 #if defined(INET6) || defined(INET)
1683 	struct lro_ctrl		*lro = &rxr->lro;
1684 #endif
1685 	int			i, nextp, processed = 0;
1686 	union i40e_rx_desc	*cur;
1687 	struct ixl_rx_buf	*rbuf, *nbuf;
1688 
1689 	IXL_RX_LOCK(rxr);
1690 
1691 #ifdef DEV_NETMAP
1692 	if (netmap_rx_irq(ifp, que->me, &count)) {
1693 		IXL_RX_UNLOCK(rxr);
1694 		return (FALSE);
1695 	}
1696 #endif /* DEV_NETMAP */
1697 
1698 	for (i = rxr->next_check; count != 0;) {
1699 		struct mbuf	*sendmp, *mh, *mp;
1700 		u32		status, error;
1701 		u16		hlen, plen, vtag;
1702 		u64		qword;
1703 		u8		ptype;
1704 		bool		eop;
1705 
1706 		/* Sync the ring. */
1707 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1708 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1709 
1710 		cur = &rxr->base[i];
1711 		qword = le64toh(cur->wb.qword1.status_error_len);
1712 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1713 		    >> I40E_RXD_QW1_STATUS_SHIFT;
1714 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1715 		    >> I40E_RXD_QW1_ERROR_SHIFT;
1716 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1717 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1718 		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1719 		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1720 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1721 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1722 
1723 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1724 			++rxr->not_done;
1725 			break;
1726 		}
1727 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1728 			break;
1729 
1730 		count--;
1731 		sendmp = NULL;
1732 		nbuf = NULL;
1733 		cur->wb.qword1.status_error_len = 0;
1734 		rbuf = &rxr->buffers[i];
1735 		mh = rbuf->m_head;
1736 		mp = rbuf->m_pack;
1737 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1738 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1739 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1740 		else
1741 			vtag = 0;
1742 
1743 		/* Remove device access to the rx buffers. */
1744 		if (rbuf->m_head != NULL) {
1745 			bus_dmamap_sync(rxr->htag, rbuf->hmap,
1746 			    BUS_DMASYNC_POSTREAD);
1747 			bus_dmamap_unload(rxr->htag, rbuf->hmap);
1748 		}
1749 		if (rbuf->m_pack != NULL) {
1750 			bus_dmamap_sync(rxr->ptag, rbuf->pmap,
1751 			    BUS_DMASYNC_POSTREAD);
1752 			bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1753 		}
1754 
1755 		/*
1756 		** Make sure bad packets are discarded,
1757 		** note that only EOP descriptor has valid
1758 		** error results.
1759 		*/
1760                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1761 			rxr->desc_errs++;
1762 			ixl_rx_discard(rxr, i);
1763 			goto next_desc;
1764 		}
1765 
1766 		/* Prefetch the next buffer */
1767 		if (!eop) {
1768 			nextp = i + 1;
1769 			if (nextp == que->num_rx_desc)
1770 				nextp = 0;
1771 			nbuf = &rxr->buffers[nextp];
1772 			prefetch(nbuf);
1773 		}
1774 
1775 		/*
1776 		** The header mbuf is ONLY used when header
1777 		** split is enabled, otherwise we get normal
1778 		** behavior, ie, both header and payload
1779 		** are DMA'd into the payload buffer.
1780 		**
1781 		** Rather than using the fmp/lmp global pointers
1782 		** we now keep the head of a packet chain in the
1783 		** buffer struct and pass this along from one
1784 		** descriptor to the next, until we get EOP.
1785 		*/
1786 		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1787 			if (hlen > IXL_RX_HDR)
1788 				hlen = IXL_RX_HDR;
1789 			mh->m_len = hlen;
1790 			mh->m_flags |= M_PKTHDR;
1791 			mh->m_next = NULL;
1792 			mh->m_pkthdr.len = mh->m_len;
1793 			/* Null buf pointer so it is refreshed */
1794 			rbuf->m_head = NULL;
1795 			/*
1796 			** Check the payload length, this
1797 			** could be zero if its a small
1798 			** packet.
1799 			*/
1800 			if (plen > 0) {
1801 				mp->m_len = plen;
1802 				mp->m_next = NULL;
1803 				mp->m_flags &= ~M_PKTHDR;
1804 				mh->m_next = mp;
1805 				mh->m_pkthdr.len += mp->m_len;
1806 				/* Null buf pointer so it is refreshed */
1807 				rbuf->m_pack = NULL;
1808 				rxr->split++;
1809 			}
1810 			/*
1811 			** Now create the forward
1812 			** chain so when complete
1813 			** we wont have to.
1814 			*/
1815                         if (eop == 0) {
1816 				/* stash the chain head */
1817                                 nbuf->fmp = mh;
1818 				/* Make forward chain */
1819                                 if (plen)
1820                                         mp->m_next = nbuf->m_pack;
1821                                 else
1822                                         mh->m_next = nbuf->m_pack;
1823                         } else {
1824 				/* Singlet, prepare to send */
1825                                 sendmp = mh;
1826                                 if (vtag) {
1827                                         sendmp->m_pkthdr.ether_vtag = vtag;
1828                                         sendmp->m_flags |= M_VLANTAG;
1829                                 }
1830                         }
1831 		} else {
1832 			/*
1833 			** Either no header split, or a
1834 			** secondary piece of a fragmented
1835 			** split packet.
1836 			*/
1837 			mp->m_len = plen;
1838 			/*
1839 			** See if there is a stored head
1840 			** that determines what we are
1841 			*/
1842 			sendmp = rbuf->fmp;
1843 			rbuf->m_pack = rbuf->fmp = NULL;
1844 
1845 			if (sendmp != NULL) /* secondary frag */
1846 				sendmp->m_pkthdr.len += mp->m_len;
1847 			else {
1848 				/* first desc of a non-ps chain */
1849 				sendmp = mp;
1850 				sendmp->m_flags |= M_PKTHDR;
1851 				sendmp->m_pkthdr.len = mp->m_len;
1852                         }
1853 			/* Pass the head pointer on */
1854 			if (eop == 0) {
1855 				nbuf->fmp = sendmp;
1856 				sendmp = NULL;
1857 				mp->m_next = nbuf->m_pack;
1858 			}
1859 		}
1860 		++processed;
1861 		/* Sending this frame? */
1862 		if (eop) {
1863 			sendmp->m_pkthdr.rcvif = ifp;
1864 			/* gather stats */
1865 			rxr->rx_packets++;
1866 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1867 			/* capture data for dynamic ITR adjustment */
1868 			rxr->packets++;
1869 			rxr->bytes += sendmp->m_pkthdr.len;
1870 			/* Set VLAN tag (field only valid in eop desc) */
1871 			if (vtag) {
1872 				sendmp->m_pkthdr.ether_vtag = vtag;
1873 				sendmp->m_flags |= M_VLANTAG;
1874 			}
1875 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1876 				ixl_rx_checksum(sendmp, status, error, ptype);
1877 #ifdef RSS
1878 			sendmp->m_pkthdr.flowid =
1879 			    le32toh(cur->wb.qword0.hi_dword.rss);
1880 			M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1881 #else
1882 			sendmp->m_pkthdr.flowid = que->msix;
1883 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1884 #endif
1885 		}
1886 next_desc:
1887 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1888 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1889 
1890 		/* Advance our pointers to the next descriptor. */
1891 		if (++i == que->num_rx_desc)
1892 			i = 0;
1893 
1894 		/* Now send to the stack or do LRO */
1895 		if (sendmp != NULL) {
1896 			rxr->next_check = i;
1897 			IXL_RX_UNLOCK(rxr);
1898 			ixl_rx_input(rxr, ifp, sendmp, ptype);
1899 			IXL_RX_LOCK(rxr);
1900 			/*
1901 			 * Update index used in loop in case another
1902 			 * ixl_rxeof() call executes when lock is released
1903 			 */
1904 			i = rxr->next_check;
1905 		}
1906 
1907 		/* Every 8 descriptors we go to refresh mbufs */
1908 		if (processed == 8) {
1909 			ixl_refresh_mbufs(que, i);
1910 			processed = 0;
1911 		}
1912 	}
1913 
1914 	/* Refresh any remaining buf structs */
1915 	if (ixl_rx_unrefreshed(que))
1916 		ixl_refresh_mbufs(que, i);
1917 
1918 	rxr->next_check = i;
1919 
1920 	IXL_RX_UNLOCK(rxr);
1921 
1922 #if defined(INET6) || defined(INET)
1923 	/*
1924 	 * Flush any outstanding LRO work
1925 	 */
1926 #if __FreeBSD_version >= 1100105
1927 	tcp_lro_flush_all(lro);
1928 #else
1929 	struct lro_entry *queued;
1930 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1931 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1932 		tcp_lro_flush(lro, queued);
1933 	}
1934 #endif
1935 #endif /* defined(INET6) || defined(INET) */
1936 
1937 	return (FALSE);
1938 }
1939 
1940 
1941 /*********************************************************************
1942  *
1943  *  Verify that the hardware indicated that the checksum is valid.
1944  *  Inform the stack about the status of checksum so that stack
1945  *  doesn't spend time verifying the checksum.
1946  *
1947  *********************************************************************/
1948 static void
1949 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1950 {
1951 	struct i40e_rx_ptype_decoded decoded;
1952 
1953 	decoded = decode_rx_desc_ptype(ptype);
1954 
1955 	/* Errors? */
1956  	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1957 	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1958 		mp->m_pkthdr.csum_flags = 0;
1959 		return;
1960 	}
1961 
1962 	/* IPv6 with extension headers likely have bad csum */
1963 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1964 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1965 		if (status &
1966 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1967 			mp->m_pkthdr.csum_flags = 0;
1968 			return;
1969 		}
1970 
1971 
1972 	/* IP Checksum Good */
1973 	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1974 	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1975 
1976 	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1977 		mp->m_pkthdr.csum_flags |=
1978 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1979 		mp->m_pkthdr.csum_data |= htons(0xffff);
1980 	}
1981 	return;
1982 }
1983 
1984 #if __FreeBSD_version >= 1100000
1985 uint64_t
1986 ixl_get_counter(if_t ifp, ift_counter cnt)
1987 {
1988 	struct ixl_vsi *vsi;
1989 
1990 	vsi = if_getsoftc(ifp);
1991 
1992 	switch (cnt) {
1993 	case IFCOUNTER_IPACKETS:
1994 		return (vsi->ipackets);
1995 	case IFCOUNTER_IERRORS:
1996 		return (vsi->ierrors);
1997 	case IFCOUNTER_OPACKETS:
1998 		return (vsi->opackets);
1999 	case IFCOUNTER_OERRORS:
2000 		return (vsi->oerrors);
2001 	case IFCOUNTER_COLLISIONS:
2002 		/* Collisions are by standard impossible in 40G/10G Ethernet */
2003 		return (0);
2004 	case IFCOUNTER_IBYTES:
2005 		return (vsi->ibytes);
2006 	case IFCOUNTER_OBYTES:
2007 		return (vsi->obytes);
2008 	case IFCOUNTER_IMCASTS:
2009 		return (vsi->imcasts);
2010 	case IFCOUNTER_OMCASTS:
2011 		return (vsi->omcasts);
2012 	case IFCOUNTER_IQDROPS:
2013 		return (vsi->iqdrops);
2014 	case IFCOUNTER_OQDROPS:
2015 		return (vsi->oqdrops);
2016 	case IFCOUNTER_NOPROTO:
2017 		return (vsi->noproto);
2018 	default:
2019 		return (if_get_counter_default(ifp, cnt));
2020 	}
2021 }
2022 #endif
2023 
2024 /*
2025  * Set TX and RX ring size adjusting value to supported range
2026  */
2027 void
2028 ixl_vsi_setup_rings_size(struct ixl_vsi * vsi, int tx_ring_size, int rx_ring_size)
2029 {
2030 	struct device * dev = vsi->dev;
2031 
2032 	if (tx_ring_size < IXL_MIN_RING
2033 	     || tx_ring_size > IXL_MAX_RING
2034 	     || tx_ring_size % IXL_RING_INCREMENT != 0) {
2035 		device_printf(dev, "Invalid tx_ring_size value of %d set!\n",
2036 		    tx_ring_size);
2037 		device_printf(dev, "tx_ring_size must be between %d and %d, "
2038 		    "inclusive, and must be a multiple of %d\n",
2039 		    IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2040 		device_printf(dev, "Using default value of %d instead\n",
2041 		    IXL_DEFAULT_RING);
2042 		vsi->num_tx_desc = IXL_DEFAULT_RING;
2043 	} else
2044 		vsi->num_tx_desc = tx_ring_size;
2045 
2046 	if (rx_ring_size < IXL_MIN_RING
2047 	     || rx_ring_size > IXL_MAX_RING
2048 	     || rx_ring_size % IXL_RING_INCREMENT != 0) {
2049 		device_printf(dev, "Invalid rx_ring_size value of %d set!\n",
2050 		    rx_ring_size);
2051 		device_printf(dev, "rx_ring_size must be between %d and %d, "
2052 		    "inclusive, and must be a multiple of %d\n",
2053 		    IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2054 		device_printf(dev, "Using default value of %d instead\n",
2055 		    IXL_DEFAULT_RING);
2056 		vsi->num_rx_desc = IXL_DEFAULT_RING;
2057 	} else
2058 		vsi->num_rx_desc = rx_ring_size;
2059 
2060 	device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
2061 		vsi->num_tx_desc, vsi->num_rx_desc);
2062 
2063 }
2064 
2065 static void
2066 ixl_queue_sw_irq(struct ixl_vsi *vsi, int qidx)
2067 {
2068 	struct i40e_hw *hw = vsi->hw;
2069 	u32	reg, mask;
2070 
2071 	if ((vsi->flags & IXL_FLAGS_IS_VF) != 0) {
2072 		mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK |
2073 			I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
2074 			I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK);
2075 
2076 		reg = I40E_VFINT_DYN_CTLN1(qidx);
2077 	} else {
2078 		mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
2079 				I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
2080 				I40E_PFINT_DYN_CTLN_ITR_INDX_MASK);
2081 
2082 		reg = ((vsi->flags & IXL_FLAGS_USES_MSIX) != 0) ?
2083 			I40E_PFINT_DYN_CTLN(qidx) : I40E_PFINT_DYN_CTL0;
2084 	}
2085 
2086 	wr32(hw, reg, mask);
2087 }
2088 
2089 int
2090 ixl_queue_hang_check(struct ixl_vsi *vsi)
2091 {
2092 	struct ixl_queue *que = vsi->queues;
2093 	device_t dev = vsi->dev;
2094 	struct tx_ring *txr;
2095 	s32 timer, new_timer;
2096 	int hung = 0;
2097 
2098 	for (int i = 0; i < vsi->num_queues; i++, que++) {
2099 		txr = &que->txr;
2100 		/*
2101 		 * If watchdog_timer is equal to defualt value set by ixl_txeof
2102 		 * just substract hz and move on - the queue is most probably
2103 		 * running. Otherwise check the value.
2104 		 */
2105                 if (atomic_cmpset_rel_32(&txr->watchdog_timer,
2106 					IXL_WATCHDOG, (IXL_WATCHDOG) - hz) == 0) {
2107 			timer = atomic_load_acq_32(&txr->watchdog_timer);
2108 			/*
2109                          * Again - if the timer was reset to default value
2110 			 * then queue is running. Otherwise check if watchdog
2111 			 * expired and act accrdingly.
2112                          */
2113 
2114 			if (timer > 0 && timer != IXL_WATCHDOG) {
2115 				new_timer = timer - hz;
2116 				if (new_timer <= 0) {
2117 					atomic_store_rel_32(&txr->watchdog_timer, -1);
2118 					device_printf(dev, "WARNING: queue %d "
2119 							"appears to be hung!\n", que->me);
2120 					++hung;
2121 					/* Try to unblock the queue with SW IRQ */
2122 					ixl_queue_sw_irq(vsi, i);
2123 				} else {
2124 					/*
2125 					 * If this fails, that means something in the TX path
2126 					 * has updated the watchdog, so it means the TX path
2127 					 * is still working and the watchdog doesn't need
2128 					 * to countdown.
2129 					 */
2130 					atomic_cmpset_rel_32(&txr->watchdog_timer,
2131 							timer, new_timer);
2132 				}
2133 			}
2134 		}
2135 	}
2136 
2137 	return (hung);
2138 }
2139 
2140