xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision e91afc1cda50cbcb8fffa3f52cc0f8c595a392a3)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the BASE and the VF drivers.
39 */
40 
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "ixl.h"
44 
45 /* Local Prototypes */
46 static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
47 static void	ixl_refresh_mbufs(struct ixl_queue *, int);
48 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
49 static int	ixl_tx_setup_offload(struct ixl_queue *,
50 		    struct mbuf *, u32 *, u32 *);
51 static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
52 
53 static __inline void ixl_rx_discard(struct rx_ring *, int);
54 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
55 		    struct mbuf *, u8);
56 
57 /*
58 ** Multiqueue Transmit driver
59 **
60 */
61 int
62 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
63 {
64 	struct ixl_vsi		*vsi = ifp->if_softc;
65 	struct ixl_queue	*que;
66 	struct tx_ring		*txr;
67 	int 			err, i;
68 
69 	/* Which queue to use */
70 	if ((m->m_flags & M_FLOWID) != 0)
71 		i = m->m_pkthdr.flowid % vsi->num_queues;
72 	else
73 		i = curcpu % vsi->num_queues;
74 
75 	/* Check for a hung queue and pick alternative */
76 	if (((1 << i) & vsi->active_queues) == 0)
77 		i = ffsl(vsi->active_queues);
78 
79 	que = &vsi->queues[i];
80 	txr = &que->txr;
81 
82 	err = drbr_enqueue(ifp, txr->br, m);
83 	if (err)
84 		return(err);
85 	if (IXL_TX_TRYLOCK(txr)) {
86 		ixl_mq_start_locked(ifp, txr);
87 		IXL_TX_UNLOCK(txr);
88 	} else
89 		taskqueue_enqueue(que->tq, &que->tx_task);
90 
91 	return (0);
92 }
93 
94 int
95 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
96 {
97 	struct ixl_queue	*que = txr->que;
98 	struct ixl_vsi		*vsi = que->vsi;
99         struct mbuf		*next;
100         int			err = 0;
101 
102 
103 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
104 	    vsi->link_active == 0)
105 		return (ENETDOWN);
106 
107 	/* Process the transmit queue */
108 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
109 		if ((err = ixl_xmit(que, &next)) != 0) {
110 			if (next == NULL)
111 				drbr_advance(ifp, txr->br);
112 			else
113 				drbr_putback(ifp, txr->br, next);
114 			break;
115 		}
116 		drbr_advance(ifp, txr->br);
117 		/* Send a copy of the frame to the BPF listener */
118 		ETHER_BPF_MTAP(ifp, next);
119 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
120 			break;
121 	}
122 
123 	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
124 		ixl_txeof(que);
125 
126 	return (err);
127 }
128 
129 /*
130  * Called from a taskqueue to drain queued transmit packets.
131  */
132 void
133 ixl_deferred_mq_start(void *arg, int pending)
134 {
135 	struct ixl_queue	*que = arg;
136         struct tx_ring		*txr = &que->txr;
137 	struct ixl_vsi		*vsi = que->vsi;
138         struct ifnet		*ifp = vsi->ifp;
139 
140 	IXL_TX_LOCK(txr);
141 	if (!drbr_empty(ifp, txr->br))
142 		ixl_mq_start_locked(ifp, txr);
143 	IXL_TX_UNLOCK(txr);
144 }
145 
146 /*
147 ** Flush all queue ring buffers
148 */
149 void
150 ixl_qflush(struct ifnet *ifp)
151 {
152 	struct ixl_vsi	*vsi = ifp->if_softc;
153 
154         for (int i = 0; i < vsi->num_queues; i++) {
155 		struct ixl_queue *que = &vsi->queues[i];
156 		struct tx_ring	*txr = &que->txr;
157 		struct mbuf	*m;
158 		IXL_TX_LOCK(txr);
159 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
160 			m_freem(m);
161 		IXL_TX_UNLOCK(txr);
162 	}
163 	if_qflush(ifp);
164 }
165 
166 /*
167 ** Find mbuf chains passed to the driver
168 ** that are 'sparse', using more than 8
169 ** mbufs to deliver an mss-size chunk of data
170 */
171 static inline bool
172 ixl_tso_detect_sparse(struct mbuf *mp)
173 {
174 	struct mbuf	*m;
175 	int		num = 0, mss;
176 	bool		ret = FALSE;
177 
178 	mss = mp->m_pkthdr.tso_segsz;
179 	for (m = mp->m_next; m != NULL; m = m->m_next) {
180 		num++;
181 		mss -= m->m_len;
182 		if (mss < 1)
183 			break;
184 		if (m->m_next == NULL)
185 			break;
186 	}
187 	if (num > IXL_SPARSE_CHAIN)
188 		ret = TRUE;
189 
190 	return (ret);
191 }
192 
193 
194 /*********************************************************************
195  *
196  *  This routine maps the mbufs to tx descriptors, allowing the
197  *  TX engine to transmit the packets.
198  *  	- return 0 on success, positive on failure
199  *
200  **********************************************************************/
201 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
202 
203 static int
204 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
205 {
206 	struct ixl_vsi		*vsi = que->vsi;
207 	struct i40e_hw		*hw = vsi->hw;
208 	struct tx_ring		*txr = &que->txr;
209 	struct ixl_tx_buf	*buf;
210 	struct i40e_tx_desc	*txd = NULL;
211 	struct mbuf		*m_head, *m;
212 	int             	i, j, error, nsegs, maxsegs;
213 	int			first, last = 0;
214 	u16			vtag = 0;
215 	u32			cmd, off;
216 	bus_dmamap_t		map;
217 	bus_dma_tag_t		tag;
218 	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
219 
220 
221 	cmd = off = 0;
222 	m_head = *m_headp;
223 
224         /*
225          * Important to capture the first descriptor
226          * used because it will contain the index of
227          * the one we tell the hardware to report back
228          */
229         first = txr->next_avail;
230 	buf = &txr->buffers[first];
231 	map = buf->map;
232 	tag = txr->tx_tag;
233 	maxsegs = IXL_MAX_TX_SEGS;
234 
235 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
236 		/* Use larger mapping for TSO */
237 		tag = txr->tso_tag;
238 		maxsegs = IXL_MAX_TSO_SEGS;
239 		if (ixl_tso_detect_sparse(m_head)) {
240 			m = m_defrag(m_head, M_NOWAIT);
241 			if (m == NULL) {
242 				m_freem(*m_headp);
243 				*m_headp = NULL;
244 				return (ENOBUFS);
245 			}
246 			*m_headp = m;
247 		}
248 	}
249 
250 	/*
251 	 * Map the packet for DMA.
252 	 */
253 	error = bus_dmamap_load_mbuf_sg(tag, map,
254 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
255 
256 	if (error == EFBIG) {
257 		struct mbuf *m;
258 
259 		m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
260 		if (m == NULL) {
261 			que->mbuf_defrag_failed++;
262 			m_freem(*m_headp);
263 			*m_headp = NULL;
264 			return (ENOBUFS);
265 		}
266 		*m_headp = m;
267 
268 		/* Try it again */
269 		error = bus_dmamap_load_mbuf_sg(tag, map,
270 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
271 
272 		if (error == ENOMEM) {
273 			que->tx_dma_setup++;
274 			return (error);
275 		} else if (error != 0) {
276 			que->tx_dma_setup++;
277 			m_freem(*m_headp);
278 			*m_headp = NULL;
279 			return (error);
280 		}
281 	} else if (error == ENOMEM) {
282 		que->tx_dma_setup++;
283 		return (error);
284 	} else if (error != 0) {
285 		que->tx_dma_setup++;
286 		m_freem(*m_headp);
287 		*m_headp = NULL;
288 		return (error);
289 	}
290 
291 	/* Make certain there are enough descriptors */
292 	if (nsegs > txr->avail - 2) {
293 		txr->no_desc++;
294 		error = ENOBUFS;
295 		goto xmit_fail;
296 	}
297 	m_head = *m_headp;
298 
299 	/* Set up the TSO/CSUM offload */
300 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
301 		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
302 		if (error)
303 			goto xmit_fail;
304 	}
305 
306 	cmd |= I40E_TX_DESC_CMD_ICRC;
307 	/* Grab the VLAN tag */
308 	if (m_head->m_flags & M_VLANTAG) {
309 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
310 		vtag = htole16(m_head->m_pkthdr.ether_vtag);
311 	}
312 
313 	i = txr->next_avail;
314 	for (j = 0; j < nsegs; j++) {
315 		bus_size_t seglen;
316 
317 		buf = &txr->buffers[i];
318 		buf->tag = tag; /* Keep track of the type tag */
319 		txd = &txr->base[i];
320 		seglen = segs[j].ds_len;
321 
322 		txd->buffer_addr = htole64(segs[j].ds_addr);
323 		txd->cmd_type_offset_bsz =
324 		    htole64(I40E_TX_DESC_DTYPE_DATA
325 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
326 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
327 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
328 		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
329 
330 		last = i; /* descriptor that will get completion IRQ */
331 
332 		if (++i == que->num_desc)
333 			i = 0;
334 
335 		buf->m_head = NULL;
336 		buf->eop_index = -1;
337 	}
338 	/* Set the last descriptor for report */
339 	txd->cmd_type_offset_bsz |=
340 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
341 	txr->avail -= nsegs;
342 	txr->next_avail = i;
343 
344 	buf->m_head = m_head;
345 	/* Swap the dma map between the first and last descriptor */
346 	txr->buffers[first].map = buf->map;
347 	buf->map = map;
348 	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
349 
350         /* Set the index of the descriptor that will be marked done */
351         buf = &txr->buffers[first];
352 	buf->eop_index = last;
353 
354         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
355             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
356 	/*
357 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
358 	 * hardware that this frame is available to transmit.
359 	 */
360 	++txr->total_packets;
361 	wr32(hw, txr->tail, i);
362 
363 	ixl_flush(hw);
364 	/* Mark outstanding work */
365 	if (que->busy == 0)
366 		que->busy = 1;
367 	return (0);
368 
369 xmit_fail:
370 	bus_dmamap_unload(tag, buf->map);
371 	return (error);
372 }
373 
374 
375 /*********************************************************************
376  *
377  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
378  *  the information needed to transmit a packet on the wire. This is
379  *  called only once at attach, setup is done every reset.
380  *
381  **********************************************************************/
382 int
383 ixl_allocate_tx_data(struct ixl_queue *que)
384 {
385 	struct tx_ring		*txr = &que->txr;
386 	struct ixl_vsi		*vsi = que->vsi;
387 	device_t		dev = vsi->dev;
388 	struct ixl_tx_buf	*buf;
389 	int			error = 0;
390 
391 	/*
392 	 * Setup DMA descriptor areas.
393 	 */
394 	if ((error = bus_dma_tag_create(NULL,		/* parent */
395 			       1, 0,			/* alignment, bounds */
396 			       BUS_SPACE_MAXADDR,	/* lowaddr */
397 			       BUS_SPACE_MAXADDR,	/* highaddr */
398 			       NULL, NULL,		/* filter, filterarg */
399 			       IXL_TSO_SIZE,		/* maxsize */
400 			       IXL_MAX_TX_SEGS,		/* nsegments */
401 			       PAGE_SIZE,		/* maxsegsize */
402 			       0,			/* flags */
403 			       NULL,			/* lockfunc */
404 			       NULL,			/* lockfuncarg */
405 			       &txr->tx_tag))) {
406 		device_printf(dev,"Unable to allocate TX DMA tag\n");
407 		goto fail;
408 	}
409 
410 	/* Make a special tag for TSO */
411 	if ((error = bus_dma_tag_create(NULL,		/* parent */
412 			       1, 0,			/* alignment, bounds */
413 			       BUS_SPACE_MAXADDR,	/* lowaddr */
414 			       BUS_SPACE_MAXADDR,	/* highaddr */
415 			       NULL, NULL,		/* filter, filterarg */
416 			       IXL_TSO_SIZE,		/* maxsize */
417 			       IXL_MAX_TSO_SEGS,	/* nsegments */
418 			       PAGE_SIZE,		/* maxsegsize */
419 			       0,			/* flags */
420 			       NULL,			/* lockfunc */
421 			       NULL,			/* lockfuncarg */
422 			       &txr->tso_tag))) {
423 		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
424 		goto fail;
425 	}
426 
427 	if (!(txr->buffers =
428 	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
429 	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
430 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
431 		error = ENOMEM;
432 		goto fail;
433 	}
434 
435         /* Create the descriptor buffer default dma maps */
436 	buf = txr->buffers;
437 	for (int i = 0; i < que->num_desc; i++, buf++) {
438 		buf->tag = txr->tx_tag;
439 		error = bus_dmamap_create(buf->tag, 0, &buf->map);
440 		if (error != 0) {
441 			device_printf(dev, "Unable to create TX DMA map\n");
442 			goto fail;
443 		}
444 	}
445 fail:
446 	return (error);
447 }
448 
449 
450 /*********************************************************************
451  *
452  *  (Re)Initialize a queue transmit ring.
453  *	- called by init, it clears the descriptor ring,
454  *	  and frees any stale mbufs
455  *
456  **********************************************************************/
457 void
458 ixl_init_tx_ring(struct ixl_queue *que)
459 {
460 	struct tx_ring *txr = &que->txr;
461 	struct ixl_tx_buf *buf;
462 
463 	/* Clear the old ring contents */
464 	IXL_TX_LOCK(txr);
465 	bzero((void *)txr->base,
466 	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
467 
468 	/* Reset indices */
469 	txr->next_avail = 0;
470 	txr->next_to_clean = 0;
471 
472 #ifdef IXL_FDIR
473 	/* Initialize flow director */
474 	txr->atr_rate = ixl_atr_rate;
475 	txr->atr_count = 0;
476 #endif
477 
478 	/* Free any existing tx mbufs. */
479         buf = txr->buffers;
480 	for (int i = 0; i < que->num_desc; i++, buf++) {
481 		if (buf->m_head != NULL) {
482 			bus_dmamap_sync(buf->tag, buf->map,
483 			    BUS_DMASYNC_POSTWRITE);
484 			bus_dmamap_unload(buf->tag, buf->map);
485 			m_freem(buf->m_head);
486 			buf->m_head = NULL;
487 		}
488 		/* Clear the EOP index */
489 		buf->eop_index = -1;
490         }
491 
492 	/* Set number of descriptors available */
493 	txr->avail = que->num_desc;
494 
495 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
496 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
497 	IXL_TX_UNLOCK(txr);
498 }
499 
500 
501 /*********************************************************************
502  *
503  *  Free transmit ring related data structures.
504  *
505  **********************************************************************/
506 void
507 ixl_free_que_tx(struct ixl_queue *que)
508 {
509 	struct tx_ring *txr = &que->txr;
510 	struct ixl_tx_buf *buf;
511 
512 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
513 
514 	for (int i = 0; i < que->num_desc; i++) {
515 		buf = &txr->buffers[i];
516 		if (buf->m_head != NULL) {
517 			bus_dmamap_sync(buf->tag, buf->map,
518 			    BUS_DMASYNC_POSTWRITE);
519 			bus_dmamap_unload(buf->tag,
520 			    buf->map);
521 			m_freem(buf->m_head);
522 			buf->m_head = NULL;
523 			if (buf->map != NULL) {
524 				bus_dmamap_destroy(buf->tag,
525 				    buf->map);
526 				buf->map = NULL;
527 			}
528 		} else if (buf->map != NULL) {
529 			bus_dmamap_unload(buf->tag,
530 			    buf->map);
531 			bus_dmamap_destroy(buf->tag,
532 			    buf->map);
533 			buf->map = NULL;
534 		}
535 	}
536 	if (txr->br != NULL)
537 		buf_ring_free(txr->br, M_DEVBUF);
538 	if (txr->buffers != NULL) {
539 		free(txr->buffers, M_DEVBUF);
540 		txr->buffers = NULL;
541 	}
542 	if (txr->tx_tag != NULL) {
543 		bus_dma_tag_destroy(txr->tx_tag);
544 		txr->tx_tag = NULL;
545 	}
546 	if (txr->tso_tag != NULL) {
547 		bus_dma_tag_destroy(txr->tso_tag);
548 		txr->tso_tag = NULL;
549 	}
550 
551 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
552 	return;
553 }
554 
555 /*********************************************************************
556  *
557  *  Setup descriptor for hw offloads
558  *
559  **********************************************************************/
560 
561 static int
562 ixl_tx_setup_offload(struct ixl_queue *que,
563     struct mbuf *mp, u32 *cmd, u32 *off)
564 {
565 	struct ether_vlan_header	*eh;
566 #ifdef INET
567 	struct ip			*ip = NULL;
568 #endif
569 	struct tcphdr			*th = NULL;
570 #ifdef INET6
571 	struct ip6_hdr			*ip6;
572 #endif
573 	int				elen, ip_hlen = 0, tcp_hlen;
574 	u16				etype;
575 	u8				ipproto = 0;
576 	bool				tso = FALSE;
577 
578 
579 	/* Set up the TSO context descriptor if required */
580 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
581 		tso = ixl_tso_setup(que, mp);
582 		if (tso)
583 			++que->tso;
584 		else
585 			return (ENXIO);
586 	}
587 
588 	/*
589 	 * Determine where frame payload starts.
590 	 * Jump over vlan headers if already present,
591 	 * helpful for QinQ too.
592 	 */
593 	eh = mtod(mp, struct ether_vlan_header *);
594 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
595 		etype = ntohs(eh->evl_proto);
596 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
597 	} else {
598 		etype = ntohs(eh->evl_encap_proto);
599 		elen = ETHER_HDR_LEN;
600 	}
601 
602 	switch (etype) {
603 #ifdef INET
604 		case ETHERTYPE_IP:
605 			ip = (struct ip *)(mp->m_data + elen);
606 			ip_hlen = ip->ip_hl << 2;
607 			ipproto = ip->ip_p;
608 			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
609 			/* The IP checksum must be recalculated with TSO */
610 			if (tso)
611 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
612 			else
613 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
614 			break;
615 #endif
616 #ifdef INET6
617 		case ETHERTYPE_IPV6:
618 			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
619 			ip_hlen = sizeof(struct ip6_hdr);
620 			ipproto = ip6->ip6_nxt;
621 			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
622 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
623 			break;
624 #endif
625 		default:
626 			break;
627 	}
628 
629 	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
630 	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
631 
632 	switch (ipproto) {
633 		case IPPROTO_TCP:
634 			tcp_hlen = th->th_off << 2;
635 			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
636 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
637 				*off |= (tcp_hlen >> 2) <<
638 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
639 			}
640 #ifdef IXL_FDIR
641 			ixl_atr(que, th, etype);
642 #endif
643 			break;
644 		case IPPROTO_UDP:
645 			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
646 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
647 				*off |= (sizeof(struct udphdr) >> 2) <<
648 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
649 			}
650 			break;
651 
652 		case IPPROTO_SCTP:
653 			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
654 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
655 				*off |= (sizeof(struct sctphdr) >> 2) <<
656 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
657 			}
658 			/* Fall Thru */
659 		default:
660 			break;
661 	}
662 
663         return (0);
664 }
665 
666 
667 /**********************************************************************
668  *
669  *  Setup context for hardware segmentation offload (TSO)
670  *
671  **********************************************************************/
672 static bool
673 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
674 {
675 	struct tx_ring			*txr = &que->txr;
676 	struct i40e_tx_context_desc	*TXD;
677 	struct ixl_tx_buf		*buf;
678 	u32				cmd, mss, type, tsolen;
679 	u16				etype;
680 	int				idx, elen, ip_hlen, tcp_hlen;
681 	struct ether_vlan_header	*eh;
682 #ifdef INET
683 	struct ip			*ip;
684 #endif
685 #ifdef INET6
686 	struct ip6_hdr			*ip6;
687 #endif
688 #if defined(INET6) || defined(INET)
689 	struct tcphdr			*th;
690 #endif
691 	u64				type_cmd_tso_mss;
692 
693 	/*
694 	 * Determine where frame payload starts.
695 	 * Jump over vlan headers if already present
696 	 */
697 	eh = mtod(mp, struct ether_vlan_header *);
698 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
699 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
700 		etype = eh->evl_proto;
701 	} else {
702 		elen = ETHER_HDR_LEN;
703 		etype = eh->evl_encap_proto;
704 	}
705 
706         switch (ntohs(etype)) {
707 #ifdef INET6
708 	case ETHERTYPE_IPV6:
709 		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
710 		if (ip6->ip6_nxt != IPPROTO_TCP)
711 			return (ENXIO);
712 		ip_hlen = sizeof(struct ip6_hdr);
713 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
714 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
715 		tcp_hlen = th->th_off << 2;
716 		break;
717 #endif
718 #ifdef INET
719 	case ETHERTYPE_IP:
720 		ip = (struct ip *)(mp->m_data + elen);
721 		if (ip->ip_p != IPPROTO_TCP)
722 			return (ENXIO);
723 		ip->ip_sum = 0;
724 		ip_hlen = ip->ip_hl << 2;
725 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
726 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
727 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
728 		tcp_hlen = th->th_off << 2;
729 		break;
730 #endif
731 	default:
732 		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
733 		    __func__, ntohs(etype));
734 		return FALSE;
735         }
736 
737         /* Ensure we have at least the IP+TCP header in the first mbuf. */
738         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
739 		return FALSE;
740 
741 	idx = txr->next_avail;
742 	buf = &txr->buffers[idx];
743 	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
744 	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
745 
746 	type = I40E_TX_DESC_DTYPE_CONTEXT;
747 	cmd = I40E_TX_CTX_DESC_TSO;
748 	mss = mp->m_pkthdr.tso_segsz;
749 
750 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
751 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
752 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
753 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
754 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
755 
756 	TXD->tunneling_params = htole32(0);
757 	buf->m_head = NULL;
758 	buf->eop_index = -1;
759 
760 	if (++idx == que->num_desc)
761 		idx = 0;
762 
763 	txr->avail--;
764 	txr->next_avail = idx;
765 
766 	return TRUE;
767 }
768 
769 /*
770 ** ixl_get_tx_head - Retrieve the value from the
771 **    location the HW records its HEAD index
772 */
773 static inline u32
774 ixl_get_tx_head(struct ixl_queue *que)
775 {
776 	struct tx_ring  *txr = &que->txr;
777 	void *head = &txr->base[que->num_desc];
778 	return LE32_TO_CPU(*(volatile __le32 *)head);
779 }
780 
781 /**********************************************************************
782  *
783  *  Examine each tx_buffer in the used queue. If the hardware is done
784  *  processing the packet then free associated resources. The
785  *  tx_buffer is put back on the free queue.
786  *
787  **********************************************************************/
788 bool
789 ixl_txeof(struct ixl_queue *que)
790 {
791 	struct tx_ring		*txr = &que->txr;
792 	u32			first, last, head, done, processed;
793 	struct ixl_tx_buf	*buf;
794 	struct i40e_tx_desc	*tx_desc, *eop_desc;
795 
796 
797 	mtx_assert(&txr->mtx, MA_OWNED);
798 
799 
800 	/* These are not the descriptors you seek, move along :) */
801 	if (txr->avail == que->num_desc) {
802 		que->busy = 0;
803 		return FALSE;
804 	}
805 
806 	processed = 0;
807 	first = txr->next_to_clean;
808 	buf = &txr->buffers[first];
809 	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
810 	last = buf->eop_index;
811 	if (last == -1)
812 		return FALSE;
813 	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
814 
815 	/* Get the Head WB value */
816 	head = ixl_get_tx_head(que);
817 
818 	/*
819 	** Get the index of the first descriptor
820 	** BEYOND the EOP and call that 'done'.
821 	** I do this so the comparison in the
822 	** inner while loop below can be simple
823 	*/
824 	if (++last == que->num_desc) last = 0;
825 	done = last;
826 
827         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
828             BUS_DMASYNC_POSTREAD);
829 	/*
830 	** The HEAD index of the ring is written in a
831 	** defined location, this rather than a done bit
832 	** is what is used to keep track of what must be
833 	** 'cleaned'.
834 	*/
835 	while (first != head) {
836 		/* We clean the range of the packet */
837 		while (first != done) {
838 			++txr->avail;
839 			++processed;
840 
841 			if (buf->m_head) {
842 				txr->bytes += /* for ITR adjustment */
843 				    buf->m_head->m_pkthdr.len;
844 				txr->tx_bytes += /* for TX stats */
845 				    buf->m_head->m_pkthdr.len;
846 				bus_dmamap_sync(buf->tag,
847 				    buf->map,
848 				    BUS_DMASYNC_POSTWRITE);
849 				bus_dmamap_unload(buf->tag,
850 				    buf->map);
851 				m_freem(buf->m_head);
852 				buf->m_head = NULL;
853 				buf->map = NULL;
854 			}
855 			buf->eop_index = -1;
856 
857 			if (++first == que->num_desc)
858 				first = 0;
859 
860 			buf = &txr->buffers[first];
861 			tx_desc = &txr->base[first];
862 		}
863 		++txr->packets;
864 		/* See if there is more work now */
865 		last = buf->eop_index;
866 		if (last != -1) {
867 			eop_desc = &txr->base[last];
868 			/* Get next done point */
869 			if (++last == que->num_desc) last = 0;
870 			done = last;
871 		} else
872 			break;
873 	}
874 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
875 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
876 
877 	txr->next_to_clean = first;
878 
879 
880 	/*
881 	** Hang detection, we know there's
882 	** work outstanding or the first return
883 	** would have been taken, so indicate an
884 	** unsuccessful pass, in local_timer if
885 	** the value is too great the queue will
886 	** be considered hung. If anything has been
887 	** cleaned then reset the state.
888 	*/
889 	if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
890 		++que->busy;
891 
892 	if (processed)
893 		que->busy = 1; /* Note this turns off HUNG */
894 
895 	/*
896 	 * If there are no pending descriptors, clear the timeout.
897 	 */
898 	if (txr->avail == que->num_desc) {
899 		que->busy = 0;
900 		return FALSE;
901 	}
902 
903 	return TRUE;
904 }
905 
906 /*********************************************************************
907  *
908  *  Refresh mbuf buffers for RX descriptor rings
909  *   - now keeps its own state so discards due to resource
910  *     exhaustion are unnecessary, if an mbuf cannot be obtained
911  *     it just returns, keeping its placeholder, thus it can simply
912  *     be recalled to try again.
913  *
914  **********************************************************************/
915 static void
916 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
917 {
918 	struct ixl_vsi		*vsi = que->vsi;
919 	struct rx_ring		*rxr = &que->rxr;
920 	bus_dma_segment_t	hseg[1];
921 	bus_dma_segment_t	pseg[1];
922 	struct ixl_rx_buf	*buf;
923 	struct mbuf		*mh, *mp;
924 	int			i, j, nsegs, error;
925 	bool			refreshed = FALSE;
926 
927 	i = j = rxr->next_refresh;
928 	/* Control the loop with one beyond */
929 	if (++j == que->num_desc)
930 		j = 0;
931 
932 	while (j != limit) {
933 		buf = &rxr->buffers[i];
934 		if (rxr->hdr_split == FALSE)
935 			goto no_split;
936 
937 		if (buf->m_head == NULL) {
938 			mh = m_gethdr(M_NOWAIT, MT_DATA);
939 			if (mh == NULL)
940 				goto update;
941 		} else
942 			mh = buf->m_head;
943 
944 		mh->m_pkthdr.len = mh->m_len = MHLEN;
945 		mh->m_len = MHLEN;
946 		mh->m_flags |= M_PKTHDR;
947 		/* Get the memory mapping */
948 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
949 		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
950 		if (error != 0) {
951 			printf("Refresh mbufs: hdr dmamap load"
952 			    " failure - %d\n", error);
953 			m_free(mh);
954 			buf->m_head = NULL;
955 			goto update;
956 		}
957 		buf->m_head = mh;
958 		bus_dmamap_sync(rxr->htag, buf->hmap,
959 		    BUS_DMASYNC_PREREAD);
960 		rxr->base[i].read.hdr_addr =
961 		   htole64(hseg[0].ds_addr);
962 
963 no_split:
964 		if (buf->m_pack == NULL) {
965 			mp = m_getjcl(M_NOWAIT, MT_DATA,
966 			    M_PKTHDR, rxr->mbuf_sz);
967 			if (mp == NULL)
968 				goto update;
969 		} else
970 			mp = buf->m_pack;
971 
972 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
973 		/* Get the memory mapping */
974 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
975 		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
976 		if (error != 0) {
977 			printf("Refresh mbufs: payload dmamap load"
978 			    " failure - %d\n", error);
979 			m_free(mp);
980 			buf->m_pack = NULL;
981 			goto update;
982 		}
983 		buf->m_pack = mp;
984 		bus_dmamap_sync(rxr->ptag, buf->pmap,
985 		    BUS_DMASYNC_PREREAD);
986 		rxr->base[i].read.pkt_addr =
987 		   htole64(pseg[0].ds_addr);
988 		/* Used only when doing header split */
989 		rxr->base[i].read.hdr_addr = 0;
990 
991 		refreshed = TRUE;
992 		/* Next is precalculated */
993 		i = j;
994 		rxr->next_refresh = i;
995 		if (++j == que->num_desc)
996 			j = 0;
997 	}
998 update:
999 	if (refreshed) /* Update hardware tail index */
1000 		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1001 	return;
1002 }
1003 
1004 
1005 /*********************************************************************
1006  *
1007  *  Allocate memory for rx_buffer structures. Since we use one
1008  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1009  *  that we'll need is equal to the number of receive descriptors
1010  *  that we've defined.
1011  *
1012  **********************************************************************/
1013 int
1014 ixl_allocate_rx_data(struct ixl_queue *que)
1015 {
1016 	struct rx_ring		*rxr = &que->rxr;
1017 	struct ixl_vsi		*vsi = que->vsi;
1018 	device_t 		dev = vsi->dev;
1019 	struct ixl_rx_buf 	*buf;
1020 	int             	i, bsize, error;
1021 
1022 	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1023 	if (!(rxr->buffers =
1024 	    (struct ixl_rx_buf *) malloc(bsize,
1025 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1026 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1027 		error = ENOMEM;
1028 		return (error);
1029 	}
1030 
1031 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1032 				   1, 0,	/* alignment, bounds */
1033 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1034 				   BUS_SPACE_MAXADDR,	/* highaddr */
1035 				   NULL, NULL,		/* filter, filterarg */
1036 				   MSIZE,		/* maxsize */
1037 				   1,			/* nsegments */
1038 				   MSIZE,		/* maxsegsize */
1039 				   0,			/* flags */
1040 				   NULL,		/* lockfunc */
1041 				   NULL,		/* lockfuncarg */
1042 				   &rxr->htag))) {
1043 		device_printf(dev, "Unable to create RX DMA htag\n");
1044 		return (error);
1045 	}
1046 
1047 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1048 				   1, 0,	/* alignment, bounds */
1049 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1050 				   BUS_SPACE_MAXADDR,	/* highaddr */
1051 				   NULL, NULL,		/* filter, filterarg */
1052 				   MJUM16BYTES,		/* maxsize */
1053 				   1,			/* nsegments */
1054 				   MJUM16BYTES,		/* maxsegsize */
1055 				   0,			/* flags */
1056 				   NULL,		/* lockfunc */
1057 				   NULL,		/* lockfuncarg */
1058 				   &rxr->ptag))) {
1059 		device_printf(dev, "Unable to create RX DMA ptag\n");
1060 		return (error);
1061 	}
1062 
1063 	for (i = 0; i < que->num_desc; i++) {
1064 		buf = &rxr->buffers[i];
1065 		error = bus_dmamap_create(rxr->htag,
1066 		    BUS_DMA_NOWAIT, &buf->hmap);
1067 		if (error) {
1068 			device_printf(dev, "Unable to create RX head map\n");
1069 			break;
1070 		}
1071 		error = bus_dmamap_create(rxr->ptag,
1072 		    BUS_DMA_NOWAIT, &buf->pmap);
1073 		if (error) {
1074 			device_printf(dev, "Unable to create RX pkt map\n");
1075 			break;
1076 		}
1077 	}
1078 
1079 	return (error);
1080 }
1081 
1082 
1083 /*********************************************************************
1084  *
1085  *  (Re)Initialize the queue receive ring and its buffers.
1086  *
1087  **********************************************************************/
1088 int
1089 ixl_init_rx_ring(struct ixl_queue *que)
1090 {
1091 	struct	rx_ring 	*rxr = &que->rxr;
1092 	struct ixl_vsi		*vsi = que->vsi;
1093 #if defined(INET6) || defined(INET)
1094 	struct ifnet		*ifp = vsi->ifp;
1095 	struct lro_ctrl		*lro = &rxr->lro;
1096 #endif
1097 	struct ixl_rx_buf	*buf;
1098 	bus_dma_segment_t	pseg[1], hseg[1];
1099 	int			rsize, nsegs, error = 0;
1100 
1101 	IXL_RX_LOCK(rxr);
1102 	/* Clear the ring contents */
1103 	rsize = roundup2(que->num_desc *
1104 	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1105 	bzero((void *)rxr->base, rsize);
1106 	/* Cleanup any existing buffers */
1107 	for (int i = 0; i < que->num_desc; i++) {
1108 		buf = &rxr->buffers[i];
1109 		if (buf->m_head != NULL) {
1110 			bus_dmamap_sync(rxr->htag, buf->hmap,
1111 			    BUS_DMASYNC_POSTREAD);
1112 			bus_dmamap_unload(rxr->htag, buf->hmap);
1113 			buf->m_head->m_flags |= M_PKTHDR;
1114 			m_freem(buf->m_head);
1115 		}
1116 		if (buf->m_pack != NULL) {
1117 			bus_dmamap_sync(rxr->ptag, buf->pmap,
1118 			    BUS_DMASYNC_POSTREAD);
1119 			bus_dmamap_unload(rxr->ptag, buf->pmap);
1120 			buf->m_pack->m_flags |= M_PKTHDR;
1121 			m_freem(buf->m_pack);
1122 		}
1123 		buf->m_head = NULL;
1124 		buf->m_pack = NULL;
1125 	}
1126 
1127 	/* header split is off */
1128 	rxr->hdr_split = FALSE;
1129 
1130 	/* Now replenish the mbufs */
1131 	for (int j = 0; j != que->num_desc; ++j) {
1132 		struct mbuf	*mh, *mp;
1133 
1134 		buf = &rxr->buffers[j];
1135 		/*
1136 		** Don't allocate mbufs if not
1137 		** doing header split, its wasteful
1138 		*/
1139 		if (rxr->hdr_split == FALSE)
1140 			goto skip_head;
1141 
1142 		/* First the header */
1143 		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1144 		if (buf->m_head == NULL) {
1145 			error = ENOBUFS;
1146 			goto fail;
1147 		}
1148 		m_adj(buf->m_head, ETHER_ALIGN);
1149 		mh = buf->m_head;
1150 		mh->m_len = mh->m_pkthdr.len = MHLEN;
1151 		mh->m_flags |= M_PKTHDR;
1152 		/* Get the memory mapping */
1153 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1154 		    buf->hmap, buf->m_head, hseg,
1155 		    &nsegs, BUS_DMA_NOWAIT);
1156 		if (error != 0) /* Nothing elegant to do here */
1157 			goto fail;
1158 		bus_dmamap_sync(rxr->htag,
1159 		    buf->hmap, BUS_DMASYNC_PREREAD);
1160 		/* Update descriptor */
1161 		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1162 
1163 skip_head:
1164 		/* Now the payload cluster */
1165 		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1166 		    M_PKTHDR, rxr->mbuf_sz);
1167 		if (buf->m_pack == NULL) {
1168 			error = ENOBUFS;
1169                         goto fail;
1170 		}
1171 		mp = buf->m_pack;
1172 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1173 		/* Get the memory mapping */
1174 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1175 		    buf->pmap, mp, pseg,
1176 		    &nsegs, BUS_DMA_NOWAIT);
1177 		if (error != 0)
1178                         goto fail;
1179 		bus_dmamap_sync(rxr->ptag,
1180 		    buf->pmap, BUS_DMASYNC_PREREAD);
1181 		/* Update descriptor */
1182 		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1183 		rxr->base[j].read.hdr_addr = 0;
1184 	}
1185 
1186 
1187 	/* Setup our descriptor indices */
1188 	rxr->next_check = 0;
1189 	rxr->next_refresh = 0;
1190 	rxr->lro_enabled = FALSE;
1191 	rxr->split = 0;
1192 	rxr->bytes = 0;
1193 	rxr->discard = FALSE;
1194 
1195 	wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1196 	ixl_flush(vsi->hw);
1197 
1198 #if defined(INET6) || defined(INET)
1199 	/*
1200 	** Now set up the LRO interface:
1201 	*/
1202 	if (ifp->if_capenable & IFCAP_LRO) {
1203 		int err = tcp_lro_init(lro);
1204 		if (err) {
1205 			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1206 			goto fail;
1207 		}
1208 		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1209 		rxr->lro_enabled = TRUE;
1210 		lro->ifp = vsi->ifp;
1211 	}
1212 #endif
1213 
1214 	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1215 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1216 
1217 fail:
1218 	IXL_RX_UNLOCK(rxr);
1219 	return (error);
1220 }
1221 
1222 
1223 /*********************************************************************
1224  *
1225  *  Free station receive ring data structures
1226  *
1227  **********************************************************************/
1228 void
1229 ixl_free_que_rx(struct ixl_queue *que)
1230 {
1231 	struct rx_ring		*rxr = &que->rxr;
1232 	struct ixl_rx_buf	*buf;
1233 
1234 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1235 
1236 	/* Cleanup any existing buffers */
1237 	if (rxr->buffers != NULL) {
1238 		for (int i = 0; i < que->num_desc; i++) {
1239 			buf = &rxr->buffers[i];
1240 			if (buf->m_head != NULL) {
1241 				bus_dmamap_sync(rxr->htag, buf->hmap,
1242 				    BUS_DMASYNC_POSTREAD);
1243 				bus_dmamap_unload(rxr->htag, buf->hmap);
1244 				buf->m_head->m_flags |= M_PKTHDR;
1245 				m_freem(buf->m_head);
1246 			}
1247 			if (buf->m_pack != NULL) {
1248 				bus_dmamap_sync(rxr->ptag, buf->pmap,
1249 				    BUS_DMASYNC_POSTREAD);
1250 				bus_dmamap_unload(rxr->ptag, buf->pmap);
1251 				buf->m_pack->m_flags |= M_PKTHDR;
1252 				m_freem(buf->m_pack);
1253 			}
1254 			buf->m_head = NULL;
1255 			buf->m_pack = NULL;
1256 			if (buf->hmap != NULL) {
1257 				bus_dmamap_destroy(rxr->htag, buf->hmap);
1258 				buf->hmap = NULL;
1259 			}
1260 			if (buf->pmap != NULL) {
1261 				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1262 				buf->pmap = NULL;
1263 			}
1264 		}
1265 		if (rxr->buffers != NULL) {
1266 			free(rxr->buffers, M_DEVBUF);
1267 			rxr->buffers = NULL;
1268 		}
1269 	}
1270 
1271 	if (rxr->htag != NULL) {
1272 		bus_dma_tag_destroy(rxr->htag);
1273 		rxr->htag = NULL;
1274 	}
1275 	if (rxr->ptag != NULL) {
1276 		bus_dma_tag_destroy(rxr->ptag);
1277 		rxr->ptag = NULL;
1278 	}
1279 
1280 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1281 	return;
1282 }
1283 
1284 static __inline void
1285 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1286 {
1287 
1288 #if defined(INET6) || defined(INET)
1289         /*
1290          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1291          * should be computed by hardware. Also it should not have VLAN tag in
1292          * ethernet header.
1293          */
1294         if (rxr->lro_enabled &&
1295             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1296             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1297             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1298                 /*
1299                  * Send to the stack if:
1300                  **  - LRO not enabled, or
1301                  **  - no LRO resources, or
1302                  **  - lro enqueue fails
1303                  */
1304                 if (rxr->lro.lro_cnt != 0)
1305                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1306                                 return;
1307         }
1308 #endif
1309 	IXL_RX_UNLOCK(rxr);
1310         (*ifp->if_input)(ifp, m);
1311 	IXL_RX_LOCK(rxr);
1312 }
1313 
1314 
1315 static __inline void
1316 ixl_rx_discard(struct rx_ring *rxr, int i)
1317 {
1318 	struct ixl_rx_buf	*rbuf;
1319 
1320 	rbuf = &rxr->buffers[i];
1321 
1322         if (rbuf->fmp != NULL) {/* Partial chain ? */
1323 		rbuf->fmp->m_flags |= M_PKTHDR;
1324                 m_freem(rbuf->fmp);
1325                 rbuf->fmp = NULL;
1326 	}
1327 
1328 	/*
1329 	** With advanced descriptors the writeback
1330 	** clobbers the buffer addrs, so its easier
1331 	** to just free the existing mbufs and take
1332 	** the normal refresh path to get new buffers
1333 	** and mapping.
1334 	*/
1335 	if (rbuf->m_head) {
1336 		m_free(rbuf->m_head);
1337 		rbuf->m_head = NULL;
1338 	}
1339 
1340 	if (rbuf->m_pack) {
1341 		m_free(rbuf->m_pack);
1342 		rbuf->m_pack = NULL;
1343 	}
1344 
1345 	return;
1346 }
1347 
1348 
1349 /*********************************************************************
1350  *
1351  *  This routine executes in interrupt context. It replenishes
1352  *  the mbufs in the descriptor and sends data which has been
1353  *  dma'ed into host memory to upper layer.
1354  *
1355  *  We loop at most count times if count is > 0, or until done if
1356  *  count < 0.
1357  *
1358  *  Return TRUE for more work, FALSE for all clean.
1359  *********************************************************************/
1360 bool
1361 ixl_rxeof(struct ixl_queue *que, int count)
1362 {
1363 	struct ixl_vsi		*vsi = que->vsi;
1364 	struct rx_ring		*rxr = &que->rxr;
1365 	struct ifnet		*ifp = vsi->ifp;
1366 #if defined(INET6) || defined(INET)
1367 	struct lro_ctrl		*lro = &rxr->lro;
1368 	struct lro_entry	*queued;
1369 #endif
1370 	int			i, nextp, processed = 0;
1371 	union i40e_rx_desc	*cur;
1372 	struct ixl_rx_buf	*rbuf, *nbuf;
1373 
1374 
1375 	IXL_RX_LOCK(rxr);
1376 
1377 
1378 	for (i = rxr->next_check; count != 0;) {
1379 		struct mbuf	*sendmp, *mh, *mp;
1380 		u32		rsc, status, error;
1381 		u16		hlen, plen, vtag;
1382 		u64		qword;
1383 		u8		ptype;
1384 		bool		eop;
1385 
1386 		/* Sync the ring. */
1387 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1388 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1389 
1390 		cur = &rxr->base[i];
1391 		qword = le64toh(cur->wb.qword1.status_error_len);
1392 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1393 		    >> I40E_RXD_QW1_STATUS_SHIFT;
1394 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1395 		    >> I40E_RXD_QW1_ERROR_SHIFT;
1396 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1397 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1398 		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1399 		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1400 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1401 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1402 
1403 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1404 			++rxr->not_done;
1405 			break;
1406 		}
1407 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1408 			break;
1409 
1410 		count--;
1411 		sendmp = NULL;
1412 		nbuf = NULL;
1413 		rsc = 0;
1414 		cur->wb.qword1.status_error_len = 0;
1415 		rbuf = &rxr->buffers[i];
1416 		mh = rbuf->m_head;
1417 		mp = rbuf->m_pack;
1418 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1419 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1420 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1421 		else
1422 			vtag = 0;
1423 
1424 		/*
1425 		** Make sure bad packets are discarded,
1426 		** note that only EOP descriptor has valid
1427 		** error results.
1428 		*/
1429                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1430 			rxr->discarded++;
1431 			ixl_rx_discard(rxr, i);
1432 			goto next_desc;
1433 		}
1434 
1435 		/* Prefetch the next buffer */
1436 		if (!eop) {
1437 			nextp = i + 1;
1438 			if (nextp == que->num_desc)
1439 				nextp = 0;
1440 			nbuf = &rxr->buffers[nextp];
1441 			prefetch(nbuf);
1442 		}
1443 
1444 		/*
1445 		** The header mbuf is ONLY used when header
1446 		** split is enabled, otherwise we get normal
1447 		** behavior, ie, both header and payload
1448 		** are DMA'd into the payload buffer.
1449 		**
1450 		** Rather than using the fmp/lmp global pointers
1451 		** we now keep the head of a packet chain in the
1452 		** buffer struct and pass this along from one
1453 		** descriptor to the next, until we get EOP.
1454 		*/
1455 		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1456 			if (hlen > IXL_RX_HDR)
1457 				hlen = IXL_RX_HDR;
1458 			mh->m_len = hlen;
1459 			mh->m_flags |= M_PKTHDR;
1460 			mh->m_next = NULL;
1461 			mh->m_pkthdr.len = mh->m_len;
1462 			/* Null buf pointer so it is refreshed */
1463 			rbuf->m_head = NULL;
1464 			/*
1465 			** Check the payload length, this
1466 			** could be zero if its a small
1467 			** packet.
1468 			*/
1469 			if (plen > 0) {
1470 				mp->m_len = plen;
1471 				mp->m_next = NULL;
1472 				mp->m_flags &= ~M_PKTHDR;
1473 				mh->m_next = mp;
1474 				mh->m_pkthdr.len += mp->m_len;
1475 				/* Null buf pointer so it is refreshed */
1476 				rbuf->m_pack = NULL;
1477 				rxr->split++;
1478 			}
1479 			/*
1480 			** Now create the forward
1481 			** chain so when complete
1482 			** we wont have to.
1483 			*/
1484                         if (eop == 0) {
1485 				/* stash the chain head */
1486                                 nbuf->fmp = mh;
1487 				/* Make forward chain */
1488                                 if (plen)
1489                                         mp->m_next = nbuf->m_pack;
1490                                 else
1491                                         mh->m_next = nbuf->m_pack;
1492                         } else {
1493 				/* Singlet, prepare to send */
1494                                 sendmp = mh;
1495                                 if (vtag) {
1496                                         sendmp->m_pkthdr.ether_vtag = vtag;
1497                                         sendmp->m_flags |= M_VLANTAG;
1498                                 }
1499                         }
1500 		} else {
1501 			/*
1502 			** Either no header split, or a
1503 			** secondary piece of a fragmented
1504 			** split packet.
1505 			*/
1506 			mp->m_len = plen;
1507 			/*
1508 			** See if there is a stored head
1509 			** that determines what we are
1510 			*/
1511 			sendmp = rbuf->fmp;
1512 			rbuf->m_pack = rbuf->fmp = NULL;
1513 
1514 			if (sendmp != NULL) /* secondary frag */
1515 				sendmp->m_pkthdr.len += mp->m_len;
1516 			else {
1517 				/* first desc of a non-ps chain */
1518 				sendmp = mp;
1519 				sendmp->m_flags |= M_PKTHDR;
1520 				sendmp->m_pkthdr.len = mp->m_len;
1521 				if (vtag) {
1522 					sendmp->m_pkthdr.ether_vtag = vtag;
1523 					sendmp->m_flags |= M_VLANTAG;
1524 				}
1525                         }
1526 			/* Pass the head pointer on */
1527 			if (eop == 0) {
1528 				nbuf->fmp = sendmp;
1529 				sendmp = NULL;
1530 				mp->m_next = nbuf->m_pack;
1531 			}
1532 		}
1533 		++processed;
1534 		/* Sending this frame? */
1535 		if (eop) {
1536 			sendmp->m_pkthdr.rcvif = ifp;
1537 			/* gather stats */
1538 			rxr->rx_packets++;
1539 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1540 			/* capture data for dynamic ITR adjustment */
1541 			rxr->packets++;
1542 			rxr->bytes += sendmp->m_pkthdr.len;
1543 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1544 				ixl_rx_checksum(sendmp, status, error, ptype);
1545 			sendmp->m_pkthdr.flowid = que->msix;
1546 			sendmp->m_flags |= M_FLOWID;
1547 		}
1548 next_desc:
1549 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1550 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1551 
1552 		/* Advance our pointers to the next descriptor. */
1553 		if (++i == que->num_desc)
1554 			i = 0;
1555 
1556 		/* Now send to the stack or do LRO */
1557 		if (sendmp != NULL) {
1558 			rxr->next_check = i;
1559 			ixl_rx_input(rxr, ifp, sendmp, ptype);
1560 			i = rxr->next_check;
1561 		}
1562 
1563                /* Every 8 descriptors we go to refresh mbufs */
1564 		if (processed == 8) {
1565 			ixl_refresh_mbufs(que, i);
1566 			processed = 0;
1567 		}
1568 	}
1569 
1570 	/* Refresh any remaining buf structs */
1571 	if (ixl_rx_unrefreshed(que))
1572 		ixl_refresh_mbufs(que, i);
1573 
1574 	rxr->next_check = i;
1575 
1576 #if defined(INET6) || defined(INET)
1577 	/*
1578 	 * Flush any outstanding LRO work
1579 	 */
1580 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1581 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1582 		tcp_lro_flush(lro, queued);
1583 	}
1584 #endif
1585 
1586 	IXL_RX_UNLOCK(rxr);
1587 	return (FALSE);
1588 }
1589 
1590 
1591 /*********************************************************************
1592  *
1593  *  Verify that the hardware indicated that the checksum is valid.
1594  *  Inform the stack about the status of checksum so that stack
1595  *  doesn't spend time verifying the checksum.
1596  *
1597  *********************************************************************/
1598 static void
1599 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1600 {
1601 	struct i40e_rx_ptype_decoded decoded;
1602 
1603 	decoded = decode_rx_desc_ptype(ptype);
1604 
1605 	/* Errors? */
1606  	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1607 	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1608 		mp->m_pkthdr.csum_flags = 0;
1609 		return;
1610 	}
1611 
1612 	/* IPv6 with extension headers likely have bad csum */
1613 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1614 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1615 		if (status &
1616 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1617 			mp->m_pkthdr.csum_flags = 0;
1618 			return;
1619 		}
1620 
1621 
1622 	/* IP Checksum Good */
1623 	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1624 	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1625 
1626 	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1627 		mp->m_pkthdr.csum_flags |=
1628 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1629 		mp->m_pkthdr.csum_data |= htons(0xffff);
1630 	}
1631 	return;
1632 }
1633 
1634 #if __FreeBSD_version >= 1100000
1635 uint64_t
1636 ixl_get_counter(if_t ifp, ift_counter cnt)
1637 {
1638 	struct ixl_vsi *vsi;
1639 
1640 	vsi = if_getsoftc(ifp);
1641 
1642 	switch (cnt) {
1643 	case IFCOUNTER_IPACKETS:
1644 		return (vsi->ipackets);
1645 	case IFCOUNTER_IERRORS:
1646 		return (vsi->ierrors);
1647 	case IFCOUNTER_OPACKETS:
1648 		return (vsi->opackets);
1649 	case IFCOUNTER_OERRORS:
1650 		return (vsi->oerrors);
1651 	case IFCOUNTER_COLLISIONS:
1652 		/* Collisions are by standard impossible in 40G/10G Ethernet */
1653 		return (0);
1654 	case IFCOUNTER_IBYTES:
1655 		return (vsi->ibytes);
1656 	case IFCOUNTER_OBYTES:
1657 		return (vsi->obytes);
1658 	case IFCOUNTER_IMCASTS:
1659 		return (vsi->imcasts);
1660 	case IFCOUNTER_OMCASTS:
1661 		return (vsi->omcasts);
1662 	case IFCOUNTER_IQDROPS:
1663 		return (vsi->iqdrops);
1664 	case IFCOUNTER_OQDROPS:
1665 		return (vsi->oqdrops);
1666 	case IFCOUNTER_NOPROTO:
1667 		return (vsi->noproto);
1668 	default:
1669 		return (if_get_counter_default(ifp, cnt));
1670 	}
1671 }
1672 #endif
1673 
1674