xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision fcb560670601b2a4d87bb31d7531c8dcc37ee71b)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the BASE and the VF drivers.
39 */
40 
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "opt_rss.h"
44 #include "ixl.h"
45 
46 #ifdef RSS
47 #include <net/rss_config.h>
48 #endif
49 
50 /* Local Prototypes */
51 static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
52 static void	ixl_refresh_mbufs(struct ixl_queue *, int);
53 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
54 static int	ixl_tx_setup_offload(struct ixl_queue *,
55 		    struct mbuf *, u32 *, u32 *);
56 static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
57 
58 static __inline void ixl_rx_discard(struct rx_ring *, int);
59 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
60 		    struct mbuf *, u8);
61 
62 /*
63 ** Multiqueue Transmit driver
64 **
65 */
66 int
67 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
68 {
69 	struct ixl_vsi		*vsi = ifp->if_softc;
70 	struct ixl_queue	*que;
71 	struct tx_ring		*txr;
72 	int 			err, i;
73 #ifdef RSS
74 	u32			bucket_id;
75 #endif
76 
77 	/*
78 	** Which queue to use:
79 	**
80 	** When doing RSS, map it to the same outbound
81 	** queue as the incoming flow would be mapped to.
82 	** If everything is setup correctly, it should be
83 	** the same bucket that the current CPU we're on is.
84 	*/
85 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
86 #ifdef  RSS
87 		if (rss_hash2bucket(m->m_pkthdr.flowid,
88 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
89 			i = bucket_id % vsi->num_queues;
90                 } else
91 #endif
92                         i = m->m_pkthdr.flowid % vsi->num_queues;
93         } else
94 		i = curcpu % vsi->num_queues;
95 	/*
96 	** This may not be perfect, but until something
97 	** better comes along it will keep from scheduling
98 	** on stalled queues.
99 	*/
100 	if (((1 << i) & vsi->active_queues) == 0)
101 		i = ffsl(vsi->active_queues);
102 
103 	que = &vsi->queues[i];
104 	txr = &que->txr;
105 
106 	err = drbr_enqueue(ifp, txr->br, m);
107 	if (err)
108 		return(err);
109 	if (IXL_TX_TRYLOCK(txr)) {
110 		ixl_mq_start_locked(ifp, txr);
111 		IXL_TX_UNLOCK(txr);
112 	} else
113 		taskqueue_enqueue(que->tq, &que->tx_task);
114 
115 	return (0);
116 }
117 
118 int
119 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
120 {
121 	struct ixl_queue	*que = txr->que;
122 	struct ixl_vsi		*vsi = que->vsi;
123         struct mbuf		*next;
124         int			err = 0;
125 
126 
127 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
128 	    vsi->link_active == 0)
129 		return (ENETDOWN);
130 
131 	/* Process the transmit queue */
132 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
133 		if ((err = ixl_xmit(que, &next)) != 0) {
134 			if (next == NULL)
135 				drbr_advance(ifp, txr->br);
136 			else
137 				drbr_putback(ifp, txr->br, next);
138 			break;
139 		}
140 		drbr_advance(ifp, txr->br);
141 		/* Send a copy of the frame to the BPF listener */
142 		ETHER_BPF_MTAP(ifp, next);
143 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
144 			break;
145 	}
146 
147 	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
148 		ixl_txeof(que);
149 
150 	return (err);
151 }
152 
153 /*
154  * Called from a taskqueue to drain queued transmit packets.
155  */
156 void
157 ixl_deferred_mq_start(void *arg, int pending)
158 {
159 	struct ixl_queue	*que = arg;
160         struct tx_ring		*txr = &que->txr;
161 	struct ixl_vsi		*vsi = que->vsi;
162         struct ifnet		*ifp = vsi->ifp;
163 
164 	IXL_TX_LOCK(txr);
165 	if (!drbr_empty(ifp, txr->br))
166 		ixl_mq_start_locked(ifp, txr);
167 	IXL_TX_UNLOCK(txr);
168 }
169 
170 /*
171 ** Flush all queue ring buffers
172 */
173 void
174 ixl_qflush(struct ifnet *ifp)
175 {
176 	struct ixl_vsi	*vsi = ifp->if_softc;
177 
178         for (int i = 0; i < vsi->num_queues; i++) {
179 		struct ixl_queue *que = &vsi->queues[i];
180 		struct tx_ring	*txr = &que->txr;
181 		struct mbuf	*m;
182 		IXL_TX_LOCK(txr);
183 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
184 			m_freem(m);
185 		IXL_TX_UNLOCK(txr);
186 	}
187 	if_qflush(ifp);
188 }
189 
190 /*
191 ** Find mbuf chains passed to the driver
192 ** that are 'sparse', using more than 8
193 ** mbufs to deliver an mss-size chunk of data
194 */
195 static inline bool
196 ixl_tso_detect_sparse(struct mbuf *mp)
197 {
198 	struct mbuf	*m;
199 	int		num = 0, mss;
200 	bool		ret = FALSE;
201 
202 	mss = mp->m_pkthdr.tso_segsz;
203 	for (m = mp->m_next; m != NULL; m = m->m_next) {
204 		num++;
205 		mss -= m->m_len;
206 		if (mss < 1)
207 			break;
208 		if (m->m_next == NULL)
209 			break;
210 	}
211 	if (num > IXL_SPARSE_CHAIN)
212 		ret = TRUE;
213 
214 	return (ret);
215 }
216 
217 
218 /*********************************************************************
219  *
220  *  This routine maps the mbufs to tx descriptors, allowing the
221  *  TX engine to transmit the packets.
222  *  	- return 0 on success, positive on failure
223  *
224  **********************************************************************/
225 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
226 
227 static int
228 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
229 {
230 	struct ixl_vsi		*vsi = que->vsi;
231 	struct i40e_hw		*hw = vsi->hw;
232 	struct tx_ring		*txr = &que->txr;
233 	struct ixl_tx_buf	*buf;
234 	struct i40e_tx_desc	*txd = NULL;
235 	struct mbuf		*m_head, *m;
236 	int             	i, j, error, nsegs, maxsegs;
237 	int			first, last = 0;
238 	u16			vtag = 0;
239 	u32			cmd, off;
240 	bus_dmamap_t		map;
241 	bus_dma_tag_t		tag;
242 	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
243 
244 
245 	cmd = off = 0;
246 	m_head = *m_headp;
247 
248         /*
249          * Important to capture the first descriptor
250          * used because it will contain the index of
251          * the one we tell the hardware to report back
252          */
253         first = txr->next_avail;
254 	buf = &txr->buffers[first];
255 	map = buf->map;
256 	tag = txr->tx_tag;
257 	maxsegs = IXL_MAX_TX_SEGS;
258 
259 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
260 		/* Use larger mapping for TSO */
261 		tag = txr->tso_tag;
262 		maxsegs = IXL_MAX_TSO_SEGS;
263 		if (ixl_tso_detect_sparse(m_head)) {
264 			m = m_defrag(m_head, M_NOWAIT);
265 			if (m == NULL) {
266 				m_freem(*m_headp);
267 				*m_headp = NULL;
268 				return (ENOBUFS);
269 			}
270 			*m_headp = m;
271 		}
272 	}
273 
274 	/*
275 	 * Map the packet for DMA.
276 	 */
277 	error = bus_dmamap_load_mbuf_sg(tag, map,
278 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
279 
280 	if (error == EFBIG) {
281 		struct mbuf *m;
282 
283 		m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
284 		if (m == NULL) {
285 			que->mbuf_defrag_failed++;
286 			m_freem(*m_headp);
287 			*m_headp = NULL;
288 			return (ENOBUFS);
289 		}
290 		*m_headp = m;
291 
292 		/* Try it again */
293 		error = bus_dmamap_load_mbuf_sg(tag, map,
294 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
295 
296 		if (error == ENOMEM) {
297 			que->tx_dma_setup++;
298 			return (error);
299 		} else if (error != 0) {
300 			que->tx_dma_setup++;
301 			m_freem(*m_headp);
302 			*m_headp = NULL;
303 			return (error);
304 		}
305 	} else if (error == ENOMEM) {
306 		que->tx_dma_setup++;
307 		return (error);
308 	} else if (error != 0) {
309 		que->tx_dma_setup++;
310 		m_freem(*m_headp);
311 		*m_headp = NULL;
312 		return (error);
313 	}
314 
315 	/* Make certain there are enough descriptors */
316 	if (nsegs > txr->avail - 2) {
317 		txr->no_desc++;
318 		error = ENOBUFS;
319 		goto xmit_fail;
320 	}
321 	m_head = *m_headp;
322 
323 	/* Set up the TSO/CSUM offload */
324 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
325 		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
326 		if (error)
327 			goto xmit_fail;
328 	}
329 
330 	cmd |= I40E_TX_DESC_CMD_ICRC;
331 	/* Grab the VLAN tag */
332 	if (m_head->m_flags & M_VLANTAG) {
333 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
334 		vtag = htole16(m_head->m_pkthdr.ether_vtag);
335 	}
336 
337 	i = txr->next_avail;
338 	for (j = 0; j < nsegs; j++) {
339 		bus_size_t seglen;
340 
341 		buf = &txr->buffers[i];
342 		buf->tag = tag; /* Keep track of the type tag */
343 		txd = &txr->base[i];
344 		seglen = segs[j].ds_len;
345 
346 		txd->buffer_addr = htole64(segs[j].ds_addr);
347 		txd->cmd_type_offset_bsz =
348 		    htole64(I40E_TX_DESC_DTYPE_DATA
349 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
350 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
351 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
352 		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
353 
354 		last = i; /* descriptor that will get completion IRQ */
355 
356 		if (++i == que->num_desc)
357 			i = 0;
358 
359 		buf->m_head = NULL;
360 		buf->eop_index = -1;
361 	}
362 	/* Set the last descriptor for report */
363 	txd->cmd_type_offset_bsz |=
364 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
365 	txr->avail -= nsegs;
366 	txr->next_avail = i;
367 
368 	buf->m_head = m_head;
369 	/* Swap the dma map between the first and last descriptor */
370 	txr->buffers[first].map = buf->map;
371 	buf->map = map;
372 	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
373 
374         /* Set the index of the descriptor that will be marked done */
375         buf = &txr->buffers[first];
376 	buf->eop_index = last;
377 
378         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
379             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
380 	/*
381 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
382 	 * hardware that this frame is available to transmit.
383 	 */
384 	++txr->total_packets;
385 	wr32(hw, txr->tail, i);
386 
387 	ixl_flush(hw);
388 	/* Mark outstanding work */
389 	if (que->busy == 0)
390 		que->busy = 1;
391 	return (0);
392 
393 xmit_fail:
394 	bus_dmamap_unload(tag, buf->map);
395 	return (error);
396 }
397 
398 
399 /*********************************************************************
400  *
401  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
402  *  the information needed to transmit a packet on the wire. This is
403  *  called only once at attach, setup is done every reset.
404  *
405  **********************************************************************/
406 int
407 ixl_allocate_tx_data(struct ixl_queue *que)
408 {
409 	struct tx_ring		*txr = &que->txr;
410 	struct ixl_vsi		*vsi = que->vsi;
411 	device_t		dev = vsi->dev;
412 	struct ixl_tx_buf	*buf;
413 	int			error = 0;
414 
415 	/*
416 	 * Setup DMA descriptor areas.
417 	 */
418 	if ((error = bus_dma_tag_create(NULL,		/* parent */
419 			       1, 0,			/* alignment, bounds */
420 			       BUS_SPACE_MAXADDR,	/* lowaddr */
421 			       BUS_SPACE_MAXADDR,	/* highaddr */
422 			       NULL, NULL,		/* filter, filterarg */
423 			       IXL_TSO_SIZE,		/* maxsize */
424 			       IXL_MAX_TX_SEGS,		/* nsegments */
425 			       PAGE_SIZE,		/* maxsegsize */
426 			       0,			/* flags */
427 			       NULL,			/* lockfunc */
428 			       NULL,			/* lockfuncarg */
429 			       &txr->tx_tag))) {
430 		device_printf(dev,"Unable to allocate TX DMA tag\n");
431 		goto fail;
432 	}
433 
434 	/* Make a special tag for TSO */
435 	if ((error = bus_dma_tag_create(NULL,		/* parent */
436 			       1, 0,			/* alignment, bounds */
437 			       BUS_SPACE_MAXADDR,	/* lowaddr */
438 			       BUS_SPACE_MAXADDR,	/* highaddr */
439 			       NULL, NULL,		/* filter, filterarg */
440 			       IXL_TSO_SIZE,		/* maxsize */
441 			       IXL_MAX_TSO_SEGS,	/* nsegments */
442 			       PAGE_SIZE,		/* maxsegsize */
443 			       0,			/* flags */
444 			       NULL,			/* lockfunc */
445 			       NULL,			/* lockfuncarg */
446 			       &txr->tso_tag))) {
447 		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
448 		goto fail;
449 	}
450 
451 	if (!(txr->buffers =
452 	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
453 	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
454 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
455 		error = ENOMEM;
456 		goto fail;
457 	}
458 
459         /* Create the descriptor buffer default dma maps */
460 	buf = txr->buffers;
461 	for (int i = 0; i < que->num_desc; i++, buf++) {
462 		buf->tag = txr->tx_tag;
463 		error = bus_dmamap_create(buf->tag, 0, &buf->map);
464 		if (error != 0) {
465 			device_printf(dev, "Unable to create TX DMA map\n");
466 			goto fail;
467 		}
468 	}
469 fail:
470 	return (error);
471 }
472 
473 
474 /*********************************************************************
475  *
476  *  (Re)Initialize a queue transmit ring.
477  *	- called by init, it clears the descriptor ring,
478  *	  and frees any stale mbufs
479  *
480  **********************************************************************/
481 void
482 ixl_init_tx_ring(struct ixl_queue *que)
483 {
484 	struct tx_ring *txr = &que->txr;
485 	struct ixl_tx_buf *buf;
486 
487 	/* Clear the old ring contents */
488 	IXL_TX_LOCK(txr);
489 	bzero((void *)txr->base,
490 	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
491 
492 	/* Reset indices */
493 	txr->next_avail = 0;
494 	txr->next_to_clean = 0;
495 
496 #ifdef IXL_FDIR
497 	/* Initialize flow director */
498 	txr->atr_rate = ixl_atr_rate;
499 	txr->atr_count = 0;
500 #endif
501 
502 	/* Free any existing tx mbufs. */
503         buf = txr->buffers;
504 	for (int i = 0; i < que->num_desc; i++, buf++) {
505 		if (buf->m_head != NULL) {
506 			bus_dmamap_sync(buf->tag, buf->map,
507 			    BUS_DMASYNC_POSTWRITE);
508 			bus_dmamap_unload(buf->tag, buf->map);
509 			m_freem(buf->m_head);
510 			buf->m_head = NULL;
511 		}
512 		/* Clear the EOP index */
513 		buf->eop_index = -1;
514         }
515 
516 	/* Set number of descriptors available */
517 	txr->avail = que->num_desc;
518 
519 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
520 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
521 	IXL_TX_UNLOCK(txr);
522 }
523 
524 
525 /*********************************************************************
526  *
527  *  Free transmit ring related data structures.
528  *
529  **********************************************************************/
530 void
531 ixl_free_que_tx(struct ixl_queue *que)
532 {
533 	struct tx_ring *txr = &que->txr;
534 	struct ixl_tx_buf *buf;
535 
536 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
537 
538 	for (int i = 0; i < que->num_desc; i++) {
539 		buf = &txr->buffers[i];
540 		if (buf->m_head != NULL) {
541 			bus_dmamap_sync(buf->tag, buf->map,
542 			    BUS_DMASYNC_POSTWRITE);
543 			bus_dmamap_unload(buf->tag,
544 			    buf->map);
545 			m_freem(buf->m_head);
546 			buf->m_head = NULL;
547 			if (buf->map != NULL) {
548 				bus_dmamap_destroy(buf->tag,
549 				    buf->map);
550 				buf->map = NULL;
551 			}
552 		} else if (buf->map != NULL) {
553 			bus_dmamap_unload(buf->tag,
554 			    buf->map);
555 			bus_dmamap_destroy(buf->tag,
556 			    buf->map);
557 			buf->map = NULL;
558 		}
559 	}
560 	if (txr->br != NULL)
561 		buf_ring_free(txr->br, M_DEVBUF);
562 	if (txr->buffers != NULL) {
563 		free(txr->buffers, M_DEVBUF);
564 		txr->buffers = NULL;
565 	}
566 	if (txr->tx_tag != NULL) {
567 		bus_dma_tag_destroy(txr->tx_tag);
568 		txr->tx_tag = NULL;
569 	}
570 	if (txr->tso_tag != NULL) {
571 		bus_dma_tag_destroy(txr->tso_tag);
572 		txr->tso_tag = NULL;
573 	}
574 
575 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
576 	return;
577 }
578 
579 /*********************************************************************
580  *
581  *  Setup descriptor for hw offloads
582  *
583  **********************************************************************/
584 
585 static int
586 ixl_tx_setup_offload(struct ixl_queue *que,
587     struct mbuf *mp, u32 *cmd, u32 *off)
588 {
589 	struct ether_vlan_header	*eh;
590 #ifdef INET
591 	struct ip			*ip = NULL;
592 #endif
593 	struct tcphdr			*th = NULL;
594 #ifdef INET6
595 	struct ip6_hdr			*ip6;
596 #endif
597 	int				elen, ip_hlen = 0, tcp_hlen;
598 	u16				etype;
599 	u8				ipproto = 0;
600 	bool				tso = FALSE;
601 
602 
603 	/* Set up the TSO context descriptor if required */
604 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
605 		tso = ixl_tso_setup(que, mp);
606 		if (tso)
607 			++que->tso;
608 		else
609 			return (ENXIO);
610 	}
611 
612 	/*
613 	 * Determine where frame payload starts.
614 	 * Jump over vlan headers if already present,
615 	 * helpful for QinQ too.
616 	 */
617 	eh = mtod(mp, struct ether_vlan_header *);
618 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
619 		etype = ntohs(eh->evl_proto);
620 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
621 	} else {
622 		etype = ntohs(eh->evl_encap_proto);
623 		elen = ETHER_HDR_LEN;
624 	}
625 
626 	switch (etype) {
627 #ifdef INET
628 		case ETHERTYPE_IP:
629 			ip = (struct ip *)(mp->m_data + elen);
630 			ip_hlen = ip->ip_hl << 2;
631 			ipproto = ip->ip_p;
632 			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
633 			/* The IP checksum must be recalculated with TSO */
634 			if (tso)
635 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
636 			else
637 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
638 			break;
639 #endif
640 #ifdef INET6
641 		case ETHERTYPE_IPV6:
642 			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
643 			ip_hlen = sizeof(struct ip6_hdr);
644 			ipproto = ip6->ip6_nxt;
645 			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
646 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
647 			break;
648 #endif
649 		default:
650 			break;
651 	}
652 
653 	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
654 	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
655 
656 	switch (ipproto) {
657 		case IPPROTO_TCP:
658 			tcp_hlen = th->th_off << 2;
659 			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
660 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
661 				*off |= (tcp_hlen >> 2) <<
662 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
663 			}
664 #ifdef IXL_FDIR
665 			ixl_atr(que, th, etype);
666 #endif
667 			break;
668 		case IPPROTO_UDP:
669 			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
670 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
671 				*off |= (sizeof(struct udphdr) >> 2) <<
672 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
673 			}
674 			break;
675 
676 		case IPPROTO_SCTP:
677 			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
678 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
679 				*off |= (sizeof(struct sctphdr) >> 2) <<
680 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
681 			}
682 			/* Fall Thru */
683 		default:
684 			break;
685 	}
686 
687         return (0);
688 }
689 
690 
691 /**********************************************************************
692  *
693  *  Setup context for hardware segmentation offload (TSO)
694  *
695  **********************************************************************/
696 static bool
697 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
698 {
699 	struct tx_ring			*txr = &que->txr;
700 	struct i40e_tx_context_desc	*TXD;
701 	struct ixl_tx_buf		*buf;
702 	u32				cmd, mss, type, tsolen;
703 	u16				etype;
704 	int				idx, elen, ip_hlen, tcp_hlen;
705 	struct ether_vlan_header	*eh;
706 #ifdef INET
707 	struct ip			*ip;
708 #endif
709 #ifdef INET6
710 	struct ip6_hdr			*ip6;
711 #endif
712 #if defined(INET6) || defined(INET)
713 	struct tcphdr			*th;
714 #endif
715 	u64				type_cmd_tso_mss;
716 
717 	/*
718 	 * Determine where frame payload starts.
719 	 * Jump over vlan headers if already present
720 	 */
721 	eh = mtod(mp, struct ether_vlan_header *);
722 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
723 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
724 		etype = eh->evl_proto;
725 	} else {
726 		elen = ETHER_HDR_LEN;
727 		etype = eh->evl_encap_proto;
728 	}
729 
730         switch (ntohs(etype)) {
731 #ifdef INET6
732 	case ETHERTYPE_IPV6:
733 		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
734 		if (ip6->ip6_nxt != IPPROTO_TCP)
735 			return (ENXIO);
736 		ip_hlen = sizeof(struct ip6_hdr);
737 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
738 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
739 		tcp_hlen = th->th_off << 2;
740 		break;
741 #endif
742 #ifdef INET
743 	case ETHERTYPE_IP:
744 		ip = (struct ip *)(mp->m_data + elen);
745 		if (ip->ip_p != IPPROTO_TCP)
746 			return (ENXIO);
747 		ip->ip_sum = 0;
748 		ip_hlen = ip->ip_hl << 2;
749 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
750 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
751 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
752 		tcp_hlen = th->th_off << 2;
753 		break;
754 #endif
755 	default:
756 		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
757 		    __func__, ntohs(etype));
758 		return FALSE;
759         }
760 
761         /* Ensure we have at least the IP+TCP header in the first mbuf. */
762         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
763 		return FALSE;
764 
765 	idx = txr->next_avail;
766 	buf = &txr->buffers[idx];
767 	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
768 	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
769 
770 	type = I40E_TX_DESC_DTYPE_CONTEXT;
771 	cmd = I40E_TX_CTX_DESC_TSO;
772 	mss = mp->m_pkthdr.tso_segsz;
773 
774 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
775 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
776 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
777 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
778 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
779 
780 	TXD->tunneling_params = htole32(0);
781 	buf->m_head = NULL;
782 	buf->eop_index = -1;
783 
784 	if (++idx == que->num_desc)
785 		idx = 0;
786 
787 	txr->avail--;
788 	txr->next_avail = idx;
789 
790 	return TRUE;
791 }
792 
793 /*
794 ** ixl_get_tx_head - Retrieve the value from the
795 **    location the HW records its HEAD index
796 */
797 static inline u32
798 ixl_get_tx_head(struct ixl_queue *que)
799 {
800 	struct tx_ring  *txr = &que->txr;
801 	void *head = &txr->base[que->num_desc];
802 	return LE32_TO_CPU(*(volatile __le32 *)head);
803 }
804 
805 /**********************************************************************
806  *
807  *  Examine each tx_buffer in the used queue. If the hardware is done
808  *  processing the packet then free associated resources. The
809  *  tx_buffer is put back on the free queue.
810  *
811  **********************************************************************/
812 bool
813 ixl_txeof(struct ixl_queue *que)
814 {
815 	struct tx_ring		*txr = &que->txr;
816 	u32			first, last, head, done, processed;
817 	struct ixl_tx_buf	*buf;
818 	struct i40e_tx_desc	*tx_desc, *eop_desc;
819 
820 
821 	mtx_assert(&txr->mtx, MA_OWNED);
822 
823 
824 	/* These are not the descriptors you seek, move along :) */
825 	if (txr->avail == que->num_desc) {
826 		que->busy = 0;
827 		return FALSE;
828 	}
829 
830 	processed = 0;
831 	first = txr->next_to_clean;
832 	buf = &txr->buffers[first];
833 	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
834 	last = buf->eop_index;
835 	if (last == -1)
836 		return FALSE;
837 	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
838 
839 	/* Get the Head WB value */
840 	head = ixl_get_tx_head(que);
841 
842 	/*
843 	** Get the index of the first descriptor
844 	** BEYOND the EOP and call that 'done'.
845 	** I do this so the comparison in the
846 	** inner while loop below can be simple
847 	*/
848 	if (++last == que->num_desc) last = 0;
849 	done = last;
850 
851         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
852             BUS_DMASYNC_POSTREAD);
853 	/*
854 	** The HEAD index of the ring is written in a
855 	** defined location, this rather than a done bit
856 	** is what is used to keep track of what must be
857 	** 'cleaned'.
858 	*/
859 	while (first != head) {
860 		/* We clean the range of the packet */
861 		while (first != done) {
862 			++txr->avail;
863 			++processed;
864 
865 			if (buf->m_head) {
866 				txr->bytes += /* for ITR adjustment */
867 				    buf->m_head->m_pkthdr.len;
868 				txr->tx_bytes += /* for TX stats */
869 				    buf->m_head->m_pkthdr.len;
870 				bus_dmamap_sync(buf->tag,
871 				    buf->map,
872 				    BUS_DMASYNC_POSTWRITE);
873 				bus_dmamap_unload(buf->tag,
874 				    buf->map);
875 				m_freem(buf->m_head);
876 				buf->m_head = NULL;
877 				buf->map = NULL;
878 			}
879 			buf->eop_index = -1;
880 
881 			if (++first == que->num_desc)
882 				first = 0;
883 
884 			buf = &txr->buffers[first];
885 			tx_desc = &txr->base[first];
886 		}
887 		++txr->packets;
888 		/* See if there is more work now */
889 		last = buf->eop_index;
890 		if (last != -1) {
891 			eop_desc = &txr->base[last];
892 			/* Get next done point */
893 			if (++last == que->num_desc) last = 0;
894 			done = last;
895 		} else
896 			break;
897 	}
898 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
899 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
900 
901 	txr->next_to_clean = first;
902 
903 
904 	/*
905 	** Hang detection, we know there's
906 	** work outstanding or the first return
907 	** would have been taken, so indicate an
908 	** unsuccessful pass, in local_timer if
909 	** the value is too great the queue will
910 	** be considered hung. If anything has been
911 	** cleaned then reset the state.
912 	*/
913 	if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
914 		++que->busy;
915 
916 	if (processed)
917 		que->busy = 1; /* Note this turns off HUNG */
918 
919 	/*
920 	 * If there are no pending descriptors, clear the timeout.
921 	 */
922 	if (txr->avail == que->num_desc) {
923 		que->busy = 0;
924 		return FALSE;
925 	}
926 
927 	return TRUE;
928 }
929 
930 /*********************************************************************
931  *
932  *  Refresh mbuf buffers for RX descriptor rings
933  *   - now keeps its own state so discards due to resource
934  *     exhaustion are unnecessary, if an mbuf cannot be obtained
935  *     it just returns, keeping its placeholder, thus it can simply
936  *     be recalled to try again.
937  *
938  **********************************************************************/
939 static void
940 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
941 {
942 	struct ixl_vsi		*vsi = que->vsi;
943 	struct rx_ring		*rxr = &que->rxr;
944 	bus_dma_segment_t	hseg[1];
945 	bus_dma_segment_t	pseg[1];
946 	struct ixl_rx_buf	*buf;
947 	struct mbuf		*mh, *mp;
948 	int			i, j, nsegs, error;
949 	bool			refreshed = FALSE;
950 
951 	i = j = rxr->next_refresh;
952 	/* Control the loop with one beyond */
953 	if (++j == que->num_desc)
954 		j = 0;
955 
956 	while (j != limit) {
957 		buf = &rxr->buffers[i];
958 		if (rxr->hdr_split == FALSE)
959 			goto no_split;
960 
961 		if (buf->m_head == NULL) {
962 			mh = m_gethdr(M_NOWAIT, MT_DATA);
963 			if (mh == NULL)
964 				goto update;
965 		} else
966 			mh = buf->m_head;
967 
968 		mh->m_pkthdr.len = mh->m_len = MHLEN;
969 		mh->m_len = MHLEN;
970 		mh->m_flags |= M_PKTHDR;
971 		/* Get the memory mapping */
972 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
973 		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
974 		if (error != 0) {
975 			printf("Refresh mbufs: hdr dmamap load"
976 			    " failure - %d\n", error);
977 			m_free(mh);
978 			buf->m_head = NULL;
979 			goto update;
980 		}
981 		buf->m_head = mh;
982 		bus_dmamap_sync(rxr->htag, buf->hmap,
983 		    BUS_DMASYNC_PREREAD);
984 		rxr->base[i].read.hdr_addr =
985 		   htole64(hseg[0].ds_addr);
986 
987 no_split:
988 		if (buf->m_pack == NULL) {
989 			mp = m_getjcl(M_NOWAIT, MT_DATA,
990 			    M_PKTHDR, rxr->mbuf_sz);
991 			if (mp == NULL)
992 				goto update;
993 		} else
994 			mp = buf->m_pack;
995 
996 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
997 		/* Get the memory mapping */
998 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
999 		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1000 		if (error != 0) {
1001 			printf("Refresh mbufs: payload dmamap load"
1002 			    " failure - %d\n", error);
1003 			m_free(mp);
1004 			buf->m_pack = NULL;
1005 			goto update;
1006 		}
1007 		buf->m_pack = mp;
1008 		bus_dmamap_sync(rxr->ptag, buf->pmap,
1009 		    BUS_DMASYNC_PREREAD);
1010 		rxr->base[i].read.pkt_addr =
1011 		   htole64(pseg[0].ds_addr);
1012 		/* Used only when doing header split */
1013 		rxr->base[i].read.hdr_addr = 0;
1014 
1015 		refreshed = TRUE;
1016 		/* Next is precalculated */
1017 		i = j;
1018 		rxr->next_refresh = i;
1019 		if (++j == que->num_desc)
1020 			j = 0;
1021 	}
1022 update:
1023 	if (refreshed) /* Update hardware tail index */
1024 		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1025 	return;
1026 }
1027 
1028 
1029 /*********************************************************************
1030  *
1031  *  Allocate memory for rx_buffer structures. Since we use one
1032  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1033  *  that we'll need is equal to the number of receive descriptors
1034  *  that we've defined.
1035  *
1036  **********************************************************************/
1037 int
1038 ixl_allocate_rx_data(struct ixl_queue *que)
1039 {
1040 	struct rx_ring		*rxr = &que->rxr;
1041 	struct ixl_vsi		*vsi = que->vsi;
1042 	device_t 		dev = vsi->dev;
1043 	struct ixl_rx_buf 	*buf;
1044 	int             	i, bsize, error;
1045 
1046 	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1047 	if (!(rxr->buffers =
1048 	    (struct ixl_rx_buf *) malloc(bsize,
1049 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1050 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1051 		error = ENOMEM;
1052 		return (error);
1053 	}
1054 
1055 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1056 				   1, 0,	/* alignment, bounds */
1057 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1058 				   BUS_SPACE_MAXADDR,	/* highaddr */
1059 				   NULL, NULL,		/* filter, filterarg */
1060 				   MSIZE,		/* maxsize */
1061 				   1,			/* nsegments */
1062 				   MSIZE,		/* maxsegsize */
1063 				   0,			/* flags */
1064 				   NULL,		/* lockfunc */
1065 				   NULL,		/* lockfuncarg */
1066 				   &rxr->htag))) {
1067 		device_printf(dev, "Unable to create RX DMA htag\n");
1068 		return (error);
1069 	}
1070 
1071 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1072 				   1, 0,	/* alignment, bounds */
1073 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1074 				   BUS_SPACE_MAXADDR,	/* highaddr */
1075 				   NULL, NULL,		/* filter, filterarg */
1076 				   MJUM16BYTES,		/* maxsize */
1077 				   1,			/* nsegments */
1078 				   MJUM16BYTES,		/* maxsegsize */
1079 				   0,			/* flags */
1080 				   NULL,		/* lockfunc */
1081 				   NULL,		/* lockfuncarg */
1082 				   &rxr->ptag))) {
1083 		device_printf(dev, "Unable to create RX DMA ptag\n");
1084 		return (error);
1085 	}
1086 
1087 	for (i = 0; i < que->num_desc; i++) {
1088 		buf = &rxr->buffers[i];
1089 		error = bus_dmamap_create(rxr->htag,
1090 		    BUS_DMA_NOWAIT, &buf->hmap);
1091 		if (error) {
1092 			device_printf(dev, "Unable to create RX head map\n");
1093 			break;
1094 		}
1095 		error = bus_dmamap_create(rxr->ptag,
1096 		    BUS_DMA_NOWAIT, &buf->pmap);
1097 		if (error) {
1098 			device_printf(dev, "Unable to create RX pkt map\n");
1099 			break;
1100 		}
1101 	}
1102 
1103 	return (error);
1104 }
1105 
1106 
1107 /*********************************************************************
1108  *
1109  *  (Re)Initialize the queue receive ring and its buffers.
1110  *
1111  **********************************************************************/
1112 int
1113 ixl_init_rx_ring(struct ixl_queue *que)
1114 {
1115 	struct	rx_ring 	*rxr = &que->rxr;
1116 	struct ixl_vsi		*vsi = que->vsi;
1117 #if defined(INET6) || defined(INET)
1118 	struct ifnet		*ifp = vsi->ifp;
1119 	struct lro_ctrl		*lro = &rxr->lro;
1120 #endif
1121 	struct ixl_rx_buf	*buf;
1122 	bus_dma_segment_t	pseg[1], hseg[1];
1123 	int			rsize, nsegs, error = 0;
1124 
1125 	IXL_RX_LOCK(rxr);
1126 	/* Clear the ring contents */
1127 	rsize = roundup2(que->num_desc *
1128 	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1129 	bzero((void *)rxr->base, rsize);
1130 	/* Cleanup any existing buffers */
1131 	for (int i = 0; i < que->num_desc; i++) {
1132 		buf = &rxr->buffers[i];
1133 		if (buf->m_head != NULL) {
1134 			bus_dmamap_sync(rxr->htag, buf->hmap,
1135 			    BUS_DMASYNC_POSTREAD);
1136 			bus_dmamap_unload(rxr->htag, buf->hmap);
1137 			buf->m_head->m_flags |= M_PKTHDR;
1138 			m_freem(buf->m_head);
1139 		}
1140 		if (buf->m_pack != NULL) {
1141 			bus_dmamap_sync(rxr->ptag, buf->pmap,
1142 			    BUS_DMASYNC_POSTREAD);
1143 			bus_dmamap_unload(rxr->ptag, buf->pmap);
1144 			buf->m_pack->m_flags |= M_PKTHDR;
1145 			m_freem(buf->m_pack);
1146 		}
1147 		buf->m_head = NULL;
1148 		buf->m_pack = NULL;
1149 	}
1150 
1151 	/* header split is off */
1152 	rxr->hdr_split = FALSE;
1153 
1154 	/* Now replenish the mbufs */
1155 	for (int j = 0; j != que->num_desc; ++j) {
1156 		struct mbuf	*mh, *mp;
1157 
1158 		buf = &rxr->buffers[j];
1159 		/*
1160 		** Don't allocate mbufs if not
1161 		** doing header split, its wasteful
1162 		*/
1163 		if (rxr->hdr_split == FALSE)
1164 			goto skip_head;
1165 
1166 		/* First the header */
1167 		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1168 		if (buf->m_head == NULL) {
1169 			error = ENOBUFS;
1170 			goto fail;
1171 		}
1172 		m_adj(buf->m_head, ETHER_ALIGN);
1173 		mh = buf->m_head;
1174 		mh->m_len = mh->m_pkthdr.len = MHLEN;
1175 		mh->m_flags |= M_PKTHDR;
1176 		/* Get the memory mapping */
1177 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1178 		    buf->hmap, buf->m_head, hseg,
1179 		    &nsegs, BUS_DMA_NOWAIT);
1180 		if (error != 0) /* Nothing elegant to do here */
1181 			goto fail;
1182 		bus_dmamap_sync(rxr->htag,
1183 		    buf->hmap, BUS_DMASYNC_PREREAD);
1184 		/* Update descriptor */
1185 		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1186 
1187 skip_head:
1188 		/* Now the payload cluster */
1189 		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1190 		    M_PKTHDR, rxr->mbuf_sz);
1191 		if (buf->m_pack == NULL) {
1192 			error = ENOBUFS;
1193                         goto fail;
1194 		}
1195 		mp = buf->m_pack;
1196 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1197 		/* Get the memory mapping */
1198 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1199 		    buf->pmap, mp, pseg,
1200 		    &nsegs, BUS_DMA_NOWAIT);
1201 		if (error != 0)
1202                         goto fail;
1203 		bus_dmamap_sync(rxr->ptag,
1204 		    buf->pmap, BUS_DMASYNC_PREREAD);
1205 		/* Update descriptor */
1206 		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1207 		rxr->base[j].read.hdr_addr = 0;
1208 	}
1209 
1210 
1211 	/* Setup our descriptor indices */
1212 	rxr->next_check = 0;
1213 	rxr->next_refresh = 0;
1214 	rxr->lro_enabled = FALSE;
1215 	rxr->split = 0;
1216 	rxr->bytes = 0;
1217 	rxr->discard = FALSE;
1218 
1219 	wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1220 	ixl_flush(vsi->hw);
1221 
1222 #if defined(INET6) || defined(INET)
1223 	/*
1224 	** Now set up the LRO interface:
1225 	*/
1226 	if (ifp->if_capenable & IFCAP_LRO) {
1227 		int err = tcp_lro_init(lro);
1228 		if (err) {
1229 			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1230 			goto fail;
1231 		}
1232 		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1233 		rxr->lro_enabled = TRUE;
1234 		lro->ifp = vsi->ifp;
1235 	}
1236 #endif
1237 
1238 	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1239 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1240 
1241 fail:
1242 	IXL_RX_UNLOCK(rxr);
1243 	return (error);
1244 }
1245 
1246 
1247 /*********************************************************************
1248  *
1249  *  Free station receive ring data structures
1250  *
1251  **********************************************************************/
1252 void
1253 ixl_free_que_rx(struct ixl_queue *que)
1254 {
1255 	struct rx_ring		*rxr = &que->rxr;
1256 	struct ixl_rx_buf	*buf;
1257 
1258 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1259 
1260 	/* Cleanup any existing buffers */
1261 	if (rxr->buffers != NULL) {
1262 		for (int i = 0; i < que->num_desc; i++) {
1263 			buf = &rxr->buffers[i];
1264 			if (buf->m_head != NULL) {
1265 				bus_dmamap_sync(rxr->htag, buf->hmap,
1266 				    BUS_DMASYNC_POSTREAD);
1267 				bus_dmamap_unload(rxr->htag, buf->hmap);
1268 				buf->m_head->m_flags |= M_PKTHDR;
1269 				m_freem(buf->m_head);
1270 			}
1271 			if (buf->m_pack != NULL) {
1272 				bus_dmamap_sync(rxr->ptag, buf->pmap,
1273 				    BUS_DMASYNC_POSTREAD);
1274 				bus_dmamap_unload(rxr->ptag, buf->pmap);
1275 				buf->m_pack->m_flags |= M_PKTHDR;
1276 				m_freem(buf->m_pack);
1277 			}
1278 			buf->m_head = NULL;
1279 			buf->m_pack = NULL;
1280 			if (buf->hmap != NULL) {
1281 				bus_dmamap_destroy(rxr->htag, buf->hmap);
1282 				buf->hmap = NULL;
1283 			}
1284 			if (buf->pmap != NULL) {
1285 				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1286 				buf->pmap = NULL;
1287 			}
1288 		}
1289 		if (rxr->buffers != NULL) {
1290 			free(rxr->buffers, M_DEVBUF);
1291 			rxr->buffers = NULL;
1292 		}
1293 	}
1294 
1295 	if (rxr->htag != NULL) {
1296 		bus_dma_tag_destroy(rxr->htag);
1297 		rxr->htag = NULL;
1298 	}
1299 	if (rxr->ptag != NULL) {
1300 		bus_dma_tag_destroy(rxr->ptag);
1301 		rxr->ptag = NULL;
1302 	}
1303 
1304 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1305 	return;
1306 }
1307 
1308 static __inline void
1309 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1310 {
1311 
1312 #if defined(INET6) || defined(INET)
1313         /*
1314          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1315          * should be computed by hardware. Also it should not have VLAN tag in
1316          * ethernet header.
1317          */
1318         if (rxr->lro_enabled &&
1319             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1320             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1321             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1322                 /*
1323                  * Send to the stack if:
1324                  **  - LRO not enabled, or
1325                  **  - no LRO resources, or
1326                  **  - lro enqueue fails
1327                  */
1328                 if (rxr->lro.lro_cnt != 0)
1329                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1330                                 return;
1331         }
1332 #endif
1333 	IXL_RX_UNLOCK(rxr);
1334         (*ifp->if_input)(ifp, m);
1335 	IXL_RX_LOCK(rxr);
1336 }
1337 
1338 
1339 static __inline void
1340 ixl_rx_discard(struct rx_ring *rxr, int i)
1341 {
1342 	struct ixl_rx_buf	*rbuf;
1343 
1344 	rbuf = &rxr->buffers[i];
1345 
1346         if (rbuf->fmp != NULL) {/* Partial chain ? */
1347 		rbuf->fmp->m_flags |= M_PKTHDR;
1348                 m_freem(rbuf->fmp);
1349                 rbuf->fmp = NULL;
1350 	}
1351 
1352 	/*
1353 	** With advanced descriptors the writeback
1354 	** clobbers the buffer addrs, so its easier
1355 	** to just free the existing mbufs and take
1356 	** the normal refresh path to get new buffers
1357 	** and mapping.
1358 	*/
1359 	if (rbuf->m_head) {
1360 		m_free(rbuf->m_head);
1361 		rbuf->m_head = NULL;
1362 	}
1363 
1364 	if (rbuf->m_pack) {
1365 		m_free(rbuf->m_pack);
1366 		rbuf->m_pack = NULL;
1367 	}
1368 
1369 	return;
1370 }
1371 
1372 #ifdef RSS
1373 /*
1374 ** ixl_ptype_to_hash: parse the packet type
1375 ** to determine the appropriate hash.
1376 */
1377 static inline int
1378 ixl_ptype_to_hash(u8 ptype)
1379 {
1380         struct i40e_rx_ptype_decoded	decoded;
1381 	u8				ex = 0;
1382 
1383 	decoded = decode_rx_desc_ptype(ptype);
1384 	ex = decoded.outer_frag;
1385 
1386 	if (!decoded.known)
1387 		return M_HASHTYPE_OPAQUE;
1388 
1389 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
1390 		return M_HASHTYPE_OPAQUE;
1391 
1392 	/* Note: anything that gets to this point is IP */
1393         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
1394 		switch (decoded.inner_prot) {
1395 			case I40E_RX_PTYPE_INNER_PROT_TCP:
1396 				if (ex)
1397 					return M_HASHTYPE_RSS_TCP_IPV6_EX;
1398 				else
1399 					return M_HASHTYPE_RSS_TCP_IPV6;
1400 			case I40E_RX_PTYPE_INNER_PROT_UDP:
1401 				if (ex)
1402 					return M_HASHTYPE_RSS_UDP_IPV6_EX;
1403 				else
1404 					return M_HASHTYPE_RSS_UDP_IPV6;
1405 			default:
1406 				if (ex)
1407 					return M_HASHTYPE_RSS_IPV6_EX;
1408 				else
1409 					return M_HASHTYPE_RSS_IPV6;
1410 		}
1411 	}
1412         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
1413 		switch (decoded.inner_prot) {
1414 			case I40E_RX_PTYPE_INNER_PROT_TCP:
1415 					return M_HASHTYPE_RSS_TCP_IPV4;
1416 			case I40E_RX_PTYPE_INNER_PROT_UDP:
1417 				if (ex)
1418 					return M_HASHTYPE_RSS_UDP_IPV4_EX;
1419 				else
1420 					return M_HASHTYPE_RSS_UDP_IPV4;
1421 			default:
1422 					return M_HASHTYPE_RSS_IPV4;
1423 		}
1424 	}
1425 	/* We should never get here!! */
1426 	return M_HASHTYPE_OPAQUE;
1427 }
1428 #endif /* RSS */
1429 
1430 /*********************************************************************
1431  *
1432  *  This routine executes in interrupt context. It replenishes
1433  *  the mbufs in the descriptor and sends data which has been
1434  *  dma'ed into host memory to upper layer.
1435  *
1436  *  We loop at most count times if count is > 0, or until done if
1437  *  count < 0.
1438  *
1439  *  Return TRUE for more work, FALSE for all clean.
1440  *********************************************************************/
1441 bool
1442 ixl_rxeof(struct ixl_queue *que, int count)
1443 {
1444 	struct ixl_vsi		*vsi = que->vsi;
1445 	struct rx_ring		*rxr = &que->rxr;
1446 	struct ifnet		*ifp = vsi->ifp;
1447 #if defined(INET6) || defined(INET)
1448 	struct lro_ctrl		*lro = &rxr->lro;
1449 	struct lro_entry	*queued;
1450 #endif
1451 	int			i, nextp, processed = 0;
1452 	union i40e_rx_desc	*cur;
1453 	struct ixl_rx_buf	*rbuf, *nbuf;
1454 
1455 
1456 	IXL_RX_LOCK(rxr);
1457 
1458 
1459 	for (i = rxr->next_check; count != 0;) {
1460 		struct mbuf	*sendmp, *mh, *mp;
1461 		u32		rsc, status, error;
1462 		u16		hlen, plen, vtag;
1463 		u64		qword;
1464 		u8		ptype;
1465 		bool		eop;
1466 
1467 		/* Sync the ring. */
1468 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1469 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1470 
1471 		cur = &rxr->base[i];
1472 		qword = le64toh(cur->wb.qword1.status_error_len);
1473 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1474 		    >> I40E_RXD_QW1_STATUS_SHIFT;
1475 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1476 		    >> I40E_RXD_QW1_ERROR_SHIFT;
1477 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1478 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1479 		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1480 		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1481 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1482 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1483 
1484 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1485 			++rxr->not_done;
1486 			break;
1487 		}
1488 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1489 			break;
1490 
1491 		count--;
1492 		sendmp = NULL;
1493 		nbuf = NULL;
1494 		rsc = 0;
1495 		cur->wb.qword1.status_error_len = 0;
1496 		rbuf = &rxr->buffers[i];
1497 		mh = rbuf->m_head;
1498 		mp = rbuf->m_pack;
1499 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1500 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1501 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1502 		else
1503 			vtag = 0;
1504 
1505 		/*
1506 		** Make sure bad packets are discarded,
1507 		** note that only EOP descriptor has valid
1508 		** error results.
1509 		*/
1510                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1511 			rxr->discarded++;
1512 			ixl_rx_discard(rxr, i);
1513 			goto next_desc;
1514 		}
1515 
1516 		/* Prefetch the next buffer */
1517 		if (!eop) {
1518 			nextp = i + 1;
1519 			if (nextp == que->num_desc)
1520 				nextp = 0;
1521 			nbuf = &rxr->buffers[nextp];
1522 			prefetch(nbuf);
1523 		}
1524 
1525 		/*
1526 		** The header mbuf is ONLY used when header
1527 		** split is enabled, otherwise we get normal
1528 		** behavior, ie, both header and payload
1529 		** are DMA'd into the payload buffer.
1530 		**
1531 		** Rather than using the fmp/lmp global pointers
1532 		** we now keep the head of a packet chain in the
1533 		** buffer struct and pass this along from one
1534 		** descriptor to the next, until we get EOP.
1535 		*/
1536 		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1537 			if (hlen > IXL_RX_HDR)
1538 				hlen = IXL_RX_HDR;
1539 			mh->m_len = hlen;
1540 			mh->m_flags |= M_PKTHDR;
1541 			mh->m_next = NULL;
1542 			mh->m_pkthdr.len = mh->m_len;
1543 			/* Null buf pointer so it is refreshed */
1544 			rbuf->m_head = NULL;
1545 			/*
1546 			** Check the payload length, this
1547 			** could be zero if its a small
1548 			** packet.
1549 			*/
1550 			if (plen > 0) {
1551 				mp->m_len = plen;
1552 				mp->m_next = NULL;
1553 				mp->m_flags &= ~M_PKTHDR;
1554 				mh->m_next = mp;
1555 				mh->m_pkthdr.len += mp->m_len;
1556 				/* Null buf pointer so it is refreshed */
1557 				rbuf->m_pack = NULL;
1558 				rxr->split++;
1559 			}
1560 			/*
1561 			** Now create the forward
1562 			** chain so when complete
1563 			** we wont have to.
1564 			*/
1565                         if (eop == 0) {
1566 				/* stash the chain head */
1567                                 nbuf->fmp = mh;
1568 				/* Make forward chain */
1569                                 if (plen)
1570                                         mp->m_next = nbuf->m_pack;
1571                                 else
1572                                         mh->m_next = nbuf->m_pack;
1573                         } else {
1574 				/* Singlet, prepare to send */
1575                                 sendmp = mh;
1576                                 if (vtag) {
1577                                         sendmp->m_pkthdr.ether_vtag = vtag;
1578                                         sendmp->m_flags |= M_VLANTAG;
1579                                 }
1580                         }
1581 		} else {
1582 			/*
1583 			** Either no header split, or a
1584 			** secondary piece of a fragmented
1585 			** split packet.
1586 			*/
1587 			mp->m_len = plen;
1588 			/*
1589 			** See if there is a stored head
1590 			** that determines what we are
1591 			*/
1592 			sendmp = rbuf->fmp;
1593 			rbuf->m_pack = rbuf->fmp = NULL;
1594 
1595 			if (sendmp != NULL) /* secondary frag */
1596 				sendmp->m_pkthdr.len += mp->m_len;
1597 			else {
1598 				/* first desc of a non-ps chain */
1599 				sendmp = mp;
1600 				sendmp->m_flags |= M_PKTHDR;
1601 				sendmp->m_pkthdr.len = mp->m_len;
1602 				if (vtag) {
1603 					sendmp->m_pkthdr.ether_vtag = vtag;
1604 					sendmp->m_flags |= M_VLANTAG;
1605 				}
1606                         }
1607 			/* Pass the head pointer on */
1608 			if (eop == 0) {
1609 				nbuf->fmp = sendmp;
1610 				sendmp = NULL;
1611 				mp->m_next = nbuf->m_pack;
1612 			}
1613 		}
1614 		++processed;
1615 		/* Sending this frame? */
1616 		if (eop) {
1617 			sendmp->m_pkthdr.rcvif = ifp;
1618 			/* gather stats */
1619 			rxr->rx_packets++;
1620 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1621 			/* capture data for dynamic ITR adjustment */
1622 			rxr->packets++;
1623 			rxr->bytes += sendmp->m_pkthdr.len;
1624 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1625 				ixl_rx_checksum(sendmp, status, error, ptype);
1626 #ifdef RSS
1627 			sendmp->m_pkthdr.flowid =
1628 			    le32toh(cur->wb.qword0.hi_dword.rss);
1629 			M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1630 #else
1631 			sendmp->m_pkthdr.flowid = que->msix;
1632 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1633 #endif
1634 		}
1635 next_desc:
1636 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1637 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1638 
1639 		/* Advance our pointers to the next descriptor. */
1640 		if (++i == que->num_desc)
1641 			i = 0;
1642 
1643 		/* Now send to the stack or do LRO */
1644 		if (sendmp != NULL) {
1645 			rxr->next_check = i;
1646 			ixl_rx_input(rxr, ifp, sendmp, ptype);
1647 			i = rxr->next_check;
1648 		}
1649 
1650                /* Every 8 descriptors we go to refresh mbufs */
1651 		if (processed == 8) {
1652 			ixl_refresh_mbufs(que, i);
1653 			processed = 0;
1654 		}
1655 	}
1656 
1657 	/* Refresh any remaining buf structs */
1658 	if (ixl_rx_unrefreshed(que))
1659 		ixl_refresh_mbufs(que, i);
1660 
1661 	rxr->next_check = i;
1662 
1663 #if defined(INET6) || defined(INET)
1664 	/*
1665 	 * Flush any outstanding LRO work
1666 	 */
1667 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1668 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1669 		tcp_lro_flush(lro, queued);
1670 	}
1671 #endif
1672 
1673 	IXL_RX_UNLOCK(rxr);
1674 	return (FALSE);
1675 }
1676 
1677 
1678 /*********************************************************************
1679  *
1680  *  Verify that the hardware indicated that the checksum is valid.
1681  *  Inform the stack about the status of checksum so that stack
1682  *  doesn't spend time verifying the checksum.
1683  *
1684  *********************************************************************/
1685 static void
1686 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1687 {
1688 	struct i40e_rx_ptype_decoded decoded;
1689 
1690 	decoded = decode_rx_desc_ptype(ptype);
1691 
1692 	/* Errors? */
1693  	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1694 	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1695 		mp->m_pkthdr.csum_flags = 0;
1696 		return;
1697 	}
1698 
1699 	/* IPv6 with extension headers likely have bad csum */
1700 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1701 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1702 		if (status &
1703 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1704 			mp->m_pkthdr.csum_flags = 0;
1705 			return;
1706 		}
1707 
1708 
1709 	/* IP Checksum Good */
1710 	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1711 	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1712 
1713 	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1714 		mp->m_pkthdr.csum_flags |=
1715 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1716 		mp->m_pkthdr.csum_data |= htons(0xffff);
1717 	}
1718 	return;
1719 }
1720 
1721 #if __FreeBSD_version >= 1100000
1722 uint64_t
1723 ixl_get_counter(if_t ifp, ift_counter cnt)
1724 {
1725 	struct ixl_vsi *vsi;
1726 
1727 	vsi = if_getsoftc(ifp);
1728 
1729 	switch (cnt) {
1730 	case IFCOUNTER_IPACKETS:
1731 		return (vsi->ipackets);
1732 	case IFCOUNTER_IERRORS:
1733 		return (vsi->ierrors);
1734 	case IFCOUNTER_OPACKETS:
1735 		return (vsi->opackets);
1736 	case IFCOUNTER_OERRORS:
1737 		return (vsi->oerrors);
1738 	case IFCOUNTER_COLLISIONS:
1739 		/* Collisions are by standard impossible in 40G/10G Ethernet */
1740 		return (0);
1741 	case IFCOUNTER_IBYTES:
1742 		return (vsi->ibytes);
1743 	case IFCOUNTER_OBYTES:
1744 		return (vsi->obytes);
1745 	case IFCOUNTER_IMCASTS:
1746 		return (vsi->imcasts);
1747 	case IFCOUNTER_OMCASTS:
1748 		return (vsi->omcasts);
1749 	case IFCOUNTER_IQDROPS:
1750 		return (vsi->iqdrops);
1751 	case IFCOUNTER_OQDROPS:
1752 		return (vsi->oqdrops);
1753 	case IFCOUNTER_NOPROTO:
1754 		return (vsi->noproto);
1755 	default:
1756 		return (if_get_counter_default(ifp, cnt));
1757 	}
1758 }
1759 #endif
1760 
1761