xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision 6186fd1857626de0f7cb1a9e4dff19082f9ebb11)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the BASE and the VF drivers.
39 */
40 
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "ixl.h"
44 
45 /* Local Prototypes */
46 static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
47 static void	ixl_refresh_mbufs(struct ixl_queue *, int);
48 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
49 static int	ixl_tx_setup_offload(struct ixl_queue *,
50 		    struct mbuf *, u32 *, u32 *);
51 static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
52 
53 static __inline void ixl_rx_discard(struct rx_ring *, int);
54 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
55 		    struct mbuf *, u8);
56 
57 /*
58 ** Multiqueue Transmit driver
59 **
60 */
61 int
62 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
63 {
64 	struct ixl_vsi		*vsi = ifp->if_softc;
65 	struct ixl_queue	*que;
66 	struct tx_ring		*txr;
67 	int 			err, i;
68 
69 	/* Which queue to use */
70 	if ((m->m_flags & M_FLOWID) != 0)
71 		i = m->m_pkthdr.flowid % vsi->num_queues;
72 	else
73 		i = curcpu % vsi->num_queues;
74 
75 	/* Check for a hung queue and pick alternative */
76 	if (((1 << i) & vsi->active_queues) == 0)
77 		i = ffsl(vsi->active_queues);
78 
79 	que = &vsi->queues[i];
80 	txr = &que->txr;
81 
82 	err = drbr_enqueue(ifp, txr->br, m);
83 	if (err)
84 		return(err);
85 	if (IXL_TX_TRYLOCK(txr)) {
86 		ixl_mq_start_locked(ifp, txr);
87 		IXL_TX_UNLOCK(txr);
88 	} else
89 		taskqueue_enqueue(que->tq, &que->tx_task);
90 
91 	return (0);
92 }
93 
94 int
95 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
96 {
97 	struct ixl_queue	*que = txr->que;
98 	struct ixl_vsi		*vsi = que->vsi;
99         struct mbuf		*next;
100         int			err = 0;
101 
102 
103 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
104 	    vsi->link_active == 0)
105 		return (ENETDOWN);
106 
107 	/* Process the transmit queue */
108 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
109 		if ((err = ixl_xmit(que, &next)) != 0) {
110 			if (next == NULL)
111 				drbr_advance(ifp, txr->br);
112 			else
113 				drbr_putback(ifp, txr->br, next);
114 			break;
115 		}
116 		drbr_advance(ifp, txr->br);
117 		/* Send a copy of the frame to the BPF listener */
118 		ETHER_BPF_MTAP(ifp, next);
119 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
120 			break;
121 	}
122 
123 	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
124 		ixl_txeof(que);
125 
126 	return (err);
127 }
128 
129 /*
130  * Called from a taskqueue to drain queued transmit packets.
131  */
132 void
133 ixl_deferred_mq_start(void *arg, int pending)
134 {
135 	struct ixl_queue	*que = arg;
136         struct tx_ring		*txr = &que->txr;
137 	struct ixl_vsi		*vsi = que->vsi;
138         struct ifnet		*ifp = vsi->ifp;
139 
140 	IXL_TX_LOCK(txr);
141 	if (!drbr_empty(ifp, txr->br))
142 		ixl_mq_start_locked(ifp, txr);
143 	IXL_TX_UNLOCK(txr);
144 }
145 
146 /*
147 ** Flush all queue ring buffers
148 */
149 void
150 ixl_qflush(struct ifnet *ifp)
151 {
152 	struct ixl_vsi	*vsi = ifp->if_softc;
153 
154         for (int i = 0; i < vsi->num_queues; i++) {
155 		struct ixl_queue *que = &vsi->queues[i];
156 		struct tx_ring	*txr = &que->txr;
157 		struct mbuf	*m;
158 		IXL_TX_LOCK(txr);
159 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
160 			m_freem(m);
161 		IXL_TX_UNLOCK(txr);
162 	}
163 	if_qflush(ifp);
164 }
165 
166 /*
167 ** Find mbuf chains passed to the driver
168 ** that are 'sparse', using more than 8
169 ** mbufs to deliver an mss-size chunk of data
170 */
171 static inline bool
172 ixl_tso_detect_sparse(struct mbuf *mp)
173 {
174 	struct mbuf	*m;
175 	int		num = 0, mss;
176 	bool		ret = FALSE;
177 
178 	mss = mp->m_pkthdr.tso_segsz;
179 	for (m = mp->m_next; m != NULL; m = m->m_next) {
180 		num++;
181 		mss -= m->m_len;
182 		if (mss < 1)
183 			break;
184 		if (m->m_next == NULL)
185 			break;
186 	}
187 	if (num > IXL_SPARSE_CHAIN)
188 		ret = TRUE;
189 
190 	return (ret);
191 }
192 
193 
194 /*********************************************************************
195  *
196  *  This routine maps the mbufs to tx descriptors, allowing the
197  *  TX engine to transmit the packets.
198  *  	- return 0 on success, positive on failure
199  *
200  **********************************************************************/
201 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
202 
203 static int
204 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
205 {
206 	struct ixl_vsi		*vsi = que->vsi;
207 	struct i40e_hw		*hw = vsi->hw;
208 	struct tx_ring		*txr = &que->txr;
209 	struct ixl_tx_buf	*buf;
210 	struct i40e_tx_desc	*txd = NULL;
211 	struct mbuf		*m_head, *m;
212 	int             	i, j, error, nsegs, maxsegs;
213 	int			first, last = 0;
214 	u16			vtag = 0;
215 	u32			cmd, off;
216 	bus_dmamap_t		map;
217 	bus_dma_tag_t		tag;
218 	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
219 
220 
221 	cmd = off = 0;
222 	m_head = *m_headp;
223 
224         /*
225          * Important to capture the first descriptor
226          * used because it will contain the index of
227          * the one we tell the hardware to report back
228          */
229         first = txr->next_avail;
230 	buf = &txr->buffers[first];
231 	map = buf->map;
232 	tag = txr->tx_tag;
233 	maxsegs = IXL_MAX_TX_SEGS;
234 
235 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
236 		/* Use larger mapping for TSO */
237 		tag = txr->tso_tag;
238 		maxsegs = IXL_MAX_TSO_SEGS;
239 		if (ixl_tso_detect_sparse(m_head)) {
240 			m = m_defrag(m_head, M_NOWAIT);
241 			*m_headp = m;
242 		}
243 	}
244 
245 	/*
246 	 * Map the packet for DMA.
247 	 */
248 	error = bus_dmamap_load_mbuf_sg(tag, map,
249 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
250 
251 	if (error == EFBIG) {
252 		struct mbuf *m;
253 
254 		m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
255 		if (m == NULL) {
256 			que->mbuf_defrag_failed++;
257 			m_freem(*m_headp);
258 			*m_headp = NULL;
259 			return (ENOBUFS);
260 		}
261 		*m_headp = m;
262 
263 		/* Try it again */
264 		error = bus_dmamap_load_mbuf_sg(tag, map,
265 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
266 
267 		if (error == ENOMEM) {
268 			que->tx_dma_setup++;
269 			return (error);
270 		} else if (error != 0) {
271 			que->tx_dma_setup++;
272 			m_freem(*m_headp);
273 			*m_headp = NULL;
274 			return (error);
275 		}
276 	} else if (error == ENOMEM) {
277 		que->tx_dma_setup++;
278 		return (error);
279 	} else if (error != 0) {
280 		que->tx_dma_setup++;
281 		m_freem(*m_headp);
282 		*m_headp = NULL;
283 		return (error);
284 	}
285 
286 	/* Make certain there are enough descriptors */
287 	if (nsegs > txr->avail - 2) {
288 		txr->no_desc++;
289 		error = ENOBUFS;
290 		goto xmit_fail;
291 	}
292 	m_head = *m_headp;
293 
294 	/* Set up the TSO/CSUM offload */
295 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
296 		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
297 		if (error)
298 			goto xmit_fail;
299 	}
300 
301 	cmd |= I40E_TX_DESC_CMD_ICRC;
302 	/* Grab the VLAN tag */
303 	if (m_head->m_flags & M_VLANTAG) {
304 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
305 		vtag = htole16(m_head->m_pkthdr.ether_vtag);
306 	}
307 
308 	i = txr->next_avail;
309 	for (j = 0; j < nsegs; j++) {
310 		bus_size_t seglen;
311 
312 		buf = &txr->buffers[i];
313 		buf->tag = tag; /* Keep track of the type tag */
314 		txd = &txr->base[i];
315 		seglen = segs[j].ds_len;
316 
317 		txd->buffer_addr = htole64(segs[j].ds_addr);
318 		txd->cmd_type_offset_bsz =
319 		    htole64(I40E_TX_DESC_DTYPE_DATA
320 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
321 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
322 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
323 		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
324 
325 		last = i; /* descriptor that will get completion IRQ */
326 
327 		if (++i == que->num_desc)
328 			i = 0;
329 
330 		buf->m_head = NULL;
331 		buf->eop_index = -1;
332 	}
333 	/* Set the last descriptor for report */
334 	txd->cmd_type_offset_bsz |=
335 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
336 	txr->avail -= nsegs;
337 	txr->next_avail = i;
338 
339 	buf->m_head = m_head;
340 	/* Swap the dma map between the first and last descriptor */
341 	txr->buffers[first].map = buf->map;
342 	buf->map = map;
343 	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
344 
345         /* Set the index of the descriptor that will be marked done */
346         buf = &txr->buffers[first];
347 	buf->eop_index = last;
348 
349         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
350             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
351 	/*
352 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
353 	 * hardware that this frame is available to transmit.
354 	 */
355 	++txr->total_packets;
356 	wr32(hw, txr->tail, i);
357 
358 	ixl_flush(hw);
359 	/* Mark outstanding work */
360 	if (que->busy == 0)
361 		que->busy = 1;
362 	return (0);
363 
364 xmit_fail:
365 	bus_dmamap_unload(tag, buf->map);
366 	return (error);
367 }
368 
369 
370 /*********************************************************************
371  *
372  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
373  *  the information needed to transmit a packet on the wire. This is
374  *  called only once at attach, setup is done every reset.
375  *
376  **********************************************************************/
377 int
378 ixl_allocate_tx_data(struct ixl_queue *que)
379 {
380 	struct tx_ring		*txr = &que->txr;
381 	struct ixl_vsi		*vsi = que->vsi;
382 	device_t		dev = vsi->dev;
383 	struct ixl_tx_buf	*buf;
384 	int			error = 0;
385 
386 	/*
387 	 * Setup DMA descriptor areas.
388 	 */
389 	if ((error = bus_dma_tag_create(NULL,		/* parent */
390 			       1, 0,			/* alignment, bounds */
391 			       BUS_SPACE_MAXADDR,	/* lowaddr */
392 			       BUS_SPACE_MAXADDR,	/* highaddr */
393 			       NULL, NULL,		/* filter, filterarg */
394 			       IXL_TSO_SIZE,		/* maxsize */
395 			       IXL_MAX_TX_SEGS,		/* nsegments */
396 			       PAGE_SIZE,		/* maxsegsize */
397 			       0,			/* flags */
398 			       NULL,			/* lockfunc */
399 			       NULL,			/* lockfuncarg */
400 			       &txr->tx_tag))) {
401 		device_printf(dev,"Unable to allocate TX DMA tag\n");
402 		goto fail;
403 	}
404 
405 	/* Make a special tag for TSO */
406 	if ((error = bus_dma_tag_create(NULL,		/* parent */
407 			       1, 0,			/* alignment, bounds */
408 			       BUS_SPACE_MAXADDR,	/* lowaddr */
409 			       BUS_SPACE_MAXADDR,	/* highaddr */
410 			       NULL, NULL,		/* filter, filterarg */
411 			       IXL_TSO_SIZE,		/* maxsize */
412 			       IXL_MAX_TSO_SEGS,	/* nsegments */
413 			       PAGE_SIZE,		/* maxsegsize */
414 			       0,			/* flags */
415 			       NULL,			/* lockfunc */
416 			       NULL,			/* lockfuncarg */
417 			       &txr->tso_tag))) {
418 		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
419 		goto fail;
420 	}
421 
422 	if (!(txr->buffers =
423 	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
424 	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
425 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
426 		error = ENOMEM;
427 		goto fail;
428 	}
429 
430         /* Create the descriptor buffer default dma maps */
431 	buf = txr->buffers;
432 	for (int i = 0; i < que->num_desc; i++, buf++) {
433 		buf->tag = txr->tx_tag;
434 		error = bus_dmamap_create(buf->tag, 0, &buf->map);
435 		if (error != 0) {
436 			device_printf(dev, "Unable to create TX DMA map\n");
437 			goto fail;
438 		}
439 	}
440 fail:
441 	return (error);
442 }
443 
444 
445 /*********************************************************************
446  *
447  *  (Re)Initialize a queue transmit ring.
448  *	- called by init, it clears the descriptor ring,
449  *	  and frees any stale mbufs
450  *
451  **********************************************************************/
452 void
453 ixl_init_tx_ring(struct ixl_queue *que)
454 {
455 	struct tx_ring *txr = &que->txr;
456 	struct ixl_tx_buf *buf;
457 
458 	/* Clear the old ring contents */
459 	IXL_TX_LOCK(txr);
460 	bzero((void *)txr->base,
461 	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
462 
463 	/* Reset indices */
464 	txr->next_avail = 0;
465 	txr->next_to_clean = 0;
466 
467 #ifdef IXL_FDIR
468 	/* Initialize flow director */
469 	txr->atr_rate = ixl_atr_rate;
470 	txr->atr_count = 0;
471 #endif
472 
473 	/* Free any existing tx mbufs. */
474         buf = txr->buffers;
475 	for (int i = 0; i < que->num_desc; i++, buf++) {
476 		if (buf->m_head != NULL) {
477 			bus_dmamap_sync(buf->tag, buf->map,
478 			    BUS_DMASYNC_POSTWRITE);
479 			bus_dmamap_unload(buf->tag, buf->map);
480 			m_freem(buf->m_head);
481 			buf->m_head = NULL;
482 		}
483 		/* Clear the EOP index */
484 		buf->eop_index = -1;
485         }
486 
487 	/* Set number of descriptors available */
488 	txr->avail = que->num_desc;
489 
490 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
491 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
492 	IXL_TX_UNLOCK(txr);
493 }
494 
495 
496 /*********************************************************************
497  *
498  *  Free transmit ring related data structures.
499  *
500  **********************************************************************/
501 void
502 ixl_free_que_tx(struct ixl_queue *que)
503 {
504 	struct tx_ring *txr = &que->txr;
505 	struct ixl_tx_buf *buf;
506 
507 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
508 
509 	for (int i = 0; i < que->num_desc; i++) {
510 		buf = &txr->buffers[i];
511 		if (buf->m_head != NULL) {
512 			bus_dmamap_sync(buf->tag, buf->map,
513 			    BUS_DMASYNC_POSTWRITE);
514 			bus_dmamap_unload(buf->tag,
515 			    buf->map);
516 			m_freem(buf->m_head);
517 			buf->m_head = NULL;
518 			if (buf->map != NULL) {
519 				bus_dmamap_destroy(buf->tag,
520 				    buf->map);
521 				buf->map = NULL;
522 			}
523 		} else if (buf->map != NULL) {
524 			bus_dmamap_unload(buf->tag,
525 			    buf->map);
526 			bus_dmamap_destroy(buf->tag,
527 			    buf->map);
528 			buf->map = NULL;
529 		}
530 	}
531 	if (txr->br != NULL)
532 		buf_ring_free(txr->br, M_DEVBUF);
533 	if (txr->buffers != NULL) {
534 		free(txr->buffers, M_DEVBUF);
535 		txr->buffers = NULL;
536 	}
537 	if (txr->tx_tag != NULL) {
538 		bus_dma_tag_destroy(txr->tx_tag);
539 		txr->tx_tag = NULL;
540 	}
541 	if (txr->tso_tag != NULL) {
542 		bus_dma_tag_destroy(txr->tso_tag);
543 		txr->tso_tag = NULL;
544 	}
545 
546 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
547 	return;
548 }
549 
550 /*********************************************************************
551  *
552  *  Setup descriptor for hw offloads
553  *
554  **********************************************************************/
555 
556 static int
557 ixl_tx_setup_offload(struct ixl_queue *que,
558     struct mbuf *mp, u32 *cmd, u32 *off)
559 {
560 	struct ether_vlan_header	*eh;
561 #ifdef INET
562 	struct ip			*ip = NULL;
563 #endif
564 	struct tcphdr			*th = NULL;
565 #ifdef INET6
566 	struct ip6_hdr			*ip6;
567 #endif
568 	int				elen, ip_hlen = 0, tcp_hlen;
569 	u16				etype;
570 	u8				ipproto = 0;
571 	bool				tso = FALSE;
572 
573 
574 	/* Set up the TSO context descriptor if required */
575 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
576 		tso = ixl_tso_setup(que, mp);
577 		if (tso)
578 			++que->tso;
579 		else
580 			return (ENXIO);
581 	}
582 
583 	/*
584 	 * Determine where frame payload starts.
585 	 * Jump over vlan headers if already present,
586 	 * helpful for QinQ too.
587 	 */
588 	eh = mtod(mp, struct ether_vlan_header *);
589 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
590 		etype = ntohs(eh->evl_proto);
591 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
592 	} else {
593 		etype = ntohs(eh->evl_encap_proto);
594 		elen = ETHER_HDR_LEN;
595 	}
596 
597 	switch (etype) {
598 #ifdef INET
599 		case ETHERTYPE_IP:
600 			ip = (struct ip *)(mp->m_data + elen);
601 			ip_hlen = ip->ip_hl << 2;
602 			ipproto = ip->ip_p;
603 			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
604 			/* The IP checksum must be recalculated with TSO */
605 			if (tso)
606 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
607 			else
608 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
609 			break;
610 #endif
611 #ifdef INET6
612 		case ETHERTYPE_IPV6:
613 			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
614 			ip_hlen = sizeof(struct ip6_hdr);
615 			ipproto = ip6->ip6_nxt;
616 			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
617 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
618 			break;
619 #endif
620 		default:
621 			break;
622 	}
623 
624 	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
625 	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
626 
627 	switch (ipproto) {
628 		case IPPROTO_TCP:
629 			tcp_hlen = th->th_off << 2;
630 			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
631 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
632 				*off |= (tcp_hlen >> 2) <<
633 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
634 			}
635 #ifdef IXL_FDIR
636 			ixl_atr(que, th, etype);
637 #endif
638 			break;
639 		case IPPROTO_UDP:
640 			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
641 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
642 				*off |= (sizeof(struct udphdr) >> 2) <<
643 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
644 			}
645 			break;
646 
647 		case IPPROTO_SCTP:
648 			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
649 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
650 				*off |= (sizeof(struct sctphdr) >> 2) <<
651 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
652 			}
653 			/* Fall Thru */
654 		default:
655 			break;
656 	}
657 
658         return (0);
659 }
660 
661 
662 /**********************************************************************
663  *
664  *  Setup context for hardware segmentation offload (TSO)
665  *
666  **********************************************************************/
667 static bool
668 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
669 {
670 	struct tx_ring			*txr = &que->txr;
671 	struct i40e_tx_context_desc	*TXD;
672 	struct ixl_tx_buf		*buf;
673 	u32				cmd, mss, type, tsolen;
674 	u16				etype;
675 	int				idx, elen, ip_hlen, tcp_hlen;
676 	struct ether_vlan_header	*eh;
677 #ifdef INET
678 	struct ip			*ip;
679 #endif
680 #ifdef INET6
681 	struct ip6_hdr			*ip6;
682 #endif
683 #if defined(INET6) || defined(INET)
684 	struct tcphdr			*th;
685 #endif
686 	u64				type_cmd_tso_mss;
687 
688 	/*
689 	 * Determine where frame payload starts.
690 	 * Jump over vlan headers if already present
691 	 */
692 	eh = mtod(mp, struct ether_vlan_header *);
693 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
694 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
695 		etype = eh->evl_proto;
696 	} else {
697 		elen = ETHER_HDR_LEN;
698 		etype = eh->evl_encap_proto;
699 	}
700 
701         switch (ntohs(etype)) {
702 #ifdef INET6
703 	case ETHERTYPE_IPV6:
704 		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
705 		if (ip6->ip6_nxt != IPPROTO_TCP)
706 			return (ENXIO);
707 		ip_hlen = sizeof(struct ip6_hdr);
708 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
709 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
710 		tcp_hlen = th->th_off << 2;
711 		break;
712 #endif
713 #ifdef INET
714 	case ETHERTYPE_IP:
715 		ip = (struct ip *)(mp->m_data + elen);
716 		if (ip->ip_p != IPPROTO_TCP)
717 			return (ENXIO);
718 		ip->ip_sum = 0;
719 		ip_hlen = ip->ip_hl << 2;
720 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
721 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
722 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
723 		tcp_hlen = th->th_off << 2;
724 		break;
725 #endif
726 	default:
727 		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
728 		    __func__, ntohs(etype));
729 		return FALSE;
730         }
731 
732         /* Ensure we have at least the IP+TCP header in the first mbuf. */
733         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
734 		return FALSE;
735 
736 	idx = txr->next_avail;
737 	buf = &txr->buffers[idx];
738 	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
739 	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
740 
741 	type = I40E_TX_DESC_DTYPE_CONTEXT;
742 	cmd = I40E_TX_CTX_DESC_TSO;
743 	mss = mp->m_pkthdr.tso_segsz;
744 
745 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
746 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
747 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
748 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
749 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
750 
751 	TXD->tunneling_params = htole32(0);
752 	buf->m_head = NULL;
753 	buf->eop_index = -1;
754 
755 	if (++idx == que->num_desc)
756 		idx = 0;
757 
758 	txr->avail--;
759 	txr->next_avail = idx;
760 
761 	return TRUE;
762 }
763 
764 /*
765 ** ixl_get_tx_head - Retrieve the value from the
766 **    location the HW records its HEAD index
767 */
768 static inline u32
769 ixl_get_tx_head(struct ixl_queue *que)
770 {
771 	struct tx_ring  *txr = &que->txr;
772 	void *head = &txr->base[que->num_desc];
773 	return LE32_TO_CPU(*(volatile __le32 *)head);
774 }
775 
776 /**********************************************************************
777  *
778  *  Examine each tx_buffer in the used queue. If the hardware is done
779  *  processing the packet then free associated resources. The
780  *  tx_buffer is put back on the free queue.
781  *
782  **********************************************************************/
783 bool
784 ixl_txeof(struct ixl_queue *que)
785 {
786 	struct tx_ring		*txr = &que->txr;
787 	u32			first, last, head, done, processed;
788 	struct ixl_tx_buf	*buf;
789 	struct i40e_tx_desc	*tx_desc, *eop_desc;
790 
791 
792 	mtx_assert(&txr->mtx, MA_OWNED);
793 
794 	/* These are not the descriptors you seek, move along :) */
795 	if (txr->avail == que->num_desc) {
796 		que->busy = 0;
797 		return FALSE;
798 	}
799 
800 	processed = 0;
801 	first = txr->next_to_clean;
802 	buf = &txr->buffers[first];
803 	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
804 	last = buf->eop_index;
805 	if (last == -1)
806 		return FALSE;
807 	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
808 
809 	/* Get the Head WB value */
810 	head = ixl_get_tx_head(que);
811 
812 	/*
813 	** Get the index of the first descriptor
814 	** BEYOND the EOP and call that 'done'.
815 	** I do this so the comparison in the
816 	** inner while loop below can be simple
817 	*/
818 	if (++last == que->num_desc) last = 0;
819 	done = last;
820 
821         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
822             BUS_DMASYNC_POSTREAD);
823 	/*
824 	** The HEAD index of the ring is written in a
825 	** defined location, this rather than a done bit
826 	** is what is used to keep track of what must be
827 	** 'cleaned'.
828 	*/
829 	while (first != head) {
830 		/* We clean the range of the packet */
831 		while (first != done) {
832 			++txr->avail;
833 			++processed;
834 
835 			if (buf->m_head) {
836 				txr->bytes += /* for ITR adjustment */
837 				    buf->m_head->m_pkthdr.len;
838 				txr->tx_bytes += /* for TX stats */
839 				    buf->m_head->m_pkthdr.len;
840 				bus_dmamap_sync(buf->tag,
841 				    buf->map,
842 				    BUS_DMASYNC_POSTWRITE);
843 				bus_dmamap_unload(buf->tag,
844 				    buf->map);
845 				m_freem(buf->m_head);
846 				buf->m_head = NULL;
847 				buf->map = NULL;
848 			}
849 			buf->eop_index = -1;
850 
851 			if (++first == que->num_desc)
852 				first = 0;
853 
854 			buf = &txr->buffers[first];
855 			tx_desc = &txr->base[first];
856 		}
857 		++txr->packets;
858 		/* See if there is more work now */
859 		last = buf->eop_index;
860 		if (last != -1) {
861 			eop_desc = &txr->base[last];
862 			/* Get next done point */
863 			if (++last == que->num_desc) last = 0;
864 			done = last;
865 		} else
866 			break;
867 	}
868 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
869 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
870 
871 	txr->next_to_clean = first;
872 
873 
874 	/*
875 	** Hang detection, we know there's
876 	** work outstanding or the first return
877 	** would have been taken, so indicate an
878 	** unsuccessful pass, in local_timer if
879 	** the value is too great the queue will
880 	** be considered hung. If anything has been
881 	** cleaned then reset the state.
882 	*/
883 	if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
884 		++que->busy;
885 
886 	if (processed)
887 		que->busy = 1; /* Note this turns off HUNG */
888 
889 	/*
890 	 * If there are no pending descriptors, clear the timeout.
891 	 */
892 	if (txr->avail == que->num_desc) {
893 		que->busy = 0;
894 		return FALSE;
895 	}
896 
897 	return TRUE;
898 }
899 
900 /*********************************************************************
901  *
902  *  Refresh mbuf buffers for RX descriptor rings
903  *   - now keeps its own state so discards due to resource
904  *     exhaustion are unnecessary, if an mbuf cannot be obtained
905  *     it just returns, keeping its placeholder, thus it can simply
906  *     be recalled to try again.
907  *
908  **********************************************************************/
909 static void
910 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
911 {
912 	struct ixl_vsi		*vsi = que->vsi;
913 	struct rx_ring		*rxr = &que->rxr;
914 	bus_dma_segment_t	hseg[1];
915 	bus_dma_segment_t	pseg[1];
916 	struct ixl_rx_buf	*buf;
917 	struct mbuf		*mh, *mp;
918 	int			i, j, nsegs, error;
919 	bool			refreshed = FALSE;
920 
921 	i = j = rxr->next_refresh;
922 	/* Control the loop with one beyond */
923 	if (++j == que->num_desc)
924 		j = 0;
925 
926 	while (j != limit) {
927 		buf = &rxr->buffers[i];
928 		if (rxr->hdr_split == FALSE)
929 			goto no_split;
930 
931 		if (buf->m_head == NULL) {
932 			mh = m_gethdr(M_NOWAIT, MT_DATA);
933 			if (mh == NULL)
934 				goto update;
935 		} else
936 			mh = buf->m_head;
937 
938 		mh->m_pkthdr.len = mh->m_len = MHLEN;
939 		mh->m_len = MHLEN;
940 		mh->m_flags |= M_PKTHDR;
941 		/* Get the memory mapping */
942 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
943 		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
944 		if (error != 0) {
945 			printf("Refresh mbufs: hdr dmamap load"
946 			    " failure - %d\n", error);
947 			m_free(mh);
948 			buf->m_head = NULL;
949 			goto update;
950 		}
951 		buf->m_head = mh;
952 		bus_dmamap_sync(rxr->htag, buf->hmap,
953 		    BUS_DMASYNC_PREREAD);
954 		rxr->base[i].read.hdr_addr =
955 		   htole64(hseg[0].ds_addr);
956 
957 no_split:
958 		if (buf->m_pack == NULL) {
959 			mp = m_getjcl(M_NOWAIT, MT_DATA,
960 			    M_PKTHDR, rxr->mbuf_sz);
961 			if (mp == NULL)
962 				goto update;
963 		} else
964 			mp = buf->m_pack;
965 
966 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
967 		/* Get the memory mapping */
968 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
969 		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
970 		if (error != 0) {
971 			printf("Refresh mbufs: payload dmamap load"
972 			    " failure - %d\n", error);
973 			m_free(mp);
974 			buf->m_pack = NULL;
975 			goto update;
976 		}
977 		buf->m_pack = mp;
978 		bus_dmamap_sync(rxr->ptag, buf->pmap,
979 		    BUS_DMASYNC_PREREAD);
980 		rxr->base[i].read.pkt_addr =
981 		   htole64(pseg[0].ds_addr);
982 		/* Used only when doing header split */
983 		rxr->base[i].read.hdr_addr = 0;
984 
985 		refreshed = TRUE;
986 		/* Next is precalculated */
987 		i = j;
988 		rxr->next_refresh = i;
989 		if (++j == que->num_desc)
990 			j = 0;
991 	}
992 update:
993 	if (refreshed) /* Update hardware tail index */
994 		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
995 	return;
996 }
997 
998 
999 /*********************************************************************
1000  *
1001  *  Allocate memory for rx_buffer structures. Since we use one
1002  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1003  *  that we'll need is equal to the number of receive descriptors
1004  *  that we've defined.
1005  *
1006  **********************************************************************/
1007 int
1008 ixl_allocate_rx_data(struct ixl_queue *que)
1009 {
1010 	struct rx_ring		*rxr = &que->rxr;
1011 	struct ixl_vsi		*vsi = que->vsi;
1012 	device_t 		dev = vsi->dev;
1013 	struct ixl_rx_buf 	*buf;
1014 	int             	i, bsize, error;
1015 
1016 	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1017 	if (!(rxr->buffers =
1018 	    (struct ixl_rx_buf *) malloc(bsize,
1019 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1020 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1021 		error = ENOMEM;
1022 		return (error);
1023 	}
1024 
1025 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1026 				   1, 0,	/* alignment, bounds */
1027 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1028 				   BUS_SPACE_MAXADDR,	/* highaddr */
1029 				   NULL, NULL,		/* filter, filterarg */
1030 				   MSIZE,		/* maxsize */
1031 				   1,			/* nsegments */
1032 				   MSIZE,		/* maxsegsize */
1033 				   0,			/* flags */
1034 				   NULL,		/* lockfunc */
1035 				   NULL,		/* lockfuncarg */
1036 				   &rxr->htag))) {
1037 		device_printf(dev, "Unable to create RX DMA htag\n");
1038 		return (error);
1039 	}
1040 
1041 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1042 				   1, 0,	/* alignment, bounds */
1043 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1044 				   BUS_SPACE_MAXADDR,	/* highaddr */
1045 				   NULL, NULL,		/* filter, filterarg */
1046 				   MJUM16BYTES,		/* maxsize */
1047 				   1,			/* nsegments */
1048 				   MJUM16BYTES,		/* maxsegsize */
1049 				   0,			/* flags */
1050 				   NULL,		/* lockfunc */
1051 				   NULL,		/* lockfuncarg */
1052 				   &rxr->ptag))) {
1053 		device_printf(dev, "Unable to create RX DMA ptag\n");
1054 		return (error);
1055 	}
1056 
1057 	for (i = 0; i < que->num_desc; i++) {
1058 		buf = &rxr->buffers[i];
1059 		error = bus_dmamap_create(rxr->htag,
1060 		    BUS_DMA_NOWAIT, &buf->hmap);
1061 		if (error) {
1062 			device_printf(dev, "Unable to create RX head map\n");
1063 			break;
1064 		}
1065 		error = bus_dmamap_create(rxr->ptag,
1066 		    BUS_DMA_NOWAIT, &buf->pmap);
1067 		if (error) {
1068 			device_printf(dev, "Unable to create RX pkt map\n");
1069 			break;
1070 		}
1071 	}
1072 
1073 	return (error);
1074 }
1075 
1076 
1077 /*********************************************************************
1078  *
1079  *  (Re)Initialize the queue receive ring and its buffers.
1080  *
1081  **********************************************************************/
1082 int
1083 ixl_init_rx_ring(struct ixl_queue *que)
1084 {
1085 	struct	rx_ring 	*rxr = &que->rxr;
1086 #if defined(INET6) || defined(INET)
1087 	struct ixl_vsi		*vsi = que->vsi;
1088 	struct ifnet		*ifp = vsi->ifp;
1089 	struct lro_ctrl		*lro = &rxr->lro;
1090 #endif
1091 	struct ixl_rx_buf	*buf;
1092 	bus_dma_segment_t	pseg[1], hseg[1];
1093 	int			rsize, nsegs, error = 0;
1094 
1095 	IXL_RX_LOCK(rxr);
1096 	/* Clear the ring contents */
1097 	rsize = roundup2(que->num_desc *
1098 	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1099 	bzero((void *)rxr->base, rsize);
1100 	/* Cleanup any existing buffers */
1101 	for (int i = 0; i < que->num_desc; i++) {
1102 		buf = &rxr->buffers[i];
1103 		if (buf->m_head != NULL) {
1104 			bus_dmamap_sync(rxr->htag, buf->hmap,
1105 			    BUS_DMASYNC_POSTREAD);
1106 			bus_dmamap_unload(rxr->htag, buf->hmap);
1107 			buf->m_head->m_flags |= M_PKTHDR;
1108 			m_freem(buf->m_head);
1109 		}
1110 		if (buf->m_pack != NULL) {
1111 			bus_dmamap_sync(rxr->ptag, buf->pmap,
1112 			    BUS_DMASYNC_POSTREAD);
1113 			bus_dmamap_unload(rxr->ptag, buf->pmap);
1114 			buf->m_pack->m_flags |= M_PKTHDR;
1115 			m_freem(buf->m_pack);
1116 		}
1117 		buf->m_head = NULL;
1118 		buf->m_pack = NULL;
1119 	}
1120 
1121 	/* header split is off */
1122 	rxr->hdr_split = FALSE;
1123 
1124 	/* Now replenish the mbufs */
1125 	for (int j = 0; j != que->num_desc; ++j) {
1126 		struct mbuf	*mh, *mp;
1127 
1128 		buf = &rxr->buffers[j];
1129 		/*
1130 		** Don't allocate mbufs if not
1131 		** doing header split, its wasteful
1132 		*/
1133 		if (rxr->hdr_split == FALSE)
1134 			goto skip_head;
1135 
1136 		/* First the header */
1137 		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1138 		if (buf->m_head == NULL) {
1139 			error = ENOBUFS;
1140 			goto fail;
1141 		}
1142 		m_adj(buf->m_head, ETHER_ALIGN);
1143 		mh = buf->m_head;
1144 		mh->m_len = mh->m_pkthdr.len = MHLEN;
1145 		mh->m_flags |= M_PKTHDR;
1146 		/* Get the memory mapping */
1147 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1148 		    buf->hmap, buf->m_head, hseg,
1149 		    &nsegs, BUS_DMA_NOWAIT);
1150 		if (error != 0) /* Nothing elegant to do here */
1151 			goto fail;
1152 		bus_dmamap_sync(rxr->htag,
1153 		    buf->hmap, BUS_DMASYNC_PREREAD);
1154 		/* Update descriptor */
1155 		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1156 
1157 skip_head:
1158 		/* Now the payload cluster */
1159 		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1160 		    M_PKTHDR, rxr->mbuf_sz);
1161 		if (buf->m_pack == NULL) {
1162 			error = ENOBUFS;
1163                         goto fail;
1164 		}
1165 		mp = buf->m_pack;
1166 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1167 		/* Get the memory mapping */
1168 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1169 		    buf->pmap, mp, pseg,
1170 		    &nsegs, BUS_DMA_NOWAIT);
1171 		if (error != 0)
1172                         goto fail;
1173 		bus_dmamap_sync(rxr->ptag,
1174 		    buf->pmap, BUS_DMASYNC_PREREAD);
1175 		/* Update descriptor */
1176 		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1177 		rxr->base[j].read.hdr_addr = 0;
1178 	}
1179 
1180 
1181 	/* Setup our descriptor indices */
1182 	rxr->next_check = 0;
1183 	rxr->next_refresh = 0;
1184 	rxr->lro_enabled = FALSE;
1185 	rxr->split = 0;
1186 	rxr->bytes = 0;
1187 	rxr->discard = FALSE;
1188 
1189 #if defined(INET6) || defined(INET)
1190 	/*
1191 	** Now set up the LRO interface:
1192 	*/
1193 	if (ifp->if_capenable & IFCAP_LRO) {
1194 		int err = tcp_lro_init(lro);
1195 		if (err) {
1196 			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1197 			goto fail;
1198 		}
1199 		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1200 		rxr->lro_enabled = TRUE;
1201 		lro->ifp = vsi->ifp;
1202 	}
1203 #endif
1204 
1205 	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1206 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1207 
1208 fail:
1209 	IXL_RX_UNLOCK(rxr);
1210 	return (error);
1211 }
1212 
1213 
1214 /*********************************************************************
1215  *
1216  *  Free station receive ring data structures
1217  *
1218  **********************************************************************/
1219 void
1220 ixl_free_que_rx(struct ixl_queue *que)
1221 {
1222 	struct rx_ring		*rxr = &que->rxr;
1223 	struct ixl_rx_buf	*buf;
1224 
1225 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1226 
1227 	/* Cleanup any existing buffers */
1228 	if (rxr->buffers != NULL) {
1229 		for (int i = 0; i < que->num_desc; i++) {
1230 			buf = &rxr->buffers[i];
1231 			if (buf->m_head != NULL) {
1232 				bus_dmamap_sync(rxr->htag, buf->hmap,
1233 				    BUS_DMASYNC_POSTREAD);
1234 				bus_dmamap_unload(rxr->htag, buf->hmap);
1235 				buf->m_head->m_flags |= M_PKTHDR;
1236 				m_freem(buf->m_head);
1237 			}
1238 			if (buf->m_pack != NULL) {
1239 				bus_dmamap_sync(rxr->ptag, buf->pmap,
1240 				    BUS_DMASYNC_POSTREAD);
1241 				bus_dmamap_unload(rxr->ptag, buf->pmap);
1242 				buf->m_pack->m_flags |= M_PKTHDR;
1243 				m_freem(buf->m_pack);
1244 			}
1245 			buf->m_head = NULL;
1246 			buf->m_pack = NULL;
1247 			if (buf->hmap != NULL) {
1248 				bus_dmamap_destroy(rxr->htag, buf->hmap);
1249 				buf->hmap = NULL;
1250 			}
1251 			if (buf->pmap != NULL) {
1252 				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1253 				buf->pmap = NULL;
1254 			}
1255 		}
1256 		if (rxr->buffers != NULL) {
1257 			free(rxr->buffers, M_DEVBUF);
1258 			rxr->buffers = NULL;
1259 		}
1260 	}
1261 
1262 	if (rxr->htag != NULL) {
1263 		bus_dma_tag_destroy(rxr->htag);
1264 		rxr->htag = NULL;
1265 	}
1266 	if (rxr->ptag != NULL) {
1267 		bus_dma_tag_destroy(rxr->ptag);
1268 		rxr->ptag = NULL;
1269 	}
1270 
1271 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1272 	return;
1273 }
1274 
1275 static __inline void
1276 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1277 {
1278 
1279 #if defined(INET6) || defined(INET)
1280         /*
1281          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1282          * should be computed by hardware. Also it should not have VLAN tag in
1283          * ethernet header.
1284          */
1285         if (rxr->lro_enabled &&
1286             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1287             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1288             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1289                 /*
1290                  * Send to the stack if:
1291                  **  - LRO not enabled, or
1292                  **  - no LRO resources, or
1293                  **  - lro enqueue fails
1294                  */
1295                 if (rxr->lro.lro_cnt != 0)
1296                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1297                                 return;
1298         }
1299 #endif
1300 	IXL_RX_UNLOCK(rxr);
1301         (*ifp->if_input)(ifp, m);
1302 	IXL_RX_LOCK(rxr);
1303 }
1304 
1305 
1306 static __inline void
1307 ixl_rx_discard(struct rx_ring *rxr, int i)
1308 {
1309 	struct ixl_rx_buf	*rbuf;
1310 
1311 	rbuf = &rxr->buffers[i];
1312 
1313         if (rbuf->fmp != NULL) {/* Partial chain ? */
1314 		rbuf->fmp->m_flags |= M_PKTHDR;
1315                 m_freem(rbuf->fmp);
1316                 rbuf->fmp = NULL;
1317 	}
1318 
1319 	/*
1320 	** With advanced descriptors the writeback
1321 	** clobbers the buffer addrs, so its easier
1322 	** to just free the existing mbufs and take
1323 	** the normal refresh path to get new buffers
1324 	** and mapping.
1325 	*/
1326 	if (rbuf->m_head) {
1327 		m_free(rbuf->m_head);
1328 		rbuf->m_head = NULL;
1329 	}
1330 
1331 	if (rbuf->m_pack) {
1332 		m_free(rbuf->m_pack);
1333 		rbuf->m_pack = NULL;
1334 	}
1335 
1336 	return;
1337 }
1338 
1339 
1340 /*********************************************************************
1341  *
1342  *  This routine executes in interrupt context. It replenishes
1343  *  the mbufs in the descriptor and sends data which has been
1344  *  dma'ed into host memory to upper layer.
1345  *
1346  *  We loop at most count times if count is > 0, or until done if
1347  *  count < 0.
1348  *
1349  *  Return TRUE for more work, FALSE for all clean.
1350  *********************************************************************/
1351 bool
1352 ixl_rxeof(struct ixl_queue *que, int count)
1353 {
1354 	struct ixl_vsi		*vsi = que->vsi;
1355 	struct rx_ring		*rxr = &que->rxr;
1356 	struct ifnet		*ifp = vsi->ifp;
1357 #if defined(INET6) || defined(INET)
1358 	struct lro_ctrl		*lro = &rxr->lro;
1359 	struct lro_entry	*queued;
1360 #endif
1361 	int			i, nextp, processed = 0;
1362 	union i40e_rx_desc	*cur;
1363 	struct ixl_rx_buf	*rbuf, *nbuf;
1364 
1365 
1366 	IXL_RX_LOCK(rxr);
1367 
1368 	for (i = rxr->next_check; count != 0;) {
1369 		struct mbuf	*sendmp, *mh, *mp;
1370 		u32		rsc, status, error;
1371 		u16		hlen, plen, vtag;
1372 		u64		qword;
1373 		u8		ptype;
1374 		bool		eop;
1375 
1376 		/* Sync the ring. */
1377 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1378 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1379 
1380 		cur = &rxr->base[i];
1381 		qword = le64toh(cur->wb.qword1.status_error_len);
1382 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1383 		    >> I40E_RXD_QW1_STATUS_SHIFT;
1384 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1385 		    >> I40E_RXD_QW1_ERROR_SHIFT;
1386 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1387 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1388 		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1389 		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1390 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1391 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1392 
1393 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1394 			++rxr->not_done;
1395 			break;
1396 		}
1397 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1398 			break;
1399 
1400 		count--;
1401 		sendmp = NULL;
1402 		nbuf = NULL;
1403 		rsc = 0;
1404 		cur->wb.qword1.status_error_len = 0;
1405 		rbuf = &rxr->buffers[i];
1406 		mh = rbuf->m_head;
1407 		mp = rbuf->m_pack;
1408 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1409 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1410 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1411 		else
1412 			vtag = 0;
1413 
1414 		/*
1415 		** Make sure bad packets are discarded,
1416 		** note that only EOP descriptor has valid
1417 		** error results.
1418 		*/
1419                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1420 			rxr->discarded++;
1421 			ixl_rx_discard(rxr, i);
1422 			goto next_desc;
1423 		}
1424 
1425 		/* Prefetch the next buffer */
1426 		if (!eop) {
1427 			nextp = i + 1;
1428 			if (nextp == que->num_desc)
1429 				nextp = 0;
1430 			nbuf = &rxr->buffers[nextp];
1431 			prefetch(nbuf);
1432 		}
1433 
1434 		/*
1435 		** The header mbuf is ONLY used when header
1436 		** split is enabled, otherwise we get normal
1437 		** behavior, ie, both header and payload
1438 		** are DMA'd into the payload buffer.
1439 		**
1440 		** Rather than using the fmp/lmp global pointers
1441 		** we now keep the head of a packet chain in the
1442 		** buffer struct and pass this along from one
1443 		** descriptor to the next, until we get EOP.
1444 		*/
1445 		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1446 			if (hlen > IXL_RX_HDR)
1447 				hlen = IXL_RX_HDR;
1448 			mh->m_len = hlen;
1449 			mh->m_flags |= M_PKTHDR;
1450 			mh->m_next = NULL;
1451 			mh->m_pkthdr.len = mh->m_len;
1452 			/* Null buf pointer so it is refreshed */
1453 			rbuf->m_head = NULL;
1454 			/*
1455 			** Check the payload length, this
1456 			** could be zero if its a small
1457 			** packet.
1458 			*/
1459 			if (plen > 0) {
1460 				mp->m_len = plen;
1461 				mp->m_next = NULL;
1462 				mp->m_flags &= ~M_PKTHDR;
1463 				mh->m_next = mp;
1464 				mh->m_pkthdr.len += mp->m_len;
1465 				/* Null buf pointer so it is refreshed */
1466 				rbuf->m_pack = NULL;
1467 				rxr->split++;
1468 			}
1469 			/*
1470 			** Now create the forward
1471 			** chain so when complete
1472 			** we wont have to.
1473 			*/
1474                         if (eop == 0) {
1475 				/* stash the chain head */
1476                                 nbuf->fmp = mh;
1477 				/* Make forward chain */
1478                                 if (plen)
1479                                         mp->m_next = nbuf->m_pack;
1480                                 else
1481                                         mh->m_next = nbuf->m_pack;
1482                         } else {
1483 				/* Singlet, prepare to send */
1484                                 sendmp = mh;
1485                                 if (vtag) {
1486                                         sendmp->m_pkthdr.ether_vtag = vtag;
1487                                         sendmp->m_flags |= M_VLANTAG;
1488                                 }
1489                         }
1490 		} else {
1491 			/*
1492 			** Either no header split, or a
1493 			** secondary piece of a fragmented
1494 			** split packet.
1495 			*/
1496 			mp->m_len = plen;
1497 			/*
1498 			** See if there is a stored head
1499 			** that determines what we are
1500 			*/
1501 			sendmp = rbuf->fmp;
1502 			rbuf->m_pack = rbuf->fmp = NULL;
1503 
1504 			if (sendmp != NULL) /* secondary frag */
1505 				sendmp->m_pkthdr.len += mp->m_len;
1506 			else {
1507 				/* first desc of a non-ps chain */
1508 				sendmp = mp;
1509 				sendmp->m_flags |= M_PKTHDR;
1510 				sendmp->m_pkthdr.len = mp->m_len;
1511 				if (vtag) {
1512 					sendmp->m_pkthdr.ether_vtag = vtag;
1513 					sendmp->m_flags |= M_VLANTAG;
1514 				}
1515                         }
1516 			/* Pass the head pointer on */
1517 			if (eop == 0) {
1518 				nbuf->fmp = sendmp;
1519 				sendmp = NULL;
1520 				mp->m_next = nbuf->m_pack;
1521 			}
1522 		}
1523 		++processed;
1524 		/* Sending this frame? */
1525 		if (eop) {
1526 			sendmp->m_pkthdr.rcvif = ifp;
1527 			/* gather stats */
1528 			rxr->rx_packets++;
1529 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1530 			/* capture data for dynamic ITR adjustment */
1531 			rxr->packets++;
1532 			rxr->bytes += sendmp->m_pkthdr.len;
1533 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1534 				ixl_rx_checksum(sendmp, status, error, ptype);
1535 			sendmp->m_pkthdr.flowid = que->msix;
1536 			sendmp->m_flags |= M_FLOWID;
1537 		}
1538 next_desc:
1539 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1540 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1541 
1542 		/* Advance our pointers to the next descriptor. */
1543 		if (++i == que->num_desc)
1544 			i = 0;
1545 
1546 		/* Now send to the stack or do LRO */
1547 		if (sendmp != NULL) {
1548 			rxr->next_check = i;
1549 			ixl_rx_input(rxr, ifp, sendmp, ptype);
1550 			i = rxr->next_check;
1551 		}
1552 
1553                /* Every 8 descriptors we go to refresh mbufs */
1554 		if (processed == 8) {
1555 			ixl_refresh_mbufs(que, i);
1556 			processed = 0;
1557 		}
1558 	}
1559 
1560 	/* Refresh any remaining buf structs */
1561 	if (ixl_rx_unrefreshed(que))
1562 		ixl_refresh_mbufs(que, i);
1563 
1564 	rxr->next_check = i;
1565 
1566 #if defined(INET6) || defined(INET)
1567 	/*
1568 	 * Flush any outstanding LRO work
1569 	 */
1570 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1571 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1572 		tcp_lro_flush(lro, queued);
1573 	}
1574 #endif
1575 
1576 	IXL_RX_UNLOCK(rxr);
1577 	return (FALSE);
1578 }
1579 
1580 
1581 /*********************************************************************
1582  *
1583  *  Verify that the hardware indicated that the checksum is valid.
1584  *  Inform the stack about the status of checksum so that stack
1585  *  doesn't spend time verifying the checksum.
1586  *
1587  *********************************************************************/
1588 static void
1589 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1590 {
1591 	struct i40e_rx_ptype_decoded decoded;
1592 
1593 	decoded = decode_rx_desc_ptype(ptype);
1594 
1595 	/* Errors? */
1596  	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1597 	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1598 		mp->m_pkthdr.csum_flags = 0;
1599 		return;
1600 	}
1601 
1602 	/* IPv6 with extension headers likely have bad csum */
1603 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1604 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1605 		if (status &
1606 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1607 			mp->m_pkthdr.csum_flags = 0;
1608 			return;
1609 		}
1610 
1611 
1612 	/* IP Checksum Good */
1613 	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1614 	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1615 
1616 	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1617 		mp->m_pkthdr.csum_flags |=
1618 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1619 		mp->m_pkthdr.csum_data |= htons(0xffff);
1620 	}
1621 	return;
1622 }
1623 
1624 #if __FreeBSD_version >= 1100000
1625 uint64_t
1626 ixl_get_counter(if_t ifp, ift_counter cnt)
1627 {
1628 	struct ixl_vsi *vsi;
1629 
1630 	vsi = if_getsoftc(ifp);
1631 
1632 	switch (cnt) {
1633 	case IFCOUNTER_IPACKETS:
1634 		return (vsi->ipackets);
1635 	case IFCOUNTER_IERRORS:
1636 		return (vsi->ierrors);
1637 	case IFCOUNTER_OPACKETS:
1638 		return (vsi->opackets);
1639 	case IFCOUNTER_OERRORS:
1640 		return (vsi->oerrors);
1641 	case IFCOUNTER_COLLISIONS:
1642 		/* Collisions are by standard impossible in 40G/10G Ethernet */
1643 		return (0);
1644 	case IFCOUNTER_IBYTES:
1645 		return (vsi->ibytes);
1646 	case IFCOUNTER_OBYTES:
1647 		return (vsi->obytes);
1648 	case IFCOUNTER_IMCASTS:
1649 		return (vsi->imcasts);
1650 	case IFCOUNTER_OMCASTS:
1651 		return (vsi->omcasts);
1652 	case IFCOUNTER_IQDROPS:
1653 		return (vsi->iqdrops);
1654 	case IFCOUNTER_OQDROPS:
1655 		return (vsi->oqdrops);
1656 	case IFCOUNTER_NOPROTO:
1657 		return (vsi->noproto);
1658 	default:
1659 		return (if_get_counter_default(ifp, cnt));
1660 	}
1661 }
1662 #endif
1663