xref: /freebsd/sys/dev/ixl/ixl_txrx.c (revision 6574b8ed19b093f0af09501d2c9676c28993cb97)
1 /******************************************************************************
2 
3   Copyright (c) 2013-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 /*
36 **	IXL driver TX/RX Routines:
37 **	    This was seperated to allow usage by
38 ** 	    both the BASE and the VF drivers.
39 */
40 
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "ixl.h"
44 
45 /* Local Prototypes */
46 static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
47 static void	ixl_refresh_mbufs(struct ixl_queue *, int);
48 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
49 static int	ixl_tx_setup_offload(struct ixl_queue *,
50 		    struct mbuf *, u32 *, u32 *);
51 static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
52 
53 static __inline void ixl_rx_discard(struct rx_ring *, int);
54 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
55 		    struct mbuf *, u8);
56 
57 /*
58 ** Multiqueue Transmit driver
59 **
60 */
61 int
62 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
63 {
64 	struct ixl_vsi		*vsi = ifp->if_softc;
65 	struct ixl_queue	*que;
66 	struct tx_ring		*txr;
67 	int 			err, i;
68 
69 	/* Which queue to use */
70 	if ((m->m_flags & M_FLOWID) != 0)
71 		i = m->m_pkthdr.flowid % vsi->num_queues;
72 	else
73 		i = curcpu % vsi->num_queues;
74 
75 	/* Check for a hung queue and pick alternative */
76 	if (((1 << i) & vsi->active_queues) == 0)
77 		i = ffsl(vsi->active_queues);
78 
79 	que = &vsi->queues[i];
80 	txr = &que->txr;
81 
82 	err = drbr_enqueue(ifp, txr->br, m);
83 	if (err)
84 		return(err);
85 	if (IXL_TX_TRYLOCK(txr)) {
86 		ixl_mq_start_locked(ifp, txr);
87 		IXL_TX_UNLOCK(txr);
88 	} else
89 		taskqueue_enqueue(que->tq, &que->tx_task);
90 
91 	return (0);
92 }
93 
94 int
95 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
96 {
97 	struct ixl_queue	*que = txr->que;
98 	struct ixl_vsi		*vsi = que->vsi;
99         struct mbuf		*next;
100         int			err = 0;
101 
102 
103 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
104 	    vsi->link_active == 0)
105 		return (ENETDOWN);
106 
107 	/* Process the transmit queue */
108 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
109 		if ((err = ixl_xmit(que, &next)) != 0) {
110 			if (next == NULL)
111 				drbr_advance(ifp, txr->br);
112 			else
113 				drbr_putback(ifp, txr->br, next);
114 			break;
115 		}
116 		drbr_advance(ifp, txr->br);
117 		/* Send a copy of the frame to the BPF listener */
118 		ETHER_BPF_MTAP(ifp, next);
119 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
120 			break;
121 	}
122 
123 	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
124 		ixl_txeof(que);
125 
126 	return (err);
127 }
128 
129 /*
130  * Called from a taskqueue to drain queued transmit packets.
131  */
132 void
133 ixl_deferred_mq_start(void *arg, int pending)
134 {
135 	struct ixl_queue	*que = arg;
136         struct tx_ring		*txr = &que->txr;
137 	struct ixl_vsi		*vsi = que->vsi;
138         struct ifnet		*ifp = vsi->ifp;
139 
140 	IXL_TX_LOCK(txr);
141 	if (!drbr_empty(ifp, txr->br))
142 		ixl_mq_start_locked(ifp, txr);
143 	IXL_TX_UNLOCK(txr);
144 }
145 
146 /*
147 ** Flush all queue ring buffers
148 */
149 void
150 ixl_qflush(struct ifnet *ifp)
151 {
152 	struct ixl_vsi	*vsi = ifp->if_softc;
153 
154         for (int i = 0; i < vsi->num_queues; i++) {
155 		struct ixl_queue *que = &vsi->queues[i];
156 		struct tx_ring	*txr = &que->txr;
157 		struct mbuf	*m;
158 		IXL_TX_LOCK(txr);
159 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
160 			m_freem(m);
161 		IXL_TX_UNLOCK(txr);
162 	}
163 	if_qflush(ifp);
164 }
165 
166 /*
167 ** Find mbuf chains passed to the driver
168 ** that are 'sparse', using more than 8
169 ** mbufs to deliver an mss-size chunk of data
170 */
171 static inline bool
172 ixl_tso_detect_sparse(struct mbuf *mp)
173 {
174 	struct mbuf	*m;
175 	int		num = 0, mss;
176 	bool		ret = FALSE;
177 
178 	mss = mp->m_pkthdr.tso_segsz;
179 	for (m = mp->m_next; m != NULL; m = m->m_next) {
180 		num++;
181 		mss -= m->m_len;
182 		if (mss < 1)
183 			break;
184 		if (m->m_next == NULL)
185 			break;
186 	}
187 	if (num > IXL_SPARSE_CHAIN)
188 		ret = TRUE;
189 
190 	return (ret);
191 }
192 
193 
194 /*********************************************************************
195  *
196  *  This routine maps the mbufs to tx descriptors, allowing the
197  *  TX engine to transmit the packets.
198  *  	- return 0 on success, positive on failure
199  *
200  **********************************************************************/
201 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
202 
203 static int
204 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
205 {
206 	struct ixl_vsi		*vsi = que->vsi;
207 	struct i40e_hw		*hw = vsi->hw;
208 	struct tx_ring		*txr = &que->txr;
209 	struct ixl_tx_buf	*buf;
210 	struct i40e_tx_desc	*txd = NULL;
211 	struct mbuf		*m_head, *m;
212 	int             	i, j, error, nsegs, maxsegs;
213 	int			first, last = 0;
214 	u16			vtag = 0;
215 	u32			cmd, off;
216 	bus_dmamap_t		map;
217 	bus_dma_tag_t		tag;
218 	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
219 
220 
221 	cmd = off = 0;
222 	m_head = *m_headp;
223 
224         /*
225          * Important to capture the first descriptor
226          * used because it will contain the index of
227          * the one we tell the hardware to report back
228          */
229         first = txr->next_avail;
230 	buf = &txr->buffers[first];
231 	map = buf->map;
232 	tag = txr->tx_tag;
233 	maxsegs = IXL_MAX_TX_SEGS;
234 
235 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
236 		/* Use larger mapping for TSO */
237 		tag = txr->tso_tag;
238 		maxsegs = IXL_MAX_TSO_SEGS;
239 		if (ixl_tso_detect_sparse(m_head)) {
240 			m = m_defrag(m_head, M_NOWAIT);
241 			*m_headp = m;
242 		}
243 	}
244 
245 	/*
246 	 * Map the packet for DMA.
247 	 */
248 	error = bus_dmamap_load_mbuf_sg(tag, map,
249 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
250 
251 	if (error == EFBIG) {
252 		struct mbuf *m;
253 
254 		m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
255 		if (m == NULL) {
256 			que->mbuf_defrag_failed++;
257 			m_freem(*m_headp);
258 			*m_headp = NULL;
259 			return (ENOBUFS);
260 		}
261 		*m_headp = m;
262 
263 		/* Try it again */
264 		error = bus_dmamap_load_mbuf_sg(tag, map,
265 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
266 
267 		if (error == ENOMEM) {
268 			que->tx_dma_setup++;
269 			return (error);
270 		} else if (error != 0) {
271 			que->tx_dma_setup++;
272 			m_freem(*m_headp);
273 			*m_headp = NULL;
274 			return (error);
275 		}
276 	} else if (error == ENOMEM) {
277 		que->tx_dma_setup++;
278 		return (error);
279 	} else if (error != 0) {
280 		que->tx_dma_setup++;
281 		m_freem(*m_headp);
282 		*m_headp = NULL;
283 		return (error);
284 	}
285 
286 	/* Make certain there are enough descriptors */
287 	if (nsegs > txr->avail - 2) {
288 		txr->no_desc++;
289 		error = ENOBUFS;
290 		goto xmit_fail;
291 	}
292 	m_head = *m_headp;
293 
294 	/* Set up the TSO/CSUM offload */
295 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
296 		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
297 		if (error)
298 			goto xmit_fail;
299 	}
300 
301 	cmd |= I40E_TX_DESC_CMD_ICRC;
302 	/* Grab the VLAN tag */
303 	if (m_head->m_flags & M_VLANTAG) {
304 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
305 		vtag = htole16(m_head->m_pkthdr.ether_vtag);
306 	}
307 
308 	i = txr->next_avail;
309 	for (j = 0; j < nsegs; j++) {
310 		bus_size_t seglen;
311 
312 		buf = &txr->buffers[i];
313 		buf->tag = tag; /* Keep track of the type tag */
314 		txd = &txr->base[i];
315 		seglen = segs[j].ds_len;
316 
317 		txd->buffer_addr = htole64(segs[j].ds_addr);
318 		txd->cmd_type_offset_bsz =
319 		    htole64(I40E_TX_DESC_DTYPE_DATA
320 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
321 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
322 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
323 		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
324 
325 		last = i; /* descriptor that will get completion IRQ */
326 
327 		if (++i == que->num_desc)
328 			i = 0;
329 
330 		buf->m_head = NULL;
331 		buf->eop_index = -1;
332 	}
333 	/* Set the last descriptor for report */
334 	txd->cmd_type_offset_bsz |=
335 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
336 	txr->avail -= nsegs;
337 	txr->next_avail = i;
338 
339 	buf->m_head = m_head;
340 	/* Swap the dma map between the first and last descriptor */
341 	txr->buffers[first].map = buf->map;
342 	buf->map = map;
343 	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
344 
345         /* Set the index of the descriptor that will be marked done */
346         buf = &txr->buffers[first];
347 	buf->eop_index = last;
348 
349         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
350             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
351 	/*
352 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
353 	 * hardware that this frame is available to transmit.
354 	 */
355 	++txr->total_packets;
356 	wr32(hw, txr->tail, i);
357 
358 	ixl_flush(hw);
359 	/* Mark outstanding work */
360 	if (que->busy == 0)
361 		que->busy = 1;
362 	return (0);
363 
364 xmit_fail:
365 	bus_dmamap_unload(tag, buf->map);
366 	return (error);
367 }
368 
369 
370 /*********************************************************************
371  *
372  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
373  *  the information needed to transmit a packet on the wire. This is
374  *  called only once at attach, setup is done every reset.
375  *
376  **********************************************************************/
377 int
378 ixl_allocate_tx_data(struct ixl_queue *que)
379 {
380 	struct tx_ring		*txr = &que->txr;
381 	struct ixl_vsi		*vsi = que->vsi;
382 	device_t		dev = vsi->dev;
383 	struct ixl_tx_buf	*buf;
384 	int			error = 0;
385 
386 	/*
387 	 * Setup DMA descriptor areas.
388 	 */
389 	if ((error = bus_dma_tag_create(NULL,		/* parent */
390 			       1, 0,			/* alignment, bounds */
391 			       BUS_SPACE_MAXADDR,	/* lowaddr */
392 			       BUS_SPACE_MAXADDR,	/* highaddr */
393 			       NULL, NULL,		/* filter, filterarg */
394 			       IXL_TSO_SIZE,		/* maxsize */
395 			       IXL_MAX_TX_SEGS,		/* nsegments */
396 			       PAGE_SIZE,		/* maxsegsize */
397 			       0,			/* flags */
398 			       NULL,			/* lockfunc */
399 			       NULL,			/* lockfuncarg */
400 			       &txr->tx_tag))) {
401 		device_printf(dev,"Unable to allocate TX DMA tag\n");
402 		goto fail;
403 	}
404 
405 	/* Make a special tag for TSO */
406 	if ((error = bus_dma_tag_create(NULL,		/* parent */
407 			       1, 0,			/* alignment, bounds */
408 			       BUS_SPACE_MAXADDR,	/* lowaddr */
409 			       BUS_SPACE_MAXADDR,	/* highaddr */
410 			       NULL, NULL,		/* filter, filterarg */
411 			       IXL_TSO_SIZE,		/* maxsize */
412 			       IXL_MAX_TSO_SEGS,	/* nsegments */
413 			       PAGE_SIZE,		/* maxsegsize */
414 			       0,			/* flags */
415 			       NULL,			/* lockfunc */
416 			       NULL,			/* lockfuncarg */
417 			       &txr->tso_tag))) {
418 		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
419 		goto fail;
420 	}
421 
422 	if (!(txr->buffers =
423 	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
424 	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
425 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
426 		error = ENOMEM;
427 		goto fail;
428 	}
429 
430         /* Create the descriptor buffer default dma maps */
431 	buf = txr->buffers;
432 	for (int i = 0; i < que->num_desc; i++, buf++) {
433 		buf->tag = txr->tx_tag;
434 		error = bus_dmamap_create(buf->tag, 0, &buf->map);
435 		if (error != 0) {
436 			device_printf(dev, "Unable to create TX DMA map\n");
437 			goto fail;
438 		}
439 	}
440 fail:
441 	return (error);
442 }
443 
444 
445 /*********************************************************************
446  *
447  *  (Re)Initialize a queue transmit ring.
448  *	- called by init, it clears the descriptor ring,
449  *	  and frees any stale mbufs
450  *
451  **********************************************************************/
452 void
453 ixl_init_tx_ring(struct ixl_queue *que)
454 {
455 	struct tx_ring *txr = &que->txr;
456 	struct ixl_tx_buf *buf;
457 
458 	/* Clear the old ring contents */
459 	IXL_TX_LOCK(txr);
460 	bzero((void *)txr->base,
461 	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
462 
463 	/* Reset indices */
464 	txr->next_avail = 0;
465 	txr->next_to_clean = 0;
466 
467 #ifdef IXL_FDIR
468 	/* Initialize flow director */
469 	txr->atr_rate = ixl_atr_rate;
470 	txr->atr_count = 0;
471 #endif
472 
473 	/* Free any existing tx mbufs. */
474         buf = txr->buffers;
475 	for (int i = 0; i < que->num_desc; i++, buf++) {
476 		if (buf->m_head != NULL) {
477 			bus_dmamap_sync(buf->tag, buf->map,
478 			    BUS_DMASYNC_POSTWRITE);
479 			bus_dmamap_unload(buf->tag, buf->map);
480 			m_freem(buf->m_head);
481 			buf->m_head = NULL;
482 		}
483 		/* Clear the EOP index */
484 		buf->eop_index = -1;
485         }
486 
487 	/* Set number of descriptors available */
488 	txr->avail = que->num_desc;
489 
490 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
491 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
492 	IXL_TX_UNLOCK(txr);
493 }
494 
495 
496 /*********************************************************************
497  *
498  *  Free transmit ring related data structures.
499  *
500  **********************************************************************/
501 void
502 ixl_free_que_tx(struct ixl_queue *que)
503 {
504 	struct tx_ring *txr = &que->txr;
505 	struct ixl_tx_buf *buf;
506 
507 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
508 
509 	for (int i = 0; i < que->num_desc; i++) {
510 		buf = &txr->buffers[i];
511 		if (buf->m_head != NULL) {
512 			bus_dmamap_sync(buf->tag, buf->map,
513 			    BUS_DMASYNC_POSTWRITE);
514 			bus_dmamap_unload(buf->tag,
515 			    buf->map);
516 			m_freem(buf->m_head);
517 			buf->m_head = NULL;
518 			if (buf->map != NULL) {
519 				bus_dmamap_destroy(buf->tag,
520 				    buf->map);
521 				buf->map = NULL;
522 			}
523 		} else if (buf->map != NULL) {
524 			bus_dmamap_unload(buf->tag,
525 			    buf->map);
526 			bus_dmamap_destroy(buf->tag,
527 			    buf->map);
528 			buf->map = NULL;
529 		}
530 	}
531 	if (txr->br != NULL)
532 		buf_ring_free(txr->br, M_DEVBUF);
533 	if (txr->buffers != NULL) {
534 		free(txr->buffers, M_DEVBUF);
535 		txr->buffers = NULL;
536 	}
537 	if (txr->tx_tag != NULL) {
538 		bus_dma_tag_destroy(txr->tx_tag);
539 		txr->tx_tag = NULL;
540 	}
541 	if (txr->tso_tag != NULL) {
542 		bus_dma_tag_destroy(txr->tso_tag);
543 		txr->tso_tag = NULL;
544 	}
545 
546 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
547 	return;
548 }
549 
550 /*********************************************************************
551  *
552  *  Setup descriptor for hw offloads
553  *
554  **********************************************************************/
555 
556 static int
557 ixl_tx_setup_offload(struct ixl_queue *que,
558     struct mbuf *mp, u32 *cmd, u32 *off)
559 {
560 	struct ether_vlan_header	*eh;
561 #ifdef INET
562 	struct ip			*ip = NULL;
563 #endif
564 	struct tcphdr			*th = NULL;
565 #ifdef INET6
566 	struct ip6_hdr			*ip6;
567 #endif
568 	int				elen, ip_hlen = 0, tcp_hlen;
569 	u16				etype;
570 	u8				ipproto = 0;
571 	bool				tso = FALSE;
572 
573 
574 	/* Set up the TSO context descriptor if required */
575 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
576 		tso = ixl_tso_setup(que, mp);
577 		if (tso)
578 			++que->tso;
579 		else
580 			return (ENXIO);
581 	}
582 
583 	/*
584 	 * Determine where frame payload starts.
585 	 * Jump over vlan headers if already present,
586 	 * helpful for QinQ too.
587 	 */
588 	eh = mtod(mp, struct ether_vlan_header *);
589 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
590 		etype = ntohs(eh->evl_proto);
591 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
592 	} else {
593 		etype = ntohs(eh->evl_encap_proto);
594 		elen = ETHER_HDR_LEN;
595 	}
596 
597 	switch (etype) {
598 #ifdef INET
599 		case ETHERTYPE_IP:
600 			ip = (struct ip *)(mp->m_data + elen);
601 			ip_hlen = ip->ip_hl << 2;
602 			ipproto = ip->ip_p;
603 			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
604 			/* The IP checksum must be recalculated with TSO */
605 			if (tso)
606 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
607 			else
608 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
609 			break;
610 #endif
611 #ifdef INET6
612 		case ETHERTYPE_IPV6:
613 			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
614 			ip_hlen = sizeof(struct ip6_hdr);
615 			ipproto = ip6->ip6_nxt;
616 			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
617 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
618 			break;
619 #endif
620 		default:
621 			break;
622 	}
623 
624 	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
625 	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
626 
627 	switch (ipproto) {
628 		case IPPROTO_TCP:
629 			tcp_hlen = th->th_off << 2;
630 			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
631 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
632 				*off |= (tcp_hlen >> 2) <<
633 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
634 			}
635 #ifdef IXL_FDIR
636 			ixl_atr(que, th, etype);
637 #endif
638 			break;
639 		case IPPROTO_UDP:
640 			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
641 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
642 				*off |= (sizeof(struct udphdr) >> 2) <<
643 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
644 			}
645 			break;
646 
647 		case IPPROTO_SCTP:
648 			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
649 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
650 				*off |= (sizeof(struct sctphdr) >> 2) <<
651 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
652 			}
653 			/* Fall Thru */
654 		default:
655 			break;
656 	}
657 
658         return (0);
659 }
660 
661 
662 /**********************************************************************
663  *
664  *  Setup context for hardware segmentation offload (TSO)
665  *
666  **********************************************************************/
667 static bool
668 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
669 {
670 	struct tx_ring			*txr = &que->txr;
671 	struct i40e_tx_context_desc	*TXD;
672 	struct ixl_tx_buf		*buf;
673 	u32				cmd, mss, type, tsolen;
674 	u16				etype;
675 	int				idx, elen, ip_hlen, tcp_hlen;
676 	struct ether_vlan_header	*eh;
677 #ifdef INET
678 	struct ip			*ip;
679 #endif
680 #ifdef INET6
681 	struct ip6_hdr			*ip6;
682 #endif
683 #if defined(INET6) || defined(INET)
684 	struct tcphdr			*th;
685 #endif
686 	u64				type_cmd_tso_mss;
687 
688 	/*
689 	 * Determine where frame payload starts.
690 	 * Jump over vlan headers if already present
691 	 */
692 	eh = mtod(mp, struct ether_vlan_header *);
693 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
694 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
695 		etype = eh->evl_proto;
696 	} else {
697 		elen = ETHER_HDR_LEN;
698 		etype = eh->evl_encap_proto;
699 	}
700 
701         switch (ntohs(etype)) {
702 #ifdef INET6
703 	case ETHERTYPE_IPV6:
704 		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
705 		if (ip6->ip6_nxt != IPPROTO_TCP)
706 			return (ENXIO);
707 		ip_hlen = sizeof(struct ip6_hdr);
708 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
709 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
710 		tcp_hlen = th->th_off << 2;
711 		break;
712 #endif
713 #ifdef INET
714 	case ETHERTYPE_IP:
715 		ip = (struct ip *)(mp->m_data + elen);
716 		if (ip->ip_p != IPPROTO_TCP)
717 			return (ENXIO);
718 		ip->ip_sum = 0;
719 		ip_hlen = ip->ip_hl << 2;
720 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
721 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
722 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
723 		tcp_hlen = th->th_off << 2;
724 		break;
725 #endif
726 	default:
727 		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
728 		    __func__, ntohs(etype));
729 		return FALSE;
730         }
731 
732         /* Ensure we have at least the IP+TCP header in the first mbuf. */
733         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
734 		return FALSE;
735 
736 	idx = txr->next_avail;
737 	buf = &txr->buffers[idx];
738 	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
739 	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
740 
741 	type = I40E_TX_DESC_DTYPE_CONTEXT;
742 	cmd = I40E_TX_CTX_DESC_TSO;
743 	mss = mp->m_pkthdr.tso_segsz;
744 
745 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
746 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
747 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
748 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
749 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
750 
751 	TXD->tunneling_params = htole32(0);
752 	buf->m_head = NULL;
753 	buf->eop_index = -1;
754 
755 	if (++idx == que->num_desc)
756 		idx = 0;
757 
758 	txr->avail--;
759 	txr->next_avail = idx;
760 
761 	return TRUE;
762 }
763 
764 /*
765 ** ixl_get_tx_head - Retrieve the value from the
766 **    location the HW records its HEAD index
767 */
768 static inline u32
769 ixl_get_tx_head(struct ixl_queue *que)
770 {
771 	struct tx_ring  *txr = &que->txr;
772 	void *head = &txr->base[que->num_desc];
773 	return LE32_TO_CPU(*(volatile __le32 *)head);
774 }
775 
776 /**********************************************************************
777  *
778  *  Examine each tx_buffer in the used queue. If the hardware is done
779  *  processing the packet then free associated resources. The
780  *  tx_buffer is put back on the free queue.
781  *
782  **********************************************************************/
783 bool
784 ixl_txeof(struct ixl_queue *que)
785 {
786 	struct ixl_vsi		*vsi = que->vsi;
787 	struct ifnet		*ifp = vsi->ifp;
788 	struct tx_ring		*txr = &que->txr;
789 	u32			first, last, head, done, processed;
790 	struct ixl_tx_buf	*buf;
791 	struct i40e_tx_desc	*tx_desc, *eop_desc;
792 
793 
794 	mtx_assert(&txr->mtx, MA_OWNED);
795 
796 	/* These are not the descriptors you seek, move along :) */
797 	if (txr->avail == que->num_desc) {
798 		que->busy = 0;
799 		return FALSE;
800 	}
801 
802 	processed = 0;
803 	first = txr->next_to_clean;
804 	buf = &txr->buffers[first];
805 	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
806 	last = buf->eop_index;
807 	if (last == -1)
808 		return FALSE;
809 	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
810 
811 	/* Get the Head WB value */
812 	head = ixl_get_tx_head(que);
813 
814 	/*
815 	** Get the index of the first descriptor
816 	** BEYOND the EOP and call that 'done'.
817 	** I do this so the comparison in the
818 	** inner while loop below can be simple
819 	*/
820 	if (++last == que->num_desc) last = 0;
821 	done = last;
822 
823         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
824             BUS_DMASYNC_POSTREAD);
825 	/*
826 	** The HEAD index of the ring is written in a
827 	** defined location, this rather than a done bit
828 	** is what is used to keep track of what must be
829 	** 'cleaned'.
830 	*/
831 	while (first != head) {
832 		/* We clean the range of the packet */
833 		while (first != done) {
834 			++txr->avail;
835 			++processed;
836 
837 			if (buf->m_head) {
838 				txr->bytes += /* for ITR adjustment */
839 				    buf->m_head->m_pkthdr.len;
840 				txr->tx_bytes += /* for TX stats */
841 				    buf->m_head->m_pkthdr.len;
842 				bus_dmamap_sync(buf->tag,
843 				    buf->map,
844 				    BUS_DMASYNC_POSTWRITE);
845 				bus_dmamap_unload(buf->tag,
846 				    buf->map);
847 				m_freem(buf->m_head);
848 				buf->m_head = NULL;
849 				buf->map = NULL;
850 			}
851 			buf->eop_index = -1;
852 
853 			if (++first == que->num_desc)
854 				first = 0;
855 
856 			buf = &txr->buffers[first];
857 			tx_desc = &txr->base[first];
858 		}
859 		++txr->packets;
860 		++ifp->if_opackets;
861 		/* See if there is more work now */
862 		last = buf->eop_index;
863 		if (last != -1) {
864 			eop_desc = &txr->base[last];
865 			/* Get next done point */
866 			if (++last == que->num_desc) last = 0;
867 			done = last;
868 		} else
869 			break;
870 	}
871 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
872 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
873 
874 	txr->next_to_clean = first;
875 
876 
877 	/*
878 	** Hang detection, we know there's
879 	** work outstanding or the first return
880 	** would have been taken, so indicate an
881 	** unsuccessful pass, in local_timer if
882 	** the value is too great the queue will
883 	** be considered hung. If anything has been
884 	** cleaned then reset the state.
885 	*/
886 	if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
887 		++que->busy;
888 
889 	if (processed)
890 		que->busy = 1; /* Note this turns off HUNG */
891 
892 	/*
893 	 * If there are no pending descriptors, clear the timeout.
894 	 */
895 	if (txr->avail == que->num_desc) {
896 		que->busy = 0;
897 		return FALSE;
898 	}
899 
900 	return TRUE;
901 }
902 
903 /*********************************************************************
904  *
905  *  Refresh mbuf buffers for RX descriptor rings
906  *   - now keeps its own state so discards due to resource
907  *     exhaustion are unnecessary, if an mbuf cannot be obtained
908  *     it just returns, keeping its placeholder, thus it can simply
909  *     be recalled to try again.
910  *
911  **********************************************************************/
912 static void
913 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
914 {
915 	struct ixl_vsi		*vsi = que->vsi;
916 	struct rx_ring		*rxr = &que->rxr;
917 	bus_dma_segment_t	hseg[1];
918 	bus_dma_segment_t	pseg[1];
919 	struct ixl_rx_buf	*buf;
920 	struct mbuf		*mh, *mp;
921 	int			i, j, nsegs, error;
922 	bool			refreshed = FALSE;
923 
924 	i = j = rxr->next_refresh;
925 	/* Control the loop with one beyond */
926 	if (++j == que->num_desc)
927 		j = 0;
928 
929 	while (j != limit) {
930 		buf = &rxr->buffers[i];
931 		if (rxr->hdr_split == FALSE)
932 			goto no_split;
933 
934 		if (buf->m_head == NULL) {
935 			mh = m_gethdr(M_NOWAIT, MT_DATA);
936 			if (mh == NULL)
937 				goto update;
938 		} else
939 			mh = buf->m_head;
940 
941 		mh->m_pkthdr.len = mh->m_len = MHLEN;
942 		mh->m_len = MHLEN;
943 		mh->m_flags |= M_PKTHDR;
944 		/* Get the memory mapping */
945 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
946 		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
947 		if (error != 0) {
948 			printf("Refresh mbufs: hdr dmamap load"
949 			    " failure - %d\n", error);
950 			m_free(mh);
951 			buf->m_head = NULL;
952 			goto update;
953 		}
954 		buf->m_head = mh;
955 		bus_dmamap_sync(rxr->htag, buf->hmap,
956 		    BUS_DMASYNC_PREREAD);
957 		rxr->base[i].read.hdr_addr =
958 		   htole64(hseg[0].ds_addr);
959 
960 no_split:
961 		if (buf->m_pack == NULL) {
962 			mp = m_getjcl(M_NOWAIT, MT_DATA,
963 			    M_PKTHDR, rxr->mbuf_sz);
964 			if (mp == NULL)
965 				goto update;
966 		} else
967 			mp = buf->m_pack;
968 
969 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
970 		/* Get the memory mapping */
971 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
972 		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
973 		if (error != 0) {
974 			printf("Refresh mbufs: payload dmamap load"
975 			    " failure - %d\n", error);
976 			m_free(mp);
977 			buf->m_pack = NULL;
978 			goto update;
979 		}
980 		buf->m_pack = mp;
981 		bus_dmamap_sync(rxr->ptag, buf->pmap,
982 		    BUS_DMASYNC_PREREAD);
983 		rxr->base[i].read.pkt_addr =
984 		   htole64(pseg[0].ds_addr);
985 		/* Used only when doing header split */
986 		rxr->base[i].read.hdr_addr = 0;
987 
988 		refreshed = TRUE;
989 		/* Next is precalculated */
990 		i = j;
991 		rxr->next_refresh = i;
992 		if (++j == que->num_desc)
993 			j = 0;
994 	}
995 update:
996 	if (refreshed) /* Update hardware tail index */
997 		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
998 	return;
999 }
1000 
1001 
1002 /*********************************************************************
1003  *
1004  *  Allocate memory for rx_buffer structures. Since we use one
1005  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1006  *  that we'll need is equal to the number of receive descriptors
1007  *  that we've defined.
1008  *
1009  **********************************************************************/
1010 int
1011 ixl_allocate_rx_data(struct ixl_queue *que)
1012 {
1013 	struct rx_ring		*rxr = &que->rxr;
1014 	struct ixl_vsi		*vsi = que->vsi;
1015 	device_t 		dev = vsi->dev;
1016 	struct ixl_rx_buf 	*buf;
1017 	int             	i, bsize, error;
1018 
1019 	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1020 	if (!(rxr->buffers =
1021 	    (struct ixl_rx_buf *) malloc(bsize,
1022 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1023 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1024 		error = ENOMEM;
1025 		return (error);
1026 	}
1027 
1028 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1029 				   1, 0,	/* alignment, bounds */
1030 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1031 				   BUS_SPACE_MAXADDR,	/* highaddr */
1032 				   NULL, NULL,		/* filter, filterarg */
1033 				   MSIZE,		/* maxsize */
1034 				   1,			/* nsegments */
1035 				   MSIZE,		/* maxsegsize */
1036 				   0,			/* flags */
1037 				   NULL,		/* lockfunc */
1038 				   NULL,		/* lockfuncarg */
1039 				   &rxr->htag))) {
1040 		device_printf(dev, "Unable to create RX DMA htag\n");
1041 		return (error);
1042 	}
1043 
1044 	if ((error = bus_dma_tag_create(NULL,	/* parent */
1045 				   1, 0,	/* alignment, bounds */
1046 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1047 				   BUS_SPACE_MAXADDR,	/* highaddr */
1048 				   NULL, NULL,		/* filter, filterarg */
1049 				   MJUM16BYTES,		/* maxsize */
1050 				   1,			/* nsegments */
1051 				   MJUM16BYTES,		/* maxsegsize */
1052 				   0,			/* flags */
1053 				   NULL,		/* lockfunc */
1054 				   NULL,		/* lockfuncarg */
1055 				   &rxr->ptag))) {
1056 		device_printf(dev, "Unable to create RX DMA ptag\n");
1057 		return (error);
1058 	}
1059 
1060 	for (i = 0; i < que->num_desc; i++) {
1061 		buf = &rxr->buffers[i];
1062 		error = bus_dmamap_create(rxr->htag,
1063 		    BUS_DMA_NOWAIT, &buf->hmap);
1064 		if (error) {
1065 			device_printf(dev, "Unable to create RX head map\n");
1066 			break;
1067 		}
1068 		error = bus_dmamap_create(rxr->ptag,
1069 		    BUS_DMA_NOWAIT, &buf->pmap);
1070 		if (error) {
1071 			device_printf(dev, "Unable to create RX pkt map\n");
1072 			break;
1073 		}
1074 	}
1075 
1076 	return (error);
1077 }
1078 
1079 
1080 /*********************************************************************
1081  *
1082  *  (Re)Initialize the queue receive ring and its buffers.
1083  *
1084  **********************************************************************/
1085 int
1086 ixl_init_rx_ring(struct ixl_queue *que)
1087 {
1088 	struct ixl_vsi		*vsi = que->vsi;
1089 	struct ifnet		*ifp = vsi->ifp;
1090 	struct	rx_ring 	*rxr = &que->rxr;
1091 	struct lro_ctrl		*lro = &rxr->lro;
1092 	struct ixl_rx_buf	*buf;
1093 	bus_dma_segment_t	pseg[1], hseg[1];
1094 	int			rsize, nsegs, error = 0;
1095 
1096 	IXL_RX_LOCK(rxr);
1097 	/* Clear the ring contents */
1098 	rsize = roundup2(que->num_desc *
1099 	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1100 	bzero((void *)rxr->base, rsize);
1101 	/* Cleanup any existing buffers */
1102 	for (int i = 0; i < que->num_desc; i++) {
1103 		buf = &rxr->buffers[i];
1104 		if (buf->m_head != NULL) {
1105 			bus_dmamap_sync(rxr->htag, buf->hmap,
1106 			    BUS_DMASYNC_POSTREAD);
1107 			bus_dmamap_unload(rxr->htag, buf->hmap);
1108 			buf->m_head->m_flags |= M_PKTHDR;
1109 			m_freem(buf->m_head);
1110 		}
1111 		if (buf->m_pack != NULL) {
1112 			bus_dmamap_sync(rxr->ptag, buf->pmap,
1113 			    BUS_DMASYNC_POSTREAD);
1114 			bus_dmamap_unload(rxr->ptag, buf->pmap);
1115 			buf->m_pack->m_flags |= M_PKTHDR;
1116 			m_freem(buf->m_pack);
1117 		}
1118 		buf->m_head = NULL;
1119 		buf->m_pack = NULL;
1120 	}
1121 
1122 	/* header split is off */
1123 	rxr->hdr_split = FALSE;
1124 
1125 	/* Now replenish the mbufs */
1126 	for (int j = 0; j != que->num_desc; ++j) {
1127 		struct mbuf	*mh, *mp;
1128 
1129 		buf = &rxr->buffers[j];
1130 		/*
1131 		** Don't allocate mbufs if not
1132 		** doing header split, its wasteful
1133 		*/
1134 		if (rxr->hdr_split == FALSE)
1135 			goto skip_head;
1136 
1137 		/* First the header */
1138 		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1139 		if (buf->m_head == NULL) {
1140 			error = ENOBUFS;
1141 			goto fail;
1142 		}
1143 		m_adj(buf->m_head, ETHER_ALIGN);
1144 		mh = buf->m_head;
1145 		mh->m_len = mh->m_pkthdr.len = MHLEN;
1146 		mh->m_flags |= M_PKTHDR;
1147 		/* Get the memory mapping */
1148 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1149 		    buf->hmap, buf->m_head, hseg,
1150 		    &nsegs, BUS_DMA_NOWAIT);
1151 		if (error != 0) /* Nothing elegant to do here */
1152 			goto fail;
1153 		bus_dmamap_sync(rxr->htag,
1154 		    buf->hmap, BUS_DMASYNC_PREREAD);
1155 		/* Update descriptor */
1156 		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1157 
1158 skip_head:
1159 		/* Now the payload cluster */
1160 		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1161 		    M_PKTHDR, rxr->mbuf_sz);
1162 		if (buf->m_pack == NULL) {
1163 			error = ENOBUFS;
1164                         goto fail;
1165 		}
1166 		mp = buf->m_pack;
1167 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1168 		/* Get the memory mapping */
1169 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1170 		    buf->pmap, mp, pseg,
1171 		    &nsegs, BUS_DMA_NOWAIT);
1172 		if (error != 0)
1173                         goto fail;
1174 		bus_dmamap_sync(rxr->ptag,
1175 		    buf->pmap, BUS_DMASYNC_PREREAD);
1176 		/* Update descriptor */
1177 		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1178 		rxr->base[j].read.hdr_addr = 0;
1179 	}
1180 
1181 
1182 	/* Setup our descriptor indices */
1183 	rxr->next_check = 0;
1184 	rxr->next_refresh = 0;
1185 	rxr->lro_enabled = FALSE;
1186 	rxr->split = 0;
1187 	rxr->bytes = 0;
1188 	rxr->discard = FALSE;
1189 
1190 	/*
1191 	** Now set up the LRO interface:
1192 	*/
1193 	if (ifp->if_capenable & IFCAP_LRO) {
1194 		int err = tcp_lro_init(lro);
1195 		if (err) {
1196 			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1197 			goto fail;
1198 		}
1199 		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1200 		rxr->lro_enabled = TRUE;
1201 		lro->ifp = vsi->ifp;
1202 	}
1203 
1204 	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1205 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1206 
1207 fail:
1208 	IXL_RX_UNLOCK(rxr);
1209 	return (error);
1210 }
1211 
1212 
1213 /*********************************************************************
1214  *
1215  *  Free station receive ring data structures
1216  *
1217  **********************************************************************/
1218 void
1219 ixl_free_que_rx(struct ixl_queue *que)
1220 {
1221 	struct rx_ring		*rxr = &que->rxr;
1222 	struct ixl_rx_buf	*buf;
1223 
1224 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1225 
1226 	/* Cleanup any existing buffers */
1227 	if (rxr->buffers != NULL) {
1228 		for (int i = 0; i < que->num_desc; i++) {
1229 			buf = &rxr->buffers[i];
1230 			if (buf->m_head != NULL) {
1231 				bus_dmamap_sync(rxr->htag, buf->hmap,
1232 				    BUS_DMASYNC_POSTREAD);
1233 				bus_dmamap_unload(rxr->htag, buf->hmap);
1234 				buf->m_head->m_flags |= M_PKTHDR;
1235 				m_freem(buf->m_head);
1236 			}
1237 			if (buf->m_pack != NULL) {
1238 				bus_dmamap_sync(rxr->ptag, buf->pmap,
1239 				    BUS_DMASYNC_POSTREAD);
1240 				bus_dmamap_unload(rxr->ptag, buf->pmap);
1241 				buf->m_pack->m_flags |= M_PKTHDR;
1242 				m_freem(buf->m_pack);
1243 			}
1244 			buf->m_head = NULL;
1245 			buf->m_pack = NULL;
1246 			if (buf->hmap != NULL) {
1247 				bus_dmamap_destroy(rxr->htag, buf->hmap);
1248 				buf->hmap = NULL;
1249 			}
1250 			if (buf->pmap != NULL) {
1251 				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1252 				buf->pmap = NULL;
1253 			}
1254 		}
1255 		if (rxr->buffers != NULL) {
1256 			free(rxr->buffers, M_DEVBUF);
1257 			rxr->buffers = NULL;
1258 		}
1259 	}
1260 
1261 	if (rxr->htag != NULL) {
1262 		bus_dma_tag_destroy(rxr->htag);
1263 		rxr->htag = NULL;
1264 	}
1265 	if (rxr->ptag != NULL) {
1266 		bus_dma_tag_destroy(rxr->ptag);
1267 		rxr->ptag = NULL;
1268 	}
1269 
1270 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1271 	return;
1272 }
1273 
1274 static __inline void
1275 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1276 {
1277         /*
1278          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1279          * should be computed by hardware. Also it should not have VLAN tag in
1280          * ethernet header.
1281          */
1282         if (rxr->lro_enabled &&
1283             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1284             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1285             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1286                 /*
1287                  * Send to the stack if:
1288                  **  - LRO not enabled, or
1289                  **  - no LRO resources, or
1290                  **  - lro enqueue fails
1291                  */
1292                 if (rxr->lro.lro_cnt != 0)
1293                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1294                                 return;
1295         }
1296 	IXL_RX_UNLOCK(rxr);
1297         (*ifp->if_input)(ifp, m);
1298 	IXL_RX_LOCK(rxr);
1299 }
1300 
1301 
1302 static __inline void
1303 ixl_rx_discard(struct rx_ring *rxr, int i)
1304 {
1305 	struct ixl_rx_buf	*rbuf;
1306 
1307 	rbuf = &rxr->buffers[i];
1308 
1309         if (rbuf->fmp != NULL) {/* Partial chain ? */
1310 		rbuf->fmp->m_flags |= M_PKTHDR;
1311                 m_freem(rbuf->fmp);
1312                 rbuf->fmp = NULL;
1313 	}
1314 
1315 	/*
1316 	** With advanced descriptors the writeback
1317 	** clobbers the buffer addrs, so its easier
1318 	** to just free the existing mbufs and take
1319 	** the normal refresh path to get new buffers
1320 	** and mapping.
1321 	*/
1322 	if (rbuf->m_head) {
1323 		m_free(rbuf->m_head);
1324 		rbuf->m_head = NULL;
1325 	}
1326 
1327 	if (rbuf->m_pack) {
1328 		m_free(rbuf->m_pack);
1329 		rbuf->m_pack = NULL;
1330 	}
1331 
1332 	return;
1333 }
1334 
1335 
1336 /*********************************************************************
1337  *
1338  *  This routine executes in interrupt context. It replenishes
1339  *  the mbufs in the descriptor and sends data which has been
1340  *  dma'ed into host memory to upper layer.
1341  *
1342  *  We loop at most count times if count is > 0, or until done if
1343  *  count < 0.
1344  *
1345  *  Return TRUE for more work, FALSE for all clean.
1346  *********************************************************************/
1347 bool
1348 ixl_rxeof(struct ixl_queue *que, int count)
1349 {
1350 	struct ixl_vsi		*vsi = que->vsi;
1351 	struct rx_ring		*rxr = &que->rxr;
1352 	struct ifnet		*ifp = vsi->ifp;
1353 	struct lro_ctrl		*lro = &rxr->lro;
1354 	struct lro_entry	*queued;
1355 	int			i, nextp, processed = 0;
1356 	union i40e_rx_desc	*cur;
1357 	struct ixl_rx_buf	*rbuf, *nbuf;
1358 
1359 
1360 	IXL_RX_LOCK(rxr);
1361 
1362 	for (i = rxr->next_check; count != 0;) {
1363 		struct mbuf	*sendmp, *mh, *mp;
1364 		u32		rsc, status, error;
1365 		u16		hlen, plen, vtag;
1366 		u64		qword;
1367 		u8		ptype;
1368 		bool		eop;
1369 
1370 		/* Sync the ring. */
1371 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1372 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1373 
1374 		cur = &rxr->base[i];
1375 		qword = le64toh(cur->wb.qword1.status_error_len);
1376 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1377 		    >> I40E_RXD_QW1_STATUS_SHIFT;
1378 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1379 		    >> I40E_RXD_QW1_ERROR_SHIFT;
1380 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1381 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1382 		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1383 		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1384 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1385 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1386 
1387 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1388 			++rxr->not_done;
1389 			break;
1390 		}
1391 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1392 			break;
1393 
1394 		count--;
1395 		sendmp = NULL;
1396 		nbuf = NULL;
1397 		rsc = 0;
1398 		cur->wb.qword1.status_error_len = 0;
1399 		rbuf = &rxr->buffers[i];
1400 		mh = rbuf->m_head;
1401 		mp = rbuf->m_pack;
1402 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1403 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1404 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1405 		else
1406 			vtag = 0;
1407 
1408 		/*
1409 		** Make sure bad packets are discarded,
1410 		** note that only EOP descriptor has valid
1411 		** error results.
1412 		*/
1413                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1414 			ifp->if_ierrors++;
1415 			rxr->discarded++;
1416 			ixl_rx_discard(rxr, i);
1417 			goto next_desc;
1418 		}
1419 
1420 		/* Prefetch the next buffer */
1421 		if (!eop) {
1422 			nextp = i + 1;
1423 			if (nextp == que->num_desc)
1424 				nextp = 0;
1425 			nbuf = &rxr->buffers[nextp];
1426 			prefetch(nbuf);
1427 		}
1428 
1429 		/*
1430 		** The header mbuf is ONLY used when header
1431 		** split is enabled, otherwise we get normal
1432 		** behavior, ie, both header and payload
1433 		** are DMA'd into the payload buffer.
1434 		**
1435 		** Rather than using the fmp/lmp global pointers
1436 		** we now keep the head of a packet chain in the
1437 		** buffer struct and pass this along from one
1438 		** descriptor to the next, until we get EOP.
1439 		*/
1440 		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1441 			if (hlen > IXL_RX_HDR)
1442 				hlen = IXL_RX_HDR;
1443 			mh->m_len = hlen;
1444 			mh->m_flags |= M_PKTHDR;
1445 			mh->m_next = NULL;
1446 			mh->m_pkthdr.len = mh->m_len;
1447 			/* Null buf pointer so it is refreshed */
1448 			rbuf->m_head = NULL;
1449 			/*
1450 			** Check the payload length, this
1451 			** could be zero if its a small
1452 			** packet.
1453 			*/
1454 			if (plen > 0) {
1455 				mp->m_len = plen;
1456 				mp->m_next = NULL;
1457 				mp->m_flags &= ~M_PKTHDR;
1458 				mh->m_next = mp;
1459 				mh->m_pkthdr.len += mp->m_len;
1460 				/* Null buf pointer so it is refreshed */
1461 				rbuf->m_pack = NULL;
1462 				rxr->split++;
1463 			}
1464 			/*
1465 			** Now create the forward
1466 			** chain so when complete
1467 			** we wont have to.
1468 			*/
1469                         if (eop == 0) {
1470 				/* stash the chain head */
1471                                 nbuf->fmp = mh;
1472 				/* Make forward chain */
1473                                 if (plen)
1474                                         mp->m_next = nbuf->m_pack;
1475                                 else
1476                                         mh->m_next = nbuf->m_pack;
1477                         } else {
1478 				/* Singlet, prepare to send */
1479                                 sendmp = mh;
1480                                 if (vtag) {
1481                                         sendmp->m_pkthdr.ether_vtag = vtag;
1482                                         sendmp->m_flags |= M_VLANTAG;
1483                                 }
1484                         }
1485 		} else {
1486 			/*
1487 			** Either no header split, or a
1488 			** secondary piece of a fragmented
1489 			** split packet.
1490 			*/
1491 			mp->m_len = plen;
1492 			/*
1493 			** See if there is a stored head
1494 			** that determines what we are
1495 			*/
1496 			sendmp = rbuf->fmp;
1497 			rbuf->m_pack = rbuf->fmp = NULL;
1498 
1499 			if (sendmp != NULL) /* secondary frag */
1500 				sendmp->m_pkthdr.len += mp->m_len;
1501 			else {
1502 				/* first desc of a non-ps chain */
1503 				sendmp = mp;
1504 				sendmp->m_flags |= M_PKTHDR;
1505 				sendmp->m_pkthdr.len = mp->m_len;
1506 				if (vtag) {
1507 					sendmp->m_pkthdr.ether_vtag = vtag;
1508 					sendmp->m_flags |= M_VLANTAG;
1509 				}
1510                         }
1511 			/* Pass the head pointer on */
1512 			if (eop == 0) {
1513 				nbuf->fmp = sendmp;
1514 				sendmp = NULL;
1515 				mp->m_next = nbuf->m_pack;
1516 			}
1517 		}
1518 		++processed;
1519 		/* Sending this frame? */
1520 		if (eop) {
1521 			sendmp->m_pkthdr.rcvif = ifp;
1522 			/* gather stats */
1523 			ifp->if_ipackets++;
1524 			rxr->rx_packets++;
1525 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1526 			/* capture data for dynamic ITR adjustment */
1527 			rxr->packets++;
1528 			rxr->bytes += sendmp->m_pkthdr.len;
1529 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1530 				ixl_rx_checksum(sendmp, status, error, ptype);
1531 			sendmp->m_pkthdr.flowid = que->msix;
1532 			sendmp->m_flags |= M_FLOWID;
1533 		}
1534 next_desc:
1535 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1536 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1537 
1538 		/* Advance our pointers to the next descriptor. */
1539 		if (++i == que->num_desc)
1540 			i = 0;
1541 
1542 		/* Now send to the stack or do LRO */
1543 		if (sendmp != NULL) {
1544 			rxr->next_check = i;
1545 			ixl_rx_input(rxr, ifp, sendmp, ptype);
1546 			i = rxr->next_check;
1547 		}
1548 
1549                /* Every 8 descriptors we go to refresh mbufs */
1550 		if (processed == 8) {
1551 			ixl_refresh_mbufs(que, i);
1552 			processed = 0;
1553 		}
1554 	}
1555 
1556 	/* Refresh any remaining buf structs */
1557 	if (ixl_rx_unrefreshed(que))
1558 		ixl_refresh_mbufs(que, i);
1559 
1560 	rxr->next_check = i;
1561 
1562 	/*
1563 	 * Flush any outstanding LRO work
1564 	 */
1565 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1566 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1567 		tcp_lro_flush(lro, queued);
1568 	}
1569 
1570 	IXL_RX_UNLOCK(rxr);
1571 	return (FALSE);
1572 }
1573 
1574 
1575 /*********************************************************************
1576  *
1577  *  Verify that the hardware indicated that the checksum is valid.
1578  *  Inform the stack about the status of checksum so that stack
1579  *  doesn't spend time verifying the checksum.
1580  *
1581  *********************************************************************/
1582 static void
1583 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1584 {
1585 	struct i40e_rx_ptype_decoded decoded;
1586 
1587 	decoded = decode_rx_desc_ptype(ptype);
1588 
1589 	/* Errors? */
1590  	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1591 	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1592 		mp->m_pkthdr.csum_flags = 0;
1593 		return;
1594 	}
1595 
1596 	/* IPv6 with extension headers likely have bad csum */
1597 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1598 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1599 		if (status &
1600 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1601 			mp->m_pkthdr.csum_flags = 0;
1602 			return;
1603 		}
1604 
1605 
1606 	/* IP Checksum Good */
1607 	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1608 	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1609 
1610 	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1611 		mp->m_pkthdr.csum_flags |=
1612 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1613 		mp->m_pkthdr.csum_data |= htons(0xffff);
1614 	}
1615 	return;
1616 }
1617