xref: /freebsd/sys/dev/ixgbe/ix_txrx.c (revision 99429157e8615dc3b7f11afbe3ed92de7476a5db)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41 
42 #include "ixgbe.h"
43 
44 #ifdef	RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48 
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53 
54 extern int ix_crcstrip;
55 #endif
56 
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70 
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83 
84 /*********************************************************************
85  *  Local Function prototypes
86  *********************************************************************/
87 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int	ixgbe_setup_receive_ring(struct rx_ring *);
90 static void     ixgbe_free_receive_buffers(struct rx_ring *);
91 
92 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
96 		    struct mbuf *, u32 *, u32 *);
97 static int	ixgbe_tso_setup(struct tx_ring *,
98 		    struct mbuf *, u32 *, u32 *);
99 #ifdef IXGBE_FDIR
100 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
101 #endif
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
104 		    struct mbuf *, u32);
105 
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108  *  Transmit entry point
109  *
110  *  ixgbe_start is called by the stack to initiate a transmit.
111  *  The driver will remain in this routine as long as there are
112  *  packets to transmit and transmit resources are available.
113  *  In case resources are not available stack is notified and
114  *  the packet is requeued.
115  **********************************************************************/
116 
117 void
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
119 {
120 	struct mbuf    *m_head;
121 	struct adapter *adapter = txr->adapter;
122 
123 	IXGBE_TX_LOCK_ASSERT(txr);
124 
125 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
126 		return;
127 	if (!adapter->link_active)
128 		return;
129 
130 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
132 			break;
133 
134 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
135 		if (m_head == NULL)
136 			break;
137 
138 		if (ixgbe_xmit(txr, &m_head)) {
139 			if (m_head != NULL)
140 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
141 			break;
142 		}
143 		/* Send a copy of the frame to the BPF listener */
144 		ETHER_BPF_MTAP(ifp, m_head);
145 	}
146 	return;
147 }
148 
149 /*
150  * Legacy TX start - called by the stack, this
151  * always uses the first tx ring, and should
152  * not be used with multiqueue tx enabled.
153  */
154 void
155 ixgbe_start(struct ifnet *ifp)
156 {
157 	struct adapter *adapter = ifp->if_softc;
158 	struct tx_ring	*txr = adapter->tx_rings;
159 
160 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
161 		IXGBE_TX_LOCK(txr);
162 		ixgbe_start_locked(txr, ifp);
163 		IXGBE_TX_UNLOCK(txr);
164 	}
165 	return;
166 }
167 
168 #else /* ! IXGBE_LEGACY_TX */
169 
170 /*
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
173 */
174 int
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
176 {
177 	struct adapter	*adapter = ifp->if_softc;
178 	struct ix_queue	*que;
179 	struct tx_ring	*txr;
180 	int 		i, err = 0;
181 #ifdef	RSS
182 	uint32_t bucket_id;
183 #endif
184 
185 	/*
186 	 * When doing RSS, map it to the same outbound queue
187 	 * as the incoming flow would be mapped to.
188 	 *
189 	 * If everything is setup correctly, it should be the
190 	 * same bucket that the current CPU we're on is.
191 	 */
192 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
193 #ifdef	RSS
194 		if (rss_hash2bucket(m->m_pkthdr.flowid,
195 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
196 			i = bucket_id % adapter->num_queues;
197 #ifdef IXGBE_DEBUG
198 			if (bucket_id > adapter->num_queues)
199 				if_printf(ifp, "bucket_id (%d) > num_queues "
200 				    "(%d)\n", bucket_id, adapter->num_queues);
201 #endif
202 		} else
203 #endif
204 			i = m->m_pkthdr.flowid % adapter->num_queues;
205 	} else
206 		i = curcpu % adapter->num_queues;
207 
208 	/* Check for a hung queue and pick alternative */
209 	if (((1 << i) & adapter->active_queues) == 0)
210 		i = ffsl(adapter->active_queues);
211 
212 	txr = &adapter->tx_rings[i];
213 	que = &adapter->queues[i];
214 
215 	err = drbr_enqueue(ifp, txr->br, m);
216 	if (err)
217 		return (err);
218 	if (IXGBE_TX_TRYLOCK(txr)) {
219 		ixgbe_mq_start_locked(ifp, txr);
220 		IXGBE_TX_UNLOCK(txr);
221 	} else
222 		taskqueue_enqueue(que->tq, &txr->txq_task);
223 
224 	return (0);
225 }
226 
227 int
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
229 {
230 	struct adapter  *adapter = txr->adapter;
231         struct mbuf     *next;
232         int             enqueued = 0, err = 0;
233 
234 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235 	    adapter->link_active == 0)
236 		return (ENETDOWN);
237 
238 	/* Process the queue */
239 #if __FreeBSD_version < 901504
240 	next = drbr_dequeue(ifp, txr->br);
241 	while (next != NULL) {
242 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
243 			if (next != NULL)
244 				err = drbr_enqueue(ifp, txr->br, next);
245 #else
246 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
248 			if (next == NULL) {
249 				drbr_advance(ifp, txr->br);
250 			} else {
251 				drbr_putback(ifp, txr->br, next);
252 			}
253 #endif
254 			break;
255 		}
256 #if __FreeBSD_version >= 901504
257 		drbr_advance(ifp, txr->br);
258 #endif
259 		enqueued++;
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
262 		/*
263 		 * Since we're looking at the tx ring, we can check
264 		 * to see if we're a VF by examing our tail register
265 		 * address.
266 		 */
267 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
269 #endif
270 #endif
271 		/* Send a copy of the frame to the BPF listener */
272 		ETHER_BPF_MTAP(ifp, next);
273 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
274 			break;
275 #if __FreeBSD_version < 901504
276 		next = drbr_dequeue(ifp, txr->br);
277 #endif
278 	}
279 
280 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
281 		ixgbe_txeof(txr);
282 
283 	return (err);
284 }
285 
286 /*
287  * Called from a taskqueue to drain queued transmit packets.
288  */
289 void
290 ixgbe_deferred_mq_start(void *arg, int pending)
291 {
292 	struct tx_ring *txr = arg;
293 	struct adapter *adapter = txr->adapter;
294 	struct ifnet *ifp = adapter->ifp;
295 
296 	IXGBE_TX_LOCK(txr);
297 	if (!drbr_empty(ifp, txr->br))
298 		ixgbe_mq_start_locked(ifp, txr);
299 	IXGBE_TX_UNLOCK(txr);
300 }
301 
302 /*
303  * Flush all ring buffers
304  */
305 void
306 ixgbe_qflush(struct ifnet *ifp)
307 {
308 	struct adapter	*adapter = ifp->if_softc;
309 	struct tx_ring	*txr = adapter->tx_rings;
310 	struct mbuf	*m;
311 
312 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
313 		IXGBE_TX_LOCK(txr);
314 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
315 			m_freem(m);
316 		IXGBE_TX_UNLOCK(txr);
317 	}
318 	if_qflush(ifp);
319 }
320 #endif /* IXGBE_LEGACY_TX */
321 
322 
323 /*********************************************************************
324  *
325  *  This routine maps the mbufs to tx descriptors, allowing the
326  *  TX engine to transmit the packets.
327  *  	- return 0 on success, positive on failure
328  *
329  **********************************************************************/
330 
331 static int
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
333 {
334 	struct adapter  *adapter = txr->adapter;
335 	u32		olinfo_status = 0, cmd_type_len;
336 	int             i, j, error, nsegs;
337 	int		first;
338 	bool		remap = TRUE;
339 	struct mbuf	*m_head;
340 	bus_dma_segment_t segs[adapter->num_segs];
341 	bus_dmamap_t	map;
342 	struct ixgbe_tx_buf *txbuf;
343 	union ixgbe_adv_tx_desc *txd = NULL;
344 
345 	m_head = *m_headp;
346 
347 	/* Basic descriptor defines */
348         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
350 
351 	if (m_head->m_flags & M_VLANTAG)
352         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
353 
354         /*
355          * Important to capture the first descriptor
356          * used because it will contain the index of
357          * the one we tell the hardware to report back
358          */
359         first = txr->next_avail_desc;
360 	txbuf = &txr->tx_buffers[first];
361 	map = txbuf->map;
362 
363 	/*
364 	 * Map the packet for DMA.
365 	 */
366 retry:
367 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
369 
370 	if (__predict_false(error)) {
371 		struct mbuf *m;
372 
373 		switch (error) {
374 		case EFBIG:
375 			/* Try it again? - one try */
376 			if (remap == TRUE) {
377 				remap = FALSE;
378 				/*
379 				 * XXX: m_defrag will choke on
380 				 * non-MCLBYTES-sized clusters
381 				 */
382 				m = m_defrag(*m_headp, M_NOWAIT);
383 				if (m == NULL) {
384 					adapter->mbuf_defrag_failed++;
385 					m_freem(*m_headp);
386 					*m_headp = NULL;
387 					return (ENOBUFS);
388 				}
389 				*m_headp = m;
390 				goto retry;
391 			} else
392 				return (error);
393 		case ENOMEM:
394 			txr->no_tx_dma_setup++;
395 			return (error);
396 		default:
397 			txr->no_tx_dma_setup++;
398 			m_freem(*m_headp);
399 			*m_headp = NULL;
400 			return (error);
401 		}
402 	}
403 
404 	/* Make certain there are enough descriptors */
405 	if (txr->tx_avail < (nsegs + 2)) {
406 		txr->no_desc_avail++;
407 		bus_dmamap_unload(txr->txtag, map);
408 		return (ENOBUFS);
409 	}
410 	m_head = *m_headp;
411 
412 	/*
413 	 * Set up the appropriate offload context
414 	 * this will consume the first descriptor
415 	 */
416 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417 	if (__predict_false(error)) {
418 		if (error == ENOBUFS)
419 			*m_headp = NULL;
420 		return (error);
421 	}
422 
423 #ifdef IXGBE_FDIR
424 	/* Do the flow director magic */
425 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
426 		++txr->atr_count;
427 		if (txr->atr_count >= atr_sample_rate) {
428 			ixgbe_atr(txr, m_head);
429 			txr->atr_count = 0;
430 		}
431 	}
432 #endif
433 
434 	olinfo_status |= IXGBE_ADVTXD_CC;
435 	i = txr->next_avail_desc;
436 	for (j = 0; j < nsegs; j++) {
437 		bus_size_t seglen;
438 		bus_addr_t segaddr;
439 
440 		txbuf = &txr->tx_buffers[i];
441 		txd = &txr->tx_base[i];
442 		seglen = segs[j].ds_len;
443 		segaddr = htole64(segs[j].ds_addr);
444 
445 		txd->read.buffer_addr = segaddr;
446 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 		    cmd_type_len |seglen);
448 		txd->read.olinfo_status = htole32(olinfo_status);
449 
450 		if (++i == txr->num_desc)
451 			i = 0;
452 	}
453 
454 	txd->read.cmd_type_len |=
455 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 	txr->tx_avail -= nsegs;
457 	txr->next_avail_desc = i;
458 
459 	txbuf->m_head = m_head;
460 	/*
461 	 * Here we swap the map so the last descriptor,
462 	 * which gets the completion interrupt has the
463 	 * real map, and the first descriptor gets the
464 	 * unused map from this descriptor.
465 	 */
466 	txr->tx_buffers[first].map = txbuf->map;
467 	txbuf->map = map;
468 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469 
470         /* Set the EOP descriptor that will be marked done */
471         txbuf = &txr->tx_buffers[first];
472 	txbuf->eop = txd;
473 
474         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476 	/*
477 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 	 * hardware that this frame is available to transmit.
479 	 */
480 	++txr->total_packets;
481 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482 
483 	/* Mark queue as having work */
484 	if (txr->busy == 0)
485 		txr->busy = 1;
486 
487 	return (0);
488 }
489 
490 
491 /*********************************************************************
492  *
493  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494  *  the information needed to transmit a packet on the wire. This is
495  *  called only once at attach, setup is done every reset.
496  *
497  **********************************************************************/
498 int
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 {
501 	struct adapter *adapter = txr->adapter;
502 	device_t dev = adapter->dev;
503 	struct ixgbe_tx_buf *txbuf;
504 	int error, i;
505 
506 	/*
507 	 * Setup DMA descriptor areas.
508 	 */
509 	if ((error = bus_dma_tag_create(
510 			       bus_get_dma_tag(adapter->dev),	/* parent */
511 			       1, 0,		/* alignment, bounds */
512 			       BUS_SPACE_MAXADDR,	/* lowaddr */
513 			       BUS_SPACE_MAXADDR,	/* highaddr */
514 			       NULL, NULL,		/* filter, filterarg */
515 			       IXGBE_TSO_SIZE,		/* maxsize */
516 			       adapter->num_segs,	/* nsegments */
517 			       PAGE_SIZE,		/* maxsegsize */
518 			       0,			/* flags */
519 			       NULL,			/* lockfunc */
520 			       NULL,			/* lockfuncarg */
521 			       &txr->txtag))) {
522 		device_printf(dev,"Unable to allocate TX DMA tag\n");
523 		goto fail;
524 	}
525 
526 	if (!(txr->tx_buffers =
527 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
530 		error = ENOMEM;
531 		goto fail;
532 	}
533 
534         /* Create the descriptor buffer dma maps */
535 	txbuf = txr->tx_buffers;
536 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538 		if (error != 0) {
539 			device_printf(dev, "Unable to create TX DMA map\n");
540 			goto fail;
541 		}
542 	}
543 
544 	return 0;
545 fail:
546 	/* We free all, it handles case where we are in the middle */
547 	ixgbe_free_transmit_structures(adapter);
548 	return (error);
549 }
550 
551 /*********************************************************************
552  *
553  *  Initialize a transmit ring.
554  *
555  **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559 	struct adapter *adapter = txr->adapter;
560 	struct ixgbe_tx_buf *txbuf;
561 #ifdef DEV_NETMAP
562 	struct netmap_adapter *na = NA(adapter->ifp);
563 	struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
565 
566 	/* Clear the old ring contents */
567 	IXGBE_TX_LOCK(txr);
568 #ifdef DEV_NETMAP
569 	/*
570 	 * (under lock): if in netmap mode, do some consistency
571 	 * checks and set slot to entry 0 of the netmap ring.
572 	 */
573 	slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575 	bzero((void *)txr->tx_base,
576 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
577 	/* Reset indices */
578 	txr->next_avail_desc = 0;
579 	txr->next_to_clean = 0;
580 
581 	/* Free any existing tx buffers. */
582         txbuf = txr->tx_buffers;
583 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584 		if (txbuf->m_head != NULL) {
585 			bus_dmamap_sync(txr->txtag, txbuf->map,
586 			    BUS_DMASYNC_POSTWRITE);
587 			bus_dmamap_unload(txr->txtag, txbuf->map);
588 			m_freem(txbuf->m_head);
589 			txbuf->m_head = NULL;
590 		}
591 #ifdef DEV_NETMAP
592 		/*
593 		 * In netmap mode, set the map for the packet buffer.
594 		 * NOTE: Some drivers (not this one) also need to set
595 		 * the physical buffer address in the NIC ring.
596 		 * Slots in the netmap ring (indexed by "si") are
597 		 * kring->nkr_hwofs positions "ahead" wrt the
598 		 * corresponding slot in the NIC ring. In some drivers
599 		 * (not here) nkr_hwofs can be negative. Function
600 		 * netmap_idx_n2k() handles wraparounds properly.
601 		 */
602 		if (slot) {
603 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604 			netmap_load_map(na, txr->txtag,
605 			    txbuf->map, NMB(na, slot + si));
606 		}
607 #endif /* DEV_NETMAP */
608 		/* Clear the EOP descriptor pointer */
609 		txbuf->eop = NULL;
610         }
611 
612 #ifdef IXGBE_FDIR
613 	/* Set the rate at which we sample packets */
614 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 		txr->atr_sample = atr_sample_rate;
616 #endif
617 
618 	/* Set number of descriptors available */
619 	txr->tx_avail = adapter->num_tx_desc;
620 
621 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 	IXGBE_TX_UNLOCK(txr);
624 }
625 
626 /*********************************************************************
627  *
628  *  Initialize all transmit rings.
629  *
630  **********************************************************************/
631 int
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 {
634 	struct tx_ring *txr = adapter->tx_rings;
635 
636 	for (int i = 0; i < adapter->num_queues; i++, txr++)
637 		ixgbe_setup_transmit_ring(txr);
638 
639 	return (0);
640 }
641 
642 /*********************************************************************
643  *
644  *  Free all transmit rings.
645  *
646  **********************************************************************/
647 void
648 ixgbe_free_transmit_structures(struct adapter *adapter)
649 {
650 	struct tx_ring *txr = adapter->tx_rings;
651 
652 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
653 		IXGBE_TX_LOCK(txr);
654 		ixgbe_free_transmit_buffers(txr);
655 		ixgbe_dma_free(adapter, &txr->txdma);
656 		IXGBE_TX_UNLOCK(txr);
657 		IXGBE_TX_LOCK_DESTROY(txr);
658 	}
659 	free(adapter->tx_rings, M_DEVBUF);
660 }
661 
662 /*********************************************************************
663  *
664  *  Free transmit ring related data structures.
665  *
666  **********************************************************************/
667 static void
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 {
670 	struct adapter *adapter = txr->adapter;
671 	struct ixgbe_tx_buf *tx_buffer;
672 	int             i;
673 
674 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675 
676 	if (txr->tx_buffers == NULL)
677 		return;
678 
679 	tx_buffer = txr->tx_buffers;
680 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 		if (tx_buffer->m_head != NULL) {
682 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 			    BUS_DMASYNC_POSTWRITE);
684 			bus_dmamap_unload(txr->txtag,
685 			    tx_buffer->map);
686 			m_freem(tx_buffer->m_head);
687 			tx_buffer->m_head = NULL;
688 			if (tx_buffer->map != NULL) {
689 				bus_dmamap_destroy(txr->txtag,
690 				    tx_buffer->map);
691 				tx_buffer->map = NULL;
692 			}
693 		} else if (tx_buffer->map != NULL) {
694 			bus_dmamap_unload(txr->txtag,
695 			    tx_buffer->map);
696 			bus_dmamap_destroy(txr->txtag,
697 			    tx_buffer->map);
698 			tx_buffer->map = NULL;
699 		}
700 	}
701 #ifdef IXGBE_LEGACY_TX
702 	if (txr->br != NULL)
703 		buf_ring_free(txr->br, M_DEVBUF);
704 #endif
705 	if (txr->tx_buffers != NULL) {
706 		free(txr->tx_buffers, M_DEVBUF);
707 		txr->tx_buffers = NULL;
708 	}
709 	if (txr->txtag != NULL) {
710 		bus_dma_tag_destroy(txr->txtag);
711 		txr->txtag = NULL;
712 	}
713 	return;
714 }
715 
716 /*********************************************************************
717  *
718  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719  *
720  **********************************************************************/
721 
722 static int
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724     u32 *cmd_type_len, u32 *olinfo_status)
725 {
726 	struct adapter *adapter = txr->adapter;
727 	struct ixgbe_adv_tx_context_desc *TXD;
728 	struct ether_vlan_header *eh;
729 #ifdef INET
730 	struct ip *ip;
731 #endif
732 #ifdef INET6
733 	struct ip6_hdr *ip6;
734 #endif
735 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736 	int	ehdrlen, ip_hlen = 0;
737 	u16	etype;
738 	u8	ipproto = 0;
739 	int	offload = TRUE;
740 	int	ctxd = txr->next_avail_desc;
741 	u16	vtag = 0;
742 	caddr_t l3d;
743 
744 
745 	/* First check if TSO is to be used */
746 	if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
748 
749 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
750 		offload = FALSE;
751 
752 	/* Indicate the whole packet as payload when not doing TSO */
753        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
754 
755 	/* Now ready a context descriptor */
756 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
757 
758 	/*
759 	** In advanced descriptors the vlan tag must
760 	** be placed into the context descriptor. Hence
761 	** we need to make one even if not doing offloads.
762 	*/
763 	if (mp->m_flags & M_VLANTAG) {
764 		vtag = htole16(mp->m_pkthdr.ether_vtag);
765 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
767 		return (0);
768 
769 	/*
770 	 * Determine where frame payload starts.
771 	 * Jump over vlan headers if already present,
772 	 * helpful for QinQ too.
773 	 */
774 	eh = mtod(mp, struct ether_vlan_header *);
775 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776 		etype = ntohs(eh->evl_proto);
777 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
778 	} else {
779 		etype = ntohs(eh->evl_encap_proto);
780 		ehdrlen = ETHER_HDR_LEN;
781 	}
782 
783 	/* Set the ether header length */
784 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
785 
786 	if (offload == FALSE)
787 		goto no_offloads;
788 
789 	/*
790 	 * If the first mbuf only includes the ethernet header, jump to the next one
791 	 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792 	 * XXX: And assumes the entire IP header is contained in one mbuf
793 	 */
794 	if (mp->m_len == ehdrlen && mp->m_next)
795 		l3d = mtod(mp->m_next, caddr_t);
796 	else
797 		l3d = mtod(mp, caddr_t) + ehdrlen;
798 
799 	switch (etype) {
800 #ifdef INET
801 		case ETHERTYPE_IP:
802 			ip = (struct ip *)(l3d);
803 			ip_hlen = ip->ip_hl << 2;
804 			ipproto = ip->ip_p;
805 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
806 			/* Insert IPv4 checksum into data descriptors */
807 			if (mp->m_pkthdr.csum_flags & CSUM_IP) {
808 				ip->ip_sum = 0;
809 				*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
810 			}
811 			break;
812 #endif
813 #ifdef INET6
814 		case ETHERTYPE_IPV6:
815 			ip6 = (struct ip6_hdr *)(l3d);
816 			ip_hlen = sizeof(struct ip6_hdr);
817 			ipproto = ip6->ip6_nxt;
818 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
819 			break;
820 #endif
821 		default:
822 			offload = FALSE;
823 			break;
824 	}
825 
826 	vlan_macip_lens |= ip_hlen;
827 
828 	/* No support for offloads for non-L4 next headers */
829 	switch (ipproto) {
830 		case IPPROTO_TCP:
831 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
832 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
833 			else
834 				offload = false;
835 			break;
836 		case IPPROTO_UDP:
837 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
838 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
839 			else
840 				offload = false;
841 			break;
842 		case IPPROTO_SCTP:
843 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
844 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
845 			else
846 				offload = false;
847 			break;
848 		default:
849 			offload = false;
850 			break;
851 	}
852 
853 	if (offload) /* Insert L4 checksum into data descriptors */
854 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
855 
856 no_offloads:
857 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
858 
859 	/* Now copy bits into descriptor */
860 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
861 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
862 	TXD->seqnum_seed = htole32(0);
863 	TXD->mss_l4len_idx = htole32(0);
864 
865 	/* We've consumed the first desc, adjust counters */
866 	if (++ctxd == txr->num_desc)
867 		ctxd = 0;
868 	txr->next_avail_desc = ctxd;
869 	--txr->tx_avail;
870 
871         return (0);
872 }
873 
874 /**********************************************************************
875  *
876  *  Setup work for hardware segmentation offload (TSO) on
877  *  adapters using advanced tx descriptors
878  *
879  **********************************************************************/
880 static int
881 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
882     u32 *cmd_type_len, u32 *olinfo_status)
883 {
884 	struct ixgbe_adv_tx_context_desc *TXD;
885 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
886 	u32 mss_l4len_idx = 0, paylen;
887 	u16 vtag = 0, eh_type;
888 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
889 	struct ether_vlan_header *eh;
890 #ifdef INET6
891 	struct ip6_hdr *ip6;
892 #endif
893 #ifdef INET
894 	struct ip *ip;
895 #endif
896 	struct tcphdr *th;
897 
898 	/*
899 	 * Determine where frame payload starts.
900 	 * Jump over vlan headers if already present
901 	 */
902 	eh = mtod(mp, struct ether_vlan_header *);
903 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
904 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
905 		eh_type = eh->evl_proto;
906 	} else {
907 		ehdrlen = ETHER_HDR_LEN;
908 		eh_type = eh->evl_encap_proto;
909 	}
910 
911 	switch (ntohs(eh_type)) {
912 #ifdef INET6
913 	case ETHERTYPE_IPV6:
914 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
916 		if (ip6->ip6_nxt != IPPROTO_TCP)
917 			return (ENXIO);
918 		ip_hlen = sizeof(struct ip6_hdr);
919 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
920 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
921 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
922 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
923 		break;
924 #endif
925 #ifdef INET
926 	case ETHERTYPE_IP:
927 		ip = (struct ip *)(mp->m_data + ehdrlen);
928 		if (ip->ip_p != IPPROTO_TCP)
929 			return (ENXIO);
930 		ip->ip_sum = 0;
931 		ip_hlen = ip->ip_hl << 2;
932 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
933 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
934 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
935 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
936 		/* Tell transmit desc to also do IPv4 checksum. */
937 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
938 		break;
939 #endif
940 	default:
941 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
942 		    __func__, ntohs(eh_type));
943 		break;
944 	}
945 
946 	ctxd = txr->next_avail_desc;
947 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
948 
949 	tcp_hlen = th->th_off << 2;
950 
951 	/* This is used in the transmit desc in encap */
952 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
953 
954 	/* VLAN MACLEN IPLEN */
955 	if (mp->m_flags & M_VLANTAG) {
956 		vtag = htole16(mp->m_pkthdr.ether_vtag);
957                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
958 	}
959 
960 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
961 	vlan_macip_lens |= ip_hlen;
962 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
963 
964 	/* ADV DTYPE TUCMD */
965 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
967 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
968 
969 	/* MSS L4LEN IDX */
970 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
971 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
972 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
973 
974 	TXD->seqnum_seed = htole32(0);
975 
976 	if (++ctxd == txr->num_desc)
977 		ctxd = 0;
978 
979 	txr->tx_avail--;
980 	txr->next_avail_desc = ctxd;
981 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
982 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
983 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
984 	++txr->tso_tx;
985 	return (0);
986 }
987 
988 
989 /**********************************************************************
990  *
991  *  Examine each tx_buffer in the used queue. If the hardware is done
992  *  processing the packet then free associated resources. The
993  *  tx_buffer is put back on the free queue.
994  *
995  **********************************************************************/
996 void
997 ixgbe_txeof(struct tx_ring *txr)
998 {
999 	struct adapter		*adapter = txr->adapter;
1000 #ifdef DEV_NETMAP
1001 	struct ifnet		*ifp = adapter->ifp;
1002 #endif
1003 	u32			work, processed = 0;
1004 	u32			limit = adapter->tx_process_limit;
1005 	struct ixgbe_tx_buf	*buf;
1006 	union ixgbe_adv_tx_desc *txd;
1007 
1008 	mtx_assert(&txr->tx_mtx, MA_OWNED);
1009 
1010 #ifdef DEV_NETMAP
1011 	if (ifp->if_capenable & IFCAP_NETMAP) {
1012 		struct netmap_adapter *na = NA(ifp);
1013 		struct netmap_kring *kring = &na->tx_rings[txr->me];
1014 		txd = txr->tx_base;
1015 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1016 		    BUS_DMASYNC_POSTREAD);
1017 		/*
1018 		 * In netmap mode, all the work is done in the context
1019 		 * of the client thread. Interrupt handlers only wake up
1020 		 * clients, which may be sleeping on individual rings
1021 		 * or on a global resource for all rings.
1022 		 * To implement tx interrupt mitigation, we wake up the client
1023 		 * thread roughly every half ring, even if the NIC interrupts
1024 		 * more frequently. This is implemented as follows:
1025 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1026 		 *   the slot that should wake up the thread (nkr_num_slots
1027 		 *   means the user thread should not be woken up);
1028 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1029 		 *   or the slot has the DD bit set.
1030 		 */
1031 		if (!netmap_mitigate ||
1032 		    (kring->nr_kflags < kring->nkr_num_slots &&
1033 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1034 			netmap_tx_irq(ifp, txr->me);
1035 		}
1036 		return;
1037 	}
1038 #endif /* DEV_NETMAP */
1039 
1040 	if (txr->tx_avail == txr->num_desc) {
1041 		txr->busy = 0;
1042 		return;
1043 	}
1044 
1045 	/* Get work starting point */
1046 	work = txr->next_to_clean;
1047 	buf = &txr->tx_buffers[work];
1048 	txd = &txr->tx_base[work];
1049 	work -= txr->num_desc; /* The distance to ring end */
1050         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1051             BUS_DMASYNC_POSTREAD);
1052 
1053 	do {
1054 		union ixgbe_adv_tx_desc *eop = buf->eop;
1055 		if (eop == NULL) /* No work */
1056 			break;
1057 
1058 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1059 			break;	/* I/O not complete */
1060 
1061 		if (buf->m_head) {
1062 			txr->bytes +=
1063 			    buf->m_head->m_pkthdr.len;
1064 			bus_dmamap_sync(txr->txtag,
1065 			    buf->map,
1066 			    BUS_DMASYNC_POSTWRITE);
1067 			bus_dmamap_unload(txr->txtag,
1068 			    buf->map);
1069 			m_freem(buf->m_head);
1070 			buf->m_head = NULL;
1071 		}
1072 		buf->eop = NULL;
1073 		++txr->tx_avail;
1074 
1075 		/* We clean the range if multi segment */
1076 		while (txd != eop) {
1077 			++txd;
1078 			++buf;
1079 			++work;
1080 			/* wrap the ring? */
1081 			if (__predict_false(!work)) {
1082 				work -= txr->num_desc;
1083 				buf = txr->tx_buffers;
1084 				txd = txr->tx_base;
1085 			}
1086 			if (buf->m_head) {
1087 				txr->bytes +=
1088 				    buf->m_head->m_pkthdr.len;
1089 				bus_dmamap_sync(txr->txtag,
1090 				    buf->map,
1091 				    BUS_DMASYNC_POSTWRITE);
1092 				bus_dmamap_unload(txr->txtag,
1093 				    buf->map);
1094 				m_freem(buf->m_head);
1095 				buf->m_head = NULL;
1096 			}
1097 			++txr->tx_avail;
1098 			buf->eop = NULL;
1099 
1100 		}
1101 		++txr->packets;
1102 		++processed;
1103 
1104 		/* Try the next packet */
1105 		++txd;
1106 		++buf;
1107 		++work;
1108 		/* reset with a wrap */
1109 		if (__predict_false(!work)) {
1110 			work -= txr->num_desc;
1111 			buf = txr->tx_buffers;
1112 			txd = txr->tx_base;
1113 		}
1114 		prefetch(txd);
1115 	} while (__predict_true(--limit));
1116 
1117 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1118 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1119 
1120 	work += txr->num_desc;
1121 	txr->next_to_clean = work;
1122 
1123 	/*
1124 	** Queue Hang detection, we know there's
1125 	** work outstanding or the first return
1126 	** would have been taken, so increment busy
1127 	** if nothing managed to get cleaned, then
1128 	** in local_timer it will be checked and
1129 	** marked as HUNG if it exceeds a MAX attempt.
1130 	*/
1131 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1132 		++txr->busy;
1133 	/*
1134 	** If anything gets cleaned we reset state to 1,
1135 	** note this will turn off HUNG if its set.
1136 	*/
1137 	if (processed)
1138 		txr->busy = 1;
1139 
1140 	if (txr->tx_avail == txr->num_desc)
1141 		txr->busy = 0;
1142 
1143 	return;
1144 }
1145 
1146 
1147 #ifdef IXGBE_FDIR
1148 /*
1149 ** This routine parses packet headers so that Flow
1150 ** Director can make a hashed filter table entry
1151 ** allowing traffic flows to be identified and kept
1152 ** on the same cpu.  This would be a performance
1153 ** hit, but we only do it at IXGBE_FDIR_RATE of
1154 ** packets.
1155 */
1156 static void
1157 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1158 {
1159 	struct adapter			*adapter = txr->adapter;
1160 	struct ix_queue			*que;
1161 	struct ip			*ip;
1162 	struct tcphdr			*th;
1163 	struct udphdr			*uh;
1164 	struct ether_vlan_header	*eh;
1165 	union ixgbe_atr_hash_dword	input = {.dword = 0};
1166 	union ixgbe_atr_hash_dword	common = {.dword = 0};
1167 	int  				ehdrlen, ip_hlen;
1168 	u16				etype;
1169 
1170 	eh = mtod(mp, struct ether_vlan_header *);
1171 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1172 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1173 		etype = eh->evl_proto;
1174 	} else {
1175 		ehdrlen = ETHER_HDR_LEN;
1176 		etype = eh->evl_encap_proto;
1177 	}
1178 
1179 	/* Only handling IPv4 */
1180 	if (etype != htons(ETHERTYPE_IP))
1181 		return;
1182 
1183 	ip = (struct ip *)(mp->m_data + ehdrlen);
1184 	ip_hlen = ip->ip_hl << 2;
1185 
1186 	/* check if we're UDP or TCP */
1187 	switch (ip->ip_p) {
1188 	case IPPROTO_TCP:
1189 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1190 		/* src and dst are inverted */
1191 		common.port.dst ^= th->th_sport;
1192 		common.port.src ^= th->th_dport;
1193 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1194 		break;
1195 	case IPPROTO_UDP:
1196 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1197 		/* src and dst are inverted */
1198 		common.port.dst ^= uh->uh_sport;
1199 		common.port.src ^= uh->uh_dport;
1200 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1201 		break;
1202 	default:
1203 		return;
1204 	}
1205 
1206 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1207 	if (mp->m_pkthdr.ether_vtag)
1208 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1209 	else
1210 		common.flex_bytes ^= etype;
1211 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1212 
1213 	que = &adapter->queues[txr->me];
1214 	/*
1215 	** This assumes the Rx queue and Tx
1216 	** queue are bound to the same CPU
1217 	*/
1218 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1219 	    input, common, que->msix);
1220 }
1221 #endif /* IXGBE_FDIR */
1222 
1223 /*
1224 ** Used to detect a descriptor that has
1225 ** been merged by Hardware RSC.
1226 */
1227 static inline u32
1228 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1229 {
1230 	return (le32toh(rx->wb.lower.lo_dword.data) &
1231 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1232 }
1233 
1234 /*********************************************************************
1235  *
1236  *  Initialize Hardware RSC (LRO) feature on 82599
1237  *  for an RX ring, this is toggled by the LRO capability
1238  *  even though it is transparent to the stack.
1239  *
1240  *  NOTE: since this HW feature only works with IPV4 and
1241  *        our testing has shown soft LRO to be as effective
1242  *        I have decided to disable this by default.
1243  *
1244  **********************************************************************/
1245 static void
1246 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1247 {
1248 	struct	adapter 	*adapter = rxr->adapter;
1249 	struct	ixgbe_hw	*hw = &adapter->hw;
1250 	u32			rscctrl, rdrxctl;
1251 
1252 	/* If turning LRO/RSC off we need to disable it */
1253 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1254 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1255 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1256 		return;
1257 	}
1258 
1259 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1260 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1261 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1262 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1263 #endif /* DEV_NETMAP */
1264 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1265 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1266 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1267 
1268 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1269 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1270 	/*
1271 	** Limit the total number of descriptors that
1272 	** can be combined, so it does not exceed 64K
1273 	*/
1274 	if (rxr->mbuf_sz == MCLBYTES)
1275 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1276 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1277 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1278 	else if (rxr->mbuf_sz == MJUM9BYTES)
1279 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1280 	else  /* Using 16K cluster */
1281 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1282 
1283 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1284 
1285 	/* Enable TCP header recognition */
1286 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1287 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1288 	    IXGBE_PSRTYPE_TCPHDR));
1289 
1290 	/* Disable RSC for ACK packets */
1291 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1292 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1293 
1294 	rxr->hw_rsc = TRUE;
1295 }
1296 
1297 /*********************************************************************
1298  *
1299  *  Refresh mbuf buffers for RX descriptor rings
1300  *   - now keeps its own state so discards due to resource
1301  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1302  *     it just returns, keeping its placeholder, thus it can simply
1303  *     be recalled to try again.
1304  *
1305  **********************************************************************/
1306 static void
1307 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1308 {
1309 	struct adapter		*adapter = rxr->adapter;
1310 	bus_dma_segment_t	seg[1];
1311 	struct ixgbe_rx_buf	*rxbuf;
1312 	struct mbuf		*mp;
1313 	int			i, j, nsegs, error;
1314 	bool			refreshed = FALSE;
1315 
1316 	i = j = rxr->next_to_refresh;
1317 	/* Control the loop with one beyond */
1318 	if (++j == rxr->num_desc)
1319 		j = 0;
1320 
1321 	while (j != limit) {
1322 		rxbuf = &rxr->rx_buffers[i];
1323 		if (rxbuf->buf == NULL) {
1324 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1325 			    M_PKTHDR, rxr->mbuf_sz);
1326 			if (mp == NULL)
1327 				goto update;
1328 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1329 				m_adj(mp, ETHER_ALIGN);
1330 		} else
1331 			mp = rxbuf->buf;
1332 
1333 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1334 
1335 		/* If we're dealing with an mbuf that was copied rather
1336 		 * than replaced, there's no need to go through busdma.
1337 		 */
1338 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1339 			/* Get the memory mapping */
1340 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1341 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1342 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1343 			if (error != 0) {
1344 				printf("Refresh mbufs: payload dmamap load"
1345 				    " failure - %d\n", error);
1346 				m_free(mp);
1347 				rxbuf->buf = NULL;
1348 				goto update;
1349 			}
1350 			rxbuf->buf = mp;
1351 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1352 			    BUS_DMASYNC_PREREAD);
1353 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1354 			    htole64(seg[0].ds_addr);
1355 		} else {
1356 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1357 			rxbuf->flags &= ~IXGBE_RX_COPY;
1358 		}
1359 
1360 		refreshed = TRUE;
1361 		/* Next is precalculated */
1362 		i = j;
1363 		rxr->next_to_refresh = i;
1364 		if (++j == rxr->num_desc)
1365 			j = 0;
1366 	}
1367 update:
1368 	if (refreshed) /* Update hardware tail index */
1369 		IXGBE_WRITE_REG(&adapter->hw,
1370 		    rxr->tail, rxr->next_to_refresh);
1371 	return;
1372 }
1373 
1374 /*********************************************************************
1375  *
1376  *  Allocate memory for rx_buffer structures. Since we use one
1377  *  rx_buffer per received packet, the maximum number of rx_buffer's
1378  *  that we'll need is equal to the number of receive descriptors
1379  *  that we've allocated.
1380  *
1381  **********************************************************************/
1382 int
1383 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1384 {
1385 	struct	adapter 	*adapter = rxr->adapter;
1386 	device_t 		dev = adapter->dev;
1387 	struct ixgbe_rx_buf 	*rxbuf;
1388 	int             	bsize, error;
1389 
1390 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1391 	if (!(rxr->rx_buffers =
1392 	    (struct ixgbe_rx_buf *) malloc(bsize,
1393 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1394 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1395 		error = ENOMEM;
1396 		goto fail;
1397 	}
1398 
1399 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1400 				   1, 0,	/* alignment, bounds */
1401 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1402 				   BUS_SPACE_MAXADDR,	/* highaddr */
1403 				   NULL, NULL,		/* filter, filterarg */
1404 				   MJUM16BYTES,		/* maxsize */
1405 				   1,			/* nsegments */
1406 				   MJUM16BYTES,		/* maxsegsize */
1407 				   0,			/* flags */
1408 				   NULL,		/* lockfunc */
1409 				   NULL,		/* lockfuncarg */
1410 				   &rxr->ptag))) {
1411 		device_printf(dev, "Unable to create RX DMA tag\n");
1412 		goto fail;
1413 	}
1414 
1415 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1416 		rxbuf = &rxr->rx_buffers[i];
1417 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1418 		if (error) {
1419 			device_printf(dev, "Unable to create RX dma map\n");
1420 			goto fail;
1421 		}
1422 	}
1423 
1424 	return (0);
1425 
1426 fail:
1427 	/* Frees all, but can handle partial completion */
1428 	ixgbe_free_receive_structures(adapter);
1429 	return (error);
1430 }
1431 
1432 static void
1433 ixgbe_free_receive_ring(struct rx_ring *rxr)
1434 {
1435 
1436 	for (int i = 0; i < rxr->num_desc; i++) {
1437 		ixgbe_rx_discard(rxr, i);
1438 	}
1439 }
1440 
1441 /*********************************************************************
1442  *
1443  *  Initialize a receive ring and its buffers.
1444  *
1445  **********************************************************************/
1446 static int
1447 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1448 {
1449 	struct	adapter 	*adapter;
1450 	struct ifnet		*ifp;
1451 	device_t		dev;
1452 	struct ixgbe_rx_buf	*rxbuf;
1453 	bus_dma_segment_t	seg[1];
1454 	struct lro_ctrl		*lro = &rxr->lro;
1455 	int			rsize, nsegs, error = 0;
1456 #ifdef DEV_NETMAP
1457 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1458 	struct netmap_slot *slot;
1459 #endif /* DEV_NETMAP */
1460 
1461 	adapter = rxr->adapter;
1462 	ifp = adapter->ifp;
1463 	dev = adapter->dev;
1464 
1465 	/* Clear the ring contents */
1466 	IXGBE_RX_LOCK(rxr);
1467 #ifdef DEV_NETMAP
1468 	/* same as in ixgbe_setup_transmit_ring() */
1469 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1470 #endif /* DEV_NETMAP */
1471 	rsize = roundup2(adapter->num_rx_desc *
1472 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1473 	bzero((void *)rxr->rx_base, rsize);
1474 	/* Cache the size */
1475 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1476 
1477 	/* Free current RX buffer structs and their mbufs */
1478 	ixgbe_free_receive_ring(rxr);
1479 
1480 	/* Now replenish the mbufs */
1481 	for (int j = 0; j != rxr->num_desc; ++j) {
1482 		struct mbuf	*mp;
1483 
1484 		rxbuf = &rxr->rx_buffers[j];
1485 #ifdef DEV_NETMAP
1486 		/*
1487 		 * In netmap mode, fill the map and set the buffer
1488 		 * address in the NIC ring, considering the offset
1489 		 * between the netmap and NIC rings (see comment in
1490 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1491 		 * an mbuf, so end the block with a continue;
1492 		 */
1493 		if (slot) {
1494 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1495 			uint64_t paddr;
1496 			void *addr;
1497 
1498 			addr = PNMB(na, slot + sj, &paddr);
1499 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1500 			/* Update descriptor and the cached value */
1501 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1502 			rxbuf->addr = htole64(paddr);
1503 			continue;
1504 		}
1505 #endif /* DEV_NETMAP */
1506 		rxbuf->flags = 0;
1507 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1508 		    M_PKTHDR, adapter->rx_mbuf_sz);
1509 		if (rxbuf->buf == NULL) {
1510 			error = ENOBUFS;
1511                         goto fail;
1512 		}
1513 		mp = rxbuf->buf;
1514 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1515 		/* Get the memory mapping */
1516 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1517 		    rxbuf->pmap, mp, seg,
1518 		    &nsegs, BUS_DMA_NOWAIT);
1519 		if (error != 0)
1520                         goto fail;
1521 		bus_dmamap_sync(rxr->ptag,
1522 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1523 		/* Update the descriptor and the cached value */
1524 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1525 		rxbuf->addr = htole64(seg[0].ds_addr);
1526 	}
1527 
1528 
1529 	/* Setup our descriptor indices */
1530 	rxr->next_to_check = 0;
1531 	rxr->next_to_refresh = 0;
1532 	rxr->lro_enabled = FALSE;
1533 	rxr->rx_copies = 0;
1534 	rxr->rx_bytes = 0;
1535 	rxr->vtag_strip = FALSE;
1536 
1537 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1538 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1539 
1540 	/*
1541 	** Now set up the LRO interface:
1542 	*/
1543 	if (ixgbe_rsc_enable)
1544 		ixgbe_setup_hw_rsc(rxr);
1545 	else if (ifp->if_capenable & IFCAP_LRO) {
1546 		int err = tcp_lro_init(lro);
1547 		if (err) {
1548 			device_printf(dev, "LRO Initialization failed!\n");
1549 			goto fail;
1550 		}
1551 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1552 		rxr->lro_enabled = TRUE;
1553 		lro->ifp = adapter->ifp;
1554 	}
1555 
1556 	IXGBE_RX_UNLOCK(rxr);
1557 	return (0);
1558 
1559 fail:
1560 	ixgbe_free_receive_ring(rxr);
1561 	IXGBE_RX_UNLOCK(rxr);
1562 	return (error);
1563 }
1564 
1565 /*********************************************************************
1566  *
1567  *  Initialize all receive rings.
1568  *
1569  **********************************************************************/
1570 int
1571 ixgbe_setup_receive_structures(struct adapter *adapter)
1572 {
1573 	struct rx_ring *rxr = adapter->rx_rings;
1574 	int j;
1575 
1576 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1577 		if (ixgbe_setup_receive_ring(rxr))
1578 			goto fail;
1579 
1580 	return (0);
1581 fail:
1582 	/*
1583 	 * Free RX buffers allocated so far, we will only handle
1584 	 * the rings that completed, the failing case will have
1585 	 * cleaned up for itself. 'j' failed, so its the terminus.
1586 	 */
1587 	for (int i = 0; i < j; ++i) {
1588 		rxr = &adapter->rx_rings[i];
1589 		IXGBE_RX_LOCK(rxr);
1590 		ixgbe_free_receive_ring(rxr);
1591 		IXGBE_RX_UNLOCK(rxr);
1592 	}
1593 
1594 	return (ENOBUFS);
1595 }
1596 
1597 
1598 /*********************************************************************
1599  *
1600  *  Free all receive rings.
1601  *
1602  **********************************************************************/
1603 void
1604 ixgbe_free_receive_structures(struct adapter *adapter)
1605 {
1606 	struct rx_ring *rxr = adapter->rx_rings;
1607 
1608 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1609 
1610 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1611 		struct lro_ctrl		*lro = &rxr->lro;
1612 		ixgbe_free_receive_buffers(rxr);
1613 		/* Free LRO memory */
1614 		tcp_lro_free(lro);
1615 		/* Free the ring memory as well */
1616 		ixgbe_dma_free(adapter, &rxr->rxdma);
1617 	}
1618 
1619 	free(adapter->rx_rings, M_DEVBUF);
1620 }
1621 
1622 
1623 /*********************************************************************
1624  *
1625  *  Free receive ring data structures
1626  *
1627  **********************************************************************/
1628 void
1629 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1630 {
1631 	struct adapter		*adapter = rxr->adapter;
1632 	struct ixgbe_rx_buf	*rxbuf;
1633 
1634 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1635 
1636 	/* Cleanup any existing buffers */
1637 	if (rxr->rx_buffers != NULL) {
1638 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1639 			rxbuf = &rxr->rx_buffers[i];
1640 			ixgbe_rx_discard(rxr, i);
1641 			if (rxbuf->pmap != NULL) {
1642 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1643 				rxbuf->pmap = NULL;
1644 			}
1645 		}
1646 		if (rxr->rx_buffers != NULL) {
1647 			free(rxr->rx_buffers, M_DEVBUF);
1648 			rxr->rx_buffers = NULL;
1649 		}
1650 	}
1651 
1652 	if (rxr->ptag != NULL) {
1653 		bus_dma_tag_destroy(rxr->ptag);
1654 		rxr->ptag = NULL;
1655 	}
1656 
1657 	return;
1658 }
1659 
1660 static __inline void
1661 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1662 {
1663 
1664         /*
1665          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1666          * should be computed by hardware. Also it should not have VLAN tag in
1667          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1668          */
1669         if (rxr->lro_enabled &&
1670             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1671             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1672             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1673             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1674             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1675             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1676             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1677             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1678                 /*
1679                  * Send to the stack if:
1680                  **  - LRO not enabled, or
1681                  **  - no LRO resources, or
1682                  **  - lro enqueue fails
1683                  */
1684                 if (rxr->lro.lro_cnt != 0)
1685                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1686                                 return;
1687         }
1688 	IXGBE_RX_UNLOCK(rxr);
1689         (*ifp->if_input)(ifp, m);
1690 	IXGBE_RX_LOCK(rxr);
1691 }
1692 
1693 static __inline void
1694 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1695 {
1696 	struct ixgbe_rx_buf	*rbuf;
1697 
1698 	rbuf = &rxr->rx_buffers[i];
1699 
1700 
1701 	/*
1702 	** With advanced descriptors the writeback
1703 	** clobbers the buffer addrs, so its easier
1704 	** to just free the existing mbufs and take
1705 	** the normal refresh path to get new buffers
1706 	** and mapping.
1707 	*/
1708 
1709 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1710 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1711 		m_freem(rbuf->fmp);
1712 		rbuf->fmp = NULL;
1713 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1714 	} else if (rbuf->buf) {
1715 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1716 		m_free(rbuf->buf);
1717 		rbuf->buf = NULL;
1718 	}
1719 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1720 
1721 	rbuf->flags = 0;
1722 
1723 	return;
1724 }
1725 
1726 
1727 /*********************************************************************
1728  *
1729  *  This routine executes in interrupt context. It replenishes
1730  *  the mbufs in the descriptor and sends data which has been
1731  *  dma'ed into host memory to upper layer.
1732  *
1733  *  Return TRUE for more work, FALSE for all clean.
1734  *********************************************************************/
1735 bool
1736 ixgbe_rxeof(struct ix_queue *que)
1737 {
1738 	struct adapter		*adapter = que->adapter;
1739 	struct rx_ring		*rxr = que->rxr;
1740 	struct ifnet		*ifp = adapter->ifp;
1741 	struct lro_ctrl		*lro = &rxr->lro;
1742 	int			i, nextp, processed = 0;
1743 	u32			staterr = 0;
1744 	u32			count = adapter->rx_process_limit;
1745 	union ixgbe_adv_rx_desc	*cur;
1746 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1747 	u16			pkt_info;
1748 
1749 	IXGBE_RX_LOCK(rxr);
1750 
1751 #ifdef DEV_NETMAP
1752 	/* Same as the txeof routine: wakeup clients on intr. */
1753 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1754 		IXGBE_RX_UNLOCK(rxr);
1755 		return (FALSE);
1756 	}
1757 #endif /* DEV_NETMAP */
1758 
1759 	for (i = rxr->next_to_check; count != 0;) {
1760 		struct mbuf	*sendmp, *mp;
1761 		u32		rsc, ptype;
1762 		u16		len;
1763 		u16		vtag = 0;
1764 		bool		eop;
1765 
1766 		/* Sync the ring. */
1767 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1768 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1769 
1770 		cur = &rxr->rx_base[i];
1771 		staterr = le32toh(cur->wb.upper.status_error);
1772 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1773 
1774 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1775 			break;
1776 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1777 			break;
1778 
1779 		count--;
1780 		sendmp = NULL;
1781 		nbuf = NULL;
1782 		rsc = 0;
1783 		cur->wb.upper.status_error = 0;
1784 		rbuf = &rxr->rx_buffers[i];
1785 		mp = rbuf->buf;
1786 
1787 		len = le16toh(cur->wb.upper.length);
1788 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1789 		    IXGBE_RXDADV_PKTTYPE_MASK;
1790 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1791 
1792 		/* Make sure bad packets are discarded */
1793 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1794 #if __FreeBSD_version >= 1100036
1795 			if (IXGBE_IS_VF(adapter))
1796 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1797 #endif
1798 			rxr->rx_discarded++;
1799 			ixgbe_rx_discard(rxr, i);
1800 			goto next_desc;
1801 		}
1802 
1803 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1804 
1805 		/*
1806 		** On 82599 which supports a hardware
1807 		** LRO (called HW RSC), packets need
1808 		** not be fragmented across sequential
1809 		** descriptors, rather the next descriptor
1810 		** is indicated in bits of the descriptor.
1811 		** This also means that we might proceses
1812 		** more than one packet at a time, something
1813 		** that has never been true before, it
1814 		** required eliminating global chain pointers
1815 		** in favor of what we are doing here.  -jfv
1816 		*/
1817 		if (!eop) {
1818 			/*
1819 			** Figure out the next descriptor
1820 			** of this frame.
1821 			*/
1822 			if (rxr->hw_rsc == TRUE) {
1823 				rsc = ixgbe_rsc_count(cur);
1824 				rxr->rsc_num += (rsc - 1);
1825 			}
1826 			if (rsc) { /* Get hardware index */
1827 				nextp = ((staterr &
1828 				    IXGBE_RXDADV_NEXTP_MASK) >>
1829 				    IXGBE_RXDADV_NEXTP_SHIFT);
1830 			} else { /* Just sequential */
1831 				nextp = i + 1;
1832 				if (nextp == adapter->num_rx_desc)
1833 					nextp = 0;
1834 			}
1835 			nbuf = &rxr->rx_buffers[nextp];
1836 			prefetch(nbuf);
1837 		}
1838 		/*
1839 		** Rather than using the fmp/lmp global pointers
1840 		** we now keep the head of a packet chain in the
1841 		** buffer struct and pass this along from one
1842 		** descriptor to the next, until we get EOP.
1843 		*/
1844 		mp->m_len = len;
1845 		/*
1846 		** See if there is a stored head
1847 		** that determines what we are
1848 		*/
1849 		sendmp = rbuf->fmp;
1850 		if (sendmp != NULL) {  /* secondary frag */
1851 			rbuf->buf = rbuf->fmp = NULL;
1852 			mp->m_flags &= ~M_PKTHDR;
1853 			sendmp->m_pkthdr.len += mp->m_len;
1854 		} else {
1855 			/*
1856 			 * Optimize.  This might be a small packet,
1857 			 * maybe just a TCP ACK.  Do a fast copy that
1858 			 * is cache aligned into a new mbuf, and
1859 			 * leave the old mbuf+cluster for re-use.
1860 			 */
1861 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1862 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1863 				if (sendmp != NULL) {
1864 					sendmp->m_data +=
1865 					    IXGBE_RX_COPY_ALIGN;
1866 					ixgbe_bcopy(mp->m_data,
1867 					    sendmp->m_data, len);
1868 					sendmp->m_len = len;
1869 					rxr->rx_copies++;
1870 					rbuf->flags |= IXGBE_RX_COPY;
1871 				}
1872 			}
1873 			if (sendmp == NULL) {
1874 				rbuf->buf = rbuf->fmp = NULL;
1875 				sendmp = mp;
1876 			}
1877 
1878 			/* first desc of a non-ps chain */
1879 			sendmp->m_flags |= M_PKTHDR;
1880 			sendmp->m_pkthdr.len = mp->m_len;
1881 		}
1882 		++processed;
1883 
1884 		/* Pass the head pointer on */
1885 		if (eop == 0) {
1886 			nbuf->fmp = sendmp;
1887 			sendmp = NULL;
1888 			mp->m_next = nbuf->buf;
1889 		} else { /* Sending this frame */
1890 			sendmp->m_pkthdr.rcvif = ifp;
1891 			rxr->rx_packets++;
1892 			/* capture data for AIM */
1893 			rxr->bytes += sendmp->m_pkthdr.len;
1894 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1895 			/* Process vlan info */
1896 			if ((rxr->vtag_strip) &&
1897 			    (staterr & IXGBE_RXD_STAT_VP))
1898 				vtag = le16toh(cur->wb.upper.vlan);
1899 			if (vtag) {
1900 				sendmp->m_pkthdr.ether_vtag = vtag;
1901 				sendmp->m_flags |= M_VLANTAG;
1902 			}
1903 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1904 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1905 
1906                         /*
1907                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1908                          * and never cleared. This means we have RSS hash
1909                          * available to be used.
1910                          */
1911                         if (adapter->num_queues > 1) {
1912                                 sendmp->m_pkthdr.flowid =
1913                                     le32toh(cur->wb.lower.hi_dword.rss);
1914                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1915                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
1916                                         M_HASHTYPE_SET(sendmp,
1917                                             M_HASHTYPE_RSS_IPV4);
1918                                         break;
1919                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1920                                         M_HASHTYPE_SET(sendmp,
1921                                             M_HASHTYPE_RSS_TCP_IPV4);
1922                                         break;
1923                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
1924                                         M_HASHTYPE_SET(sendmp,
1925                                             M_HASHTYPE_RSS_IPV6);
1926                                         break;
1927                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928                                         M_HASHTYPE_SET(sendmp,
1929                                             M_HASHTYPE_RSS_TCP_IPV6);
1930                                         break;
1931                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1932                                         M_HASHTYPE_SET(sendmp,
1933                                             M_HASHTYPE_RSS_IPV6_EX);
1934                                         break;
1935                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1936                                         M_HASHTYPE_SET(sendmp,
1937                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1938                                         break;
1939 #if __FreeBSD_version > 1100000
1940                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1941                                         M_HASHTYPE_SET(sendmp,
1942                                             M_HASHTYPE_RSS_UDP_IPV4);
1943                                         break;
1944                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1945                                         M_HASHTYPE_SET(sendmp,
1946                                             M_HASHTYPE_RSS_UDP_IPV6);
1947                                         break;
1948                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1949                                         M_HASHTYPE_SET(sendmp,
1950                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1951                                         break;
1952 #endif
1953                                     default:
1954                                         M_HASHTYPE_SET(sendmp,
1955                                             M_HASHTYPE_OPAQUE_HASH);
1956                                 }
1957                         } else {
1958                                 sendmp->m_pkthdr.flowid = que->msix;
1959 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1960 			}
1961 		}
1962 next_desc:
1963 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1964 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1965 
1966 		/* Advance our pointers to the next descriptor. */
1967 		if (++i == rxr->num_desc)
1968 			i = 0;
1969 
1970 		/* Now send to the stack or do LRO */
1971 		if (sendmp != NULL) {
1972 			rxr->next_to_check = i;
1973 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1974 			i = rxr->next_to_check;
1975 		}
1976 
1977                /* Every 8 descriptors we go to refresh mbufs */
1978 		if (processed == 8) {
1979 			ixgbe_refresh_mbufs(rxr, i);
1980 			processed = 0;
1981 		}
1982 	}
1983 
1984 	/* Refresh any remaining buf structs */
1985 	if (ixgbe_rx_unrefreshed(rxr))
1986 		ixgbe_refresh_mbufs(rxr, i);
1987 
1988 	rxr->next_to_check = i;
1989 
1990 	/*
1991 	 * Flush any outstanding LRO work
1992 	 */
1993 	tcp_lro_flush_all(lro);
1994 
1995 	IXGBE_RX_UNLOCK(rxr);
1996 
1997 	/*
1998 	** Still have cleaning to do?
1999 	*/
2000 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2001 		return (TRUE);
2002 	else
2003 		return (FALSE);
2004 }
2005 
2006 
2007 /*********************************************************************
2008  *
2009  *  Verify that the hardware indicated that the checksum is valid.
2010  *  Inform the stack about the status of checksum so that stack
2011  *  doesn't spend time verifying the checksum.
2012  *
2013  *********************************************************************/
2014 static void
2015 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2016 {
2017 	u16	status = (u16) staterr;
2018 	u8	errors = (u8) (staterr >> 24);
2019 	bool	sctp = false;
2020 
2021 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2022 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2023 		sctp = true;
2024 
2025 	/* IPv4 checksum */
2026 	if (status & IXGBE_RXD_STAT_IPCS) {
2027 		mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2028 		/* IP Checksum Good */
2029 		if (!(errors & IXGBE_RXD_ERR_IPE))
2030 			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2031 	}
2032 	/* TCP/UDP/SCTP checksum */
2033 	if (status & IXGBE_RXD_STAT_L4CS) {
2034 		mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2035 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2036 			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2037 			if (!sctp)
2038 				mp->m_pkthdr.csum_data = htons(0xffff);
2039 		}
2040 	}
2041 }
2042 
2043 /********************************************************************
2044  * Manage DMA'able memory.
2045  *******************************************************************/
2046 static void
2047 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2048 {
2049 	if (error)
2050 		return;
2051 	*(bus_addr_t *) arg = segs->ds_addr;
2052 	return;
2053 }
2054 
2055 int
2056 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2057 		struct ixgbe_dma_alloc *dma, int mapflags)
2058 {
2059 	device_t dev = adapter->dev;
2060 	int             r;
2061 
2062 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2063 			       DBA_ALIGN, 0,	/* alignment, bounds */
2064 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2065 			       BUS_SPACE_MAXADDR,	/* highaddr */
2066 			       NULL, NULL,	/* filter, filterarg */
2067 			       size,	/* maxsize */
2068 			       1,	/* nsegments */
2069 			       size,	/* maxsegsize */
2070 			       BUS_DMA_ALLOCNOW,	/* flags */
2071 			       NULL,	/* lockfunc */
2072 			       NULL,	/* lockfuncarg */
2073 			       &dma->dma_tag);
2074 	if (r != 0) {
2075 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2076 		       "error %u\n", r);
2077 		goto fail_0;
2078 	}
2079 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2080 			     BUS_DMA_NOWAIT, &dma->dma_map);
2081 	if (r != 0) {
2082 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2083 		       "error %u\n", r);
2084 		goto fail_1;
2085 	}
2086 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2087 			    size,
2088 			    ixgbe_dmamap_cb,
2089 			    &dma->dma_paddr,
2090 			    mapflags | BUS_DMA_NOWAIT);
2091 	if (r != 0) {
2092 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2093 		       "error %u\n", r);
2094 		goto fail_2;
2095 	}
2096 	dma->dma_size = size;
2097 	return (0);
2098 fail_2:
2099 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2100 fail_1:
2101 	bus_dma_tag_destroy(dma->dma_tag);
2102 fail_0:
2103 	dma->dma_tag = NULL;
2104 	return (r);
2105 }
2106 
2107 void
2108 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2109 {
2110 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2111 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2112 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2113 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2114 	bus_dma_tag_destroy(dma->dma_tag);
2115 }
2116 
2117 
2118 /*********************************************************************
2119  *
2120  *  Allocate memory for the transmit and receive rings, and then
2121  *  the descriptors associated with each, called only once at attach.
2122  *
2123  **********************************************************************/
2124 int
2125 ixgbe_allocate_queues(struct adapter *adapter)
2126 {
2127 	device_t	dev = adapter->dev;
2128 	struct ix_queue	*que;
2129 	struct tx_ring	*txr;
2130 	struct rx_ring	*rxr;
2131 	int rsize, tsize, error = IXGBE_SUCCESS;
2132 	int txconf = 0, rxconf = 0;
2133 #ifdef PCI_IOV
2134 	enum ixgbe_iov_mode iov_mode;
2135 #endif
2136 
2137         /* First allocate the top level queue structs */
2138         if (!(adapter->queues =
2139             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2140             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2141                 device_printf(dev, "Unable to allocate queue memory\n");
2142                 error = ENOMEM;
2143                 goto fail;
2144         }
2145 
2146 	/* First allocate the TX ring struct memory */
2147 	if (!(adapter->tx_rings =
2148 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2149 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2150 		device_printf(dev, "Unable to allocate TX ring memory\n");
2151 		error = ENOMEM;
2152 		goto tx_fail;
2153 	}
2154 
2155 	/* Next allocate the RX */
2156 	if (!(adapter->rx_rings =
2157 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2158 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2159 		device_printf(dev, "Unable to allocate RX ring memory\n");
2160 		error = ENOMEM;
2161 		goto rx_fail;
2162 	}
2163 
2164 	/* For the ring itself */
2165 	tsize = roundup2(adapter->num_tx_desc *
2166 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2167 
2168 #ifdef PCI_IOV
2169 	iov_mode = ixgbe_get_iov_mode(adapter);
2170 	adapter->pool = ixgbe_max_vfs(iov_mode);
2171 #else
2172 	adapter->pool = 0;
2173 #endif
2174 	/*
2175 	 * Now set up the TX queues, txconf is needed to handle the
2176 	 * possibility that things fail midcourse and we need to
2177 	 * undo memory gracefully
2178 	 */
2179 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2180 		/* Set up some basics */
2181 		txr = &adapter->tx_rings[i];
2182 		txr->adapter = adapter;
2183 #ifdef PCI_IOV
2184 		txr->me = ixgbe_pf_que_index(iov_mode, i);
2185 #else
2186 		txr->me = i;
2187 #endif
2188 		txr->num_desc = adapter->num_tx_desc;
2189 
2190 		/* Initialize the TX side lock */
2191 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2192 		    device_get_nameunit(dev), txr->me);
2193 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2194 
2195 		if (ixgbe_dma_malloc(adapter, tsize,
2196 			&txr->txdma, BUS_DMA_NOWAIT)) {
2197 			device_printf(dev,
2198 			    "Unable to allocate TX Descriptor memory\n");
2199 			error = ENOMEM;
2200 			goto err_tx_desc;
2201 		}
2202 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2203 		bzero((void *)txr->tx_base, tsize);
2204 
2205         	/* Now allocate transmit buffers for the ring */
2206         	if (ixgbe_allocate_transmit_buffers(txr)) {
2207 			device_printf(dev,
2208 			    "Critical Failure setting up transmit buffers\n");
2209 			error = ENOMEM;
2210 			goto err_tx_desc;
2211         	}
2212 #ifndef IXGBE_LEGACY_TX
2213 		/* Allocate a buf ring */
2214 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2215 		    M_WAITOK, &txr->tx_mtx);
2216 		if (txr->br == NULL) {
2217 			device_printf(dev,
2218 			    "Critical Failure setting up buf ring\n");
2219 			error = ENOMEM;
2220 			goto err_tx_desc;
2221         	}
2222 #endif
2223 	}
2224 
2225 	/*
2226 	 * Next the RX queues...
2227 	 */
2228 	rsize = roundup2(adapter->num_rx_desc *
2229 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2230 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2231 		rxr = &adapter->rx_rings[i];
2232 		/* Set up some basics */
2233 		rxr->adapter = adapter;
2234 #ifdef PCI_IOV
2235 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
2236 #else
2237 		rxr->me = i;
2238 #endif
2239 		rxr->num_desc = adapter->num_rx_desc;
2240 
2241 		/* Initialize the RX side lock */
2242 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2243 		    device_get_nameunit(dev), rxr->me);
2244 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2245 
2246 		if (ixgbe_dma_malloc(adapter, rsize,
2247 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2248 			device_printf(dev,
2249 			    "Unable to allocate RxDescriptor memory\n");
2250 			error = ENOMEM;
2251 			goto err_rx_desc;
2252 		}
2253 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2254 		bzero((void *)rxr->rx_base, rsize);
2255 
2256         	/* Allocate receive buffers for the ring*/
2257 		if (ixgbe_allocate_receive_buffers(rxr)) {
2258 			device_printf(dev,
2259 			    "Critical Failure setting up receive buffers\n");
2260 			error = ENOMEM;
2261 			goto err_rx_desc;
2262 		}
2263 	}
2264 
2265 	/*
2266 	** Finally set up the queue holding structs
2267 	*/
2268 	for (int i = 0; i < adapter->num_queues; i++) {
2269 		que = &adapter->queues[i];
2270 		que->adapter = adapter;
2271 		que->me = i;
2272 		que->txr = &adapter->tx_rings[i];
2273 		que->rxr = &adapter->rx_rings[i];
2274 	}
2275 
2276 	return (0);
2277 
2278 err_rx_desc:
2279 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2280 		ixgbe_dma_free(adapter, &rxr->rxdma);
2281 err_tx_desc:
2282 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2283 		ixgbe_dma_free(adapter, &txr->txdma);
2284 	free(adapter->rx_rings, M_DEVBUF);
2285 rx_fail:
2286 	free(adapter->tx_rings, M_DEVBUF);
2287 tx_fail:
2288 	free(adapter->queues, M_DEVBUF);
2289 fail:
2290 	return (error);
2291 }
2292