xref: /freebsd/sys/dev/ixgbe/ix_txrx.c (revision 788ca347b816afd83b2885e0c79aeeb88649b2ab)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41 
42 #include "ixgbe.h"
43 
44 #ifdef	RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48 
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53 
54 extern int ix_crcstrip;
55 #endif
56 
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70 
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83 
84 /* Shared PCI config read/write */
85 inline u16
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
87 {
88 	u16 value;
89 
90 	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91 	    reg, 2);
92 
93 	return (value);
94 }
95 
96 inline void
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
98 {
99 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
100 	    reg, value, 2);
101 
102 	return;
103 }
104 
105 /*********************************************************************
106  *  Local Function prototypes
107  *********************************************************************/
108 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int	ixgbe_setup_receive_ring(struct rx_ring *);
111 static void     ixgbe_free_receive_buffers(struct rx_ring *);
112 
113 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
117 		    struct mbuf *, u32 *, u32 *);
118 static int	ixgbe_tso_setup(struct tx_ring *,
119 		    struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 		    struct mbuf *, u32);
126 
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129  *  Transmit entry point
130  *
131  *  ixgbe_start is called by the stack to initiate a transmit.
132  *  The driver will remain in this routine as long as there are
133  *  packets to transmit and transmit resources are available.
134  *  In case resources are not available stack is notified and
135  *  the packet is requeued.
136  **********************************************************************/
137 
138 void
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
140 {
141 	struct mbuf    *m_head;
142 	struct adapter *adapter = txr->adapter;
143 
144 	IXGBE_TX_LOCK_ASSERT(txr);
145 
146 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
147 		return;
148 	if (!adapter->link_active)
149 		return;
150 
151 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153 			break;
154 
155 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
156 		if (m_head == NULL)
157 			break;
158 
159 		if (ixgbe_xmit(txr, &m_head)) {
160 			if (m_head != NULL)
161 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
162 			break;
163 		}
164 		/* Send a copy of the frame to the BPF listener */
165 		ETHER_BPF_MTAP(ifp, m_head);
166 	}
167 	return;
168 }
169 
170 /*
171  * Legacy TX start - called by the stack, this
172  * always uses the first tx ring, and should
173  * not be used with multiqueue tx enabled.
174  */
175 void
176 ixgbe_start(struct ifnet *ifp)
177 {
178 	struct adapter *adapter = ifp->if_softc;
179 	struct tx_ring	*txr = adapter->tx_rings;
180 
181 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
182 		IXGBE_TX_LOCK(txr);
183 		ixgbe_start_locked(txr, ifp);
184 		IXGBE_TX_UNLOCK(txr);
185 	}
186 	return;
187 }
188 
189 #else /* ! IXGBE_LEGACY_TX */
190 
191 /*
192 ** Multiqueue Transmit driver
193 **
194 */
195 int
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
197 {
198 	struct adapter	*adapter = ifp->if_softc;
199 	struct ix_queue	*que;
200 	struct tx_ring	*txr;
201 	int 		i, err = 0;
202 #ifdef	RSS
203 	uint32_t bucket_id;
204 #endif
205 
206 	/*
207 	 * When doing RSS, map it to the same outbound queue
208 	 * as the incoming flow would be mapped to.
209 	 *
210 	 * If everything is setup correctly, it should be the
211 	 * same bucket that the current CPU we're on is.
212 	 */
213 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
214 #ifdef	RSS
215 		if (rss_hash2bucket(m->m_pkthdr.flowid,
216 		    M_HASHTYPE_GET(m), &bucket_id) == 0)
217 			/* TODO: spit out something if bucket_id > num_queues? */
218 			i = bucket_id % adapter->num_queues;
219 		else
220 #endif
221 			i = m->m_pkthdr.flowid % adapter->num_queues;
222 	} else
223 		i = curcpu % adapter->num_queues;
224 
225 	/* Check for a hung queue and pick alternative */
226 	if (((1 << i) & adapter->active_queues) == 0)
227 		i = ffsl(adapter->active_queues);
228 
229 	txr = &adapter->tx_rings[i];
230 	que = &adapter->queues[i];
231 
232 	err = drbr_enqueue(ifp, txr->br, m);
233 	if (err)
234 		return (err);
235 	if (IXGBE_TX_TRYLOCK(txr)) {
236 		ixgbe_mq_start_locked(ifp, txr);
237 		IXGBE_TX_UNLOCK(txr);
238 	} else
239 		taskqueue_enqueue(que->tq, &txr->txq_task);
240 
241 	return (0);
242 }
243 
244 int
245 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
246 {
247 	struct adapter  *adapter = txr->adapter;
248         struct mbuf     *next;
249         int             enqueued = 0, err = 0;
250 
251 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
252 	    adapter->link_active == 0)
253 		return (ENETDOWN);
254 
255 	/* Process the queue */
256 #if __FreeBSD_version < 901504
257 	next = drbr_dequeue(ifp, txr->br);
258 	while (next != NULL) {
259 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
260 			if (next != NULL)
261 				err = drbr_enqueue(ifp, txr->br, next);
262 #else
263 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
264 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
265 			if (next == NULL) {
266 				drbr_advance(ifp, txr->br);
267 			} else {
268 				drbr_putback(ifp, txr->br, next);
269 			}
270 #endif
271 			break;
272 		}
273 #if __FreeBSD_version >= 901504
274 		drbr_advance(ifp, txr->br);
275 #endif
276 		enqueued++;
277 #if 0 // this is VF-only
278 #if __FreeBSD_version >= 1100036
279 		if (next->m_flags & M_MCAST)
280 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
281 #endif
282 #endif
283 		/* Send a copy of the frame to the BPF listener */
284 		ETHER_BPF_MTAP(ifp, next);
285 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
286 			break;
287 #if __FreeBSD_version < 901504
288 		next = drbr_dequeue(ifp, txr->br);
289 #endif
290 	}
291 
292 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
293 		ixgbe_txeof(txr);
294 
295 	return (err);
296 }
297 
298 /*
299  * Called from a taskqueue to drain queued transmit packets.
300  */
301 void
302 ixgbe_deferred_mq_start(void *arg, int pending)
303 {
304 	struct tx_ring *txr = arg;
305 	struct adapter *adapter = txr->adapter;
306 	struct ifnet *ifp = adapter->ifp;
307 
308 	IXGBE_TX_LOCK(txr);
309 	if (!drbr_empty(ifp, txr->br))
310 		ixgbe_mq_start_locked(ifp, txr);
311 	IXGBE_TX_UNLOCK(txr);
312 }
313 
314 /*
315 ** Flush all ring buffers
316 */
317 void
318 ixgbe_qflush(struct ifnet *ifp)
319 {
320 	struct adapter	*adapter = ifp->if_softc;
321 	struct tx_ring	*txr = adapter->tx_rings;
322 	struct mbuf	*m;
323 
324 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
325 		IXGBE_TX_LOCK(txr);
326 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
327 			m_freem(m);
328 		IXGBE_TX_UNLOCK(txr);
329 	}
330 	if_qflush(ifp);
331 }
332 #endif /* IXGBE_LEGACY_TX */
333 
334 
335 /*********************************************************************
336  *
337  *  This routine maps the mbufs to tx descriptors, allowing the
338  *  TX engine to transmit the packets.
339  *  	- return 0 on success, positive on failure
340  *
341  **********************************************************************/
342 
343 static int
344 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
345 {
346 	struct adapter  *adapter = txr->adapter;
347 	u32		olinfo_status = 0, cmd_type_len;
348 	int             i, j, error, nsegs;
349 	int		first;
350 	bool		remap = TRUE;
351 	struct mbuf	*m_head;
352 	bus_dma_segment_t segs[adapter->num_segs];
353 	bus_dmamap_t	map;
354 	struct ixgbe_tx_buf *txbuf;
355 	union ixgbe_adv_tx_desc *txd = NULL;
356 
357 	m_head = *m_headp;
358 
359 	/* Basic descriptor defines */
360         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
361 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
362 
363 	if (m_head->m_flags & M_VLANTAG)
364         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
365 
366         /*
367          * Important to capture the first descriptor
368          * used because it will contain the index of
369          * the one we tell the hardware to report back
370          */
371         first = txr->next_avail_desc;
372 	txbuf = &txr->tx_buffers[first];
373 	map = txbuf->map;
374 
375 	/*
376 	 * Map the packet for DMA.
377 	 */
378 retry:
379 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
380 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
381 
382 	if (__predict_false(error)) {
383 		struct mbuf *m;
384 
385 		switch (error) {
386 		case EFBIG:
387 			/* Try it again? - one try */
388 			if (remap == TRUE) {
389 				remap = FALSE;
390 				m = m_defrag(*m_headp, M_NOWAIT);
391 				if (m == NULL) {
392 					adapter->mbuf_defrag_failed++;
393 					m_freem(*m_headp);
394 					*m_headp = NULL;
395 					return (ENOBUFS);
396 				}
397 				*m_headp = m;
398 				goto retry;
399 			} else
400 				return (error);
401 		case ENOMEM:
402 			txr->no_tx_dma_setup++;
403 			return (error);
404 		default:
405 			txr->no_tx_dma_setup++;
406 			m_freem(*m_headp);
407 			*m_headp = NULL;
408 			return (error);
409 		}
410 	}
411 
412 	/* Make certain there are enough descriptors */
413 	if (nsegs > txr->tx_avail - 2) {
414 		txr->no_desc_avail++;
415 		bus_dmamap_unload(txr->txtag, map);
416 		return (ENOBUFS);
417 	}
418 	m_head = *m_headp;
419 
420 	/*
421 	** Set up the appropriate offload context
422 	** this will consume the first descriptor
423 	*/
424 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
425 	if (__predict_false(error)) {
426 		if (error == ENOBUFS)
427 			*m_headp = NULL;
428 		return (error);
429 	}
430 
431 #ifdef IXGBE_FDIR
432 	/* Do the flow director magic */
433 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
434 		++txr->atr_count;
435 		if (txr->atr_count >= atr_sample_rate) {
436 			ixgbe_atr(txr, m_head);
437 			txr->atr_count = 0;
438 		}
439 	}
440 #endif
441 
442 	olinfo_status |= IXGBE_ADVTXD_CC;
443 	i = txr->next_avail_desc;
444 	for (j = 0; j < nsegs; j++) {
445 		bus_size_t seglen;
446 		bus_addr_t segaddr;
447 
448 		txbuf = &txr->tx_buffers[i];
449 		txd = &txr->tx_base[i];
450 		seglen = segs[j].ds_len;
451 		segaddr = htole64(segs[j].ds_addr);
452 
453 		txd->read.buffer_addr = segaddr;
454 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
455 		    cmd_type_len |seglen);
456 		txd->read.olinfo_status = htole32(olinfo_status);
457 
458 		if (++i == txr->num_desc)
459 			i = 0;
460 	}
461 
462 	txd->read.cmd_type_len |=
463 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
464 	txr->tx_avail -= nsegs;
465 	txr->next_avail_desc = i;
466 
467 	txbuf->m_head = m_head;
468 	/*
469 	** Here we swap the map so the last descriptor,
470 	** which gets the completion interrupt has the
471 	** real map, and the first descriptor gets the
472 	** unused map from this descriptor.
473 	*/
474 	txr->tx_buffers[first].map = txbuf->map;
475 	txbuf->map = map;
476 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
477 
478         /* Set the EOP descriptor that will be marked done */
479         txbuf = &txr->tx_buffers[first];
480 	txbuf->eop = txd;
481 
482         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
483             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
484 	/*
485 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
486 	 * hardware that this frame is available to transmit.
487 	 */
488 	++txr->total_packets;
489 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
490 
491 	/* Mark queue as having work */
492 	if (txr->busy == 0)
493 		txr->busy = 1;
494 
495 	return (0);
496 
497 }
498 
499 
500 /*********************************************************************
501  *
502  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
503  *  the information needed to transmit a packet on the wire. This is
504  *  called only once at attach, setup is done every reset.
505  *
506  **********************************************************************/
507 int
508 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
509 {
510 	struct adapter *adapter = txr->adapter;
511 	device_t dev = adapter->dev;
512 	struct ixgbe_tx_buf *txbuf;
513 	int error, i;
514 
515 	/*
516 	 * Setup DMA descriptor areas.
517 	 */
518 	if ((error = bus_dma_tag_create(
519 			       bus_get_dma_tag(adapter->dev),	/* parent */
520 			       1, 0,		/* alignment, bounds */
521 			       BUS_SPACE_MAXADDR,	/* lowaddr */
522 			       BUS_SPACE_MAXADDR,	/* highaddr */
523 			       NULL, NULL,		/* filter, filterarg */
524 			       IXGBE_TSO_SIZE,		/* maxsize */
525 			       adapter->num_segs,	/* nsegments */
526 			       PAGE_SIZE,		/* maxsegsize */
527 			       0,			/* flags */
528 			       NULL,			/* lockfunc */
529 			       NULL,			/* lockfuncarg */
530 			       &txr->txtag))) {
531 		device_printf(dev,"Unable to allocate TX DMA tag\n");
532 		goto fail;
533 	}
534 
535 	if (!(txr->tx_buffers =
536 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
537 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
538 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
539 		error = ENOMEM;
540 		goto fail;
541 	}
542 
543         /* Create the descriptor buffer dma maps */
544 	txbuf = txr->tx_buffers;
545 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
546 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
547 		if (error != 0) {
548 			device_printf(dev, "Unable to create TX DMA map\n");
549 			goto fail;
550 		}
551 	}
552 
553 	return 0;
554 fail:
555 	/* We free all, it handles case where we are in the middle */
556 	ixgbe_free_transmit_structures(adapter);
557 	return (error);
558 }
559 
560 /*********************************************************************
561  *
562  *  Initialize a transmit ring.
563  *
564  **********************************************************************/
565 static void
566 ixgbe_setup_transmit_ring(struct tx_ring *txr)
567 {
568 	struct adapter *adapter = txr->adapter;
569 	struct ixgbe_tx_buf *txbuf;
570 	int i;
571 #ifdef DEV_NETMAP
572 	struct netmap_adapter *na = NA(adapter->ifp);
573 	struct netmap_slot *slot;
574 #endif /* DEV_NETMAP */
575 
576 	/* Clear the old ring contents */
577 	IXGBE_TX_LOCK(txr);
578 #ifdef DEV_NETMAP
579 	/*
580 	 * (under lock): if in netmap mode, do some consistency
581 	 * checks and set slot to entry 0 of the netmap ring.
582 	 */
583 	slot = netmap_reset(na, NR_TX, txr->me, 0);
584 #endif /* DEV_NETMAP */
585 	bzero((void *)txr->tx_base,
586 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
587 	/* Reset indices */
588 	txr->next_avail_desc = 0;
589 	txr->next_to_clean = 0;
590 
591 	/* Free any existing tx buffers. */
592         txbuf = txr->tx_buffers;
593 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
594 		if (txbuf->m_head != NULL) {
595 			bus_dmamap_sync(txr->txtag, txbuf->map,
596 			    BUS_DMASYNC_POSTWRITE);
597 			bus_dmamap_unload(txr->txtag, txbuf->map);
598 			m_freem(txbuf->m_head);
599 			txbuf->m_head = NULL;
600 		}
601 #ifdef DEV_NETMAP
602 		/*
603 		 * In netmap mode, set the map for the packet buffer.
604 		 * NOTE: Some drivers (not this one) also need to set
605 		 * the physical buffer address in the NIC ring.
606 		 * Slots in the netmap ring (indexed by "si") are
607 		 * kring->nkr_hwofs positions "ahead" wrt the
608 		 * corresponding slot in the NIC ring. In some drivers
609 		 * (not here) nkr_hwofs can be negative. Function
610 		 * netmap_idx_n2k() handles wraparounds properly.
611 		 */
612 		if (slot) {
613 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
614 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
615 		}
616 #endif /* DEV_NETMAP */
617 		/* Clear the EOP descriptor pointer */
618 		txbuf->eop = NULL;
619         }
620 
621 #ifdef IXGBE_FDIR
622 	/* Set the rate at which we sample packets */
623 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
624 		txr->atr_sample = atr_sample_rate;
625 #endif
626 
627 	/* Set number of descriptors available */
628 	txr->tx_avail = adapter->num_tx_desc;
629 
630 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
631 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
632 	IXGBE_TX_UNLOCK(txr);
633 }
634 
635 /*********************************************************************
636  *
637  *  Initialize all transmit rings.
638  *
639  **********************************************************************/
640 int
641 ixgbe_setup_transmit_structures(struct adapter *adapter)
642 {
643 	struct tx_ring *txr = adapter->tx_rings;
644 
645 	for (int i = 0; i < adapter->num_queues; i++, txr++)
646 		ixgbe_setup_transmit_ring(txr);
647 
648 	return (0);
649 }
650 
651 /*********************************************************************
652  *
653  *  Free all transmit rings.
654  *
655  **********************************************************************/
656 void
657 ixgbe_free_transmit_structures(struct adapter *adapter)
658 {
659 	struct tx_ring *txr = adapter->tx_rings;
660 
661 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
662 		IXGBE_TX_LOCK(txr);
663 		ixgbe_free_transmit_buffers(txr);
664 		ixgbe_dma_free(adapter, &txr->txdma);
665 		IXGBE_TX_UNLOCK(txr);
666 		IXGBE_TX_LOCK_DESTROY(txr);
667 	}
668 	free(adapter->tx_rings, M_DEVBUF);
669 }
670 
671 /*********************************************************************
672  *
673  *  Free transmit ring related data structures.
674  *
675  **********************************************************************/
676 static void
677 ixgbe_free_transmit_buffers(struct tx_ring *txr)
678 {
679 	struct adapter *adapter = txr->adapter;
680 	struct ixgbe_tx_buf *tx_buffer;
681 	int             i;
682 
683 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
684 
685 	if (txr->tx_buffers == NULL)
686 		return;
687 
688 	tx_buffer = txr->tx_buffers;
689 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
690 		if (tx_buffer->m_head != NULL) {
691 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
692 			    BUS_DMASYNC_POSTWRITE);
693 			bus_dmamap_unload(txr->txtag,
694 			    tx_buffer->map);
695 			m_freem(tx_buffer->m_head);
696 			tx_buffer->m_head = NULL;
697 			if (tx_buffer->map != NULL) {
698 				bus_dmamap_destroy(txr->txtag,
699 				    tx_buffer->map);
700 				tx_buffer->map = NULL;
701 			}
702 		} else if (tx_buffer->map != NULL) {
703 			bus_dmamap_unload(txr->txtag,
704 			    tx_buffer->map);
705 			bus_dmamap_destroy(txr->txtag,
706 			    tx_buffer->map);
707 			tx_buffer->map = NULL;
708 		}
709 	}
710 #ifdef IXGBE_LEGACY_TX
711 	if (txr->br != NULL)
712 		buf_ring_free(txr->br, M_DEVBUF);
713 #endif
714 	if (txr->tx_buffers != NULL) {
715 		free(txr->tx_buffers, M_DEVBUF);
716 		txr->tx_buffers = NULL;
717 	}
718 	if (txr->txtag != NULL) {
719 		bus_dma_tag_destroy(txr->txtag);
720 		txr->txtag = NULL;
721 	}
722 	return;
723 }
724 
725 /*********************************************************************
726  *
727  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
728  *
729  **********************************************************************/
730 
731 static int
732 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
733     u32 *cmd_type_len, u32 *olinfo_status)
734 {
735 	struct ixgbe_adv_tx_context_desc *TXD;
736 	struct ether_vlan_header *eh;
737 	struct ip *ip;
738 	struct ip6_hdr *ip6;
739 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
740 	int	ehdrlen, ip_hlen = 0;
741 	u16	etype;
742 	u8	ipproto = 0;
743 	int	offload = TRUE;
744 	int	ctxd = txr->next_avail_desc;
745 	u16	vtag = 0;
746 
747 	/* First check if TSO is to be used */
748 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
749 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
750 
751 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
752 		offload = FALSE;
753 
754 	/* Indicate the whole packet as payload when not doing TSO */
755        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
756 
757 	/* Now ready a context descriptor */
758 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
759 
760 	/*
761 	** In advanced descriptors the vlan tag must
762 	** be placed into the context descriptor. Hence
763 	** we need to make one even if not doing offloads.
764 	*/
765 	if (mp->m_flags & M_VLANTAG) {
766 		vtag = htole16(mp->m_pkthdr.ether_vtag);
767 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
768 	}
769 
770 	/*
771 	 * Determine where frame payload starts.
772 	 * Jump over vlan headers if already present,
773 	 * helpful for QinQ too.
774 	 */
775 	eh = mtod(mp, struct ether_vlan_header *);
776 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
777 		etype = ntohs(eh->evl_proto);
778 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
779 	} else {
780 		etype = ntohs(eh->evl_encap_proto);
781 		ehdrlen = ETHER_HDR_LEN;
782 	}
783 
784 	/* Set the ether header length */
785 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
786 
787 	if (offload == FALSE)
788 		goto no_offloads;
789 
790 	switch (etype) {
791 		case ETHERTYPE_IP:
792 			ip = (struct ip *)(mp->m_data + ehdrlen);
793 			ip_hlen = ip->ip_hl << 2;
794 			ipproto = ip->ip_p;
795 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
796 			break;
797 		case ETHERTYPE_IPV6:
798 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
799 			ip_hlen = sizeof(struct ip6_hdr);
800 			/* XXX-BZ this will go badly in case of ext hdrs. */
801 			ipproto = ip6->ip6_nxt;
802 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
803 			break;
804 		default:
805 			offload = FALSE;
806 			break;
807 	}
808 
809 	vlan_macip_lens |= ip_hlen;
810 
811 	switch (ipproto) {
812 		case IPPROTO_TCP:
813 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
814 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
815 			break;
816 
817 		case IPPROTO_UDP:
818 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
819 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
820 			break;
821 
822 #if __FreeBSD_version >= 800000
823 		case IPPROTO_SCTP:
824 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
825 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
826 			break;
827 #endif
828 		default:
829 			offload = FALSE;
830 			break;
831 	}
832 
833 	if (offload) /* For the TX descriptor setup */
834 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
835 
836 no_offloads:
837 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
838 
839 	/* Now copy bits into descriptor */
840 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
841 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
842 	TXD->seqnum_seed = htole32(0);
843 	TXD->mss_l4len_idx = htole32(0);
844 
845 	/* We've consumed the first desc, adjust counters */
846 	if (++ctxd == txr->num_desc)
847 		ctxd = 0;
848 	txr->next_avail_desc = ctxd;
849 	--txr->tx_avail;
850 
851         return (0);
852 }
853 
854 /**********************************************************************
855  *
856  *  Setup work for hardware segmentation offload (TSO) on
857  *  adapters using advanced tx descriptors
858  *
859  **********************************************************************/
860 static int
861 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
862     u32 *cmd_type_len, u32 *olinfo_status)
863 {
864 	struct ixgbe_adv_tx_context_desc *TXD;
865 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
866 	u32 mss_l4len_idx = 0, paylen;
867 	u16 vtag = 0, eh_type;
868 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
869 	struct ether_vlan_header *eh;
870 #ifdef INET6
871 	struct ip6_hdr *ip6;
872 #endif
873 #ifdef INET
874 	struct ip *ip;
875 #endif
876 	struct tcphdr *th;
877 
878 
879 	/*
880 	 * Determine where frame payload starts.
881 	 * Jump over vlan headers if already present
882 	 */
883 	eh = mtod(mp, struct ether_vlan_header *);
884 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
885 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
886 		eh_type = eh->evl_proto;
887 	} else {
888 		ehdrlen = ETHER_HDR_LEN;
889 		eh_type = eh->evl_encap_proto;
890 	}
891 
892 	switch (ntohs(eh_type)) {
893 #ifdef INET6
894 	case ETHERTYPE_IPV6:
895 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
896 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
897 		if (ip6->ip6_nxt != IPPROTO_TCP)
898 			return (ENXIO);
899 		ip_hlen = sizeof(struct ip6_hdr);
900 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
901 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
902 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
903 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
904 		break;
905 #endif
906 #ifdef INET
907 	case ETHERTYPE_IP:
908 		ip = (struct ip *)(mp->m_data + ehdrlen);
909 		if (ip->ip_p != IPPROTO_TCP)
910 			return (ENXIO);
911 		ip->ip_sum = 0;
912 		ip_hlen = ip->ip_hl << 2;
913 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
914 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
915 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
916 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
917 		/* Tell transmit desc to also do IPv4 checksum. */
918 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
919 		break;
920 #endif
921 	default:
922 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
923 		    __func__, ntohs(eh_type));
924 		break;
925 	}
926 
927 	ctxd = txr->next_avail_desc;
928 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
929 
930 	tcp_hlen = th->th_off << 2;
931 
932 	/* This is used in the transmit desc in encap */
933 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
934 
935 	/* VLAN MACLEN IPLEN */
936 	if (mp->m_flags & M_VLANTAG) {
937 		vtag = htole16(mp->m_pkthdr.ether_vtag);
938                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
939 	}
940 
941 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
942 	vlan_macip_lens |= ip_hlen;
943 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
944 
945 	/* ADV DTYPE TUCMD */
946 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
947 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
948 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
949 
950 	/* MSS L4LEN IDX */
951 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
952 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
953 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
954 
955 	TXD->seqnum_seed = htole32(0);
956 
957 	if (++ctxd == txr->num_desc)
958 		ctxd = 0;
959 
960 	txr->tx_avail--;
961 	txr->next_avail_desc = ctxd;
962 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
963 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
964 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
965 	++txr->tso_tx;
966 	return (0);
967 }
968 
969 
970 /**********************************************************************
971  *
972  *  Examine each tx_buffer in the used queue. If the hardware is done
973  *  processing the packet then free associated resources. The
974  *  tx_buffer is put back on the free queue.
975  *
976  **********************************************************************/
977 void
978 ixgbe_txeof(struct tx_ring *txr)
979 {
980 #ifdef DEV_NETMAP
981 	struct adapter		*adapter = txr->adapter;
982 	struct ifnet		*ifp = adapter->ifp;
983 #endif
984 	u32			work, processed = 0;
985 	u16			limit = txr->process_limit;
986 	struct ixgbe_tx_buf	*buf;
987 	union ixgbe_adv_tx_desc *txd;
988 
989 	mtx_assert(&txr->tx_mtx, MA_OWNED);
990 
991 #ifdef DEV_NETMAP
992 	if (ifp->if_capenable & IFCAP_NETMAP) {
993 		struct netmap_adapter *na = NA(ifp);
994 		struct netmap_kring *kring = &na->tx_rings[txr->me];
995 		txd = txr->tx_base;
996 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997 		    BUS_DMASYNC_POSTREAD);
998 		/*
999 		 * In netmap mode, all the work is done in the context
1000 		 * of the client thread. Interrupt handlers only wake up
1001 		 * clients, which may be sleeping on individual rings
1002 		 * or on a global resource for all rings.
1003 		 * To implement tx interrupt mitigation, we wake up the client
1004 		 * thread roughly every half ring, even if the NIC interrupts
1005 		 * more frequently. This is implemented as follows:
1006 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1007 		 *   the slot that should wake up the thread (nkr_num_slots
1008 		 *   means the user thread should not be woken up);
1009 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1010 		 *   or the slot has the DD bit set.
1011 		 */
1012 		if (!netmap_mitigate ||
1013 		    (kring->nr_kflags < kring->nkr_num_slots &&
1014 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1015 			netmap_tx_irq(ifp, txr->me);
1016 		}
1017 		return;
1018 	}
1019 #endif /* DEV_NETMAP */
1020 
1021 	if (txr->tx_avail == txr->num_desc) {
1022 		txr->busy = 0;
1023 		return;
1024 	}
1025 
1026 	/* Get work starting point */
1027 	work = txr->next_to_clean;
1028 	buf = &txr->tx_buffers[work];
1029 	txd = &txr->tx_base[work];
1030 	work -= txr->num_desc; /* The distance to ring end */
1031         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1032             BUS_DMASYNC_POSTREAD);
1033 
1034 	do {
1035 		union ixgbe_adv_tx_desc *eop= buf->eop;
1036 		if (eop == NULL) /* No work */
1037 			break;
1038 
1039 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1040 			break;	/* I/O not complete */
1041 
1042 		if (buf->m_head) {
1043 			txr->bytes +=
1044 			    buf->m_head->m_pkthdr.len;
1045 			bus_dmamap_sync(txr->txtag,
1046 			    buf->map,
1047 			    BUS_DMASYNC_POSTWRITE);
1048 			bus_dmamap_unload(txr->txtag,
1049 			    buf->map);
1050 			m_freem(buf->m_head);
1051 			buf->m_head = NULL;
1052 		}
1053 		buf->eop = NULL;
1054 		++txr->tx_avail;
1055 
1056 		/* We clean the range if multi segment */
1057 		while (txd != eop) {
1058 			++txd;
1059 			++buf;
1060 			++work;
1061 			/* wrap the ring? */
1062 			if (__predict_false(!work)) {
1063 				work -= txr->num_desc;
1064 				buf = txr->tx_buffers;
1065 				txd = txr->tx_base;
1066 			}
1067 			if (buf->m_head) {
1068 				txr->bytes +=
1069 				    buf->m_head->m_pkthdr.len;
1070 				bus_dmamap_sync(txr->txtag,
1071 				    buf->map,
1072 				    BUS_DMASYNC_POSTWRITE);
1073 				bus_dmamap_unload(txr->txtag,
1074 				    buf->map);
1075 				m_freem(buf->m_head);
1076 				buf->m_head = NULL;
1077 			}
1078 			++txr->tx_avail;
1079 			buf->eop = NULL;
1080 
1081 		}
1082 		++txr->packets;
1083 		++processed;
1084 
1085 		/* Try the next packet */
1086 		++txd;
1087 		++buf;
1088 		++work;
1089 		/* reset with a wrap */
1090 		if (__predict_false(!work)) {
1091 			work -= txr->num_desc;
1092 			buf = txr->tx_buffers;
1093 			txd = txr->tx_base;
1094 		}
1095 		prefetch(txd);
1096 	} while (__predict_true(--limit));
1097 
1098 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1099 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1100 
1101 	work += txr->num_desc;
1102 	txr->next_to_clean = work;
1103 
1104 	/*
1105 	** Queue Hang detection, we know there's
1106 	** work outstanding or the first return
1107 	** would have been taken, so increment busy
1108 	** if nothing managed to get cleaned, then
1109 	** in local_timer it will be checked and
1110 	** marked as HUNG if it exceeds a MAX attempt.
1111 	*/
1112 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1113 		++txr->busy;
1114 	/*
1115 	** If anything gets cleaned we reset state to 1,
1116 	** note this will turn off HUNG if its set.
1117 	*/
1118 	if (processed)
1119 		txr->busy = 1;
1120 
1121 	if (txr->tx_avail == txr->num_desc)
1122 		txr->busy = 0;
1123 
1124 	return;
1125 }
1126 
1127 
1128 #ifdef IXGBE_FDIR
1129 /*
1130 ** This routine parses packet headers so that Flow
1131 ** Director can make a hashed filter table entry
1132 ** allowing traffic flows to be identified and kept
1133 ** on the same cpu.  This would be a performance
1134 ** hit, but we only do it at IXGBE_FDIR_RATE of
1135 ** packets.
1136 */
1137 static void
1138 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1139 {
1140 	struct adapter			*adapter = txr->adapter;
1141 	struct ix_queue			*que;
1142 	struct ip			*ip;
1143 	struct tcphdr			*th;
1144 	struct udphdr			*uh;
1145 	struct ether_vlan_header	*eh;
1146 	union ixgbe_atr_hash_dword	input = {.dword = 0};
1147 	union ixgbe_atr_hash_dword	common = {.dword = 0};
1148 	int  				ehdrlen, ip_hlen;
1149 	u16				etype;
1150 
1151 	eh = mtod(mp, struct ether_vlan_header *);
1152 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1153 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1154 		etype = eh->evl_proto;
1155 	} else {
1156 		ehdrlen = ETHER_HDR_LEN;
1157 		etype = eh->evl_encap_proto;
1158 	}
1159 
1160 	/* Only handling IPv4 */
1161 	if (etype != htons(ETHERTYPE_IP))
1162 		return;
1163 
1164 	ip = (struct ip *)(mp->m_data + ehdrlen);
1165 	ip_hlen = ip->ip_hl << 2;
1166 
1167 	/* check if we're UDP or TCP */
1168 	switch (ip->ip_p) {
1169 	case IPPROTO_TCP:
1170 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1171 		/* src and dst are inverted */
1172 		common.port.dst ^= th->th_sport;
1173 		common.port.src ^= th->th_dport;
1174 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1175 		break;
1176 	case IPPROTO_UDP:
1177 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1178 		/* src and dst are inverted */
1179 		common.port.dst ^= uh->uh_sport;
1180 		common.port.src ^= uh->uh_dport;
1181 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1182 		break;
1183 	default:
1184 		return;
1185 	}
1186 
1187 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1188 	if (mp->m_pkthdr.ether_vtag)
1189 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1190 	else
1191 		common.flex_bytes ^= etype;
1192 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1193 
1194 	que = &adapter->queues[txr->me];
1195 	/*
1196 	** This assumes the Rx queue and Tx
1197 	** queue are bound to the same CPU
1198 	*/
1199 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1200 	    input, common, que->msix);
1201 }
1202 #endif /* IXGBE_FDIR */
1203 
1204 /*
1205 ** Used to detect a descriptor that has
1206 ** been merged by Hardware RSC.
1207 */
1208 static inline u32
1209 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1210 {
1211 	return (le32toh(rx->wb.lower.lo_dword.data) &
1212 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1213 }
1214 
1215 /*********************************************************************
1216  *
1217  *  Initialize Hardware RSC (LRO) feature on 82599
1218  *  for an RX ring, this is toggled by the LRO capability
1219  *  even though it is transparent to the stack.
1220  *
1221  *  NOTE: since this HW feature only works with IPV4 and
1222  *        our testing has shown soft LRO to be as effective
1223  *        I have decided to disable this by default.
1224  *
1225  **********************************************************************/
1226 static void
1227 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1228 {
1229 	struct	adapter 	*adapter = rxr->adapter;
1230 	struct	ixgbe_hw	*hw = &adapter->hw;
1231 	u32			rscctrl, rdrxctl;
1232 
1233 	/* If turning LRO/RSC off we need to disable it */
1234 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1235 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1236 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1237 		return;
1238 	}
1239 
1240 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1241 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1242 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1243 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1244 #endif /* DEV_NETMAP */
1245 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1246 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1247 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1248 
1249 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1250 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1251 	/*
1252 	** Limit the total number of descriptors that
1253 	** can be combined, so it does not exceed 64K
1254 	*/
1255 	if (rxr->mbuf_sz == MCLBYTES)
1256 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1257 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1258 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1259 	else if (rxr->mbuf_sz == MJUM9BYTES)
1260 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1261 	else  /* Using 16K cluster */
1262 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1263 
1264 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1265 
1266 	/* Enable TCP header recognition */
1267 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1268 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1269 	    IXGBE_PSRTYPE_TCPHDR));
1270 
1271 	/* Disable RSC for ACK packets */
1272 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1273 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1274 
1275 	rxr->hw_rsc = TRUE;
1276 }
1277 /*********************************************************************
1278  *
1279  *  Refresh mbuf buffers for RX descriptor rings
1280  *   - now keeps its own state so discards due to resource
1281  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1282  *     it just returns, keeping its placeholder, thus it can simply
1283  *     be recalled to try again.
1284  *
1285  **********************************************************************/
1286 static void
1287 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1288 {
1289 	struct adapter		*adapter = rxr->adapter;
1290 	bus_dma_segment_t	seg[1];
1291 	struct ixgbe_rx_buf	*rxbuf;
1292 	struct mbuf		*mp;
1293 	int			i, j, nsegs, error;
1294 	bool			refreshed = FALSE;
1295 
1296 	i = j = rxr->next_to_refresh;
1297 	/* Control the loop with one beyond */
1298 	if (++j == rxr->num_desc)
1299 		j = 0;
1300 
1301 	while (j != limit) {
1302 		rxbuf = &rxr->rx_buffers[i];
1303 		if (rxbuf->buf == NULL) {
1304 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1305 			    M_PKTHDR, rxr->mbuf_sz);
1306 			if (mp == NULL)
1307 				goto update;
1308 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1309 				m_adj(mp, ETHER_ALIGN);
1310 		} else
1311 			mp = rxbuf->buf;
1312 
1313 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1314 
1315 		/* If we're dealing with an mbuf that was copied rather
1316 		 * than replaced, there's no need to go through busdma.
1317 		 */
1318 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1319 			/* Get the memory mapping */
1320 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1321 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1322 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1323 			if (error != 0) {
1324 				printf("Refresh mbufs: payload dmamap load"
1325 				    " failure - %d\n", error);
1326 				m_free(mp);
1327 				rxbuf->buf = NULL;
1328 				goto update;
1329 			}
1330 			rxbuf->buf = mp;
1331 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1332 			    BUS_DMASYNC_PREREAD);
1333 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1334 			    htole64(seg[0].ds_addr);
1335 		} else {
1336 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1337 			rxbuf->flags &= ~IXGBE_RX_COPY;
1338 		}
1339 
1340 		refreshed = TRUE;
1341 		/* Next is precalculated */
1342 		i = j;
1343 		rxr->next_to_refresh = i;
1344 		if (++j == rxr->num_desc)
1345 			j = 0;
1346 	}
1347 update:
1348 	if (refreshed) /* Update hardware tail index */
1349 		IXGBE_WRITE_REG(&adapter->hw,
1350 		    rxr->tail, rxr->next_to_refresh);
1351 	return;
1352 }
1353 
1354 /*********************************************************************
1355  *
1356  *  Allocate memory for rx_buffer structures. Since we use one
1357  *  rx_buffer per received packet, the maximum number of rx_buffer's
1358  *  that we'll need is equal to the number of receive descriptors
1359  *  that we've allocated.
1360  *
1361  **********************************************************************/
1362 int
1363 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1364 {
1365 	struct	adapter 	*adapter = rxr->adapter;
1366 	device_t 		dev = adapter->dev;
1367 	struct ixgbe_rx_buf 	*rxbuf;
1368 	int             	i, bsize, error;
1369 
1370 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1371 	if (!(rxr->rx_buffers =
1372 	    (struct ixgbe_rx_buf *) malloc(bsize,
1373 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1374 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1375 		error = ENOMEM;
1376 		goto fail;
1377 	}
1378 
1379 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1380 				   1, 0,	/* alignment, bounds */
1381 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1382 				   BUS_SPACE_MAXADDR,	/* highaddr */
1383 				   NULL, NULL,		/* filter, filterarg */
1384 				   MJUM16BYTES,		/* maxsize */
1385 				   1,			/* nsegments */
1386 				   MJUM16BYTES,		/* maxsegsize */
1387 				   0,			/* flags */
1388 				   NULL,		/* lockfunc */
1389 				   NULL,		/* lockfuncarg */
1390 				   &rxr->ptag))) {
1391 		device_printf(dev, "Unable to create RX DMA tag\n");
1392 		goto fail;
1393 	}
1394 
1395 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1396 		rxbuf = &rxr->rx_buffers[i];
1397 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1398 		if (error) {
1399 			device_printf(dev, "Unable to create RX dma map\n");
1400 			goto fail;
1401 		}
1402 	}
1403 
1404 	return (0);
1405 
1406 fail:
1407 	/* Frees all, but can handle partial completion */
1408 	ixgbe_free_receive_structures(adapter);
1409 	return (error);
1410 }
1411 
1412 
1413 static void
1414 ixgbe_free_receive_ring(struct rx_ring *rxr)
1415 {
1416 	struct ixgbe_rx_buf       *rxbuf;
1417 	int i;
1418 
1419 	for (i = 0; i < rxr->num_desc; i++) {
1420 		rxbuf = &rxr->rx_buffers[i];
1421 		if (rxbuf->buf != NULL) {
1422 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1423 			    BUS_DMASYNC_POSTREAD);
1424 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1425 			rxbuf->buf->m_flags |= M_PKTHDR;
1426 			m_freem(rxbuf->buf);
1427 			rxbuf->buf = NULL;
1428 			rxbuf->flags = 0;
1429 		}
1430 	}
1431 }
1432 
1433 
1434 /*********************************************************************
1435  *
1436  *  Initialize a receive ring and its buffers.
1437  *
1438  **********************************************************************/
1439 static int
1440 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1441 {
1442 	struct	adapter 	*adapter;
1443 	struct ifnet		*ifp;
1444 	device_t		dev;
1445 	struct ixgbe_rx_buf	*rxbuf;
1446 	bus_dma_segment_t	seg[1];
1447 	struct lro_ctrl		*lro = &rxr->lro;
1448 	int			rsize, nsegs, error = 0;
1449 #ifdef DEV_NETMAP
1450 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1451 	struct netmap_slot *slot;
1452 #endif /* DEV_NETMAP */
1453 
1454 	adapter = rxr->adapter;
1455 	ifp = adapter->ifp;
1456 	dev = adapter->dev;
1457 
1458 	/* Clear the ring contents */
1459 	IXGBE_RX_LOCK(rxr);
1460 #ifdef DEV_NETMAP
1461 	/* same as in ixgbe_setup_transmit_ring() */
1462 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1463 #endif /* DEV_NETMAP */
1464 	rsize = roundup2(adapter->num_rx_desc *
1465 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1466 	bzero((void *)rxr->rx_base, rsize);
1467 	/* Cache the size */
1468 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1469 
1470 	/* Free current RX buffer structs and their mbufs */
1471 	ixgbe_free_receive_ring(rxr);
1472 
1473 	/* Now replenish the mbufs */
1474 	for (int j = 0; j != rxr->num_desc; ++j) {
1475 		struct mbuf	*mp;
1476 
1477 		rxbuf = &rxr->rx_buffers[j];
1478 #ifdef DEV_NETMAP
1479 		/*
1480 		 * In netmap mode, fill the map and set the buffer
1481 		 * address in the NIC ring, considering the offset
1482 		 * between the netmap and NIC rings (see comment in
1483 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1484 		 * an mbuf, so end the block with a continue;
1485 		 */
1486 		if (slot) {
1487 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1488 			uint64_t paddr;
1489 			void *addr;
1490 
1491 			addr = PNMB(na, slot + sj, &paddr);
1492 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1493 			/* Update descriptor and the cached value */
1494 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1495 			rxbuf->addr = htole64(paddr);
1496 			continue;
1497 		}
1498 #endif /* DEV_NETMAP */
1499 		rxbuf->flags = 0;
1500 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1501 		    M_PKTHDR, adapter->rx_mbuf_sz);
1502 		if (rxbuf->buf == NULL) {
1503 			error = ENOBUFS;
1504                         goto fail;
1505 		}
1506 		mp = rxbuf->buf;
1507 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1508 		/* Get the memory mapping */
1509 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1510 		    rxbuf->pmap, mp, seg,
1511 		    &nsegs, BUS_DMA_NOWAIT);
1512 		if (error != 0)
1513                         goto fail;
1514 		bus_dmamap_sync(rxr->ptag,
1515 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1516 		/* Update the descriptor and the cached value */
1517 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1518 		rxbuf->addr = htole64(seg[0].ds_addr);
1519 	}
1520 
1521 
1522 	/* Setup our descriptor indices */
1523 	rxr->next_to_check = 0;
1524 	rxr->next_to_refresh = 0;
1525 	rxr->lro_enabled = FALSE;
1526 	rxr->rx_copies = 0;
1527 	rxr->rx_bytes = 0;
1528 	rxr->vtag_strip = FALSE;
1529 
1530 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1531 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1532 
1533 	/*
1534 	** Now set up the LRO interface:
1535 	*/
1536 	if (ixgbe_rsc_enable)
1537 		ixgbe_setup_hw_rsc(rxr);
1538 	else if (ifp->if_capenable & IFCAP_LRO) {
1539 		int err = tcp_lro_init(lro);
1540 		if (err) {
1541 			device_printf(dev, "LRO Initialization failed!\n");
1542 			goto fail;
1543 		}
1544 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1545 		rxr->lro_enabled = TRUE;
1546 		lro->ifp = adapter->ifp;
1547 	}
1548 
1549 	IXGBE_RX_UNLOCK(rxr);
1550 	return (0);
1551 
1552 fail:
1553 	ixgbe_free_receive_ring(rxr);
1554 	IXGBE_RX_UNLOCK(rxr);
1555 	return (error);
1556 }
1557 
1558 /*********************************************************************
1559  *
1560  *  Initialize all receive rings.
1561  *
1562  **********************************************************************/
1563 int
1564 ixgbe_setup_receive_structures(struct adapter *adapter)
1565 {
1566 	struct rx_ring *rxr = adapter->rx_rings;
1567 	int j;
1568 
1569 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1570 		if (ixgbe_setup_receive_ring(rxr))
1571 			goto fail;
1572 
1573 	return (0);
1574 fail:
1575 	/*
1576 	 * Free RX buffers allocated so far, we will only handle
1577 	 * the rings that completed, the failing case will have
1578 	 * cleaned up for itself. 'j' failed, so its the terminus.
1579 	 */
1580 	for (int i = 0; i < j; ++i) {
1581 		rxr = &adapter->rx_rings[i];
1582 		ixgbe_free_receive_ring(rxr);
1583 	}
1584 
1585 	return (ENOBUFS);
1586 }
1587 
1588 
1589 /*********************************************************************
1590  *
1591  *  Free all receive rings.
1592  *
1593  **********************************************************************/
1594 void
1595 ixgbe_free_receive_structures(struct adapter *adapter)
1596 {
1597 	struct rx_ring *rxr = adapter->rx_rings;
1598 
1599 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1600 
1601 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1602 		struct lro_ctrl		*lro = &rxr->lro;
1603 		ixgbe_free_receive_buffers(rxr);
1604 		/* Free LRO memory */
1605 		tcp_lro_free(lro);
1606 		/* Free the ring memory as well */
1607 		ixgbe_dma_free(adapter, &rxr->rxdma);
1608 	}
1609 
1610 	free(adapter->rx_rings, M_DEVBUF);
1611 }
1612 
1613 
1614 /*********************************************************************
1615  *
1616  *  Free receive ring data structures
1617  *
1618  **********************************************************************/
1619 void
1620 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1621 {
1622 	struct adapter		*adapter = rxr->adapter;
1623 	struct ixgbe_rx_buf	*rxbuf;
1624 
1625 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1626 
1627 	/* Cleanup any existing buffers */
1628 	if (rxr->rx_buffers != NULL) {
1629 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1630 			rxbuf = &rxr->rx_buffers[i];
1631 			if (rxbuf->buf != NULL) {
1632 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1633 				    BUS_DMASYNC_POSTREAD);
1634 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1635 				rxbuf->buf->m_flags |= M_PKTHDR;
1636 				m_freem(rxbuf->buf);
1637 			}
1638 			rxbuf->buf = NULL;
1639 			if (rxbuf->pmap != NULL) {
1640 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1641 				rxbuf->pmap = NULL;
1642 			}
1643 		}
1644 		if (rxr->rx_buffers != NULL) {
1645 			free(rxr->rx_buffers, M_DEVBUF);
1646 			rxr->rx_buffers = NULL;
1647 		}
1648 	}
1649 
1650 	if (rxr->ptag != NULL) {
1651 		bus_dma_tag_destroy(rxr->ptag);
1652 		rxr->ptag = NULL;
1653 	}
1654 
1655 	return;
1656 }
1657 
1658 static __inline void
1659 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1660 {
1661 
1662         /*
1663          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1664          * should be computed by hardware. Also it should not have VLAN tag in
1665          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1666          */
1667         if (rxr->lro_enabled &&
1668             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1669             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1670             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1671             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1672             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1673             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1674             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1675             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1676                 /*
1677                  * Send to the stack if:
1678                  **  - LRO not enabled, or
1679                  **  - no LRO resources, or
1680                  **  - lro enqueue fails
1681                  */
1682                 if (rxr->lro.lro_cnt != 0)
1683                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1684                                 return;
1685         }
1686 	IXGBE_RX_UNLOCK(rxr);
1687         (*ifp->if_input)(ifp, m);
1688 	IXGBE_RX_LOCK(rxr);
1689 }
1690 
1691 static __inline void
1692 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1693 {
1694 	struct ixgbe_rx_buf	*rbuf;
1695 
1696 	rbuf = &rxr->rx_buffers[i];
1697 
1698 
1699 	/*
1700 	** With advanced descriptors the writeback
1701 	** clobbers the buffer addrs, so its easier
1702 	** to just free the existing mbufs and take
1703 	** the normal refresh path to get new buffers
1704 	** and mapping.
1705 	*/
1706 
1707 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1708 		rbuf->fmp->m_flags |= M_PKTHDR;
1709 		m_freem(rbuf->fmp);
1710 		rbuf->fmp = NULL;
1711 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1712 	} else if (rbuf->buf) {
1713 		m_free(rbuf->buf);
1714 		rbuf->buf = NULL;
1715 	}
1716 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1717 
1718 	rbuf->flags = 0;
1719 
1720 	return;
1721 }
1722 
1723 
1724 /*********************************************************************
1725  *
1726  *  This routine executes in interrupt context. It replenishes
1727  *  the mbufs in the descriptor and sends data which has been
1728  *  dma'ed into host memory to upper layer.
1729  *
1730  *  We loop at most count times if count is > 0, or until done if
1731  *  count < 0.
1732  *
1733  *  Return TRUE for more work, FALSE for all clean.
1734  *********************************************************************/
1735 bool
1736 ixgbe_rxeof(struct ix_queue *que)
1737 {
1738 	struct adapter		*adapter = que->adapter;
1739 	struct rx_ring		*rxr = que->rxr;
1740 	struct ifnet		*ifp = adapter->ifp;
1741 	struct lro_ctrl		*lro = &rxr->lro;
1742 	struct lro_entry	*queued;
1743 	int			i, nextp, processed = 0;
1744 	u32			staterr = 0;
1745 	u16			count = rxr->process_limit;
1746 	union ixgbe_adv_rx_desc	*cur;
1747 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1748 	u16			pkt_info;
1749 
1750 	IXGBE_RX_LOCK(rxr);
1751 
1752 #ifdef DEV_NETMAP
1753 	/* Same as the txeof routine: wakeup clients on intr. */
1754 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1755 		IXGBE_RX_UNLOCK(rxr);
1756 		return (FALSE);
1757 	}
1758 #endif /* DEV_NETMAP */
1759 
1760 	for (i = rxr->next_to_check; count != 0;) {
1761 		struct mbuf	*sendmp, *mp;
1762 		u32		rsc, ptype;
1763 		u16		len;
1764 		u16		vtag = 0;
1765 		bool		eop;
1766 
1767 		/* Sync the ring. */
1768 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1769 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1770 
1771 		cur = &rxr->rx_base[i];
1772 		staterr = le32toh(cur->wb.upper.status_error);
1773 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1774 
1775 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1776 			break;
1777 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1778 			break;
1779 
1780 		count--;
1781 		sendmp = NULL;
1782 		nbuf = NULL;
1783 		rsc = 0;
1784 		cur->wb.upper.status_error = 0;
1785 		rbuf = &rxr->rx_buffers[i];
1786 		mp = rbuf->buf;
1787 
1788 		len = le16toh(cur->wb.upper.length);
1789 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1790 		    IXGBE_RXDADV_PKTTYPE_MASK;
1791 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1792 
1793 		/* Make sure bad packets are discarded */
1794 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1795 #if 0 // VF-only
1796 #if __FreeBSD_version >= 1100036
1797 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1798 #endif
1799 #endif
1800 			rxr->rx_discarded++;
1801 			ixgbe_rx_discard(rxr, i);
1802 			goto next_desc;
1803 		}
1804 
1805 		/*
1806 		** On 82599 which supports a hardware
1807 		** LRO (called HW RSC), packets need
1808 		** not be fragmented across sequential
1809 		** descriptors, rather the next descriptor
1810 		** is indicated in bits of the descriptor.
1811 		** This also means that we might proceses
1812 		** more than one packet at a time, something
1813 		** that has never been true before, it
1814 		** required eliminating global chain pointers
1815 		** in favor of what we are doing here.  -jfv
1816 		*/
1817 		if (!eop) {
1818 			/*
1819 			** Figure out the next descriptor
1820 			** of this frame.
1821 			*/
1822 			if (rxr->hw_rsc == TRUE) {
1823 				rsc = ixgbe_rsc_count(cur);
1824 				rxr->rsc_num += (rsc - 1);
1825 			}
1826 			if (rsc) { /* Get hardware index */
1827 				nextp = ((staterr &
1828 				    IXGBE_RXDADV_NEXTP_MASK) >>
1829 				    IXGBE_RXDADV_NEXTP_SHIFT);
1830 			} else { /* Just sequential */
1831 				nextp = i + 1;
1832 				if (nextp == adapter->num_rx_desc)
1833 					nextp = 0;
1834 			}
1835 			nbuf = &rxr->rx_buffers[nextp];
1836 			prefetch(nbuf);
1837 		}
1838 		/*
1839 		** Rather than using the fmp/lmp global pointers
1840 		** we now keep the head of a packet chain in the
1841 		** buffer struct and pass this along from one
1842 		** descriptor to the next, until we get EOP.
1843 		*/
1844 		mp->m_len = len;
1845 		/*
1846 		** See if there is a stored head
1847 		** that determines what we are
1848 		*/
1849 		sendmp = rbuf->fmp;
1850 		if (sendmp != NULL) {  /* secondary frag */
1851 			rbuf->buf = rbuf->fmp = NULL;
1852 			mp->m_flags &= ~M_PKTHDR;
1853 			sendmp->m_pkthdr.len += mp->m_len;
1854 		} else {
1855 			/*
1856 			 * Optimize.  This might be a small packet,
1857 			 * maybe just a TCP ACK.  Do a fast copy that
1858 			 * is cache aligned into a new mbuf, and
1859 			 * leave the old mbuf+cluster for re-use.
1860 			 */
1861 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1862 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1863 				if (sendmp != NULL) {
1864 					sendmp->m_data +=
1865 					    IXGBE_RX_COPY_ALIGN;
1866 					ixgbe_bcopy(mp->m_data,
1867 					    sendmp->m_data, len);
1868 					sendmp->m_len = len;
1869 					rxr->rx_copies++;
1870 					rbuf->flags |= IXGBE_RX_COPY;
1871 				}
1872 			}
1873 			if (sendmp == NULL) {
1874 				rbuf->buf = rbuf->fmp = NULL;
1875 				sendmp = mp;
1876 			}
1877 
1878 			/* first desc of a non-ps chain */
1879 			sendmp->m_flags |= M_PKTHDR;
1880 			sendmp->m_pkthdr.len = mp->m_len;
1881 		}
1882 		++processed;
1883 
1884 		/* Pass the head pointer on */
1885 		if (eop == 0) {
1886 			nbuf->fmp = sendmp;
1887 			sendmp = NULL;
1888 			mp->m_next = nbuf->buf;
1889 		} else { /* Sending this frame */
1890 			sendmp->m_pkthdr.rcvif = ifp;
1891 			rxr->rx_packets++;
1892 			/* capture data for AIM */
1893 			rxr->bytes += sendmp->m_pkthdr.len;
1894 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1895 			/* Process vlan info */
1896 			if ((rxr->vtag_strip) &&
1897 			    (staterr & IXGBE_RXD_STAT_VP))
1898 				vtag = le16toh(cur->wb.upper.vlan);
1899 			if (vtag) {
1900 				sendmp->m_pkthdr.ether_vtag = vtag;
1901 				sendmp->m_flags |= M_VLANTAG;
1902 			}
1903 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1904 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1905 #if __FreeBSD_version >= 800000
1906 #ifdef RSS
1907 			sendmp->m_pkthdr.flowid =
1908 			    le32toh(cur->wb.lower.hi_dword.rss);
1909 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1910 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1911 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1912 				break;
1913 			case IXGBE_RXDADV_RSSTYPE_IPV4:
1914 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1915 				break;
1916 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1917 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1918 				break;
1919 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1920 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1921 				break;
1922 			case IXGBE_RXDADV_RSSTYPE_IPV6:
1923 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1924 				break;
1925 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1926 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1927 				break;
1928 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1929 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1930 				break;
1931 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1932 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1933 				break;
1934 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1935 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1936 				break;
1937 			default:
1938 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1939 			}
1940 #else /* RSS */
1941 			sendmp->m_pkthdr.flowid = que->msix;
1942 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1943 #endif /* RSS */
1944 #endif /* FreeBSD_version */
1945 		}
1946 next_desc:
1947 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1948 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1949 
1950 		/* Advance our pointers to the next descriptor. */
1951 		if (++i == rxr->num_desc)
1952 			i = 0;
1953 
1954 		/* Now send to the stack or do LRO */
1955 		if (sendmp != NULL) {
1956 			rxr->next_to_check = i;
1957 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1958 			i = rxr->next_to_check;
1959 		}
1960 
1961                /* Every 8 descriptors we go to refresh mbufs */
1962 		if (processed == 8) {
1963 			ixgbe_refresh_mbufs(rxr, i);
1964 			processed = 0;
1965 		}
1966 	}
1967 
1968 	/* Refresh any remaining buf structs */
1969 	if (ixgbe_rx_unrefreshed(rxr))
1970 		ixgbe_refresh_mbufs(rxr, i);
1971 
1972 	rxr->next_to_check = i;
1973 
1974 	/*
1975 	 * Flush any outstanding LRO work
1976 	 */
1977 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1978 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1979 		tcp_lro_flush(lro, queued);
1980 	}
1981 
1982 	IXGBE_RX_UNLOCK(rxr);
1983 
1984 	/*
1985 	** Still have cleaning to do?
1986 	*/
1987 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1988 		return (TRUE);
1989 	else
1990 		return (FALSE);
1991 }
1992 
1993 
1994 /*********************************************************************
1995  *
1996  *  Verify that the hardware indicated that the checksum is valid.
1997  *  Inform the stack about the status of checksum so that stack
1998  *  doesn't spend time verifying the checksum.
1999  *
2000  *********************************************************************/
2001 static void
2002 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2003 {
2004 	u16	status = (u16) staterr;
2005 	u8	errors = (u8) (staterr >> 24);
2006 	bool	sctp = FALSE;
2007 
2008 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2009 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2010 		sctp = TRUE;
2011 
2012 	if (status & IXGBE_RXD_STAT_IPCS) {
2013 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2014 			/* IP Checksum Good */
2015 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2016 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2017 
2018 		} else
2019 			mp->m_pkthdr.csum_flags = 0;
2020 	}
2021 	if (status & IXGBE_RXD_STAT_L4CS) {
2022 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2023 #if __FreeBSD_version >= 800000
2024 		if (sctp)
2025 			type = CSUM_SCTP_VALID;
2026 #endif
2027 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2028 			mp->m_pkthdr.csum_flags |= type;
2029 			if (!sctp)
2030 				mp->m_pkthdr.csum_data = htons(0xffff);
2031 		}
2032 	}
2033 	return;
2034 }
2035 
2036 /********************************************************************
2037  * Manage DMA'able memory.
2038  *******************************************************************/
2039 static void
2040 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2041 {
2042 	if (error)
2043 		return;
2044 	*(bus_addr_t *) arg = segs->ds_addr;
2045 	return;
2046 }
2047 
2048 int
2049 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2050 		struct ixgbe_dma_alloc *dma, int mapflags)
2051 {
2052 	device_t dev = adapter->dev;
2053 	int             r;
2054 
2055 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2056 			       DBA_ALIGN, 0,	/* alignment, bounds */
2057 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2058 			       BUS_SPACE_MAXADDR,	/* highaddr */
2059 			       NULL, NULL,	/* filter, filterarg */
2060 			       size,	/* maxsize */
2061 			       1,	/* nsegments */
2062 			       size,	/* maxsegsize */
2063 			       BUS_DMA_ALLOCNOW,	/* flags */
2064 			       NULL,	/* lockfunc */
2065 			       NULL,	/* lockfuncarg */
2066 			       &dma->dma_tag);
2067 	if (r != 0) {
2068 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2069 		       "error %u\n", r);
2070 		goto fail_0;
2071 	}
2072 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2073 			     BUS_DMA_NOWAIT, &dma->dma_map);
2074 	if (r != 0) {
2075 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2076 		       "error %u\n", r);
2077 		goto fail_1;
2078 	}
2079 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2080 			    size,
2081 			    ixgbe_dmamap_cb,
2082 			    &dma->dma_paddr,
2083 			    mapflags | BUS_DMA_NOWAIT);
2084 	if (r != 0) {
2085 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2086 		       "error %u\n", r);
2087 		goto fail_2;
2088 	}
2089 	dma->dma_size = size;
2090 	return (0);
2091 fail_2:
2092 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2093 fail_1:
2094 	bus_dma_tag_destroy(dma->dma_tag);
2095 fail_0:
2096 	dma->dma_tag = NULL;
2097 	return (r);
2098 }
2099 
2100 void
2101 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2102 {
2103 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2104 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2105 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2106 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2107 	bus_dma_tag_destroy(dma->dma_tag);
2108 }
2109 
2110 
2111 /*********************************************************************
2112  *
2113  *  Allocate memory for the transmit and receive rings, and then
2114  *  the descriptors associated with each, called only once at attach.
2115  *
2116  **********************************************************************/
2117 int
2118 ixgbe_allocate_queues(struct adapter *adapter)
2119 {
2120 	device_t	dev = adapter->dev;
2121 	struct ix_queue	*que;
2122 	struct tx_ring	*txr;
2123 	struct rx_ring	*rxr;
2124 	int rsize, tsize, error = IXGBE_SUCCESS;
2125 	int txconf = 0, rxconf = 0;
2126 
2127         /* First allocate the top level queue structs */
2128         if (!(adapter->queues =
2129             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2130             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2131                 device_printf(dev, "Unable to allocate queue memory\n");
2132                 error = ENOMEM;
2133                 goto fail;
2134         }
2135 
2136 	/* First allocate the TX ring struct memory */
2137 	if (!(adapter->tx_rings =
2138 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2139 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2140 		device_printf(dev, "Unable to allocate TX ring memory\n");
2141 		error = ENOMEM;
2142 		goto tx_fail;
2143 	}
2144 
2145 	/* Next allocate the RX */
2146 	if (!(adapter->rx_rings =
2147 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2148 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2149 		device_printf(dev, "Unable to allocate RX ring memory\n");
2150 		error = ENOMEM;
2151 		goto rx_fail;
2152 	}
2153 
2154 	/* For the ring itself */
2155 	tsize = roundup2(adapter->num_tx_desc *
2156 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2157 
2158 	/*
2159 	 * Now set up the TX queues, txconf is needed to handle the
2160 	 * possibility that things fail midcourse and we need to
2161 	 * undo memory gracefully
2162 	 */
2163 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2164 		/* Set up some basics */
2165 		txr = &adapter->tx_rings[i];
2166 		txr->adapter = adapter;
2167 		txr->me = i;
2168 		txr->num_desc = adapter->num_tx_desc;
2169 
2170 		/* Initialize the TX side lock */
2171 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2172 		    device_get_nameunit(dev), txr->me);
2173 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2174 
2175 		if (ixgbe_dma_malloc(adapter, tsize,
2176 			&txr->txdma, BUS_DMA_NOWAIT)) {
2177 			device_printf(dev,
2178 			    "Unable to allocate TX Descriptor memory\n");
2179 			error = ENOMEM;
2180 			goto err_tx_desc;
2181 		}
2182 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2183 		bzero((void *)txr->tx_base, tsize);
2184 
2185         	/* Now allocate transmit buffers for the ring */
2186         	if (ixgbe_allocate_transmit_buffers(txr)) {
2187 			device_printf(dev,
2188 			    "Critical Failure setting up transmit buffers\n");
2189 			error = ENOMEM;
2190 			goto err_tx_desc;
2191         	}
2192 #ifndef IXGBE_LEGACY_TX
2193 		/* Allocate a buf ring */
2194 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2195 		    M_WAITOK, &txr->tx_mtx);
2196 		if (txr->br == NULL) {
2197 			device_printf(dev,
2198 			    "Critical Failure setting up buf ring\n");
2199 			error = ENOMEM;
2200 			goto err_tx_desc;
2201         	}
2202 #endif
2203 	}
2204 
2205 	/*
2206 	 * Next the RX queues...
2207 	 */
2208 	rsize = roundup2(adapter->num_rx_desc *
2209 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2210 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2211 		rxr = &adapter->rx_rings[i];
2212 		/* Set up some basics */
2213 		rxr->adapter = adapter;
2214 		rxr->me = i;
2215 		rxr->num_desc = adapter->num_rx_desc;
2216 
2217 		/* Initialize the RX side lock */
2218 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2219 		    device_get_nameunit(dev), rxr->me);
2220 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2221 
2222 		if (ixgbe_dma_malloc(adapter, rsize,
2223 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2224 			device_printf(dev,
2225 			    "Unable to allocate RxDescriptor memory\n");
2226 			error = ENOMEM;
2227 			goto err_rx_desc;
2228 		}
2229 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2230 		bzero((void *)rxr->rx_base, rsize);
2231 
2232         	/* Allocate receive buffers for the ring*/
2233 		if (ixgbe_allocate_receive_buffers(rxr)) {
2234 			device_printf(dev,
2235 			    "Critical Failure setting up receive buffers\n");
2236 			error = ENOMEM;
2237 			goto err_rx_desc;
2238 		}
2239 	}
2240 
2241 	/*
2242 	** Finally set up the queue holding structs
2243 	*/
2244 	for (int i = 0; i < adapter->num_queues; i++) {
2245 		que = &adapter->queues[i];
2246 		que->adapter = adapter;
2247 		que->me = i;
2248 		que->txr = &adapter->tx_rings[i];
2249 		que->rxr = &adapter->rx_rings[i];
2250 	}
2251 
2252 	return (0);
2253 
2254 err_rx_desc:
2255 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2256 		ixgbe_dma_free(adapter, &rxr->rxdma);
2257 err_tx_desc:
2258 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2259 		ixgbe_dma_free(adapter, &txr->txdma);
2260 	free(adapter->rx_rings, M_DEVBUF);
2261 rx_fail:
2262 	free(adapter->tx_rings, M_DEVBUF);
2263 tx_fail:
2264 	free(adapter->queues, M_DEVBUF);
2265 fail:
2266 	return (error);
2267 }
2268