xref: /freebsd/sys/dev/ixgbe/ix_txrx.c (revision 2b15cb3d0922bd70ea592f0da9b4a5b167f4d53f)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41 
42 #include "ixgbe.h"
43 
44 #ifdef	RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48 
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53 
54 extern int ix_crcstrip;
55 #endif
56 
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70 
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83 
84 /* Shared PCI config read/write */
85 inline u16
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
87 {
88 	u16 value;
89 
90 	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91 	    reg, 2);
92 
93 	return (value);
94 }
95 
96 inline void
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
98 {
99 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
100 	    reg, value, 2);
101 
102 	return;
103 }
104 
105 /*********************************************************************
106  *  Local Function prototypes
107  *********************************************************************/
108 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int	ixgbe_setup_receive_ring(struct rx_ring *);
111 static void     ixgbe_free_receive_buffers(struct rx_ring *);
112 
113 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
117 		    struct mbuf *, u32 *, u32 *);
118 static int	ixgbe_tso_setup(struct tx_ring *,
119 		    struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 		    struct mbuf *, u32);
126 
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129  *  Transmit entry point
130  *
131  *  ixgbe_start is called by the stack to initiate a transmit.
132  *  The driver will remain in this routine as long as there are
133  *  packets to transmit and transmit resources are available.
134  *  In case resources are not available stack is notified and
135  *  the packet is requeued.
136  **********************************************************************/
137 
138 void
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
140 {
141 	struct mbuf    *m_head;
142 	struct adapter *adapter = txr->adapter;
143 
144 	IXGBE_TX_LOCK_ASSERT(txr);
145 
146 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
147 		return;
148 	if (!adapter->link_active)
149 		return;
150 
151 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153 			break;
154 
155 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
156 		if (m_head == NULL)
157 			break;
158 
159 		if (ixgbe_xmit(txr, &m_head)) {
160 			if (m_head != NULL)
161 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
162 			break;
163 		}
164 		/* Send a copy of the frame to the BPF listener */
165 		ETHER_BPF_MTAP(ifp, m_head);
166 	}
167 	return;
168 }
169 
170 /*
171  * Legacy TX start - called by the stack, this
172  * always uses the first tx ring, and should
173  * not be used with multiqueue tx enabled.
174  */
175 void
176 ixgbe_start(struct ifnet *ifp)
177 {
178 	struct adapter *adapter = ifp->if_softc;
179 	struct tx_ring	*txr = adapter->tx_rings;
180 
181 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
182 		IXGBE_TX_LOCK(txr);
183 		ixgbe_start_locked(txr, ifp);
184 		IXGBE_TX_UNLOCK(txr);
185 	}
186 	return;
187 }
188 
189 #else /* ! IXGBE_LEGACY_TX */
190 
191 /*
192 ** Multiqueue Transmit driver
193 **
194 */
195 int
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
197 {
198 	struct adapter	*adapter = ifp->if_softc;
199 	struct ix_queue	*que;
200 	struct tx_ring	*txr;
201 	int 		i, err = 0;
202 #ifdef	RSS
203 	uint32_t bucket_id;
204 #endif
205 
206 	/*
207 	 * When doing RSS, map it to the same outbound queue
208 	 * as the incoming flow would be mapped to.
209 	 *
210 	 * If everything is setup correctly, it should be the
211 	 * same bucket that the current CPU we're on is.
212 	 */
213 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
214 #ifdef	RSS
215 		if (rss_hash2bucket(m->m_pkthdr.flowid,
216 		    M_HASHTYPE_GET(m), &bucket_id) == 0)
217 			/* TODO: spit out something if bucket_id > num_queues? */
218 			i = bucket_id % adapter->num_queues;
219 		else
220 #endif
221 			i = m->m_pkthdr.flowid % adapter->num_queues;
222 	} else
223 		i = curcpu % adapter->num_queues;
224 
225 	/* Check for a hung queue and pick alternative */
226 	if (((1 << i) & adapter->active_queues) == 0)
227 		i = ffsl(adapter->active_queues);
228 
229 	txr = &adapter->tx_rings[i];
230 	que = &adapter->queues[i];
231 
232 	err = drbr_enqueue(ifp, txr->br, m);
233 	if (err)
234 		return (err);
235 	if (IXGBE_TX_TRYLOCK(txr)) {
236 		ixgbe_mq_start_locked(ifp, txr);
237 		IXGBE_TX_UNLOCK(txr);
238 	} else
239 		taskqueue_enqueue(que->tq, &txr->txq_task);
240 
241 	return (0);
242 }
243 
244 int
245 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
246 {
247 	struct adapter  *adapter = txr->adapter;
248         struct mbuf     *next;
249         int             enqueued = 0, err = 0;
250 
251 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
252 	    adapter->link_active == 0)
253 		return (ENETDOWN);
254 
255 	/* Process the queue */
256 #if __FreeBSD_version < 901504
257 	next = drbr_dequeue(ifp, txr->br);
258 	while (next != NULL) {
259 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
260 			if (next != NULL)
261 				err = drbr_enqueue(ifp, txr->br, next);
262 #else
263 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
264 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
265 			if (next == NULL) {
266 				drbr_advance(ifp, txr->br);
267 			} else {
268 				drbr_putback(ifp, txr->br, next);
269 			}
270 #endif
271 			break;
272 		}
273 #if __FreeBSD_version >= 901504
274 		drbr_advance(ifp, txr->br);
275 #endif
276 		enqueued++;
277 #if 0 // this is VF-only
278 #if __FreeBSD_version >= 1100036
279 		if (next->m_flags & M_MCAST)
280 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
281 #endif
282 #endif
283 		/* Send a copy of the frame to the BPF listener */
284 		ETHER_BPF_MTAP(ifp, next);
285 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
286 			break;
287 #if __FreeBSD_version < 901504
288 		next = drbr_dequeue(ifp, txr->br);
289 #endif
290 	}
291 
292 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
293 		ixgbe_txeof(txr);
294 
295 	return (err);
296 }
297 
298 /*
299  * Called from a taskqueue to drain queued transmit packets.
300  */
301 void
302 ixgbe_deferred_mq_start(void *arg, int pending)
303 {
304 	struct tx_ring *txr = arg;
305 	struct adapter *adapter = txr->adapter;
306 	struct ifnet *ifp = adapter->ifp;
307 
308 	IXGBE_TX_LOCK(txr);
309 	if (!drbr_empty(ifp, txr->br))
310 		ixgbe_mq_start_locked(ifp, txr);
311 	IXGBE_TX_UNLOCK(txr);
312 }
313 
314 /*
315 ** Flush all ring buffers
316 */
317 void
318 ixgbe_qflush(struct ifnet *ifp)
319 {
320 	struct adapter	*adapter = ifp->if_softc;
321 	struct tx_ring	*txr = adapter->tx_rings;
322 	struct mbuf	*m;
323 
324 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
325 		IXGBE_TX_LOCK(txr);
326 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
327 			m_freem(m);
328 		IXGBE_TX_UNLOCK(txr);
329 	}
330 	if_qflush(ifp);
331 }
332 #endif /* IXGBE_LEGACY_TX */
333 
334 
335 /*********************************************************************
336  *
337  *  This routine maps the mbufs to tx descriptors, allowing the
338  *  TX engine to transmit the packets.
339  *  	- return 0 on success, positive on failure
340  *
341  **********************************************************************/
342 
343 static int
344 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
345 {
346 	struct adapter  *adapter = txr->adapter;
347 	u32		olinfo_status = 0, cmd_type_len;
348 	int             i, j, error, nsegs;
349 	int		first;
350 	bool		remap = TRUE;
351 	struct mbuf	*m_head;
352 	bus_dma_segment_t segs[adapter->num_segs];
353 	bus_dmamap_t	map;
354 	struct ixgbe_tx_buf *txbuf;
355 	union ixgbe_adv_tx_desc *txd = NULL;
356 
357 	m_head = *m_headp;
358 
359 	/* Basic descriptor defines */
360         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
361 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
362 
363 	if (m_head->m_flags & M_VLANTAG)
364         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
365 
366         /*
367          * Important to capture the first descriptor
368          * used because it will contain the index of
369          * the one we tell the hardware to report back
370          */
371         first = txr->next_avail_desc;
372 	txbuf = &txr->tx_buffers[first];
373 	map = txbuf->map;
374 
375 	/*
376 	 * Map the packet for DMA.
377 	 */
378 retry:
379 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
380 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
381 
382 	if (__predict_false(error)) {
383 		struct mbuf *m;
384 
385 		switch (error) {
386 		case EFBIG:
387 			/* Try it again? - one try */
388 			if (remap == TRUE) {
389 				remap = FALSE;
390 				m = m_defrag(*m_headp, M_NOWAIT);
391 				if (m == NULL) {
392 					adapter->mbuf_defrag_failed++;
393 					m_freem(*m_headp);
394 					*m_headp = NULL;
395 					return (ENOBUFS);
396 				}
397 				*m_headp = m;
398 				goto retry;
399 			} else
400 				return (error);
401 		case ENOMEM:
402 			txr->no_tx_dma_setup++;
403 			return (error);
404 		default:
405 			txr->no_tx_dma_setup++;
406 			m_freem(*m_headp);
407 			*m_headp = NULL;
408 			return (error);
409 		}
410 	}
411 
412 	/* Make certain there are enough descriptors */
413 	if (nsegs > txr->tx_avail - 2) {
414 		txr->no_desc_avail++;
415 		bus_dmamap_unload(txr->txtag, map);
416 		return (ENOBUFS);
417 	}
418 	m_head = *m_headp;
419 
420 	/*
421 	** Set up the appropriate offload context
422 	** this will consume the first descriptor
423 	*/
424 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
425 	if (__predict_false(error)) {
426 		if (error == ENOBUFS)
427 			*m_headp = NULL;
428 		return (error);
429 	}
430 
431 #ifdef IXGBE_FDIR
432 	/* Do the flow director magic */
433 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
434 		++txr->atr_count;
435 		if (txr->atr_count >= atr_sample_rate) {
436 			ixgbe_atr(txr, m_head);
437 			txr->atr_count = 0;
438 		}
439 	}
440 #endif
441 
442 	olinfo_status |= IXGBE_ADVTXD_CC;
443 	i = txr->next_avail_desc;
444 	for (j = 0; j < nsegs; j++) {
445 		bus_size_t seglen;
446 		bus_addr_t segaddr;
447 
448 		txbuf = &txr->tx_buffers[i];
449 		txd = &txr->tx_base[i];
450 		seglen = segs[j].ds_len;
451 		segaddr = htole64(segs[j].ds_addr);
452 
453 		txd->read.buffer_addr = segaddr;
454 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
455 		    cmd_type_len |seglen);
456 		txd->read.olinfo_status = htole32(olinfo_status);
457 
458 		if (++i == txr->num_desc)
459 			i = 0;
460 	}
461 
462 	txd->read.cmd_type_len |=
463 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
464 	txr->tx_avail -= nsegs;
465 	txr->next_avail_desc = i;
466 
467 	txbuf->m_head = m_head;
468 	/*
469 	** Here we swap the map so the last descriptor,
470 	** which gets the completion interrupt has the
471 	** real map, and the first descriptor gets the
472 	** unused map from this descriptor.
473 	*/
474 	txr->tx_buffers[first].map = txbuf->map;
475 	txbuf->map = map;
476 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
477 
478         /* Set the EOP descriptor that will be marked done */
479         txbuf = &txr->tx_buffers[first];
480 	txbuf->eop = txd;
481 
482         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
483             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
484 	/*
485 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
486 	 * hardware that this frame is available to transmit.
487 	 */
488 	++txr->total_packets;
489 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
490 
491 	/* Mark queue as having work */
492 	if (txr->busy == 0)
493 		txr->busy = 1;
494 
495 	return (0);
496 
497 }
498 
499 
500 /*********************************************************************
501  *
502  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
503  *  the information needed to transmit a packet on the wire. This is
504  *  called only once at attach, setup is done every reset.
505  *
506  **********************************************************************/
507 int
508 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
509 {
510 	struct adapter *adapter = txr->adapter;
511 	device_t dev = adapter->dev;
512 	struct ixgbe_tx_buf *txbuf;
513 	int error, i;
514 
515 	/*
516 	 * Setup DMA descriptor areas.
517 	 */
518 	if ((error = bus_dma_tag_create(
519 			       bus_get_dma_tag(adapter->dev),	/* parent */
520 			       1, 0,		/* alignment, bounds */
521 			       BUS_SPACE_MAXADDR,	/* lowaddr */
522 			       BUS_SPACE_MAXADDR,	/* highaddr */
523 			       NULL, NULL,		/* filter, filterarg */
524 			       IXGBE_TSO_SIZE,		/* maxsize */
525 			       adapter->num_segs,	/* nsegments */
526 			       PAGE_SIZE,		/* maxsegsize */
527 			       0,			/* flags */
528 			       NULL,			/* lockfunc */
529 			       NULL,			/* lockfuncarg */
530 			       &txr->txtag))) {
531 		device_printf(dev,"Unable to allocate TX DMA tag\n");
532 		goto fail;
533 	}
534 
535 	if (!(txr->tx_buffers =
536 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
537 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
538 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
539 		error = ENOMEM;
540 		goto fail;
541 	}
542 
543         /* Create the descriptor buffer dma maps */
544 	txbuf = txr->tx_buffers;
545 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
546 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
547 		if (error != 0) {
548 			device_printf(dev, "Unable to create TX DMA map\n");
549 			goto fail;
550 		}
551 	}
552 
553 	return 0;
554 fail:
555 	/* We free all, it handles case where we are in the middle */
556 	ixgbe_free_transmit_structures(adapter);
557 	return (error);
558 }
559 
560 /*********************************************************************
561  *
562  *  Initialize a transmit ring.
563  *
564  **********************************************************************/
565 static void
566 ixgbe_setup_transmit_ring(struct tx_ring *txr)
567 {
568 	struct adapter *adapter = txr->adapter;
569 	struct ixgbe_tx_buf *txbuf;
570 	int i;
571 #ifdef DEV_NETMAP
572 	struct netmap_adapter *na = NA(adapter->ifp);
573 	struct netmap_slot *slot;
574 #endif /* DEV_NETMAP */
575 
576 	/* Clear the old ring contents */
577 	IXGBE_TX_LOCK(txr);
578 #ifdef DEV_NETMAP
579 	/*
580 	 * (under lock): if in netmap mode, do some consistency
581 	 * checks and set slot to entry 0 of the netmap ring.
582 	 */
583 	slot = netmap_reset(na, NR_TX, txr->me, 0);
584 #endif /* DEV_NETMAP */
585 	bzero((void *)txr->tx_base,
586 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
587 	/* Reset indices */
588 	txr->next_avail_desc = 0;
589 	txr->next_to_clean = 0;
590 
591 	/* Free any existing tx buffers. */
592         txbuf = txr->tx_buffers;
593 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
594 		if (txbuf->m_head != NULL) {
595 			bus_dmamap_sync(txr->txtag, txbuf->map,
596 			    BUS_DMASYNC_POSTWRITE);
597 			bus_dmamap_unload(txr->txtag, txbuf->map);
598 			m_freem(txbuf->m_head);
599 			txbuf->m_head = NULL;
600 		}
601 #ifdef DEV_NETMAP
602 		/*
603 		 * In netmap mode, set the map for the packet buffer.
604 		 * NOTE: Some drivers (not this one) also need to set
605 		 * the physical buffer address in the NIC ring.
606 		 * Slots in the netmap ring (indexed by "si") are
607 		 * kring->nkr_hwofs positions "ahead" wrt the
608 		 * corresponding slot in the NIC ring. In some drivers
609 		 * (not here) nkr_hwofs can be negative. Function
610 		 * netmap_idx_n2k() handles wraparounds properly.
611 		 */
612 		if (slot) {
613 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
614 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
615 		}
616 #endif /* DEV_NETMAP */
617 		/* Clear the EOP descriptor pointer */
618 		txbuf->eop = NULL;
619         }
620 
621 #ifdef IXGBE_FDIR
622 	/* Set the rate at which we sample packets */
623 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
624 		txr->atr_sample = atr_sample_rate;
625 #endif
626 
627 	/* Set number of descriptors available */
628 	txr->tx_avail = adapter->num_tx_desc;
629 
630 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
631 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
632 	IXGBE_TX_UNLOCK(txr);
633 }
634 
635 /*********************************************************************
636  *
637  *  Initialize all transmit rings.
638  *
639  **********************************************************************/
640 int
641 ixgbe_setup_transmit_structures(struct adapter *adapter)
642 {
643 	struct tx_ring *txr = adapter->tx_rings;
644 
645 	for (int i = 0; i < adapter->num_queues; i++, txr++)
646 		ixgbe_setup_transmit_ring(txr);
647 
648 	return (0);
649 }
650 
651 /*********************************************************************
652  *
653  *  Free all transmit rings.
654  *
655  **********************************************************************/
656 void
657 ixgbe_free_transmit_structures(struct adapter *adapter)
658 {
659 	struct tx_ring *txr = adapter->tx_rings;
660 
661 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
662 		IXGBE_TX_LOCK(txr);
663 		ixgbe_free_transmit_buffers(txr);
664 		ixgbe_dma_free(adapter, &txr->txdma);
665 		IXGBE_TX_UNLOCK(txr);
666 		IXGBE_TX_LOCK_DESTROY(txr);
667 	}
668 	free(adapter->tx_rings, M_DEVBUF);
669 }
670 
671 /*********************************************************************
672  *
673  *  Free transmit ring related data structures.
674  *
675  **********************************************************************/
676 static void
677 ixgbe_free_transmit_buffers(struct tx_ring *txr)
678 {
679 	struct adapter *adapter = txr->adapter;
680 	struct ixgbe_tx_buf *tx_buffer;
681 	int             i;
682 
683 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
684 
685 	if (txr->tx_buffers == NULL)
686 		return;
687 
688 	tx_buffer = txr->tx_buffers;
689 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
690 		if (tx_buffer->m_head != NULL) {
691 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
692 			    BUS_DMASYNC_POSTWRITE);
693 			bus_dmamap_unload(txr->txtag,
694 			    tx_buffer->map);
695 			m_freem(tx_buffer->m_head);
696 			tx_buffer->m_head = NULL;
697 			if (tx_buffer->map != NULL) {
698 				bus_dmamap_destroy(txr->txtag,
699 				    tx_buffer->map);
700 				tx_buffer->map = NULL;
701 			}
702 		} else if (tx_buffer->map != NULL) {
703 			bus_dmamap_unload(txr->txtag,
704 			    tx_buffer->map);
705 			bus_dmamap_destroy(txr->txtag,
706 			    tx_buffer->map);
707 			tx_buffer->map = NULL;
708 		}
709 	}
710 #ifdef IXGBE_LEGACY_TX
711 	if (txr->br != NULL)
712 		buf_ring_free(txr->br, M_DEVBUF);
713 #endif
714 	if (txr->tx_buffers != NULL) {
715 		free(txr->tx_buffers, M_DEVBUF);
716 		txr->tx_buffers = NULL;
717 	}
718 	if (txr->txtag != NULL) {
719 		bus_dma_tag_destroy(txr->txtag);
720 		txr->txtag = NULL;
721 	}
722 	return;
723 }
724 
725 /*********************************************************************
726  *
727  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
728  *
729  **********************************************************************/
730 
731 static int
732 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
733     u32 *cmd_type_len, u32 *olinfo_status)
734 {
735 	struct ixgbe_adv_tx_context_desc *TXD;
736 	struct ether_vlan_header *eh;
737 	struct ip *ip;
738 	struct ip6_hdr *ip6;
739 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
740 	int	ehdrlen, ip_hlen = 0;
741 	u16	etype;
742 	u8	ipproto = 0;
743 	int	offload = TRUE;
744 	int	ctxd = txr->next_avail_desc;
745 	u16	vtag = 0;
746 
747 	/* First check if TSO is to be used */
748 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
749 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
750 
751 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
752 		offload = FALSE;
753 
754 	/* Indicate the whole packet as payload when not doing TSO */
755        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
756 
757 	/* Now ready a context descriptor */
758 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
759 
760 	/*
761 	** In advanced descriptors the vlan tag must
762 	** be placed into the context descriptor. Hence
763 	** we need to make one even if not doing offloads.
764 	*/
765 	if (mp->m_flags & M_VLANTAG) {
766 		vtag = htole16(mp->m_pkthdr.ether_vtag);
767 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
768 	}
769 
770 	/*
771 	 * Determine where frame payload starts.
772 	 * Jump over vlan headers if already present,
773 	 * helpful for QinQ too.
774 	 */
775 	eh = mtod(mp, struct ether_vlan_header *);
776 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
777 		etype = ntohs(eh->evl_proto);
778 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
779 	} else {
780 		etype = ntohs(eh->evl_encap_proto);
781 		ehdrlen = ETHER_HDR_LEN;
782 	}
783 
784 	/* Set the ether header length */
785 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
786 
787 	if (offload == FALSE)
788 		goto no_offloads;
789 
790 	switch (etype) {
791 		case ETHERTYPE_IP:
792 			ip = (struct ip *)(mp->m_data + ehdrlen);
793 			ip_hlen = ip->ip_hl << 2;
794 			ipproto = ip->ip_p;
795 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
796 			break;
797 		case ETHERTYPE_IPV6:
798 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
799 			ip_hlen = sizeof(struct ip6_hdr);
800 			/* XXX-BZ this will go badly in case of ext hdrs. */
801 			ipproto = ip6->ip6_nxt;
802 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
803 			break;
804 		default:
805 			offload = FALSE;
806 			break;
807 	}
808 
809 	vlan_macip_lens |= ip_hlen;
810 
811 	switch (ipproto) {
812 		case IPPROTO_TCP:
813 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
814 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
815 			break;
816 
817 		case IPPROTO_UDP:
818 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
819 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
820 			break;
821 
822 #if __FreeBSD_version >= 800000
823 		case IPPROTO_SCTP:
824 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
825 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
826 			break;
827 #endif
828 		default:
829 			offload = FALSE;
830 			break;
831 	}
832 
833 	if (offload) /* For the TX descriptor setup */
834 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
835 
836 no_offloads:
837 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
838 
839 	/* Now copy bits into descriptor */
840 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
841 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
842 	TXD->seqnum_seed = htole32(0);
843 	TXD->mss_l4len_idx = htole32(0);
844 
845 	/* We've consumed the first desc, adjust counters */
846 	if (++ctxd == txr->num_desc)
847 		ctxd = 0;
848 	txr->next_avail_desc = ctxd;
849 	--txr->tx_avail;
850 
851         return (0);
852 }
853 
854 /**********************************************************************
855  *
856  *  Setup work for hardware segmentation offload (TSO) on
857  *  adapters using advanced tx descriptors
858  *
859  **********************************************************************/
860 static int
861 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
862     u32 *cmd_type_len, u32 *olinfo_status)
863 {
864 	struct ixgbe_adv_tx_context_desc *TXD;
865 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
866 	u32 mss_l4len_idx = 0, paylen;
867 	u16 vtag = 0, eh_type;
868 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
869 	struct ether_vlan_header *eh;
870 #ifdef INET6
871 	struct ip6_hdr *ip6;
872 #endif
873 #ifdef INET
874 	struct ip *ip;
875 #endif
876 	struct tcphdr *th;
877 
878 
879 	/*
880 	 * Determine where frame payload starts.
881 	 * Jump over vlan headers if already present
882 	 */
883 	eh = mtod(mp, struct ether_vlan_header *);
884 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
885 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
886 		eh_type = eh->evl_proto;
887 	} else {
888 		ehdrlen = ETHER_HDR_LEN;
889 		eh_type = eh->evl_encap_proto;
890 	}
891 
892 	switch (ntohs(eh_type)) {
893 #ifdef INET6
894 	case ETHERTYPE_IPV6:
895 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
896 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
897 		if (ip6->ip6_nxt != IPPROTO_TCP)
898 			return (ENXIO);
899 		ip_hlen = sizeof(struct ip6_hdr);
900 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
901 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
902 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
903 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
904 		break;
905 #endif
906 #ifdef INET
907 	case ETHERTYPE_IP:
908 		ip = (struct ip *)(mp->m_data + ehdrlen);
909 		if (ip->ip_p != IPPROTO_TCP)
910 			return (ENXIO);
911 		ip->ip_sum = 0;
912 		ip_hlen = ip->ip_hl << 2;
913 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
914 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
915 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
916 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
917 		/* Tell transmit desc to also do IPv4 checksum. */
918 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
919 		break;
920 #endif
921 	default:
922 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
923 		    __func__, ntohs(eh_type));
924 		break;
925 	}
926 
927 	ctxd = txr->next_avail_desc;
928 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
929 
930 	tcp_hlen = th->th_off << 2;
931 
932 	/* This is used in the transmit desc in encap */
933 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
934 
935 	/* VLAN MACLEN IPLEN */
936 	if (mp->m_flags & M_VLANTAG) {
937 		vtag = htole16(mp->m_pkthdr.ether_vtag);
938                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
939 	}
940 
941 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
942 	vlan_macip_lens |= ip_hlen;
943 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
944 
945 	/* ADV DTYPE TUCMD */
946 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
947 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
948 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
949 
950 	/* MSS L4LEN IDX */
951 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
952 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
953 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
954 
955 	TXD->seqnum_seed = htole32(0);
956 
957 	if (++ctxd == txr->num_desc)
958 		ctxd = 0;
959 
960 	txr->tx_avail--;
961 	txr->next_avail_desc = ctxd;
962 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
963 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
964 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
965 	++txr->tso_tx;
966 	return (0);
967 }
968 
969 
970 /**********************************************************************
971  *
972  *  Examine each tx_buffer in the used queue. If the hardware is done
973  *  processing the packet then free associated resources. The
974  *  tx_buffer is put back on the free queue.
975  *
976  **********************************************************************/
977 void
978 ixgbe_txeof(struct tx_ring *txr)
979 {
980 #ifdef DEV_NETMAP
981 	struct adapter		*adapter = txr->adapter;
982 	struct ifnet		*ifp = adapter->ifp;
983 #endif
984 	u32			work, processed = 0;
985 	u16			limit = txr->process_limit;
986 	struct ixgbe_tx_buf	*buf;
987 	union ixgbe_adv_tx_desc *txd;
988 
989 	mtx_assert(&txr->tx_mtx, MA_OWNED);
990 
991 #ifdef DEV_NETMAP
992 	if (ifp->if_capenable & IFCAP_NETMAP) {
993 		struct netmap_adapter *na = NA(ifp);
994 		struct netmap_kring *kring = &na->tx_rings[txr->me];
995 		txd = txr->tx_base;
996 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997 		    BUS_DMASYNC_POSTREAD);
998 		/*
999 		 * In netmap mode, all the work is done in the context
1000 		 * of the client thread. Interrupt handlers only wake up
1001 		 * clients, which may be sleeping on individual rings
1002 		 * or on a global resource for all rings.
1003 		 * To implement tx interrupt mitigation, we wake up the client
1004 		 * thread roughly every half ring, even if the NIC interrupts
1005 		 * more frequently. This is implemented as follows:
1006 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1007 		 *   the slot that should wake up the thread (nkr_num_slots
1008 		 *   means the user thread should not be woken up);
1009 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1010 		 *   or the slot has the DD bit set.
1011 		 */
1012 		if (!netmap_mitigate ||
1013 		    (kring->nr_kflags < kring->nkr_num_slots &&
1014 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1015 			netmap_tx_irq(ifp, txr->me);
1016 		}
1017 		return;
1018 	}
1019 #endif /* DEV_NETMAP */
1020 
1021 	if (txr->tx_avail == txr->num_desc) {
1022 		txr->busy = 0;
1023 		return;
1024 	}
1025 
1026 	/* Get work starting point */
1027 	work = txr->next_to_clean;
1028 	buf = &txr->tx_buffers[work];
1029 	txd = &txr->tx_base[work];
1030 	work -= txr->num_desc; /* The distance to ring end */
1031         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1032             BUS_DMASYNC_POSTREAD);
1033 
1034 	do {
1035 		union ixgbe_adv_tx_desc *eop= buf->eop;
1036 		if (eop == NULL) /* No work */
1037 			break;
1038 
1039 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1040 			break;	/* I/O not complete */
1041 
1042 		if (buf->m_head) {
1043 			txr->bytes +=
1044 			    buf->m_head->m_pkthdr.len;
1045 			bus_dmamap_sync(txr->txtag,
1046 			    buf->map,
1047 			    BUS_DMASYNC_POSTWRITE);
1048 			bus_dmamap_unload(txr->txtag,
1049 			    buf->map);
1050 			m_freem(buf->m_head);
1051 			buf->m_head = NULL;
1052 			buf->map = NULL;
1053 		}
1054 		buf->eop = NULL;
1055 		++txr->tx_avail;
1056 
1057 		/* We clean the range if multi segment */
1058 		while (txd != eop) {
1059 			++txd;
1060 			++buf;
1061 			++work;
1062 			/* wrap the ring? */
1063 			if (__predict_false(!work)) {
1064 				work -= txr->num_desc;
1065 				buf = txr->tx_buffers;
1066 				txd = txr->tx_base;
1067 			}
1068 			if (buf->m_head) {
1069 				txr->bytes +=
1070 				    buf->m_head->m_pkthdr.len;
1071 				bus_dmamap_sync(txr->txtag,
1072 				    buf->map,
1073 				    BUS_DMASYNC_POSTWRITE);
1074 				bus_dmamap_unload(txr->txtag,
1075 				    buf->map);
1076 				m_freem(buf->m_head);
1077 				buf->m_head = NULL;
1078 				buf->map = NULL;
1079 			}
1080 			++txr->tx_avail;
1081 			buf->eop = NULL;
1082 
1083 		}
1084 		++txr->packets;
1085 		++processed;
1086 
1087 		/* Try the next packet */
1088 		++txd;
1089 		++buf;
1090 		++work;
1091 		/* reset with a wrap */
1092 		if (__predict_false(!work)) {
1093 			work -= txr->num_desc;
1094 			buf = txr->tx_buffers;
1095 			txd = txr->tx_base;
1096 		}
1097 		prefetch(txd);
1098 	} while (__predict_true(--limit));
1099 
1100 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1101 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1102 
1103 	work += txr->num_desc;
1104 	txr->next_to_clean = work;
1105 
1106 	/*
1107 	** Queue Hang detection, we know there's
1108 	** work outstanding or the first return
1109 	** would have been taken, so increment busy
1110 	** if nothing managed to get cleaned, then
1111 	** in local_timer it will be checked and
1112 	** marked as HUNG if it exceeds a MAX attempt.
1113 	*/
1114 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1115 		++txr->busy;
1116 	/*
1117 	** If anything gets cleaned we reset state to 1,
1118 	** note this will turn off HUNG if its set.
1119 	*/
1120 	if (processed)
1121 		txr->busy = 1;
1122 
1123 	if (txr->tx_avail == txr->num_desc)
1124 		txr->busy = 0;
1125 
1126 	return;
1127 }
1128 
1129 
1130 #ifdef IXGBE_FDIR
1131 /*
1132 ** This routine parses packet headers so that Flow
1133 ** Director can make a hashed filter table entry
1134 ** allowing traffic flows to be identified and kept
1135 ** on the same cpu.  This would be a performance
1136 ** hit, but we only do it at IXGBE_FDIR_RATE of
1137 ** packets.
1138 */
1139 static void
1140 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1141 {
1142 	struct adapter			*adapter = txr->adapter;
1143 	struct ix_queue			*que;
1144 	struct ip			*ip;
1145 	struct tcphdr			*th;
1146 	struct udphdr			*uh;
1147 	struct ether_vlan_header	*eh;
1148 	union ixgbe_atr_hash_dword	input = {.dword = 0};
1149 	union ixgbe_atr_hash_dword	common = {.dword = 0};
1150 	int  				ehdrlen, ip_hlen;
1151 	u16				etype;
1152 
1153 	eh = mtod(mp, struct ether_vlan_header *);
1154 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1155 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1156 		etype = eh->evl_proto;
1157 	} else {
1158 		ehdrlen = ETHER_HDR_LEN;
1159 		etype = eh->evl_encap_proto;
1160 	}
1161 
1162 	/* Only handling IPv4 */
1163 	if (etype != htons(ETHERTYPE_IP))
1164 		return;
1165 
1166 	ip = (struct ip *)(mp->m_data + ehdrlen);
1167 	ip_hlen = ip->ip_hl << 2;
1168 
1169 	/* check if we're UDP or TCP */
1170 	switch (ip->ip_p) {
1171 	case IPPROTO_TCP:
1172 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1173 		/* src and dst are inverted */
1174 		common.port.dst ^= th->th_sport;
1175 		common.port.src ^= th->th_dport;
1176 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1177 		break;
1178 	case IPPROTO_UDP:
1179 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1180 		/* src and dst are inverted */
1181 		common.port.dst ^= uh->uh_sport;
1182 		common.port.src ^= uh->uh_dport;
1183 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1184 		break;
1185 	default:
1186 		return;
1187 	}
1188 
1189 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1190 	if (mp->m_pkthdr.ether_vtag)
1191 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1192 	else
1193 		common.flex_bytes ^= etype;
1194 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1195 
1196 	que = &adapter->queues[txr->me];
1197 	/*
1198 	** This assumes the Rx queue and Tx
1199 	** queue are bound to the same CPU
1200 	*/
1201 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1202 	    input, common, que->msix);
1203 }
1204 #endif /* IXGBE_FDIR */
1205 
1206 /*
1207 ** Used to detect a descriptor that has
1208 ** been merged by Hardware RSC.
1209 */
1210 static inline u32
1211 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1212 {
1213 	return (le32toh(rx->wb.lower.lo_dword.data) &
1214 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1215 }
1216 
1217 /*********************************************************************
1218  *
1219  *  Initialize Hardware RSC (LRO) feature on 82599
1220  *  for an RX ring, this is toggled by the LRO capability
1221  *  even though it is transparent to the stack.
1222  *
1223  *  NOTE: since this HW feature only works with IPV4 and
1224  *        our testing has shown soft LRO to be as effective
1225  *        I have decided to disable this by default.
1226  *
1227  **********************************************************************/
1228 static void
1229 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1230 {
1231 	struct	adapter 	*adapter = rxr->adapter;
1232 	struct	ixgbe_hw	*hw = &adapter->hw;
1233 	u32			rscctrl, rdrxctl;
1234 
1235 	/* If turning LRO/RSC off we need to disable it */
1236 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1237 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1238 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1239 		return;
1240 	}
1241 
1242 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1243 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1244 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1245 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1246 #endif /* DEV_NETMAP */
1247 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1248 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1249 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1250 
1251 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1252 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1253 	/*
1254 	** Limit the total number of descriptors that
1255 	** can be combined, so it does not exceed 64K
1256 	*/
1257 	if (rxr->mbuf_sz == MCLBYTES)
1258 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1259 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1260 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1261 	else if (rxr->mbuf_sz == MJUM9BYTES)
1262 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1263 	else  /* Using 16K cluster */
1264 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1265 
1266 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1267 
1268 	/* Enable TCP header recognition */
1269 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1270 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1271 	    IXGBE_PSRTYPE_TCPHDR));
1272 
1273 	/* Disable RSC for ACK packets */
1274 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1275 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1276 
1277 	rxr->hw_rsc = TRUE;
1278 }
1279 /*********************************************************************
1280  *
1281  *  Refresh mbuf buffers for RX descriptor rings
1282  *   - now keeps its own state so discards due to resource
1283  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1284  *     it just returns, keeping its placeholder, thus it can simply
1285  *     be recalled to try again.
1286  *
1287  **********************************************************************/
1288 static void
1289 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1290 {
1291 	struct adapter		*adapter = rxr->adapter;
1292 	bus_dma_segment_t	seg[1];
1293 	struct ixgbe_rx_buf	*rxbuf;
1294 	struct mbuf		*mp;
1295 	int			i, j, nsegs, error;
1296 	bool			refreshed = FALSE;
1297 
1298 	i = j = rxr->next_to_refresh;
1299 	/* Control the loop with one beyond */
1300 	if (++j == rxr->num_desc)
1301 		j = 0;
1302 
1303 	while (j != limit) {
1304 		rxbuf = &rxr->rx_buffers[i];
1305 		if (rxbuf->buf == NULL) {
1306 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1307 			    M_PKTHDR, rxr->mbuf_sz);
1308 			if (mp == NULL)
1309 				goto update;
1310 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1311 				m_adj(mp, ETHER_ALIGN);
1312 		} else
1313 			mp = rxbuf->buf;
1314 
1315 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1316 
1317 		/* If we're dealing with an mbuf that was copied rather
1318 		 * than replaced, there's no need to go through busdma.
1319 		 */
1320 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1321 			/* Get the memory mapping */
1322 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1323 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1324 			if (error != 0) {
1325 				printf("Refresh mbufs: payload dmamap load"
1326 				    " failure - %d\n", error);
1327 				m_free(mp);
1328 				rxbuf->buf = NULL;
1329 				goto update;
1330 			}
1331 			rxbuf->buf = mp;
1332 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1333 			    BUS_DMASYNC_PREREAD);
1334 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1335 			    htole64(seg[0].ds_addr);
1336 		} else {
1337 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1338 			rxbuf->flags &= ~IXGBE_RX_COPY;
1339 		}
1340 
1341 		refreshed = TRUE;
1342 		/* Next is precalculated */
1343 		i = j;
1344 		rxr->next_to_refresh = i;
1345 		if (++j == rxr->num_desc)
1346 			j = 0;
1347 	}
1348 update:
1349 	if (refreshed) /* Update hardware tail index */
1350 		IXGBE_WRITE_REG(&adapter->hw,
1351 		    rxr->tail, rxr->next_to_refresh);
1352 	return;
1353 }
1354 
1355 /*********************************************************************
1356  *
1357  *  Allocate memory for rx_buffer structures. Since we use one
1358  *  rx_buffer per received packet, the maximum number of rx_buffer's
1359  *  that we'll need is equal to the number of receive descriptors
1360  *  that we've allocated.
1361  *
1362  **********************************************************************/
1363 int
1364 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1365 {
1366 	struct	adapter 	*adapter = rxr->adapter;
1367 	device_t 		dev = adapter->dev;
1368 	struct ixgbe_rx_buf 	*rxbuf;
1369 	int             	i, bsize, error;
1370 
1371 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1372 	if (!(rxr->rx_buffers =
1373 	    (struct ixgbe_rx_buf *) malloc(bsize,
1374 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1375 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1376 		error = ENOMEM;
1377 		goto fail;
1378 	}
1379 
1380 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1381 				   1, 0,	/* alignment, bounds */
1382 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1383 				   BUS_SPACE_MAXADDR,	/* highaddr */
1384 				   NULL, NULL,		/* filter, filterarg */
1385 				   MJUM16BYTES,		/* maxsize */
1386 				   1,			/* nsegments */
1387 				   MJUM16BYTES,		/* maxsegsize */
1388 				   0,			/* flags */
1389 				   NULL,		/* lockfunc */
1390 				   NULL,		/* lockfuncarg */
1391 				   &rxr->ptag))) {
1392 		device_printf(dev, "Unable to create RX DMA tag\n");
1393 		goto fail;
1394 	}
1395 
1396 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1397 		rxbuf = &rxr->rx_buffers[i];
1398 		error = bus_dmamap_create(rxr->ptag,
1399 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
1400 		if (error) {
1401 			device_printf(dev, "Unable to create RX dma map\n");
1402 			goto fail;
1403 		}
1404 	}
1405 
1406 	return (0);
1407 
1408 fail:
1409 	/* Frees all, but can handle partial completion */
1410 	ixgbe_free_receive_structures(adapter);
1411 	return (error);
1412 }
1413 
1414 
1415 static void
1416 ixgbe_free_receive_ring(struct rx_ring *rxr)
1417 {
1418 	struct ixgbe_rx_buf       *rxbuf;
1419 	int i;
1420 
1421 	for (i = 0; i < rxr->num_desc; i++) {
1422 		rxbuf = &rxr->rx_buffers[i];
1423 		if (rxbuf->buf != NULL) {
1424 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1425 			    BUS_DMASYNC_POSTREAD);
1426 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1427 			rxbuf->buf->m_flags |= M_PKTHDR;
1428 			m_freem(rxbuf->buf);
1429 			rxbuf->buf = NULL;
1430 			rxbuf->flags = 0;
1431 		}
1432 	}
1433 }
1434 
1435 
1436 /*********************************************************************
1437  *
1438  *  Initialize a receive ring and its buffers.
1439  *
1440  **********************************************************************/
1441 static int
1442 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1443 {
1444 	struct	adapter 	*adapter;
1445 	struct ifnet		*ifp;
1446 	device_t		dev;
1447 	struct ixgbe_rx_buf	*rxbuf;
1448 	bus_dma_segment_t	seg[1];
1449 	struct lro_ctrl		*lro = &rxr->lro;
1450 	int			rsize, nsegs, error = 0;
1451 #ifdef DEV_NETMAP
1452 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1453 	struct netmap_slot *slot;
1454 #endif /* DEV_NETMAP */
1455 
1456 	adapter = rxr->adapter;
1457 	ifp = adapter->ifp;
1458 	dev = adapter->dev;
1459 
1460 	/* Clear the ring contents */
1461 	IXGBE_RX_LOCK(rxr);
1462 #ifdef DEV_NETMAP
1463 	/* same as in ixgbe_setup_transmit_ring() */
1464 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1465 #endif /* DEV_NETMAP */
1466 	rsize = roundup2(adapter->num_rx_desc *
1467 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1468 	bzero((void *)rxr->rx_base, rsize);
1469 	/* Cache the size */
1470 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1471 
1472 	/* Free current RX buffer structs and their mbufs */
1473 	ixgbe_free_receive_ring(rxr);
1474 
1475 	/* Now replenish the mbufs */
1476 	for (int j = 0; j != rxr->num_desc; ++j) {
1477 		struct mbuf	*mp;
1478 
1479 		rxbuf = &rxr->rx_buffers[j];
1480 #ifdef DEV_NETMAP
1481 		/*
1482 		 * In netmap mode, fill the map and set the buffer
1483 		 * address in the NIC ring, considering the offset
1484 		 * between the netmap and NIC rings (see comment in
1485 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1486 		 * an mbuf, so end the block with a continue;
1487 		 */
1488 		if (slot) {
1489 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1490 			uint64_t paddr;
1491 			void *addr;
1492 
1493 			addr = PNMB(na, slot + sj, &paddr);
1494 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1495 			/* Update descriptor and the cached value */
1496 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1497 			rxbuf->addr = htole64(paddr);
1498 			continue;
1499 		}
1500 #endif /* DEV_NETMAP */
1501 		rxbuf->flags = 0;
1502 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1503 		    M_PKTHDR, adapter->rx_mbuf_sz);
1504 		if (rxbuf->buf == NULL) {
1505 			error = ENOBUFS;
1506                         goto fail;
1507 		}
1508 		mp = rxbuf->buf;
1509 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1510 		/* Get the memory mapping */
1511 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1512 		    rxbuf->pmap, mp, seg,
1513 		    &nsegs, BUS_DMA_NOWAIT);
1514 		if (error != 0)
1515                         goto fail;
1516 		bus_dmamap_sync(rxr->ptag,
1517 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1518 		/* Update the descriptor and the cached value */
1519 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1520 		rxbuf->addr = htole64(seg[0].ds_addr);
1521 	}
1522 
1523 
1524 	/* Setup our descriptor indices */
1525 	rxr->next_to_check = 0;
1526 	rxr->next_to_refresh = 0;
1527 	rxr->lro_enabled = FALSE;
1528 	rxr->rx_copies = 0;
1529 	rxr->rx_bytes = 0;
1530 	rxr->vtag_strip = FALSE;
1531 
1532 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1533 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1534 
1535 	/*
1536 	** Now set up the LRO interface:
1537 	*/
1538 	if (ixgbe_rsc_enable)
1539 		ixgbe_setup_hw_rsc(rxr);
1540 	else if (ifp->if_capenable & IFCAP_LRO) {
1541 		int err = tcp_lro_init(lro);
1542 		if (err) {
1543 			device_printf(dev, "LRO Initialization failed!\n");
1544 			goto fail;
1545 		}
1546 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1547 		rxr->lro_enabled = TRUE;
1548 		lro->ifp = adapter->ifp;
1549 	}
1550 
1551 	IXGBE_RX_UNLOCK(rxr);
1552 	return (0);
1553 
1554 fail:
1555 	ixgbe_free_receive_ring(rxr);
1556 	IXGBE_RX_UNLOCK(rxr);
1557 	return (error);
1558 }
1559 
1560 /*********************************************************************
1561  *
1562  *  Initialize all receive rings.
1563  *
1564  **********************************************************************/
1565 int
1566 ixgbe_setup_receive_structures(struct adapter *adapter)
1567 {
1568 	struct rx_ring *rxr = adapter->rx_rings;
1569 	int j;
1570 
1571 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1572 		if (ixgbe_setup_receive_ring(rxr))
1573 			goto fail;
1574 
1575 	return (0);
1576 fail:
1577 	/*
1578 	 * Free RX buffers allocated so far, we will only handle
1579 	 * the rings that completed, the failing case will have
1580 	 * cleaned up for itself. 'j' failed, so its the terminus.
1581 	 */
1582 	for (int i = 0; i < j; ++i) {
1583 		rxr = &adapter->rx_rings[i];
1584 		ixgbe_free_receive_ring(rxr);
1585 	}
1586 
1587 	return (ENOBUFS);
1588 }
1589 
1590 
1591 /*********************************************************************
1592  *
1593  *  Free all receive rings.
1594  *
1595  **********************************************************************/
1596 void
1597 ixgbe_free_receive_structures(struct adapter *adapter)
1598 {
1599 	struct rx_ring *rxr = adapter->rx_rings;
1600 
1601 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1602 
1603 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1604 		struct lro_ctrl		*lro = &rxr->lro;
1605 		ixgbe_free_receive_buffers(rxr);
1606 		/* Free LRO memory */
1607 		tcp_lro_free(lro);
1608 		/* Free the ring memory as well */
1609 		ixgbe_dma_free(adapter, &rxr->rxdma);
1610 	}
1611 
1612 	free(adapter->rx_rings, M_DEVBUF);
1613 }
1614 
1615 
1616 /*********************************************************************
1617  *
1618  *  Free receive ring data structures
1619  *
1620  **********************************************************************/
1621 void
1622 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1623 {
1624 	struct adapter		*adapter = rxr->adapter;
1625 	struct ixgbe_rx_buf	*rxbuf;
1626 
1627 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1628 
1629 	/* Cleanup any existing buffers */
1630 	if (rxr->rx_buffers != NULL) {
1631 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1632 			rxbuf = &rxr->rx_buffers[i];
1633 			if (rxbuf->buf != NULL) {
1634 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1635 				    BUS_DMASYNC_POSTREAD);
1636 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1637 				rxbuf->buf->m_flags |= M_PKTHDR;
1638 				m_freem(rxbuf->buf);
1639 			}
1640 			rxbuf->buf = NULL;
1641 			if (rxbuf->pmap != NULL) {
1642 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1643 				rxbuf->pmap = NULL;
1644 			}
1645 		}
1646 		if (rxr->rx_buffers != NULL) {
1647 			free(rxr->rx_buffers, M_DEVBUF);
1648 			rxr->rx_buffers = NULL;
1649 		}
1650 	}
1651 
1652 	if (rxr->ptag != NULL) {
1653 		bus_dma_tag_destroy(rxr->ptag);
1654 		rxr->ptag = NULL;
1655 	}
1656 
1657 	return;
1658 }
1659 
1660 static __inline void
1661 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1662 {
1663 
1664         /*
1665          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1666          * should be computed by hardware. Also it should not have VLAN tag in
1667          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1668          */
1669         if (rxr->lro_enabled &&
1670             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1671             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1672             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1673             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1674             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1675             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1676             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1677             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1678                 /*
1679                  * Send to the stack if:
1680                  **  - LRO not enabled, or
1681                  **  - no LRO resources, or
1682                  **  - lro enqueue fails
1683                  */
1684                 if (rxr->lro.lro_cnt != 0)
1685                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1686                                 return;
1687         }
1688 	IXGBE_RX_UNLOCK(rxr);
1689         (*ifp->if_input)(ifp, m);
1690 	IXGBE_RX_LOCK(rxr);
1691 }
1692 
1693 static __inline void
1694 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1695 {
1696 	struct ixgbe_rx_buf	*rbuf;
1697 
1698 	rbuf = &rxr->rx_buffers[i];
1699 
1700 
1701 	/*
1702 	** With advanced descriptors the writeback
1703 	** clobbers the buffer addrs, so its easier
1704 	** to just free the existing mbufs and take
1705 	** the normal refresh path to get new buffers
1706 	** and mapping.
1707 	*/
1708 
1709 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1710 		rbuf->fmp->m_flags |= M_PKTHDR;
1711 		m_freem(rbuf->fmp);
1712 		rbuf->fmp = NULL;
1713 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1714 	} else if (rbuf->buf) {
1715 		m_free(rbuf->buf);
1716 		rbuf->buf = NULL;
1717 	}
1718 
1719 	rbuf->flags = 0;
1720 
1721 	return;
1722 }
1723 
1724 
1725 /*********************************************************************
1726  *
1727  *  This routine executes in interrupt context. It replenishes
1728  *  the mbufs in the descriptor and sends data which has been
1729  *  dma'ed into host memory to upper layer.
1730  *
1731  *  We loop at most count times if count is > 0, or until done if
1732  *  count < 0.
1733  *
1734  *  Return TRUE for more work, FALSE for all clean.
1735  *********************************************************************/
1736 bool
1737 ixgbe_rxeof(struct ix_queue *que)
1738 {
1739 	struct adapter		*adapter = que->adapter;
1740 	struct rx_ring		*rxr = que->rxr;
1741 	struct ifnet		*ifp = adapter->ifp;
1742 	struct lro_ctrl		*lro = &rxr->lro;
1743 	struct lro_entry	*queued;
1744 	int			i, nextp, processed = 0;
1745 	u32			staterr = 0;
1746 	u16			count = rxr->process_limit;
1747 	union ixgbe_adv_rx_desc	*cur;
1748 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1749 	u16			pkt_info;
1750 
1751 	IXGBE_RX_LOCK(rxr);
1752 
1753 #ifdef DEV_NETMAP
1754 	/* Same as the txeof routine: wakeup clients on intr. */
1755 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1756 		IXGBE_RX_UNLOCK(rxr);
1757 		return (FALSE);
1758 	}
1759 #endif /* DEV_NETMAP */
1760 
1761 	for (i = rxr->next_to_check; count != 0;) {
1762 		struct mbuf	*sendmp, *mp;
1763 		u32		rsc, ptype;
1764 		u16		len;
1765 		u16		vtag = 0;
1766 		bool		eop;
1767 
1768 		/* Sync the ring. */
1769 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1770 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1771 
1772 		cur = &rxr->rx_base[i];
1773 		staterr = le32toh(cur->wb.upper.status_error);
1774 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1775 
1776 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1777 			break;
1778 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1779 			break;
1780 
1781 		count--;
1782 		sendmp = NULL;
1783 		nbuf = NULL;
1784 		rsc = 0;
1785 		cur->wb.upper.status_error = 0;
1786 		rbuf = &rxr->rx_buffers[i];
1787 		mp = rbuf->buf;
1788 
1789 		len = le16toh(cur->wb.upper.length);
1790 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1791 		    IXGBE_RXDADV_PKTTYPE_MASK;
1792 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1793 
1794 		/* Make sure bad packets are discarded */
1795 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1796 #if 0 // VF-only
1797 #if __FreeBSD_version >= 1100036
1798 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1799 #endif
1800 #endif
1801 			rxr->rx_discarded++;
1802 			ixgbe_rx_discard(rxr, i);
1803 			goto next_desc;
1804 		}
1805 
1806 		/*
1807 		** On 82599 which supports a hardware
1808 		** LRO (called HW RSC), packets need
1809 		** not be fragmented across sequential
1810 		** descriptors, rather the next descriptor
1811 		** is indicated in bits of the descriptor.
1812 		** This also means that we might proceses
1813 		** more than one packet at a time, something
1814 		** that has never been true before, it
1815 		** required eliminating global chain pointers
1816 		** in favor of what we are doing here.  -jfv
1817 		*/
1818 		if (!eop) {
1819 			/*
1820 			** Figure out the next descriptor
1821 			** of this frame.
1822 			*/
1823 			if (rxr->hw_rsc == TRUE) {
1824 				rsc = ixgbe_rsc_count(cur);
1825 				rxr->rsc_num += (rsc - 1);
1826 			}
1827 			if (rsc) { /* Get hardware index */
1828 				nextp = ((staterr &
1829 				    IXGBE_RXDADV_NEXTP_MASK) >>
1830 				    IXGBE_RXDADV_NEXTP_SHIFT);
1831 			} else { /* Just sequential */
1832 				nextp = i + 1;
1833 				if (nextp == adapter->num_rx_desc)
1834 					nextp = 0;
1835 			}
1836 			nbuf = &rxr->rx_buffers[nextp];
1837 			prefetch(nbuf);
1838 		}
1839 		/*
1840 		** Rather than using the fmp/lmp global pointers
1841 		** we now keep the head of a packet chain in the
1842 		** buffer struct and pass this along from one
1843 		** descriptor to the next, until we get EOP.
1844 		*/
1845 		mp->m_len = len;
1846 		/*
1847 		** See if there is a stored head
1848 		** that determines what we are
1849 		*/
1850 		sendmp = rbuf->fmp;
1851 		if (sendmp != NULL) {  /* secondary frag */
1852 			rbuf->buf = rbuf->fmp = NULL;
1853 			mp->m_flags &= ~M_PKTHDR;
1854 			sendmp->m_pkthdr.len += mp->m_len;
1855 		} else {
1856 			/*
1857 			 * Optimize.  This might be a small packet,
1858 			 * maybe just a TCP ACK.  Do a fast copy that
1859 			 * is cache aligned into a new mbuf, and
1860 			 * leave the old mbuf+cluster for re-use.
1861 			 */
1862 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1863 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1864 				if (sendmp != NULL) {
1865 					sendmp->m_data +=
1866 					    IXGBE_RX_COPY_ALIGN;
1867 					ixgbe_bcopy(mp->m_data,
1868 					    sendmp->m_data, len);
1869 					sendmp->m_len = len;
1870 					rxr->rx_copies++;
1871 					rbuf->flags |= IXGBE_RX_COPY;
1872 				}
1873 			}
1874 			if (sendmp == NULL) {
1875 				rbuf->buf = rbuf->fmp = NULL;
1876 				sendmp = mp;
1877 			}
1878 
1879 			/* first desc of a non-ps chain */
1880 			sendmp->m_flags |= M_PKTHDR;
1881 			sendmp->m_pkthdr.len = mp->m_len;
1882 		}
1883 		++processed;
1884 
1885 		/* Pass the head pointer on */
1886 		if (eop == 0) {
1887 			nbuf->fmp = sendmp;
1888 			sendmp = NULL;
1889 			mp->m_next = nbuf->buf;
1890 		} else { /* Sending this frame */
1891 			sendmp->m_pkthdr.rcvif = ifp;
1892 			rxr->rx_packets++;
1893 			/* capture data for AIM */
1894 			rxr->bytes += sendmp->m_pkthdr.len;
1895 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1896 			/* Process vlan info */
1897 			if ((rxr->vtag_strip) &&
1898 			    (staterr & IXGBE_RXD_STAT_VP))
1899 				vtag = le16toh(cur->wb.upper.vlan);
1900 			if (vtag) {
1901 				sendmp->m_pkthdr.ether_vtag = vtag;
1902 				sendmp->m_flags |= M_VLANTAG;
1903 			}
1904 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1905 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1906 #if __FreeBSD_version >= 800000
1907 #ifdef RSS
1908 			sendmp->m_pkthdr.flowid =
1909 			    le32toh(cur->wb.lower.hi_dword.rss);
1910 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1911 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1912 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1913 				break;
1914 			case IXGBE_RXDADV_RSSTYPE_IPV4:
1915 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1916 				break;
1917 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1918 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1919 				break;
1920 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1921 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1922 				break;
1923 			case IXGBE_RXDADV_RSSTYPE_IPV6:
1924 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1925 				break;
1926 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1927 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1928 				break;
1929 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1930 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1931 				break;
1932 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1933 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1934 				break;
1935 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1936 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1937 				break;
1938 			default:
1939 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1940 			}
1941 #else /* RSS */
1942 			sendmp->m_pkthdr.flowid = que->msix;
1943 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1944 #endif /* RSS */
1945 #endif /* FreeBSD_version */
1946 		}
1947 next_desc:
1948 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1949 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1950 
1951 		/* Advance our pointers to the next descriptor. */
1952 		if (++i == rxr->num_desc)
1953 			i = 0;
1954 
1955 		/* Now send to the stack or do LRO */
1956 		if (sendmp != NULL) {
1957 			rxr->next_to_check = i;
1958 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1959 			i = rxr->next_to_check;
1960 		}
1961 
1962                /* Every 8 descriptors we go to refresh mbufs */
1963 		if (processed == 8) {
1964 			ixgbe_refresh_mbufs(rxr, i);
1965 			processed = 0;
1966 		}
1967 	}
1968 
1969 	/* Refresh any remaining buf structs */
1970 	if (ixgbe_rx_unrefreshed(rxr))
1971 		ixgbe_refresh_mbufs(rxr, i);
1972 
1973 	rxr->next_to_check = i;
1974 
1975 	/*
1976 	 * Flush any outstanding LRO work
1977 	 */
1978 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1979 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1980 		tcp_lro_flush(lro, queued);
1981 	}
1982 
1983 	IXGBE_RX_UNLOCK(rxr);
1984 
1985 	/*
1986 	** Still have cleaning to do?
1987 	*/
1988 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1989 		return (TRUE);
1990 	else
1991 		return (FALSE);
1992 }
1993 
1994 
1995 /*********************************************************************
1996  *
1997  *  Verify that the hardware indicated that the checksum is valid.
1998  *  Inform the stack about the status of checksum so that stack
1999  *  doesn't spend time verifying the checksum.
2000  *
2001  *********************************************************************/
2002 static void
2003 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2004 {
2005 	u16	status = (u16) staterr;
2006 	u8	errors = (u8) (staterr >> 24);
2007 	bool	sctp = FALSE;
2008 
2009 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2010 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2011 		sctp = TRUE;
2012 
2013 	if (status & IXGBE_RXD_STAT_IPCS) {
2014 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2015 			/* IP Checksum Good */
2016 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2017 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2018 
2019 		} else
2020 			mp->m_pkthdr.csum_flags = 0;
2021 	}
2022 	if (status & IXGBE_RXD_STAT_L4CS) {
2023 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2024 #if __FreeBSD_version >= 800000
2025 		if (sctp)
2026 			type = CSUM_SCTP_VALID;
2027 #endif
2028 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2029 			mp->m_pkthdr.csum_flags |= type;
2030 			if (!sctp)
2031 				mp->m_pkthdr.csum_data = htons(0xffff);
2032 		}
2033 	}
2034 	return;
2035 }
2036 
2037 /********************************************************************
2038  * Manage DMA'able memory.
2039  *******************************************************************/
2040 static void
2041 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2042 {
2043 	if (error)
2044 		return;
2045 	*(bus_addr_t *) arg = segs->ds_addr;
2046 	return;
2047 }
2048 
2049 int
2050 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2051 		struct ixgbe_dma_alloc *dma, int mapflags)
2052 {
2053 	device_t dev = adapter->dev;
2054 	int             r;
2055 
2056 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2057 			       DBA_ALIGN, 0,	/* alignment, bounds */
2058 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2059 			       BUS_SPACE_MAXADDR,	/* highaddr */
2060 			       NULL, NULL,	/* filter, filterarg */
2061 			       size,	/* maxsize */
2062 			       1,	/* nsegments */
2063 			       size,	/* maxsegsize */
2064 			       BUS_DMA_ALLOCNOW,	/* flags */
2065 			       NULL,	/* lockfunc */
2066 			       NULL,	/* lockfuncarg */
2067 			       &dma->dma_tag);
2068 	if (r != 0) {
2069 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2070 		       "error %u\n", r);
2071 		goto fail_0;
2072 	}
2073 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2074 			     BUS_DMA_NOWAIT, &dma->dma_map);
2075 	if (r != 0) {
2076 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2077 		       "error %u\n", r);
2078 		goto fail_1;
2079 	}
2080 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2081 			    size,
2082 			    ixgbe_dmamap_cb,
2083 			    &dma->dma_paddr,
2084 			    mapflags | BUS_DMA_NOWAIT);
2085 	if (r != 0) {
2086 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2087 		       "error %u\n", r);
2088 		goto fail_2;
2089 	}
2090 	dma->dma_size = size;
2091 	return (0);
2092 fail_2:
2093 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2094 fail_1:
2095 	bus_dma_tag_destroy(dma->dma_tag);
2096 fail_0:
2097 	dma->dma_tag = NULL;
2098 	return (r);
2099 }
2100 
2101 void
2102 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2103 {
2104 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2105 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2106 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2107 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2108 	bus_dma_tag_destroy(dma->dma_tag);
2109 }
2110 
2111 
2112 /*********************************************************************
2113  *
2114  *  Allocate memory for the transmit and receive rings, and then
2115  *  the descriptors associated with each, called only once at attach.
2116  *
2117  **********************************************************************/
2118 int
2119 ixgbe_allocate_queues(struct adapter *adapter)
2120 {
2121 	device_t	dev = adapter->dev;
2122 	struct ix_queue	*que;
2123 	struct tx_ring	*txr;
2124 	struct rx_ring	*rxr;
2125 	int rsize, tsize, error = IXGBE_SUCCESS;
2126 	int txconf = 0, rxconf = 0;
2127 
2128         /* First allocate the top level queue structs */
2129         if (!(adapter->queues =
2130             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2131             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2132                 device_printf(dev, "Unable to allocate queue memory\n");
2133                 error = ENOMEM;
2134                 goto fail;
2135         }
2136 
2137 	/* First allocate the TX ring struct memory */
2138 	if (!(adapter->tx_rings =
2139 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2140 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2141 		device_printf(dev, "Unable to allocate TX ring memory\n");
2142 		error = ENOMEM;
2143 		goto tx_fail;
2144 	}
2145 
2146 	/* Next allocate the RX */
2147 	if (!(adapter->rx_rings =
2148 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2149 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2150 		device_printf(dev, "Unable to allocate RX ring memory\n");
2151 		error = ENOMEM;
2152 		goto rx_fail;
2153 	}
2154 
2155 	/* For the ring itself */
2156 	tsize = roundup2(adapter->num_tx_desc *
2157 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2158 
2159 	/*
2160 	 * Now set up the TX queues, txconf is needed to handle the
2161 	 * possibility that things fail midcourse and we need to
2162 	 * undo memory gracefully
2163 	 */
2164 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2165 		/* Set up some basics */
2166 		txr = &adapter->tx_rings[i];
2167 		txr->adapter = adapter;
2168 		txr->me = i;
2169 		txr->num_desc = adapter->num_tx_desc;
2170 
2171 		/* Initialize the TX side lock */
2172 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2173 		    device_get_nameunit(dev), txr->me);
2174 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2175 
2176 		if (ixgbe_dma_malloc(adapter, tsize,
2177 			&txr->txdma, BUS_DMA_NOWAIT)) {
2178 			device_printf(dev,
2179 			    "Unable to allocate TX Descriptor memory\n");
2180 			error = ENOMEM;
2181 			goto err_tx_desc;
2182 		}
2183 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2184 		bzero((void *)txr->tx_base, tsize);
2185 
2186         	/* Now allocate transmit buffers for the ring */
2187         	if (ixgbe_allocate_transmit_buffers(txr)) {
2188 			device_printf(dev,
2189 			    "Critical Failure setting up transmit buffers\n");
2190 			error = ENOMEM;
2191 			goto err_tx_desc;
2192         	}
2193 #ifndef IXGBE_LEGACY_TX
2194 		/* Allocate a buf ring */
2195 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2196 		    M_WAITOK, &txr->tx_mtx);
2197 		if (txr->br == NULL) {
2198 			device_printf(dev,
2199 			    "Critical Failure setting up buf ring\n");
2200 			error = ENOMEM;
2201 			goto err_tx_desc;
2202         	}
2203 #endif
2204 	}
2205 
2206 	/*
2207 	 * Next the RX queues...
2208 	 */
2209 	rsize = roundup2(adapter->num_rx_desc *
2210 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2211 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2212 		rxr = &adapter->rx_rings[i];
2213 		/* Set up some basics */
2214 		rxr->adapter = adapter;
2215 		rxr->me = i;
2216 		rxr->num_desc = adapter->num_rx_desc;
2217 
2218 		/* Initialize the RX side lock */
2219 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2220 		    device_get_nameunit(dev), rxr->me);
2221 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2222 
2223 		if (ixgbe_dma_malloc(adapter, rsize,
2224 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2225 			device_printf(dev,
2226 			    "Unable to allocate RxDescriptor memory\n");
2227 			error = ENOMEM;
2228 			goto err_rx_desc;
2229 		}
2230 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2231 		bzero((void *)rxr->rx_base, rsize);
2232 
2233         	/* Allocate receive buffers for the ring*/
2234 		if (ixgbe_allocate_receive_buffers(rxr)) {
2235 			device_printf(dev,
2236 			    "Critical Failure setting up receive buffers\n");
2237 			error = ENOMEM;
2238 			goto err_rx_desc;
2239 		}
2240 	}
2241 
2242 	/*
2243 	** Finally set up the queue holding structs
2244 	*/
2245 	for (int i = 0; i < adapter->num_queues; i++) {
2246 		que = &adapter->queues[i];
2247 		que->adapter = adapter;
2248 		que->me = i;
2249 		que->txr = &adapter->tx_rings[i];
2250 		que->rxr = &adapter->rx_rings[i];
2251 	}
2252 
2253 	return (0);
2254 
2255 err_rx_desc:
2256 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2257 		ixgbe_dma_free(adapter, &rxr->rxdma);
2258 err_tx_desc:
2259 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2260 		ixgbe_dma_free(adapter, &txr->txdma);
2261 	free(adapter->rx_rings, M_DEVBUF);
2262 rx_fail:
2263 	free(adapter->tx_rings, M_DEVBUF);
2264 tx_fail:
2265 	free(adapter->queues, M_DEVBUF);
2266 fail:
2267 	return (error);
2268 }
2269