xref: /freebsd/sys/dev/ixgbe/ix_txrx.c (revision bd1da0a002e9a43cfb5220835c7a42804d90dc56)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41 
42 #include "ixgbe.h"
43 
44 #ifdef	RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48 
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53 
54 extern int ix_crcstrip;
55 #endif
56 
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70 
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83 
84 /* Shared PCI config read/write */
85 inline u16
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
87 {
88 	u16 value;
89 
90 	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91 	    reg, 2);
92 
93 	return (value);
94 }
95 
96 inline void
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
98 {
99 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
100 	    reg, value, 2);
101 
102 	return;
103 }
104 
105 /*********************************************************************
106  *  Local Function prototypes
107  *********************************************************************/
108 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int	ixgbe_setup_receive_ring(struct rx_ring *);
111 static void     ixgbe_free_receive_buffers(struct rx_ring *);
112 
113 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
117 		    struct mbuf *, u32 *, u32 *);
118 static int	ixgbe_tso_setup(struct tx_ring *,
119 		    struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 		    struct mbuf *, u32);
126 
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129  *  Transmit entry point
130  *
131  *  ixgbe_start is called by the stack to initiate a transmit.
132  *  The driver will remain in this routine as long as there are
133  *  packets to transmit and transmit resources are available.
134  *  In case resources are not available stack is notified and
135  *  the packet is requeued.
136  **********************************************************************/
137 
138 void
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
140 {
141 	struct mbuf    *m_head;
142 	struct adapter *adapter = txr->adapter;
143 
144 	IXGBE_TX_LOCK_ASSERT(txr);
145 
146 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
147 		return;
148 	if (!adapter->link_active)
149 		return;
150 
151 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153 			break;
154 
155 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
156 		if (m_head == NULL)
157 			break;
158 
159 		if (ixgbe_xmit(txr, &m_head)) {
160 			if (m_head != NULL)
161 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
162 			break;
163 		}
164 		/* Send a copy of the frame to the BPF listener */
165 		ETHER_BPF_MTAP(ifp, m_head);
166 	}
167 	return;
168 }
169 
170 /*
171  * Legacy TX start - called by the stack, this
172  * always uses the first tx ring, and should
173  * not be used with multiqueue tx enabled.
174  */
175 void
176 ixgbe_start(struct ifnet *ifp)
177 {
178 	struct adapter *adapter = ifp->if_softc;
179 	struct tx_ring	*txr = adapter->tx_rings;
180 
181 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
182 		IXGBE_TX_LOCK(txr);
183 		ixgbe_start_locked(txr, ifp);
184 		IXGBE_TX_UNLOCK(txr);
185 	}
186 	return;
187 }
188 
189 #else /* ! IXGBE_LEGACY_TX */
190 
191 /*
192 ** Multiqueue Transmit driver
193 **
194 */
195 int
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
197 {
198 	struct adapter	*adapter = ifp->if_softc;
199 	struct ix_queue	*que;
200 	struct tx_ring	*txr;
201 	int 		i, err = 0;
202 #ifdef	RSS
203 	uint32_t bucket_id;
204 #endif
205 
206 	/*
207 	 * When doing RSS, map it to the same outbound queue
208 	 * as the incoming flow would be mapped to.
209 	 *
210 	 * If everything is setup correctly, it should be the
211 	 * same bucket that the current CPU we're on is.
212 	 */
213 #if __FreeBSD_version < 1100054
214 	if (m->m_flags & M_FLOWID) {
215 #else
216 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
217 #endif
218 #ifdef	RSS
219 		if (rss_hash2bucket(m->m_pkthdr.flowid,
220 		    M_HASHTYPE_GET(m), &bucket_id) == 0)
221 			/* TODO: spit out something if bucket_id > num_queues? */
222 			i = bucket_id % adapter->num_queues;
223 		else
224 #endif
225 			i = m->m_pkthdr.flowid % adapter->num_queues;
226 	} else
227 		i = curcpu % adapter->num_queues;
228 
229 	/* Check for a hung queue and pick alternative */
230 	if (((1 << i) & adapter->active_queues) == 0)
231 		i = ffsl(adapter->active_queues);
232 
233 	txr = &adapter->tx_rings[i];
234 	que = &adapter->queues[i];
235 
236 	err = drbr_enqueue(ifp, txr->br, m);
237 	if (err)
238 		return (err);
239 	if (IXGBE_TX_TRYLOCK(txr)) {
240 		ixgbe_mq_start_locked(ifp, txr);
241 		IXGBE_TX_UNLOCK(txr);
242 	} else
243 		taskqueue_enqueue(que->tq, &txr->txq_task);
244 
245 	return (0);
246 }
247 
248 int
249 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
250 {
251 	struct adapter  *adapter = txr->adapter;
252         struct mbuf     *next;
253         int             enqueued = 0, err = 0;
254 
255 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
256 	    adapter->link_active == 0)
257 		return (ENETDOWN);
258 
259 	/* Process the queue */
260 #if __FreeBSD_version < 901504
261 	next = drbr_dequeue(ifp, txr->br);
262 	while (next != NULL) {
263 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
264 			if (next != NULL)
265 				err = drbr_enqueue(ifp, txr->br, next);
266 #else
267 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
268 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
269 			if (next == NULL) {
270 				drbr_advance(ifp, txr->br);
271 			} else {
272 				drbr_putback(ifp, txr->br, next);
273 			}
274 #endif
275 			break;
276 		}
277 #if __FreeBSD_version >= 901504
278 		drbr_advance(ifp, txr->br);
279 #endif
280 		enqueued++;
281 #if 0 // this is VF-only
282 #if __FreeBSD_version >= 1100036
283 		/*
284 		 * Since we're looking at the tx ring, we can check
285 		 * to see if we're a VF by examing our tail register
286 		 * address.
287 		 */
288 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
289 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
290 #endif
291 #endif
292 		/* Send a copy of the frame to the BPF listener */
293 		ETHER_BPF_MTAP(ifp, next);
294 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
295 			break;
296 #if __FreeBSD_version < 901504
297 		next = drbr_dequeue(ifp, txr->br);
298 #endif
299 	}
300 
301 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
302 		ixgbe_txeof(txr);
303 
304 	return (err);
305 }
306 
307 /*
308  * Called from a taskqueue to drain queued transmit packets.
309  */
310 void
311 ixgbe_deferred_mq_start(void *arg, int pending)
312 {
313 	struct tx_ring *txr = arg;
314 	struct adapter *adapter = txr->adapter;
315 	struct ifnet *ifp = adapter->ifp;
316 
317 	IXGBE_TX_LOCK(txr);
318 	if (!drbr_empty(ifp, txr->br))
319 		ixgbe_mq_start_locked(ifp, txr);
320 	IXGBE_TX_UNLOCK(txr);
321 }
322 
323 /*
324  * Flush all ring buffers
325  */
326 void
327 ixgbe_qflush(struct ifnet *ifp)
328 {
329 	struct adapter	*adapter = ifp->if_softc;
330 	struct tx_ring	*txr = adapter->tx_rings;
331 	struct mbuf	*m;
332 
333 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
334 		IXGBE_TX_LOCK(txr);
335 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
336 			m_freem(m);
337 		IXGBE_TX_UNLOCK(txr);
338 	}
339 	if_qflush(ifp);
340 }
341 #endif /* IXGBE_LEGACY_TX */
342 
343 
344 /*********************************************************************
345  *
346  *  This routine maps the mbufs to tx descriptors, allowing the
347  *  TX engine to transmit the packets.
348  *  	- return 0 on success, positive on failure
349  *
350  **********************************************************************/
351 
352 static int
353 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
354 {
355 	struct adapter  *adapter = txr->adapter;
356 	u32		olinfo_status = 0, cmd_type_len;
357 	int             i, j, error, nsegs;
358 	int		first;
359 	bool		remap = TRUE;
360 	struct mbuf	*m_head;
361 	bus_dma_segment_t segs[adapter->num_segs];
362 	bus_dmamap_t	map;
363 	struct ixgbe_tx_buf *txbuf;
364 	union ixgbe_adv_tx_desc *txd = NULL;
365 
366 	m_head = *m_headp;
367 
368 	/* Basic descriptor defines */
369         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
370 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
371 
372 	if (m_head->m_flags & M_VLANTAG)
373         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
374 
375         /*
376          * Important to capture the first descriptor
377          * used because it will contain the index of
378          * the one we tell the hardware to report back
379          */
380         first = txr->next_avail_desc;
381 	txbuf = &txr->tx_buffers[first];
382 	map = txbuf->map;
383 
384 	/*
385 	 * Map the packet for DMA.
386 	 */
387 retry:
388 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
389 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
390 
391 	if (__predict_false(error)) {
392 		struct mbuf *m;
393 
394 		switch (error) {
395 		case EFBIG:
396 			/* Try it again? - one try */
397 			if (remap == TRUE) {
398 				remap = FALSE;
399 				/*
400 				 * XXX: m_defrag will choke on
401 				 * non-MCLBYTES-sized clusters
402 				 */
403 				m = m_defrag(*m_headp, M_NOWAIT);
404 				if (m == NULL) {
405 					adapter->mbuf_defrag_failed++;
406 					m_freem(*m_headp);
407 					*m_headp = NULL;
408 					return (ENOBUFS);
409 				}
410 				*m_headp = m;
411 				goto retry;
412 			} else
413 				return (error);
414 		case ENOMEM:
415 			txr->no_tx_dma_setup++;
416 			return (error);
417 		default:
418 			txr->no_tx_dma_setup++;
419 			m_freem(*m_headp);
420 			*m_headp = NULL;
421 			return (error);
422 		}
423 	}
424 
425 	/* Make certain there are enough descriptors */
426 	if (nsegs > txr->tx_avail - 2) {
427 		txr->no_desc_avail++;
428 		bus_dmamap_unload(txr->txtag, map);
429 		return (ENOBUFS);
430 	}
431 	m_head = *m_headp;
432 
433 	/*
434 	 * Set up the appropriate offload context
435 	 * this will consume the first descriptor
436 	 */
437 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
438 	if (__predict_false(error)) {
439 		if (error == ENOBUFS)
440 			*m_headp = NULL;
441 		return (error);
442 	}
443 
444 #ifdef IXGBE_FDIR
445 	/* Do the flow director magic */
446 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
447 		++txr->atr_count;
448 		if (txr->atr_count >= atr_sample_rate) {
449 			ixgbe_atr(txr, m_head);
450 			txr->atr_count = 0;
451 		}
452 	}
453 #endif
454 
455 	i = txr->next_avail_desc;
456 	for (j = 0; j < nsegs; j++) {
457 		bus_size_t seglen;
458 		bus_addr_t segaddr;
459 
460 		txbuf = &txr->tx_buffers[i];
461 		txd = &txr->tx_base[i];
462 		seglen = segs[j].ds_len;
463 		segaddr = htole64(segs[j].ds_addr);
464 
465 		txd->read.buffer_addr = segaddr;
466 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
467 		    cmd_type_len |seglen);
468 		txd->read.olinfo_status = htole32(olinfo_status);
469 
470 		if (++i == txr->num_desc)
471 			i = 0;
472 	}
473 
474 	txd->read.cmd_type_len |=
475 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
476 	txr->tx_avail -= nsegs;
477 	txr->next_avail_desc = i;
478 
479 	txbuf->m_head = m_head;
480 	/*
481 	 * Here we swap the map so the last descriptor,
482 	 * which gets the completion interrupt has the
483 	 * real map, and the first descriptor gets the
484 	 * unused map from this descriptor.
485 	 */
486 	txr->tx_buffers[first].map = txbuf->map;
487 	txbuf->map = map;
488 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
489 
490         /* Set the EOP descriptor that will be marked done */
491         txbuf = &txr->tx_buffers[first];
492 	txbuf->eop = txd;
493 
494         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
495             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
496 	/*
497 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
498 	 * hardware that this frame is available to transmit.
499 	 */
500 	++txr->total_packets;
501 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
502 
503 	/* Mark queue as having work */
504 	if (txr->busy == 0)
505 		txr->busy = 1;
506 
507 	return (0);
508 }
509 
510 
511 /*********************************************************************
512  *
513  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
514  *  the information needed to transmit a packet on the wire. This is
515  *  called only once at attach, setup is done every reset.
516  *
517  **********************************************************************/
518 int
519 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
520 {
521 	struct adapter *adapter = txr->adapter;
522 	device_t dev = adapter->dev;
523 	struct ixgbe_tx_buf *txbuf;
524 	int error, i;
525 
526 	/*
527 	 * Setup DMA descriptor areas.
528 	 */
529 	if ((error = bus_dma_tag_create(
530 			       bus_get_dma_tag(adapter->dev),	/* parent */
531 			       1, 0,		/* alignment, bounds */
532 			       BUS_SPACE_MAXADDR,	/* lowaddr */
533 			       BUS_SPACE_MAXADDR,	/* highaddr */
534 			       NULL, NULL,		/* filter, filterarg */
535 			       IXGBE_TSO_SIZE,		/* maxsize */
536 			       adapter->num_segs,	/* nsegments */
537 			       PAGE_SIZE,		/* maxsegsize */
538 			       0,			/* flags */
539 			       NULL,			/* lockfunc */
540 			       NULL,			/* lockfuncarg */
541 			       &txr->txtag))) {
542 		device_printf(dev,"Unable to allocate TX DMA tag\n");
543 		goto fail;
544 	}
545 
546 	if (!(txr->tx_buffers =
547 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
548 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
549 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
550 		error = ENOMEM;
551 		goto fail;
552 	}
553 
554         /* Create the descriptor buffer dma maps */
555 	txbuf = txr->tx_buffers;
556 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
557 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
558 		if (error != 0) {
559 			device_printf(dev, "Unable to create TX DMA map\n");
560 			goto fail;
561 		}
562 	}
563 
564 	return 0;
565 fail:
566 	/* We free all, it handles case where we are in the middle */
567 	ixgbe_free_transmit_structures(adapter);
568 	return (error);
569 }
570 
571 /*********************************************************************
572  *
573  *  Initialize a transmit ring.
574  *
575  **********************************************************************/
576 static void
577 ixgbe_setup_transmit_ring(struct tx_ring *txr)
578 {
579 	struct adapter *adapter = txr->adapter;
580 	struct ixgbe_tx_buf *txbuf;
581 	int i;
582 #ifdef DEV_NETMAP
583 	struct netmap_adapter *na = NA(adapter->ifp);
584 	struct netmap_slot *slot;
585 #endif /* DEV_NETMAP */
586 
587 	/* Clear the old ring contents */
588 	IXGBE_TX_LOCK(txr);
589 #ifdef DEV_NETMAP
590 	/*
591 	 * (under lock): if in netmap mode, do some consistency
592 	 * checks and set slot to entry 0 of the netmap ring.
593 	 */
594 	slot = netmap_reset(na, NR_TX, txr->me, 0);
595 #endif /* DEV_NETMAP */
596 	bzero((void *)txr->tx_base,
597 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
598 	/* Reset indices */
599 	txr->next_avail_desc = 0;
600 	txr->next_to_clean = 0;
601 
602 	/* Free any existing tx buffers. */
603         txbuf = txr->tx_buffers;
604 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
605 		if (txbuf->m_head != NULL) {
606 			bus_dmamap_sync(txr->txtag, txbuf->map,
607 			    BUS_DMASYNC_POSTWRITE);
608 			bus_dmamap_unload(txr->txtag, txbuf->map);
609 			m_freem(txbuf->m_head);
610 			txbuf->m_head = NULL;
611 		}
612 #ifdef DEV_NETMAP
613 		/*
614 		 * In netmap mode, set the map for the packet buffer.
615 		 * NOTE: Some drivers (not this one) also need to set
616 		 * the physical buffer address in the NIC ring.
617 		 * Slots in the netmap ring (indexed by "si") are
618 		 * kring->nkr_hwofs positions "ahead" wrt the
619 		 * corresponding slot in the NIC ring. In some drivers
620 		 * (not here) nkr_hwofs can be negative. Function
621 		 * netmap_idx_n2k() handles wraparounds properly.
622 		 */
623 		if (slot) {
624 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
625 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
626 		}
627 #endif /* DEV_NETMAP */
628 		/* Clear the EOP descriptor pointer */
629 		txbuf->eop = NULL;
630         }
631 
632 #ifdef IXGBE_FDIR
633 	/* Set the rate at which we sample packets */
634 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
635 		txr->atr_sample = atr_sample_rate;
636 #endif
637 
638 	/* Set number of descriptors available */
639 	txr->tx_avail = adapter->num_tx_desc;
640 
641 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
642 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
643 	IXGBE_TX_UNLOCK(txr);
644 }
645 
646 /*********************************************************************
647  *
648  *  Initialize all transmit rings.
649  *
650  **********************************************************************/
651 int
652 ixgbe_setup_transmit_structures(struct adapter *adapter)
653 {
654 	struct tx_ring *txr = adapter->tx_rings;
655 
656 	for (int i = 0; i < adapter->num_queues; i++, txr++)
657 		ixgbe_setup_transmit_ring(txr);
658 
659 	return (0);
660 }
661 
662 /*********************************************************************
663  *
664  *  Free all transmit rings.
665  *
666  **********************************************************************/
667 void
668 ixgbe_free_transmit_structures(struct adapter *adapter)
669 {
670 	struct tx_ring *txr = adapter->tx_rings;
671 
672 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
673 		IXGBE_TX_LOCK(txr);
674 		ixgbe_free_transmit_buffers(txr);
675 		ixgbe_dma_free(adapter, &txr->txdma);
676 		IXGBE_TX_UNLOCK(txr);
677 		IXGBE_TX_LOCK_DESTROY(txr);
678 	}
679 	free(adapter->tx_rings, M_DEVBUF);
680 }
681 
682 /*********************************************************************
683  *
684  *  Free transmit ring related data structures.
685  *
686  **********************************************************************/
687 static void
688 ixgbe_free_transmit_buffers(struct tx_ring *txr)
689 {
690 	struct adapter *adapter = txr->adapter;
691 	struct ixgbe_tx_buf *tx_buffer;
692 	int             i;
693 
694 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
695 
696 	if (txr->tx_buffers == NULL)
697 		return;
698 
699 	tx_buffer = txr->tx_buffers;
700 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
701 		if (tx_buffer->m_head != NULL) {
702 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
703 			    BUS_DMASYNC_POSTWRITE);
704 			bus_dmamap_unload(txr->txtag,
705 			    tx_buffer->map);
706 			m_freem(tx_buffer->m_head);
707 			tx_buffer->m_head = NULL;
708 			if (tx_buffer->map != NULL) {
709 				bus_dmamap_destroy(txr->txtag,
710 				    tx_buffer->map);
711 				tx_buffer->map = NULL;
712 			}
713 		} else if (tx_buffer->map != NULL) {
714 			bus_dmamap_unload(txr->txtag,
715 			    tx_buffer->map);
716 			bus_dmamap_destroy(txr->txtag,
717 			    tx_buffer->map);
718 			tx_buffer->map = NULL;
719 		}
720 	}
721 #ifdef IXGBE_LEGACY_TX
722 	if (txr->br != NULL)
723 		buf_ring_free(txr->br, M_DEVBUF);
724 #endif
725 	if (txr->tx_buffers != NULL) {
726 		free(txr->tx_buffers, M_DEVBUF);
727 		txr->tx_buffers = NULL;
728 	}
729 	if (txr->txtag != NULL) {
730 		bus_dma_tag_destroy(txr->txtag);
731 		txr->txtag = NULL;
732 	}
733 	return;
734 }
735 
736 /*********************************************************************
737  *
738  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
739  *
740  **********************************************************************/
741 
742 static int
743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
744     u32 *cmd_type_len, u32 *olinfo_status)
745 {
746 	struct adapter *adapter = txr->adapter;
747 	struct ixgbe_adv_tx_context_desc *TXD;
748 	struct ether_vlan_header *eh;
749 	struct ip *ip;
750 	struct ip6_hdr *ip6;
751 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
752 	int	ehdrlen, ip_hlen = 0;
753 	u16	etype;
754 	u8	ipproto = 0;
755 	int	offload = TRUE;
756 	int	ctxd = txr->next_avail_desc;
757 	u16	vtag = 0;
758 
759 	/* First check if TSO is to be used */
760 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
761 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
762 
763 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
764 		offload = FALSE;
765 
766 	/* Indicate the whole packet as payload when not doing TSO */
767        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
768 
769 	/* Now ready a context descriptor */
770 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
771 
772 	/*
773 	** In advanced descriptors the vlan tag must
774 	** be placed into the context descriptor. Hence
775 	** we need to make one even if not doing offloads.
776 	*/
777 	if (mp->m_flags & M_VLANTAG) {
778 		vtag = htole16(mp->m_pkthdr.ether_vtag);
779 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
780 	}
781 	else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
782 		return (0);
783 
784 	/*
785 	 * Determine where frame payload starts.
786 	 * Jump over vlan headers if already present,
787 	 * helpful for QinQ too.
788 	 */
789 	eh = mtod(mp, struct ether_vlan_header *);
790 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
791 		etype = ntohs(eh->evl_proto);
792 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
793 	} else {
794 		etype = ntohs(eh->evl_encap_proto);
795 		ehdrlen = ETHER_HDR_LEN;
796 	}
797 
798 	/* Set the ether header length */
799 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
800 
801 	if (offload == FALSE)
802 		goto no_offloads;
803 
804 	switch (etype) {
805 		case ETHERTYPE_IP:
806 			ip = (struct ip *)(mp->m_data + ehdrlen);
807 			ip_hlen = ip->ip_hl << 2;
808 			ipproto = ip->ip_p;
809 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
810 			break;
811 		case ETHERTYPE_IPV6:
812 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
813 			ip_hlen = sizeof(struct ip6_hdr);
814 			/* XXX-BZ this will go badly in case of ext hdrs. */
815 			ipproto = ip6->ip6_nxt;
816 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
817 			break;
818 		default:
819 			offload = FALSE;
820 			break;
821 	}
822 
823 	vlan_macip_lens |= ip_hlen;
824 
825 	switch (ipproto) {
826 		case IPPROTO_TCP:
827 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
828 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
829 			break;
830 
831 		case IPPROTO_UDP:
832 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
833 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
834 			break;
835 
836 #if __FreeBSD_version >= 800000
837 		case IPPROTO_SCTP:
838 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
839 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
840 			break;
841 #endif
842 		default:
843 			offload = FALSE;
844 			break;
845 	}
846 
847 	if (offload) /* For the TX descriptor setup */
848 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
849 
850 no_offloads:
851 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
852 
853 	/* Now copy bits into descriptor */
854 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
855 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
856 	TXD->seqnum_seed = htole32(0);
857 	TXD->mss_l4len_idx = htole32(0);
858 
859 	/* We've consumed the first desc, adjust counters */
860 	if (++ctxd == txr->num_desc)
861 		ctxd = 0;
862 	txr->next_avail_desc = ctxd;
863 	--txr->tx_avail;
864 
865         return (0);
866 }
867 
868 /**********************************************************************
869  *
870  *  Setup work for hardware segmentation offload (TSO) on
871  *  adapters using advanced tx descriptors
872  *
873  **********************************************************************/
874 static int
875 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
876     u32 *cmd_type_len, u32 *olinfo_status)
877 {
878 	struct ixgbe_adv_tx_context_desc *TXD;
879 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
880 	u32 mss_l4len_idx = 0, paylen;
881 	u16 vtag = 0, eh_type;
882 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
883 	struct ether_vlan_header *eh;
884 #ifdef INET6
885 	struct ip6_hdr *ip6;
886 #endif
887 #ifdef INET
888 	struct ip *ip;
889 #endif
890 	struct tcphdr *th;
891 
892 
893 	/*
894 	 * Determine where frame payload starts.
895 	 * Jump over vlan headers if already present
896 	 */
897 	eh = mtod(mp, struct ether_vlan_header *);
898 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
899 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
900 		eh_type = eh->evl_proto;
901 	} else {
902 		ehdrlen = ETHER_HDR_LEN;
903 		eh_type = eh->evl_encap_proto;
904 	}
905 
906 	switch (ntohs(eh_type)) {
907 #ifdef INET6
908 	case ETHERTYPE_IPV6:
909 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
910 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
911 		if (ip6->ip6_nxt != IPPROTO_TCP)
912 			return (ENXIO);
913 		ip_hlen = sizeof(struct ip6_hdr);
914 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
916 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
917 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
918 		break;
919 #endif
920 #ifdef INET
921 	case ETHERTYPE_IP:
922 		ip = (struct ip *)(mp->m_data + ehdrlen);
923 		if (ip->ip_p != IPPROTO_TCP)
924 			return (ENXIO);
925 		ip->ip_sum = 0;
926 		ip_hlen = ip->ip_hl << 2;
927 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
928 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
929 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
930 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
931 		/* Tell transmit desc to also do IPv4 checksum. */
932 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
933 		break;
934 #endif
935 	default:
936 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
937 		    __func__, ntohs(eh_type));
938 		break;
939 	}
940 
941 	ctxd = txr->next_avail_desc;
942 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
943 
944 	tcp_hlen = th->th_off << 2;
945 
946 	/* This is used in the transmit desc in encap */
947 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
948 
949 	/* VLAN MACLEN IPLEN */
950 	if (mp->m_flags & M_VLANTAG) {
951 		vtag = htole16(mp->m_pkthdr.ether_vtag);
952                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
953 	}
954 
955 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
956 	vlan_macip_lens |= ip_hlen;
957 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
958 
959 	/* ADV DTYPE TUCMD */
960 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
961 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
962 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
963 
964 	/* MSS L4LEN IDX */
965 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
966 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
967 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
968 
969 	TXD->seqnum_seed = htole32(0);
970 
971 	if (++ctxd == txr->num_desc)
972 		ctxd = 0;
973 
974 	txr->tx_avail--;
975 	txr->next_avail_desc = ctxd;
976 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
977 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
978 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
979 	++txr->tso_tx;
980 	return (0);
981 }
982 
983 
984 /**********************************************************************
985  *
986  *  Examine each tx_buffer in the used queue. If the hardware is done
987  *  processing the packet then free associated resources. The
988  *  tx_buffer is put back on the free queue.
989  *
990  **********************************************************************/
991 void
992 ixgbe_txeof(struct tx_ring *txr)
993 {
994 #ifdef DEV_NETMAP
995 	struct adapter		*adapter = txr->adapter;
996 	struct ifnet		*ifp = adapter->ifp;
997 #endif
998 	u32			work, processed = 0;
999 	u16			limit = txr->process_limit;
1000 	struct ixgbe_tx_buf	*buf;
1001 	union ixgbe_adv_tx_desc *txd;
1002 
1003 	mtx_assert(&txr->tx_mtx, MA_OWNED);
1004 
1005 #ifdef DEV_NETMAP
1006 	if (ifp->if_capenable & IFCAP_NETMAP) {
1007 		struct netmap_adapter *na = NA(ifp);
1008 		struct netmap_kring *kring = &na->tx_rings[txr->me];
1009 		txd = txr->tx_base;
1010 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1011 		    BUS_DMASYNC_POSTREAD);
1012 		/*
1013 		 * In netmap mode, all the work is done in the context
1014 		 * of the client thread. Interrupt handlers only wake up
1015 		 * clients, which may be sleeping on individual rings
1016 		 * or on a global resource for all rings.
1017 		 * To implement tx interrupt mitigation, we wake up the client
1018 		 * thread roughly every half ring, even if the NIC interrupts
1019 		 * more frequently. This is implemented as follows:
1020 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1021 		 *   the slot that should wake up the thread (nkr_num_slots
1022 		 *   means the user thread should not be woken up);
1023 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1024 		 *   or the slot has the DD bit set.
1025 		 */
1026 		if (!netmap_mitigate ||
1027 		    (kring->nr_kflags < kring->nkr_num_slots &&
1028 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1029 			netmap_tx_irq(ifp, txr->me);
1030 		}
1031 		return;
1032 	}
1033 #endif /* DEV_NETMAP */
1034 
1035 	if (txr->tx_avail == txr->num_desc) {
1036 		txr->busy = 0;
1037 		return;
1038 	}
1039 
1040 	/* Get work starting point */
1041 	work = txr->next_to_clean;
1042 	buf = &txr->tx_buffers[work];
1043 	txd = &txr->tx_base[work];
1044 	work -= txr->num_desc; /* The distance to ring end */
1045         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1046             BUS_DMASYNC_POSTREAD);
1047 
1048 	do {
1049 		union ixgbe_adv_tx_desc *eop= buf->eop;
1050 		if (eop == NULL) /* No work */
1051 			break;
1052 
1053 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1054 			break;	/* I/O not complete */
1055 
1056 		if (buf->m_head) {
1057 			txr->bytes +=
1058 			    buf->m_head->m_pkthdr.len;
1059 			bus_dmamap_sync(txr->txtag,
1060 			    buf->map,
1061 			    BUS_DMASYNC_POSTWRITE);
1062 			bus_dmamap_unload(txr->txtag,
1063 			    buf->map);
1064 			m_freem(buf->m_head);
1065 			buf->m_head = NULL;
1066 		}
1067 		buf->eop = NULL;
1068 		++txr->tx_avail;
1069 
1070 		/* We clean the range if multi segment */
1071 		while (txd != eop) {
1072 			++txd;
1073 			++buf;
1074 			++work;
1075 			/* wrap the ring? */
1076 			if (__predict_false(!work)) {
1077 				work -= txr->num_desc;
1078 				buf = txr->tx_buffers;
1079 				txd = txr->tx_base;
1080 			}
1081 			if (buf->m_head) {
1082 				txr->bytes +=
1083 				    buf->m_head->m_pkthdr.len;
1084 				bus_dmamap_sync(txr->txtag,
1085 				    buf->map,
1086 				    BUS_DMASYNC_POSTWRITE);
1087 				bus_dmamap_unload(txr->txtag,
1088 				    buf->map);
1089 				m_freem(buf->m_head);
1090 				buf->m_head = NULL;
1091 			}
1092 			++txr->tx_avail;
1093 			buf->eop = NULL;
1094 
1095 		}
1096 		++txr->packets;
1097 		++processed;
1098 
1099 		/* Try the next packet */
1100 		++txd;
1101 		++buf;
1102 		++work;
1103 		/* reset with a wrap */
1104 		if (__predict_false(!work)) {
1105 			work -= txr->num_desc;
1106 			buf = txr->tx_buffers;
1107 			txd = txr->tx_base;
1108 		}
1109 		prefetch(txd);
1110 	} while (__predict_true(--limit));
1111 
1112 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1113 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1114 
1115 	work += txr->num_desc;
1116 	txr->next_to_clean = work;
1117 
1118 	/*
1119 	** Queue Hang detection, we know there's
1120 	** work outstanding or the first return
1121 	** would have been taken, so increment busy
1122 	** if nothing managed to get cleaned, then
1123 	** in local_timer it will be checked and
1124 	** marked as HUNG if it exceeds a MAX attempt.
1125 	*/
1126 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1127 		++txr->busy;
1128 	/*
1129 	** If anything gets cleaned we reset state to 1,
1130 	** note this will turn off HUNG if its set.
1131 	*/
1132 	if (processed)
1133 		txr->busy = 1;
1134 
1135 	if (txr->tx_avail == txr->num_desc)
1136 		txr->busy = 0;
1137 
1138 	return;
1139 }
1140 
1141 
1142 #ifdef IXGBE_FDIR
1143 /*
1144 ** This routine parses packet headers so that Flow
1145 ** Director can make a hashed filter table entry
1146 ** allowing traffic flows to be identified and kept
1147 ** on the same cpu.  This would be a performance
1148 ** hit, but we only do it at IXGBE_FDIR_RATE of
1149 ** packets.
1150 */
1151 static void
1152 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1153 {
1154 	struct adapter			*adapter = txr->adapter;
1155 	struct ix_queue			*que;
1156 	struct ip			*ip;
1157 	struct tcphdr			*th;
1158 	struct udphdr			*uh;
1159 	struct ether_vlan_header	*eh;
1160 	union ixgbe_atr_hash_dword	input = {.dword = 0};
1161 	union ixgbe_atr_hash_dword	common = {.dword = 0};
1162 	int  				ehdrlen, ip_hlen;
1163 	u16				etype;
1164 
1165 	eh = mtod(mp, struct ether_vlan_header *);
1166 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1167 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1168 		etype = eh->evl_proto;
1169 	} else {
1170 		ehdrlen = ETHER_HDR_LEN;
1171 		etype = eh->evl_encap_proto;
1172 	}
1173 
1174 	/* Only handling IPv4 */
1175 	if (etype != htons(ETHERTYPE_IP))
1176 		return;
1177 
1178 	ip = (struct ip *)(mp->m_data + ehdrlen);
1179 	ip_hlen = ip->ip_hl << 2;
1180 
1181 	/* check if we're UDP or TCP */
1182 	switch (ip->ip_p) {
1183 	case IPPROTO_TCP:
1184 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1185 		/* src and dst are inverted */
1186 		common.port.dst ^= th->th_sport;
1187 		common.port.src ^= th->th_dport;
1188 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1189 		break;
1190 	case IPPROTO_UDP:
1191 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1192 		/* src and dst are inverted */
1193 		common.port.dst ^= uh->uh_sport;
1194 		common.port.src ^= uh->uh_dport;
1195 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1196 		break;
1197 	default:
1198 		return;
1199 	}
1200 
1201 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1202 	if (mp->m_pkthdr.ether_vtag)
1203 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1204 	else
1205 		common.flex_bytes ^= etype;
1206 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1207 
1208 	que = &adapter->queues[txr->me];
1209 	/*
1210 	** This assumes the Rx queue and Tx
1211 	** queue are bound to the same CPU
1212 	*/
1213 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1214 	    input, common, que->msix);
1215 }
1216 #endif /* IXGBE_FDIR */
1217 
1218 /*
1219 ** Used to detect a descriptor that has
1220 ** been merged by Hardware RSC.
1221 */
1222 static inline u32
1223 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1224 {
1225 	return (le32toh(rx->wb.lower.lo_dword.data) &
1226 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1227 }
1228 
1229 /*********************************************************************
1230  *
1231  *  Initialize Hardware RSC (LRO) feature on 82599
1232  *  for an RX ring, this is toggled by the LRO capability
1233  *  even though it is transparent to the stack.
1234  *
1235  *  NOTE: since this HW feature only works with IPV4 and
1236  *        our testing has shown soft LRO to be as effective
1237  *        I have decided to disable this by default.
1238  *
1239  **********************************************************************/
1240 static void
1241 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1242 {
1243 	struct	adapter 	*adapter = rxr->adapter;
1244 	struct	ixgbe_hw	*hw = &adapter->hw;
1245 	u32			rscctrl, rdrxctl;
1246 
1247 	/* If turning LRO/RSC off we need to disable it */
1248 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1249 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1250 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1251 		return;
1252 	}
1253 
1254 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1255 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1256 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1257 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1258 #endif /* DEV_NETMAP */
1259 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1260 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1261 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1262 
1263 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1264 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1265 	/*
1266 	** Limit the total number of descriptors that
1267 	** can be combined, so it does not exceed 64K
1268 	*/
1269 	if (rxr->mbuf_sz == MCLBYTES)
1270 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1271 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1272 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1273 	else if (rxr->mbuf_sz == MJUM9BYTES)
1274 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1275 	else  /* Using 16K cluster */
1276 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1277 
1278 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1279 
1280 	/* Enable TCP header recognition */
1281 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1282 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1283 	    IXGBE_PSRTYPE_TCPHDR));
1284 
1285 	/* Disable RSC for ACK packets */
1286 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1287 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1288 
1289 	rxr->hw_rsc = TRUE;
1290 }
1291 /*********************************************************************
1292  *
1293  *  Refresh mbuf buffers for RX descriptor rings
1294  *   - now keeps its own state so discards due to resource
1295  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1296  *     it just returns, keeping its placeholder, thus it can simply
1297  *     be recalled to try again.
1298  *
1299  **********************************************************************/
1300 static void
1301 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1302 {
1303 	struct adapter		*adapter = rxr->adapter;
1304 	bus_dma_segment_t	seg[1];
1305 	struct ixgbe_rx_buf	*rxbuf;
1306 	struct mbuf		*mp;
1307 	int			i, j, nsegs, error;
1308 	bool			refreshed = FALSE;
1309 
1310 	i = j = rxr->next_to_refresh;
1311 	/* Control the loop with one beyond */
1312 	if (++j == rxr->num_desc)
1313 		j = 0;
1314 
1315 	while (j != limit) {
1316 		rxbuf = &rxr->rx_buffers[i];
1317 		if (rxbuf->buf == NULL) {
1318 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1319 			    M_PKTHDR, rxr->mbuf_sz);
1320 			if (mp == NULL)
1321 				goto update;
1322 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1323 				m_adj(mp, ETHER_ALIGN);
1324 		} else
1325 			mp = rxbuf->buf;
1326 
1327 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1328 
1329 		/* If we're dealing with an mbuf that was copied rather
1330 		 * than replaced, there's no need to go through busdma.
1331 		 */
1332 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1333 			/* Get the memory mapping */
1334 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1335 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1336 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1337 			if (error != 0) {
1338 				printf("Refresh mbufs: payload dmamap load"
1339 				    " failure - %d\n", error);
1340 				m_free(mp);
1341 				rxbuf->buf = NULL;
1342 				goto update;
1343 			}
1344 			rxbuf->buf = mp;
1345 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1346 			    BUS_DMASYNC_PREREAD);
1347 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1348 			    htole64(seg[0].ds_addr);
1349 		} else {
1350 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1351 			rxbuf->flags &= ~IXGBE_RX_COPY;
1352 		}
1353 
1354 		refreshed = TRUE;
1355 		/* Next is precalculated */
1356 		i = j;
1357 		rxr->next_to_refresh = i;
1358 		if (++j == rxr->num_desc)
1359 			j = 0;
1360 	}
1361 update:
1362 	if (refreshed) /* Update hardware tail index */
1363 		IXGBE_WRITE_REG(&adapter->hw,
1364 		    rxr->tail, rxr->next_to_refresh);
1365 	return;
1366 }
1367 
1368 /*********************************************************************
1369  *
1370  *  Allocate memory for rx_buffer structures. Since we use one
1371  *  rx_buffer per received packet, the maximum number of rx_buffer's
1372  *  that we'll need is equal to the number of receive descriptors
1373  *  that we've allocated.
1374  *
1375  **********************************************************************/
1376 int
1377 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1378 {
1379 	struct	adapter 	*adapter = rxr->adapter;
1380 	device_t 		dev = adapter->dev;
1381 	struct ixgbe_rx_buf 	*rxbuf;
1382 	int             	i, bsize, error;
1383 
1384 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1385 	if (!(rxr->rx_buffers =
1386 	    (struct ixgbe_rx_buf *) malloc(bsize,
1387 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1388 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1389 		error = ENOMEM;
1390 		goto fail;
1391 	}
1392 
1393 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1394 				   1, 0,	/* alignment, bounds */
1395 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1396 				   BUS_SPACE_MAXADDR,	/* highaddr */
1397 				   NULL, NULL,		/* filter, filterarg */
1398 				   MJUM16BYTES,		/* maxsize */
1399 				   1,			/* nsegments */
1400 				   MJUM16BYTES,		/* maxsegsize */
1401 				   0,			/* flags */
1402 				   NULL,		/* lockfunc */
1403 				   NULL,		/* lockfuncarg */
1404 				   &rxr->ptag))) {
1405 		device_printf(dev, "Unable to create RX DMA tag\n");
1406 		goto fail;
1407 	}
1408 
1409 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1410 		rxbuf = &rxr->rx_buffers[i];
1411 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1412 		if (error) {
1413 			device_printf(dev, "Unable to create RX dma map\n");
1414 			goto fail;
1415 		}
1416 	}
1417 
1418 	return (0);
1419 
1420 fail:
1421 	/* Frees all, but can handle partial completion */
1422 	ixgbe_free_receive_structures(adapter);
1423 	return (error);
1424 }
1425 
1426 
1427 static void
1428 ixgbe_free_receive_ring(struct rx_ring *rxr)
1429 {
1430 	struct ixgbe_rx_buf       *rxbuf;
1431 	int i;
1432 
1433 	for (i = 0; i < rxr->num_desc; i++) {
1434 		rxbuf = &rxr->rx_buffers[i];
1435 		if (rxbuf->buf != NULL) {
1436 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1437 			    BUS_DMASYNC_POSTREAD);
1438 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1439 			rxbuf->buf->m_flags |= M_PKTHDR;
1440 			m_freem(rxbuf->buf);
1441 			rxbuf->buf = NULL;
1442 			rxbuf->flags = 0;
1443 		}
1444 	}
1445 }
1446 
1447 
1448 /*********************************************************************
1449  *
1450  *  Initialize a receive ring and its buffers.
1451  *
1452  **********************************************************************/
1453 static int
1454 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1455 {
1456 	struct	adapter 	*adapter;
1457 	struct ifnet		*ifp;
1458 	device_t		dev;
1459 	struct ixgbe_rx_buf	*rxbuf;
1460 	bus_dma_segment_t	seg[1];
1461 	struct lro_ctrl		*lro = &rxr->lro;
1462 	int			rsize, nsegs, error = 0;
1463 #ifdef DEV_NETMAP
1464 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1465 	struct netmap_slot *slot;
1466 #endif /* DEV_NETMAP */
1467 
1468 	adapter = rxr->adapter;
1469 	ifp = adapter->ifp;
1470 	dev = adapter->dev;
1471 
1472 	/* Clear the ring contents */
1473 	IXGBE_RX_LOCK(rxr);
1474 #ifdef DEV_NETMAP
1475 	/* same as in ixgbe_setup_transmit_ring() */
1476 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1477 #endif /* DEV_NETMAP */
1478 	rsize = roundup2(adapter->num_rx_desc *
1479 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1480 	bzero((void *)rxr->rx_base, rsize);
1481 	/* Cache the size */
1482 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1483 
1484 	/* Free current RX buffer structs and their mbufs */
1485 	ixgbe_free_receive_ring(rxr);
1486 
1487 	/* Now replenish the mbufs */
1488 	for (int j = 0; j != rxr->num_desc; ++j) {
1489 		struct mbuf	*mp;
1490 
1491 		rxbuf = &rxr->rx_buffers[j];
1492 #ifdef DEV_NETMAP
1493 		/*
1494 		 * In netmap mode, fill the map and set the buffer
1495 		 * address in the NIC ring, considering the offset
1496 		 * between the netmap and NIC rings (see comment in
1497 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1498 		 * an mbuf, so end the block with a continue;
1499 		 */
1500 		if (slot) {
1501 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1502 			uint64_t paddr;
1503 			void *addr;
1504 
1505 			addr = PNMB(na, slot + sj, &paddr);
1506 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1507 			/* Update descriptor and the cached value */
1508 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1509 			rxbuf->addr = htole64(paddr);
1510 			continue;
1511 		}
1512 #endif /* DEV_NETMAP */
1513 		rxbuf->flags = 0;
1514 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1515 		    M_PKTHDR, adapter->rx_mbuf_sz);
1516 		if (rxbuf->buf == NULL) {
1517 			error = ENOBUFS;
1518                         goto fail;
1519 		}
1520 		mp = rxbuf->buf;
1521 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1522 		/* Get the memory mapping */
1523 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1524 		    rxbuf->pmap, mp, seg,
1525 		    &nsegs, BUS_DMA_NOWAIT);
1526 		if (error != 0)
1527                         goto fail;
1528 		bus_dmamap_sync(rxr->ptag,
1529 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1530 		/* Update the descriptor and the cached value */
1531 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1532 		rxbuf->addr = htole64(seg[0].ds_addr);
1533 	}
1534 
1535 
1536 	/* Setup our descriptor indices */
1537 	rxr->next_to_check = 0;
1538 	rxr->next_to_refresh = 0;
1539 	rxr->lro_enabled = FALSE;
1540 	rxr->rx_copies = 0;
1541 	rxr->rx_bytes = 0;
1542 	rxr->vtag_strip = FALSE;
1543 
1544 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1545 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1546 
1547 	/*
1548 	** Now set up the LRO interface:
1549 	*/
1550 	if (ixgbe_rsc_enable)
1551 		ixgbe_setup_hw_rsc(rxr);
1552 	else if (ifp->if_capenable & IFCAP_LRO) {
1553 		int err = tcp_lro_init(lro);
1554 		if (err) {
1555 			device_printf(dev, "LRO Initialization failed!\n");
1556 			goto fail;
1557 		}
1558 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1559 		rxr->lro_enabled = TRUE;
1560 		lro->ifp = adapter->ifp;
1561 	}
1562 
1563 	IXGBE_RX_UNLOCK(rxr);
1564 	return (0);
1565 
1566 fail:
1567 	ixgbe_free_receive_ring(rxr);
1568 	IXGBE_RX_UNLOCK(rxr);
1569 	return (error);
1570 }
1571 
1572 /*********************************************************************
1573  *
1574  *  Initialize all receive rings.
1575  *
1576  **********************************************************************/
1577 int
1578 ixgbe_setup_receive_structures(struct adapter *adapter)
1579 {
1580 	struct rx_ring *rxr = adapter->rx_rings;
1581 	int j;
1582 
1583 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1584 		if (ixgbe_setup_receive_ring(rxr))
1585 			goto fail;
1586 
1587 	return (0);
1588 fail:
1589 	/*
1590 	 * Free RX buffers allocated so far, we will only handle
1591 	 * the rings that completed, the failing case will have
1592 	 * cleaned up for itself. 'j' failed, so its the terminus.
1593 	 */
1594 	for (int i = 0; i < j; ++i) {
1595 		rxr = &adapter->rx_rings[i];
1596 		ixgbe_free_receive_ring(rxr);
1597 	}
1598 
1599 	return (ENOBUFS);
1600 }
1601 
1602 
1603 /*********************************************************************
1604  *
1605  *  Free all receive rings.
1606  *
1607  **********************************************************************/
1608 void
1609 ixgbe_free_receive_structures(struct adapter *adapter)
1610 {
1611 	struct rx_ring *rxr = adapter->rx_rings;
1612 
1613 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1614 
1615 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1616 		struct lro_ctrl		*lro = &rxr->lro;
1617 		ixgbe_free_receive_buffers(rxr);
1618 		/* Free LRO memory */
1619 		tcp_lro_free(lro);
1620 		/* Free the ring memory as well */
1621 		ixgbe_dma_free(adapter, &rxr->rxdma);
1622 	}
1623 
1624 	free(adapter->rx_rings, M_DEVBUF);
1625 }
1626 
1627 
1628 /*********************************************************************
1629  *
1630  *  Free receive ring data structures
1631  *
1632  **********************************************************************/
1633 void
1634 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1635 {
1636 	struct adapter		*adapter = rxr->adapter;
1637 	struct ixgbe_rx_buf	*rxbuf;
1638 
1639 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1640 
1641 	/* Cleanup any existing buffers */
1642 	if (rxr->rx_buffers != NULL) {
1643 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1644 			rxbuf = &rxr->rx_buffers[i];
1645 			if (rxbuf->buf != NULL) {
1646 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1647 				    BUS_DMASYNC_POSTREAD);
1648 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1649 				rxbuf->buf->m_flags |= M_PKTHDR;
1650 				m_freem(rxbuf->buf);
1651 			}
1652 			rxbuf->buf = NULL;
1653 			if (rxbuf->pmap != NULL) {
1654 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1655 				rxbuf->pmap = NULL;
1656 			}
1657 		}
1658 		if (rxr->rx_buffers != NULL) {
1659 			free(rxr->rx_buffers, M_DEVBUF);
1660 			rxr->rx_buffers = NULL;
1661 		}
1662 	}
1663 
1664 	if (rxr->ptag != NULL) {
1665 		bus_dma_tag_destroy(rxr->ptag);
1666 		rxr->ptag = NULL;
1667 	}
1668 
1669 	return;
1670 }
1671 
1672 static __inline void
1673 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1674 {
1675 
1676         /*
1677          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1678          * should be computed by hardware. Also it should not have VLAN tag in
1679          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1680          */
1681         if (rxr->lro_enabled &&
1682             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1683             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1684             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1685             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1686             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1687             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1688             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1689             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1690                 /*
1691                  * Send to the stack if:
1692                  **  - LRO not enabled, or
1693                  **  - no LRO resources, or
1694                  **  - lro enqueue fails
1695                  */
1696                 if (rxr->lro.lro_cnt != 0)
1697                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1698                                 return;
1699         }
1700 	IXGBE_RX_UNLOCK(rxr);
1701         (*ifp->if_input)(ifp, m);
1702 	IXGBE_RX_LOCK(rxr);
1703 }
1704 
1705 static __inline void
1706 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1707 {
1708 	struct ixgbe_rx_buf	*rbuf;
1709 
1710 	rbuf = &rxr->rx_buffers[i];
1711 
1712 
1713 	/*
1714 	** With advanced descriptors the writeback
1715 	** clobbers the buffer addrs, so its easier
1716 	** to just free the existing mbufs and take
1717 	** the normal refresh path to get new buffers
1718 	** and mapping.
1719 	*/
1720 
1721 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1722 		rbuf->fmp->m_flags |= M_PKTHDR;
1723 		m_freem(rbuf->fmp);
1724 		rbuf->fmp = NULL;
1725 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1726 	} else if (rbuf->buf) {
1727 		m_free(rbuf->buf);
1728 		rbuf->buf = NULL;
1729 	}
1730 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1731 
1732 	rbuf->flags = 0;
1733 
1734 	return;
1735 }
1736 
1737 
1738 /*********************************************************************
1739  *
1740  *  This routine executes in interrupt context. It replenishes
1741  *  the mbufs in the descriptor and sends data which has been
1742  *  dma'ed into host memory to upper layer.
1743  *
1744  *  Return TRUE for more work, FALSE for all clean.
1745  *********************************************************************/
1746 bool
1747 ixgbe_rxeof(struct ix_queue *que)
1748 {
1749 	struct adapter		*adapter = que->adapter;
1750 	struct rx_ring		*rxr = que->rxr;
1751 	struct ifnet		*ifp = adapter->ifp;
1752 	struct lro_ctrl		*lro = &rxr->lro;
1753 	struct lro_entry	*queued;
1754 	int			i, nextp, processed = 0;
1755 	u32			staterr = 0;
1756 	u16			count = rxr->process_limit;
1757 	union ixgbe_adv_rx_desc	*cur;
1758 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1759 	u16			pkt_info;
1760 
1761 	IXGBE_RX_LOCK(rxr);
1762 
1763 #ifdef DEV_NETMAP
1764 	/* Same as the txeof routine: wakeup clients on intr. */
1765 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1766 		IXGBE_RX_UNLOCK(rxr);
1767 		return (FALSE);
1768 	}
1769 #endif /* DEV_NETMAP */
1770 
1771 	for (i = rxr->next_to_check; count != 0;) {
1772 		struct mbuf	*sendmp, *mp;
1773 		u32		rsc, ptype;
1774 		u16		len;
1775 		u16		vtag = 0;
1776 		bool		eop;
1777 
1778 		/* Sync the ring. */
1779 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1780 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1781 
1782 		cur = &rxr->rx_base[i];
1783 		staterr = le32toh(cur->wb.upper.status_error);
1784 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1785 
1786 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1787 			break;
1788 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1789 			break;
1790 
1791 		count--;
1792 		sendmp = NULL;
1793 		nbuf = NULL;
1794 		rsc = 0;
1795 		cur->wb.upper.status_error = 0;
1796 		rbuf = &rxr->rx_buffers[i];
1797 		mp = rbuf->buf;
1798 
1799 		len = le16toh(cur->wb.upper.length);
1800 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1801 		    IXGBE_RXDADV_PKTTYPE_MASK;
1802 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1803 
1804 		/* Make sure bad packets are discarded */
1805 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1806 #if __FreeBSD_version >= 1100036
1807 			if (IXGBE_IS_VF(adapter))
1808 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1809 #endif
1810 			rxr->rx_discarded++;
1811 			ixgbe_rx_discard(rxr, i);
1812 			goto next_desc;
1813 		}
1814 
1815 		/*
1816 		** On 82599 which supports a hardware
1817 		** LRO (called HW RSC), packets need
1818 		** not be fragmented across sequential
1819 		** descriptors, rather the next descriptor
1820 		** is indicated in bits of the descriptor.
1821 		** This also means that we might proceses
1822 		** more than one packet at a time, something
1823 		** that has never been true before, it
1824 		** required eliminating global chain pointers
1825 		** in favor of what we are doing here.  -jfv
1826 		*/
1827 		if (!eop) {
1828 			/*
1829 			** Figure out the next descriptor
1830 			** of this frame.
1831 			*/
1832 			if (rxr->hw_rsc == TRUE) {
1833 				rsc = ixgbe_rsc_count(cur);
1834 				rxr->rsc_num += (rsc - 1);
1835 			}
1836 			if (rsc) { /* Get hardware index */
1837 				nextp = ((staterr &
1838 				    IXGBE_RXDADV_NEXTP_MASK) >>
1839 				    IXGBE_RXDADV_NEXTP_SHIFT);
1840 			} else { /* Just sequential */
1841 				nextp = i + 1;
1842 				if (nextp == adapter->num_rx_desc)
1843 					nextp = 0;
1844 			}
1845 			nbuf = &rxr->rx_buffers[nextp];
1846 			prefetch(nbuf);
1847 		}
1848 		/*
1849 		** Rather than using the fmp/lmp global pointers
1850 		** we now keep the head of a packet chain in the
1851 		** buffer struct and pass this along from one
1852 		** descriptor to the next, until we get EOP.
1853 		*/
1854 		mp->m_len = len;
1855 		/*
1856 		** See if there is a stored head
1857 		** that determines what we are
1858 		*/
1859 		sendmp = rbuf->fmp;
1860 		if (sendmp != NULL) {  /* secondary frag */
1861 			rbuf->buf = rbuf->fmp = NULL;
1862 			mp->m_flags &= ~M_PKTHDR;
1863 			sendmp->m_pkthdr.len += mp->m_len;
1864 		} else {
1865 			/*
1866 			 * Optimize.  This might be a small packet,
1867 			 * maybe just a TCP ACK.  Do a fast copy that
1868 			 * is cache aligned into a new mbuf, and
1869 			 * leave the old mbuf+cluster for re-use.
1870 			 */
1871 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1872 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1873 				if (sendmp != NULL) {
1874 					sendmp->m_data +=
1875 					    IXGBE_RX_COPY_ALIGN;
1876 					ixgbe_bcopy(mp->m_data,
1877 					    sendmp->m_data, len);
1878 					sendmp->m_len = len;
1879 					rxr->rx_copies++;
1880 					rbuf->flags |= IXGBE_RX_COPY;
1881 				}
1882 			}
1883 			if (sendmp == NULL) {
1884 				rbuf->buf = rbuf->fmp = NULL;
1885 				sendmp = mp;
1886 			}
1887 
1888 			/* first desc of a non-ps chain */
1889 			sendmp->m_flags |= M_PKTHDR;
1890 			sendmp->m_pkthdr.len = mp->m_len;
1891 		}
1892 		++processed;
1893 
1894 		/* Pass the head pointer on */
1895 		if (eop == 0) {
1896 			nbuf->fmp = sendmp;
1897 			sendmp = NULL;
1898 			mp->m_next = nbuf->buf;
1899 		} else { /* Sending this frame */
1900 			sendmp->m_pkthdr.rcvif = ifp;
1901 			rxr->rx_packets++;
1902 			/* capture data for AIM */
1903 			rxr->bytes += sendmp->m_pkthdr.len;
1904 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1905 			/* Process vlan info */
1906 			if ((rxr->vtag_strip) &&
1907 			    (staterr & IXGBE_RXD_STAT_VP))
1908 				vtag = le16toh(cur->wb.upper.vlan);
1909 			if (vtag) {
1910 				sendmp->m_pkthdr.ether_vtag = vtag;
1911 				sendmp->m_flags |= M_VLANTAG;
1912 			}
1913 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1914 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1915 #if __FreeBSD_version >= 800000
1916 #ifdef RSS
1917 			sendmp->m_pkthdr.flowid =
1918 			    le32toh(cur->wb.lower.hi_dword.rss);
1919 #if __FreeBSD_version < 1100054
1920 			sendmp->m_flags |= M_FLOWID;
1921 #endif
1922 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1923 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1924 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1925 				break;
1926 			case IXGBE_RXDADV_RSSTYPE_IPV4:
1927 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1928 				break;
1929 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1930 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1931 				break;
1932 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1933 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1934 				break;
1935 			case IXGBE_RXDADV_RSSTYPE_IPV6:
1936 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1937 				break;
1938 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1939 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1940 				break;
1941 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1942 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1943 				break;
1944 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1945 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1946 				break;
1947 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1948 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1949 				break;
1950 			default:
1951 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1952 			}
1953 #else /* RSS */
1954 			sendmp->m_pkthdr.flowid = que->msix;
1955 #if __FreeBSD_version >= 1100054
1956 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1957 #else
1958 			sendmp->m_flags |= M_FLOWID;
1959 #endif
1960 #endif /* RSS */
1961 #endif /* FreeBSD_version */
1962 		}
1963 next_desc:
1964 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1965 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966 
1967 		/* Advance our pointers to the next descriptor. */
1968 		if (++i == rxr->num_desc)
1969 			i = 0;
1970 
1971 		/* Now send to the stack or do LRO */
1972 		if (sendmp != NULL) {
1973 			rxr->next_to_check = i;
1974 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1975 			i = rxr->next_to_check;
1976 		}
1977 
1978                /* Every 8 descriptors we go to refresh mbufs */
1979 		if (processed == 8) {
1980 			ixgbe_refresh_mbufs(rxr, i);
1981 			processed = 0;
1982 		}
1983 	}
1984 
1985 	/* Refresh any remaining buf structs */
1986 	if (ixgbe_rx_unrefreshed(rxr))
1987 		ixgbe_refresh_mbufs(rxr, i);
1988 
1989 	rxr->next_to_check = i;
1990 
1991 	/*
1992 	 * Flush any outstanding LRO work
1993 	 */
1994 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1995 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1996 		tcp_lro_flush(lro, queued);
1997 	}
1998 
1999 	IXGBE_RX_UNLOCK(rxr);
2000 
2001 	/*
2002 	** Still have cleaning to do?
2003 	*/
2004 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2005 		return (TRUE);
2006 	else
2007 		return (FALSE);
2008 }
2009 
2010 
2011 /*********************************************************************
2012  *
2013  *  Verify that the hardware indicated that the checksum is valid.
2014  *  Inform the stack about the status of checksum so that stack
2015  *  doesn't spend time verifying the checksum.
2016  *
2017  *********************************************************************/
2018 static void
2019 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2020 {
2021 	u16	status = (u16) staterr;
2022 	u8	errors = (u8) (staterr >> 24);
2023 	bool	sctp = FALSE;
2024 
2025 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2026 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2027 		sctp = TRUE;
2028 
2029 	if (status & IXGBE_RXD_STAT_IPCS) {
2030 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2031 			/* IP Checksum Good */
2032 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2033 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2034 
2035 		} else
2036 			mp->m_pkthdr.csum_flags = 0;
2037 	}
2038 	if (status & IXGBE_RXD_STAT_L4CS) {
2039 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2040 #if __FreeBSD_version >= 800000
2041 		if (sctp)
2042 			type = CSUM_SCTP_VALID;
2043 #endif
2044 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2045 			mp->m_pkthdr.csum_flags |= type;
2046 			if (!sctp)
2047 				mp->m_pkthdr.csum_data = htons(0xffff);
2048 		}
2049 	}
2050 	return;
2051 }
2052 
2053 /********************************************************************
2054  * Manage DMA'able memory.
2055  *******************************************************************/
2056 static void
2057 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2058 {
2059 	if (error)
2060 		return;
2061 	*(bus_addr_t *) arg = segs->ds_addr;
2062 	return;
2063 }
2064 
2065 int
2066 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2067 		struct ixgbe_dma_alloc *dma, int mapflags)
2068 {
2069 	device_t dev = adapter->dev;
2070 	int             r;
2071 
2072 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2073 			       DBA_ALIGN, 0,	/* alignment, bounds */
2074 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2075 			       BUS_SPACE_MAXADDR,	/* highaddr */
2076 			       NULL, NULL,	/* filter, filterarg */
2077 			       size,	/* maxsize */
2078 			       1,	/* nsegments */
2079 			       size,	/* maxsegsize */
2080 			       BUS_DMA_ALLOCNOW,	/* flags */
2081 			       NULL,	/* lockfunc */
2082 			       NULL,	/* lockfuncarg */
2083 			       &dma->dma_tag);
2084 	if (r != 0) {
2085 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2086 		       "error %u\n", r);
2087 		goto fail_0;
2088 	}
2089 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2090 			     BUS_DMA_NOWAIT, &dma->dma_map);
2091 	if (r != 0) {
2092 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2093 		       "error %u\n", r);
2094 		goto fail_1;
2095 	}
2096 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2097 			    size,
2098 			    ixgbe_dmamap_cb,
2099 			    &dma->dma_paddr,
2100 			    mapflags | BUS_DMA_NOWAIT);
2101 	if (r != 0) {
2102 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2103 		       "error %u\n", r);
2104 		goto fail_2;
2105 	}
2106 	dma->dma_size = size;
2107 	return (0);
2108 fail_2:
2109 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2110 fail_1:
2111 	bus_dma_tag_destroy(dma->dma_tag);
2112 fail_0:
2113 	dma->dma_tag = NULL;
2114 	return (r);
2115 }
2116 
2117 void
2118 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2119 {
2120 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2121 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2122 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2123 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2124 	bus_dma_tag_destroy(dma->dma_tag);
2125 }
2126 
2127 
2128 /*********************************************************************
2129  *
2130  *  Allocate memory for the transmit and receive rings, and then
2131  *  the descriptors associated with each, called only once at attach.
2132  *
2133  **********************************************************************/
2134 int
2135 ixgbe_allocate_queues(struct adapter *adapter)
2136 {
2137 	device_t	dev = adapter->dev;
2138 	struct ix_queue	*que;
2139 	struct tx_ring	*txr;
2140 	struct rx_ring	*rxr;
2141 	int rsize, tsize, error = IXGBE_SUCCESS;
2142 	int txconf = 0, rxconf = 0;
2143 
2144         /* First allocate the top level queue structs */
2145         if (!(adapter->queues =
2146             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2147             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2148                 device_printf(dev, "Unable to allocate queue memory\n");
2149                 error = ENOMEM;
2150                 goto fail;
2151         }
2152 
2153 	/* First allocate the TX ring struct memory */
2154 	if (!(adapter->tx_rings =
2155 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2156 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2157 		device_printf(dev, "Unable to allocate TX ring memory\n");
2158 		error = ENOMEM;
2159 		goto tx_fail;
2160 	}
2161 
2162 	/* Next allocate the RX */
2163 	if (!(adapter->rx_rings =
2164 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2165 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2166 		device_printf(dev, "Unable to allocate RX ring memory\n");
2167 		error = ENOMEM;
2168 		goto rx_fail;
2169 	}
2170 
2171 	/* For the ring itself */
2172 	tsize = roundup2(adapter->num_tx_desc *
2173 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2174 
2175 	/*
2176 	 * Now set up the TX queues, txconf is needed to handle the
2177 	 * possibility that things fail midcourse and we need to
2178 	 * undo memory gracefully
2179 	 */
2180 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2181 		/* Set up some basics */
2182 		txr = &adapter->tx_rings[i];
2183 		txr->adapter = adapter;
2184 		txr->me = i;
2185 		txr->num_desc = adapter->num_tx_desc;
2186 
2187 		/* Initialize the TX side lock */
2188 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2189 		    device_get_nameunit(dev), txr->me);
2190 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2191 
2192 		if (ixgbe_dma_malloc(adapter, tsize,
2193 			&txr->txdma, BUS_DMA_NOWAIT)) {
2194 			device_printf(dev,
2195 			    "Unable to allocate TX Descriptor memory\n");
2196 			error = ENOMEM;
2197 			goto err_tx_desc;
2198 		}
2199 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2200 		bzero((void *)txr->tx_base, tsize);
2201 
2202         	/* Now allocate transmit buffers for the ring */
2203         	if (ixgbe_allocate_transmit_buffers(txr)) {
2204 			device_printf(dev,
2205 			    "Critical Failure setting up transmit buffers\n");
2206 			error = ENOMEM;
2207 			goto err_tx_desc;
2208         	}
2209 #ifndef IXGBE_LEGACY_TX
2210 		/* Allocate a buf ring */
2211 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2212 		    M_WAITOK, &txr->tx_mtx);
2213 		if (txr->br == NULL) {
2214 			device_printf(dev,
2215 			    "Critical Failure setting up buf ring\n");
2216 			error = ENOMEM;
2217 			goto err_tx_desc;
2218         	}
2219 #endif
2220 	}
2221 
2222 	/*
2223 	 * Next the RX queues...
2224 	 */
2225 	rsize = roundup2(adapter->num_rx_desc *
2226 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2227 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2228 		rxr = &adapter->rx_rings[i];
2229 		/* Set up some basics */
2230 		rxr->adapter = adapter;
2231 		rxr->me = i;
2232 		rxr->num_desc = adapter->num_rx_desc;
2233 
2234 		/* Initialize the RX side lock */
2235 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2236 		    device_get_nameunit(dev), rxr->me);
2237 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2238 
2239 		if (ixgbe_dma_malloc(adapter, rsize,
2240 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2241 			device_printf(dev,
2242 			    "Unable to allocate RxDescriptor memory\n");
2243 			error = ENOMEM;
2244 			goto err_rx_desc;
2245 		}
2246 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2247 		bzero((void *)rxr->rx_base, rsize);
2248 
2249         	/* Allocate receive buffers for the ring*/
2250 		if (ixgbe_allocate_receive_buffers(rxr)) {
2251 			device_printf(dev,
2252 			    "Critical Failure setting up receive buffers\n");
2253 			error = ENOMEM;
2254 			goto err_rx_desc;
2255 		}
2256 	}
2257 
2258 	/*
2259 	** Finally set up the queue holding structs
2260 	*/
2261 	for (int i = 0; i < adapter->num_queues; i++) {
2262 		que = &adapter->queues[i];
2263 		que->adapter = adapter;
2264 		que->me = i;
2265 		que->txr = &adapter->tx_rings[i];
2266 		que->rxr = &adapter->rx_rings[i];
2267 	}
2268 
2269 	return (0);
2270 
2271 err_rx_desc:
2272 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2273 		ixgbe_dma_free(adapter, &rxr->rxdma);
2274 err_tx_desc:
2275 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2276 		ixgbe_dma_free(adapter, &txr->txdma);
2277 	free(adapter->rx_rings, M_DEVBUF);
2278 rx_fail:
2279 	free(adapter->tx_rings, M_DEVBUF);
2280 tx_fail:
2281 	free(adapter->queues, M_DEVBUF);
2282 fail:
2283 	return (error);
2284 }
2285