xref: /freebsd/sys/dev/ixgbe/ix_txrx.c (revision b78ee15e9f04ae15c3e1200df974473167524d17)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41 
42 #include "ixgbe.h"
43 
44 #ifdef	RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48 
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53 
54 extern int ix_crcstrip;
55 #endif
56 
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70 
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83 
84 /* Shared PCI config read/write */
85 inline u16
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
87 {
88 	u16 value;
89 
90 	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91 	    reg, 2);
92 
93 	return (value);
94 }
95 
96 inline void
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
98 {
99 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
100 	    reg, value, 2);
101 
102 	return;
103 }
104 
105 /*********************************************************************
106  *  Local Function prototypes
107  *********************************************************************/
108 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int	ixgbe_setup_receive_ring(struct rx_ring *);
111 static void     ixgbe_free_receive_buffers(struct rx_ring *);
112 
113 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
117 		    struct mbuf *, u32 *, u32 *);
118 static int	ixgbe_tso_setup(struct tx_ring *,
119 		    struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 		    struct mbuf *, u32);
126 
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129  *  Transmit entry point
130  *
131  *  ixgbe_start is called by the stack to initiate a transmit.
132  *  The driver will remain in this routine as long as there are
133  *  packets to transmit and transmit resources are available.
134  *  In case resources are not available stack is notified and
135  *  the packet is requeued.
136  **********************************************************************/
137 
138 void
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
140 {
141 	struct mbuf    *m_head;
142 	struct adapter *adapter = txr->adapter;
143 
144 	IXGBE_TX_LOCK_ASSERT(txr);
145 
146 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
147 		return;
148 	if (!adapter->link_active)
149 		return;
150 
151 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153 			break;
154 
155 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
156 		if (m_head == NULL)
157 			break;
158 
159 		if (ixgbe_xmit(txr, &m_head)) {
160 			if (m_head != NULL)
161 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
162 			break;
163 		}
164 		/* Send a copy of the frame to the BPF listener */
165 		ETHER_BPF_MTAP(ifp, m_head);
166 	}
167 	return;
168 }
169 
170 /*
171  * Legacy TX start - called by the stack, this
172  * always uses the first tx ring, and should
173  * not be used with multiqueue tx enabled.
174  */
175 void
176 ixgbe_start(struct ifnet *ifp)
177 {
178 	struct adapter *adapter = ifp->if_softc;
179 	struct tx_ring	*txr = adapter->tx_rings;
180 
181 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
182 		IXGBE_TX_LOCK(txr);
183 		ixgbe_start_locked(txr, ifp);
184 		IXGBE_TX_UNLOCK(txr);
185 	}
186 	return;
187 }
188 
189 #else /* ! IXGBE_LEGACY_TX */
190 
191 /*
192 ** Multiqueue Transmit driver
193 **
194 */
195 int
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
197 {
198 	struct adapter	*adapter = ifp->if_softc;
199 	struct ix_queue	*que;
200 	struct tx_ring	*txr;
201 	int 		i, err = 0;
202 #ifdef	RSS
203 	uint32_t bucket_id;
204 #endif
205 
206 	/*
207 	 * When doing RSS, map it to the same outbound queue
208 	 * as the incoming flow would be mapped to.
209 	 *
210 	 * If everything is setup correctly, it should be the
211 	 * same bucket that the current CPU we're on is.
212 	 */
213 #if __FreeBSD_version < 1100054
214 	if (m->m_flags & M_FLOWID) {
215 #else
216 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
217 #endif
218 #ifdef	RSS
219 		if (rss_hash2bucket(m->m_pkthdr.flowid,
220 		    M_HASHTYPE_GET(m), &bucket_id) == 0)
221 			/* TODO: spit out something if bucket_id > num_queues? */
222 			i = bucket_id % adapter->num_queues;
223 		else
224 #endif
225 			i = m->m_pkthdr.flowid % adapter->num_queues;
226 	} else
227 		i = curcpu % adapter->num_queues;
228 
229 	/* Check for a hung queue and pick alternative */
230 	if (((1 << i) & adapter->active_queues) == 0)
231 		i = ffsl(adapter->active_queues);
232 
233 	txr = &adapter->tx_rings[i];
234 	que = &adapter->queues[i];
235 
236 	err = drbr_enqueue(ifp, txr->br, m);
237 	if (err)
238 		return (err);
239 	if (IXGBE_TX_TRYLOCK(txr)) {
240 		ixgbe_mq_start_locked(ifp, txr);
241 		IXGBE_TX_UNLOCK(txr);
242 	} else
243 		taskqueue_enqueue(que->tq, &txr->txq_task);
244 
245 	return (0);
246 }
247 
248 int
249 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
250 {
251 	struct adapter  *adapter = txr->adapter;
252         struct mbuf     *next;
253         int             enqueued = 0, err = 0;
254 
255 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
256 	    adapter->link_active == 0)
257 		return (ENETDOWN);
258 
259 	/* Process the queue */
260 #if __FreeBSD_version < 901504
261 	next = drbr_dequeue(ifp, txr->br);
262 	while (next != NULL) {
263 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
264 			if (next != NULL)
265 				err = drbr_enqueue(ifp, txr->br, next);
266 #else
267 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
268 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
269 			if (next == NULL) {
270 				drbr_advance(ifp, txr->br);
271 			} else {
272 				drbr_putback(ifp, txr->br, next);
273 			}
274 #endif
275 			break;
276 		}
277 #if __FreeBSD_version >= 901504
278 		drbr_advance(ifp, txr->br);
279 #endif
280 		enqueued++;
281 #if 0 // this is VF-only
282 #if __FreeBSD_version >= 1100036
283 		/*
284 		 * Since we're looking at the tx ring, we can check
285 		 * to see if we're a VF by examing our tail register
286 		 * address.
287 		 */
288 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
289 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
290 #endif
291 #endif
292 		/* Send a copy of the frame to the BPF listener */
293 		ETHER_BPF_MTAP(ifp, next);
294 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
295 			break;
296 #if __FreeBSD_version < 901504
297 		next = drbr_dequeue(ifp, txr->br);
298 #endif
299 	}
300 
301 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
302 		ixgbe_txeof(txr);
303 
304 	return (err);
305 }
306 
307 /*
308  * Called from a taskqueue to drain queued transmit packets.
309  */
310 void
311 ixgbe_deferred_mq_start(void *arg, int pending)
312 {
313 	struct tx_ring *txr = arg;
314 	struct adapter *adapter = txr->adapter;
315 	struct ifnet *ifp = adapter->ifp;
316 
317 	IXGBE_TX_LOCK(txr);
318 	if (!drbr_empty(ifp, txr->br))
319 		ixgbe_mq_start_locked(ifp, txr);
320 	IXGBE_TX_UNLOCK(txr);
321 }
322 
323 /*
324  * Flush all ring buffers
325  */
326 void
327 ixgbe_qflush(struct ifnet *ifp)
328 {
329 	struct adapter	*adapter = ifp->if_softc;
330 	struct tx_ring	*txr = adapter->tx_rings;
331 	struct mbuf	*m;
332 
333 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
334 		IXGBE_TX_LOCK(txr);
335 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
336 			m_freem(m);
337 		IXGBE_TX_UNLOCK(txr);
338 	}
339 	if_qflush(ifp);
340 }
341 #endif /* IXGBE_LEGACY_TX */
342 
343 
344 /*********************************************************************
345  *
346  *  This routine maps the mbufs to tx descriptors, allowing the
347  *  TX engine to transmit the packets.
348  *  	- return 0 on success, positive on failure
349  *
350  **********************************************************************/
351 
352 static int
353 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
354 {
355 	struct adapter  *adapter = txr->adapter;
356 	u32		olinfo_status = 0, cmd_type_len;
357 	int             i, j, error, nsegs;
358 	int		first;
359 	bool		remap = TRUE;
360 	struct mbuf	*m_head;
361 	bus_dma_segment_t segs[adapter->num_segs];
362 	bus_dmamap_t	map;
363 	struct ixgbe_tx_buf *txbuf;
364 	union ixgbe_adv_tx_desc *txd = NULL;
365 
366 	m_head = *m_headp;
367 
368 	/* Basic descriptor defines */
369         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
370 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
371 
372 	if (m_head->m_flags & M_VLANTAG)
373         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
374 
375         /*
376          * Important to capture the first descriptor
377          * used because it will contain the index of
378          * the one we tell the hardware to report back
379          */
380         first = txr->next_avail_desc;
381 	txbuf = &txr->tx_buffers[first];
382 	map = txbuf->map;
383 
384 	/*
385 	 * Map the packet for DMA.
386 	 */
387 retry:
388 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
389 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
390 
391 	if (__predict_false(error)) {
392 		struct mbuf *m;
393 
394 		switch (error) {
395 		case EFBIG:
396 			/* Try it again? - one try */
397 			if (remap == TRUE) {
398 				remap = FALSE;
399 				/*
400 				 * XXX: m_defrag will choke on
401 				 * non-MCLBYTES-sized clusters
402 				 */
403 				m = m_defrag(*m_headp, M_NOWAIT);
404 				if (m == NULL) {
405 					adapter->mbuf_defrag_failed++;
406 					m_freem(*m_headp);
407 					*m_headp = NULL;
408 					return (ENOBUFS);
409 				}
410 				*m_headp = m;
411 				goto retry;
412 			} else
413 				return (error);
414 		case ENOMEM:
415 			txr->no_tx_dma_setup++;
416 			return (error);
417 		default:
418 			txr->no_tx_dma_setup++;
419 			m_freem(*m_headp);
420 			*m_headp = NULL;
421 			return (error);
422 		}
423 	}
424 
425 	/* Make certain there are enough descriptors */
426 	if (nsegs > txr->tx_avail - 2) {
427 		txr->no_desc_avail++;
428 		bus_dmamap_unload(txr->txtag, map);
429 		return (ENOBUFS);
430 	}
431 	m_head = *m_headp;
432 
433 	/*
434 	 * Set up the appropriate offload context
435 	 * this will consume the first descriptor
436 	 */
437 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
438 	if (__predict_false(error)) {
439 		if (error == ENOBUFS)
440 			*m_headp = NULL;
441 		return (error);
442 	}
443 
444 #ifdef IXGBE_FDIR
445 	/* Do the flow director magic */
446 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
447 		++txr->atr_count;
448 		if (txr->atr_count >= atr_sample_rate) {
449 			ixgbe_atr(txr, m_head);
450 			txr->atr_count = 0;
451 		}
452 	}
453 #endif
454 
455 	i = txr->next_avail_desc;
456 	for (j = 0; j < nsegs; j++) {
457 		bus_size_t seglen;
458 		bus_addr_t segaddr;
459 
460 		txbuf = &txr->tx_buffers[i];
461 		txd = &txr->tx_base[i];
462 		seglen = segs[j].ds_len;
463 		segaddr = htole64(segs[j].ds_addr);
464 
465 		txd->read.buffer_addr = segaddr;
466 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
467 		    cmd_type_len |seglen);
468 		txd->read.olinfo_status = htole32(olinfo_status);
469 
470 		if (++i == txr->num_desc)
471 			i = 0;
472 	}
473 
474 	txd->read.cmd_type_len |=
475 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
476 	txr->tx_avail -= nsegs;
477 	txr->next_avail_desc = i;
478 
479 	txbuf->m_head = m_head;
480 	/*
481 	 * Here we swap the map so the last descriptor,
482 	 * which gets the completion interrupt has the
483 	 * real map, and the first descriptor gets the
484 	 * unused map from this descriptor.
485 	 */
486 	txr->tx_buffers[first].map = txbuf->map;
487 	txbuf->map = map;
488 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
489 
490         /* Set the EOP descriptor that will be marked done */
491         txbuf = &txr->tx_buffers[first];
492 	txbuf->eop = txd;
493 
494         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
495             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
496 	/*
497 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
498 	 * hardware that this frame is available to transmit.
499 	 */
500 	++txr->total_packets;
501 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
502 
503 	/* Mark queue as having work */
504 	if (txr->busy == 0)
505 		txr->busy = 1;
506 
507 	return (0);
508 }
509 
510 
511 /*********************************************************************
512  *
513  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
514  *  the information needed to transmit a packet on the wire. This is
515  *  called only once at attach, setup is done every reset.
516  *
517  **********************************************************************/
518 int
519 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
520 {
521 	struct adapter *adapter = txr->adapter;
522 	device_t dev = adapter->dev;
523 	struct ixgbe_tx_buf *txbuf;
524 	int error, i;
525 
526 	/*
527 	 * Setup DMA descriptor areas.
528 	 */
529 	if ((error = bus_dma_tag_create(
530 			       bus_get_dma_tag(adapter->dev),	/* parent */
531 			       1, 0,		/* alignment, bounds */
532 			       BUS_SPACE_MAXADDR,	/* lowaddr */
533 			       BUS_SPACE_MAXADDR,	/* highaddr */
534 			       NULL, NULL,		/* filter, filterarg */
535 			       IXGBE_TSO_SIZE,		/* maxsize */
536 			       adapter->num_segs,	/* nsegments */
537 			       PAGE_SIZE,		/* maxsegsize */
538 			       0,			/* flags */
539 			       NULL,			/* lockfunc */
540 			       NULL,			/* lockfuncarg */
541 			       &txr->txtag))) {
542 		device_printf(dev,"Unable to allocate TX DMA tag\n");
543 		goto fail;
544 	}
545 
546 	if (!(txr->tx_buffers =
547 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
548 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
549 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
550 		error = ENOMEM;
551 		goto fail;
552 	}
553 
554         /* Create the descriptor buffer dma maps */
555 	txbuf = txr->tx_buffers;
556 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
557 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
558 		if (error != 0) {
559 			device_printf(dev, "Unable to create TX DMA map\n");
560 			goto fail;
561 		}
562 	}
563 
564 	return 0;
565 fail:
566 	/* We free all, it handles case where we are in the middle */
567 	ixgbe_free_transmit_structures(adapter);
568 	return (error);
569 }
570 
571 /*********************************************************************
572  *
573  *  Initialize a transmit ring.
574  *
575  **********************************************************************/
576 static void
577 ixgbe_setup_transmit_ring(struct tx_ring *txr)
578 {
579 	struct adapter *adapter = txr->adapter;
580 	struct ixgbe_tx_buf *txbuf;
581 #ifdef DEV_NETMAP
582 	struct netmap_adapter *na = NA(adapter->ifp);
583 	struct netmap_slot *slot;
584 #endif /* DEV_NETMAP */
585 
586 	/* Clear the old ring contents */
587 	IXGBE_TX_LOCK(txr);
588 #ifdef DEV_NETMAP
589 	/*
590 	 * (under lock): if in netmap mode, do some consistency
591 	 * checks and set slot to entry 0 of the netmap ring.
592 	 */
593 	slot = netmap_reset(na, NR_TX, txr->me, 0);
594 #endif /* DEV_NETMAP */
595 	bzero((void *)txr->tx_base,
596 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
597 	/* Reset indices */
598 	txr->next_avail_desc = 0;
599 	txr->next_to_clean = 0;
600 
601 	/* Free any existing tx buffers. */
602         txbuf = txr->tx_buffers;
603 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
604 		if (txbuf->m_head != NULL) {
605 			bus_dmamap_sync(txr->txtag, txbuf->map,
606 			    BUS_DMASYNC_POSTWRITE);
607 			bus_dmamap_unload(txr->txtag, txbuf->map);
608 			m_freem(txbuf->m_head);
609 			txbuf->m_head = NULL;
610 		}
611 #ifdef DEV_NETMAP
612 		/*
613 		 * In netmap mode, set the map for the packet buffer.
614 		 * NOTE: Some drivers (not this one) also need to set
615 		 * the physical buffer address in the NIC ring.
616 		 * Slots in the netmap ring (indexed by "si") are
617 		 * kring->nkr_hwofs positions "ahead" wrt the
618 		 * corresponding slot in the NIC ring. In some drivers
619 		 * (not here) nkr_hwofs can be negative. Function
620 		 * netmap_idx_n2k() handles wraparounds properly.
621 		 */
622 		if (slot) {
623 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
624 			netmap_load_map(na, txr->txtag,
625 			    txbuf->map, NMB(na, slot + si));
626 		}
627 #endif /* DEV_NETMAP */
628 		/* Clear the EOP descriptor pointer */
629 		txbuf->eop = NULL;
630         }
631 
632 #ifdef IXGBE_FDIR
633 	/* Set the rate at which we sample packets */
634 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
635 		txr->atr_sample = atr_sample_rate;
636 #endif
637 
638 	/* Set number of descriptors available */
639 	txr->tx_avail = adapter->num_tx_desc;
640 
641 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
642 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
643 	IXGBE_TX_UNLOCK(txr);
644 }
645 
646 /*********************************************************************
647  *
648  *  Initialize all transmit rings.
649  *
650  **********************************************************************/
651 int
652 ixgbe_setup_transmit_structures(struct adapter *adapter)
653 {
654 	struct tx_ring *txr = adapter->tx_rings;
655 
656 	for (int i = 0; i < adapter->num_queues; i++, txr++)
657 		ixgbe_setup_transmit_ring(txr);
658 
659 	return (0);
660 }
661 
662 /*********************************************************************
663  *
664  *  Free all transmit rings.
665  *
666  **********************************************************************/
667 void
668 ixgbe_free_transmit_structures(struct adapter *adapter)
669 {
670 	struct tx_ring *txr = adapter->tx_rings;
671 
672 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
673 		IXGBE_TX_LOCK(txr);
674 		ixgbe_free_transmit_buffers(txr);
675 		ixgbe_dma_free(adapter, &txr->txdma);
676 		IXGBE_TX_UNLOCK(txr);
677 		IXGBE_TX_LOCK_DESTROY(txr);
678 	}
679 	free(adapter->tx_rings, M_DEVBUF);
680 }
681 
682 /*********************************************************************
683  *
684  *  Free transmit ring related data structures.
685  *
686  **********************************************************************/
687 static void
688 ixgbe_free_transmit_buffers(struct tx_ring *txr)
689 {
690 	struct adapter *adapter = txr->adapter;
691 	struct ixgbe_tx_buf *tx_buffer;
692 	int             i;
693 
694 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
695 
696 	if (txr->tx_buffers == NULL)
697 		return;
698 
699 	tx_buffer = txr->tx_buffers;
700 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
701 		if (tx_buffer->m_head != NULL) {
702 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
703 			    BUS_DMASYNC_POSTWRITE);
704 			bus_dmamap_unload(txr->txtag,
705 			    tx_buffer->map);
706 			m_freem(tx_buffer->m_head);
707 			tx_buffer->m_head = NULL;
708 			if (tx_buffer->map != NULL) {
709 				bus_dmamap_destroy(txr->txtag,
710 				    tx_buffer->map);
711 				tx_buffer->map = NULL;
712 			}
713 		} else if (tx_buffer->map != NULL) {
714 			bus_dmamap_unload(txr->txtag,
715 			    tx_buffer->map);
716 			bus_dmamap_destroy(txr->txtag,
717 			    tx_buffer->map);
718 			tx_buffer->map = NULL;
719 		}
720 	}
721 #ifdef IXGBE_LEGACY_TX
722 	if (txr->br != NULL)
723 		buf_ring_free(txr->br, M_DEVBUF);
724 #endif
725 	if (txr->tx_buffers != NULL) {
726 		free(txr->tx_buffers, M_DEVBUF);
727 		txr->tx_buffers = NULL;
728 	}
729 	if (txr->txtag != NULL) {
730 		bus_dma_tag_destroy(txr->txtag);
731 		txr->txtag = NULL;
732 	}
733 	return;
734 }
735 
736 /*********************************************************************
737  *
738  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
739  *
740  **********************************************************************/
741 
742 static int
743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
744     u32 *cmd_type_len, u32 *olinfo_status)
745 {
746 	struct adapter *adapter = txr->adapter;
747 	struct ixgbe_adv_tx_context_desc *TXD;
748 	struct ether_vlan_header *eh;
749 	struct ip *ip;
750 	struct ip6_hdr *ip6;
751 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
752 	int	ehdrlen, ip_hlen = 0;
753 	u16	etype;
754 	u8	ipproto = 0;
755 	int	offload = TRUE;
756 	int	ctxd = txr->next_avail_desc;
757 	u16	vtag = 0;
758 
759 	/* First check if TSO is to be used */
760 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
761 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
762 
763 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
764 		offload = FALSE;
765 
766 	/* Indicate the whole packet as payload when not doing TSO */
767        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
768 
769 	/* Now ready a context descriptor */
770 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
771 
772 	/*
773 	** In advanced descriptors the vlan tag must
774 	** be placed into the context descriptor. Hence
775 	** we need to make one even if not doing offloads.
776 	*/
777 	if (mp->m_flags & M_VLANTAG) {
778 		vtag = htole16(mp->m_pkthdr.ether_vtag);
779 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
780 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
781 		return (0);
782 
783 	/*
784 	 * Determine where frame payload starts.
785 	 * Jump over vlan headers if already present,
786 	 * helpful for QinQ too.
787 	 */
788 	eh = mtod(mp, struct ether_vlan_header *);
789 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
790 		etype = ntohs(eh->evl_proto);
791 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
792 	} else {
793 		etype = ntohs(eh->evl_encap_proto);
794 		ehdrlen = ETHER_HDR_LEN;
795 	}
796 
797 	/* Set the ether header length */
798 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
799 
800 	if (offload == FALSE)
801 		goto no_offloads;
802 
803 	switch (etype) {
804 		case ETHERTYPE_IP:
805 			ip = (struct ip *)(mp->m_data + ehdrlen);
806 			ip_hlen = ip->ip_hl << 2;
807 			ipproto = ip->ip_p;
808 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
809 			break;
810 		case ETHERTYPE_IPV6:
811 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
812 			ip_hlen = sizeof(struct ip6_hdr);
813 			/* XXX-BZ this will go badly in case of ext hdrs. */
814 			ipproto = ip6->ip6_nxt;
815 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
816 			break;
817 		default:
818 			offload = FALSE;
819 			break;
820 	}
821 
822 	vlan_macip_lens |= ip_hlen;
823 
824 	switch (ipproto) {
825 		case IPPROTO_TCP:
826 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
827 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
828 			break;
829 
830 		case IPPROTO_UDP:
831 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
832 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
833 			break;
834 
835 #if __FreeBSD_version >= 800000
836 		case IPPROTO_SCTP:
837 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
838 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
839 			break;
840 #endif
841 		default:
842 			offload = FALSE;
843 			break;
844 	}
845 
846 	if (offload) /* For the TX descriptor setup */
847 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
848 
849 no_offloads:
850 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
851 
852 	/* Now copy bits into descriptor */
853 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
854 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
855 	TXD->seqnum_seed = htole32(0);
856 	TXD->mss_l4len_idx = htole32(0);
857 
858 	/* We've consumed the first desc, adjust counters */
859 	if (++ctxd == txr->num_desc)
860 		ctxd = 0;
861 	txr->next_avail_desc = ctxd;
862 	--txr->tx_avail;
863 
864         return (0);
865 }
866 
867 /**********************************************************************
868  *
869  *  Setup work for hardware segmentation offload (TSO) on
870  *  adapters using advanced tx descriptors
871  *
872  **********************************************************************/
873 static int
874 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
875     u32 *cmd_type_len, u32 *olinfo_status)
876 {
877 	struct ixgbe_adv_tx_context_desc *TXD;
878 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
879 	u32 mss_l4len_idx = 0, paylen;
880 	u16 vtag = 0, eh_type;
881 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
882 	struct ether_vlan_header *eh;
883 #ifdef INET6
884 	struct ip6_hdr *ip6;
885 #endif
886 #ifdef INET
887 	struct ip *ip;
888 #endif
889 	struct tcphdr *th;
890 
891 
892 	/*
893 	 * Determine where frame payload starts.
894 	 * Jump over vlan headers if already present
895 	 */
896 	eh = mtod(mp, struct ether_vlan_header *);
897 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
898 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
899 		eh_type = eh->evl_proto;
900 	} else {
901 		ehdrlen = ETHER_HDR_LEN;
902 		eh_type = eh->evl_encap_proto;
903 	}
904 
905 	switch (ntohs(eh_type)) {
906 #ifdef INET6
907 	case ETHERTYPE_IPV6:
908 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
909 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
910 		if (ip6->ip6_nxt != IPPROTO_TCP)
911 			return (ENXIO);
912 		ip_hlen = sizeof(struct ip6_hdr);
913 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
914 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
915 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
916 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
917 		break;
918 #endif
919 #ifdef INET
920 	case ETHERTYPE_IP:
921 		ip = (struct ip *)(mp->m_data + ehdrlen);
922 		if (ip->ip_p != IPPROTO_TCP)
923 			return (ENXIO);
924 		ip->ip_sum = 0;
925 		ip_hlen = ip->ip_hl << 2;
926 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
927 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
928 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
929 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
930 		/* Tell transmit desc to also do IPv4 checksum. */
931 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
932 		break;
933 #endif
934 	default:
935 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
936 		    __func__, ntohs(eh_type));
937 		break;
938 	}
939 
940 	ctxd = txr->next_avail_desc;
941 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
942 
943 	tcp_hlen = th->th_off << 2;
944 
945 	/* This is used in the transmit desc in encap */
946 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
947 
948 	/* VLAN MACLEN IPLEN */
949 	if (mp->m_flags & M_VLANTAG) {
950 		vtag = htole16(mp->m_pkthdr.ether_vtag);
951                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
952 	}
953 
954 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
955 	vlan_macip_lens |= ip_hlen;
956 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
957 
958 	/* ADV DTYPE TUCMD */
959 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
960 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
961 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
962 
963 	/* MSS L4LEN IDX */
964 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
965 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
966 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
967 
968 	TXD->seqnum_seed = htole32(0);
969 
970 	if (++ctxd == txr->num_desc)
971 		ctxd = 0;
972 
973 	txr->tx_avail--;
974 	txr->next_avail_desc = ctxd;
975 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
976 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
977 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
978 	++txr->tso_tx;
979 	return (0);
980 }
981 
982 
983 /**********************************************************************
984  *
985  *  Examine each tx_buffer in the used queue. If the hardware is done
986  *  processing the packet then free associated resources. The
987  *  tx_buffer is put back on the free queue.
988  *
989  **********************************************************************/
990 void
991 ixgbe_txeof(struct tx_ring *txr)
992 {
993 #ifdef DEV_NETMAP
994 	struct adapter		*adapter = txr->adapter;
995 	struct ifnet		*ifp = adapter->ifp;
996 #endif
997 	u32			work, processed = 0;
998 	u16			limit = txr->process_limit;
999 	struct ixgbe_tx_buf	*buf;
1000 	union ixgbe_adv_tx_desc *txd;
1001 
1002 	mtx_assert(&txr->tx_mtx, MA_OWNED);
1003 
1004 #ifdef DEV_NETMAP
1005 	if (ifp->if_capenable & IFCAP_NETMAP) {
1006 		struct netmap_adapter *na = NA(ifp);
1007 		struct netmap_kring *kring = &na->tx_rings[txr->me];
1008 		txd = txr->tx_base;
1009 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1010 		    BUS_DMASYNC_POSTREAD);
1011 		/*
1012 		 * In netmap mode, all the work is done in the context
1013 		 * of the client thread. Interrupt handlers only wake up
1014 		 * clients, which may be sleeping on individual rings
1015 		 * or on a global resource for all rings.
1016 		 * To implement tx interrupt mitigation, we wake up the client
1017 		 * thread roughly every half ring, even if the NIC interrupts
1018 		 * more frequently. This is implemented as follows:
1019 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1020 		 *   the slot that should wake up the thread (nkr_num_slots
1021 		 *   means the user thread should not be woken up);
1022 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1023 		 *   or the slot has the DD bit set.
1024 		 */
1025 		if (!netmap_mitigate ||
1026 		    (kring->nr_kflags < kring->nkr_num_slots &&
1027 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1028 			netmap_tx_irq(ifp, txr->me);
1029 		}
1030 		return;
1031 	}
1032 #endif /* DEV_NETMAP */
1033 
1034 	if (txr->tx_avail == txr->num_desc) {
1035 		txr->busy = 0;
1036 		return;
1037 	}
1038 
1039 	/* Get work starting point */
1040 	work = txr->next_to_clean;
1041 	buf = &txr->tx_buffers[work];
1042 	txd = &txr->tx_base[work];
1043 	work -= txr->num_desc; /* The distance to ring end */
1044         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1045             BUS_DMASYNC_POSTREAD);
1046 
1047 	do {
1048 		union ixgbe_adv_tx_desc *eop= buf->eop;
1049 		if (eop == NULL) /* No work */
1050 			break;
1051 
1052 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1053 			break;	/* I/O not complete */
1054 
1055 		if (buf->m_head) {
1056 			txr->bytes +=
1057 			    buf->m_head->m_pkthdr.len;
1058 			bus_dmamap_sync(txr->txtag,
1059 			    buf->map,
1060 			    BUS_DMASYNC_POSTWRITE);
1061 			bus_dmamap_unload(txr->txtag,
1062 			    buf->map);
1063 			m_freem(buf->m_head);
1064 			buf->m_head = NULL;
1065 		}
1066 		buf->eop = NULL;
1067 		++txr->tx_avail;
1068 
1069 		/* We clean the range if multi segment */
1070 		while (txd != eop) {
1071 			++txd;
1072 			++buf;
1073 			++work;
1074 			/* wrap the ring? */
1075 			if (__predict_false(!work)) {
1076 				work -= txr->num_desc;
1077 				buf = txr->tx_buffers;
1078 				txd = txr->tx_base;
1079 			}
1080 			if (buf->m_head) {
1081 				txr->bytes +=
1082 				    buf->m_head->m_pkthdr.len;
1083 				bus_dmamap_sync(txr->txtag,
1084 				    buf->map,
1085 				    BUS_DMASYNC_POSTWRITE);
1086 				bus_dmamap_unload(txr->txtag,
1087 				    buf->map);
1088 				m_freem(buf->m_head);
1089 				buf->m_head = NULL;
1090 			}
1091 			++txr->tx_avail;
1092 			buf->eop = NULL;
1093 
1094 		}
1095 		++txr->packets;
1096 		++processed;
1097 
1098 		/* Try the next packet */
1099 		++txd;
1100 		++buf;
1101 		++work;
1102 		/* reset with a wrap */
1103 		if (__predict_false(!work)) {
1104 			work -= txr->num_desc;
1105 			buf = txr->tx_buffers;
1106 			txd = txr->tx_base;
1107 		}
1108 		prefetch(txd);
1109 	} while (__predict_true(--limit));
1110 
1111 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1112 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1113 
1114 	work += txr->num_desc;
1115 	txr->next_to_clean = work;
1116 
1117 	/*
1118 	** Queue Hang detection, we know there's
1119 	** work outstanding or the first return
1120 	** would have been taken, so increment busy
1121 	** if nothing managed to get cleaned, then
1122 	** in local_timer it will be checked and
1123 	** marked as HUNG if it exceeds a MAX attempt.
1124 	*/
1125 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1126 		++txr->busy;
1127 	/*
1128 	** If anything gets cleaned we reset state to 1,
1129 	** note this will turn off HUNG if its set.
1130 	*/
1131 	if (processed)
1132 		txr->busy = 1;
1133 
1134 	if (txr->tx_avail == txr->num_desc)
1135 		txr->busy = 0;
1136 
1137 	return;
1138 }
1139 
1140 
1141 #ifdef IXGBE_FDIR
1142 /*
1143 ** This routine parses packet headers so that Flow
1144 ** Director can make a hashed filter table entry
1145 ** allowing traffic flows to be identified and kept
1146 ** on the same cpu.  This would be a performance
1147 ** hit, but we only do it at IXGBE_FDIR_RATE of
1148 ** packets.
1149 */
1150 static void
1151 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1152 {
1153 	struct adapter			*adapter = txr->adapter;
1154 	struct ix_queue			*que;
1155 	struct ip			*ip;
1156 	struct tcphdr			*th;
1157 	struct udphdr			*uh;
1158 	struct ether_vlan_header	*eh;
1159 	union ixgbe_atr_hash_dword	input = {.dword = 0};
1160 	union ixgbe_atr_hash_dword	common = {.dword = 0};
1161 	int  				ehdrlen, ip_hlen;
1162 	u16				etype;
1163 
1164 	eh = mtod(mp, struct ether_vlan_header *);
1165 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1166 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1167 		etype = eh->evl_proto;
1168 	} else {
1169 		ehdrlen = ETHER_HDR_LEN;
1170 		etype = eh->evl_encap_proto;
1171 	}
1172 
1173 	/* Only handling IPv4 */
1174 	if (etype != htons(ETHERTYPE_IP))
1175 		return;
1176 
1177 	ip = (struct ip *)(mp->m_data + ehdrlen);
1178 	ip_hlen = ip->ip_hl << 2;
1179 
1180 	/* check if we're UDP or TCP */
1181 	switch (ip->ip_p) {
1182 	case IPPROTO_TCP:
1183 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1184 		/* src and dst are inverted */
1185 		common.port.dst ^= th->th_sport;
1186 		common.port.src ^= th->th_dport;
1187 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1188 		break;
1189 	case IPPROTO_UDP:
1190 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1191 		/* src and dst are inverted */
1192 		common.port.dst ^= uh->uh_sport;
1193 		common.port.src ^= uh->uh_dport;
1194 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1195 		break;
1196 	default:
1197 		return;
1198 	}
1199 
1200 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1201 	if (mp->m_pkthdr.ether_vtag)
1202 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1203 	else
1204 		common.flex_bytes ^= etype;
1205 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1206 
1207 	que = &adapter->queues[txr->me];
1208 	/*
1209 	** This assumes the Rx queue and Tx
1210 	** queue are bound to the same CPU
1211 	*/
1212 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1213 	    input, common, que->msix);
1214 }
1215 #endif /* IXGBE_FDIR */
1216 
1217 /*
1218 ** Used to detect a descriptor that has
1219 ** been merged by Hardware RSC.
1220 */
1221 static inline u32
1222 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1223 {
1224 	return (le32toh(rx->wb.lower.lo_dword.data) &
1225 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1226 }
1227 
1228 /*********************************************************************
1229  *
1230  *  Initialize Hardware RSC (LRO) feature on 82599
1231  *  for an RX ring, this is toggled by the LRO capability
1232  *  even though it is transparent to the stack.
1233  *
1234  *  NOTE: since this HW feature only works with IPV4 and
1235  *        our testing has shown soft LRO to be as effective
1236  *        I have decided to disable this by default.
1237  *
1238  **********************************************************************/
1239 static void
1240 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1241 {
1242 	struct	adapter 	*adapter = rxr->adapter;
1243 	struct	ixgbe_hw	*hw = &adapter->hw;
1244 	u32			rscctrl, rdrxctl;
1245 
1246 	/* If turning LRO/RSC off we need to disable it */
1247 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1248 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1249 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1250 		return;
1251 	}
1252 
1253 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1254 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1255 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1256 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1257 #endif /* DEV_NETMAP */
1258 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1259 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1260 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1261 
1262 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1263 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1264 	/*
1265 	** Limit the total number of descriptors that
1266 	** can be combined, so it does not exceed 64K
1267 	*/
1268 	if (rxr->mbuf_sz == MCLBYTES)
1269 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1270 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1271 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1272 	else if (rxr->mbuf_sz == MJUM9BYTES)
1273 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1274 	else  /* Using 16K cluster */
1275 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1276 
1277 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1278 
1279 	/* Enable TCP header recognition */
1280 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1281 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1282 	    IXGBE_PSRTYPE_TCPHDR));
1283 
1284 	/* Disable RSC for ACK packets */
1285 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1286 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1287 
1288 	rxr->hw_rsc = TRUE;
1289 }
1290 /*********************************************************************
1291  *
1292  *  Refresh mbuf buffers for RX descriptor rings
1293  *   - now keeps its own state so discards due to resource
1294  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1295  *     it just returns, keeping its placeholder, thus it can simply
1296  *     be recalled to try again.
1297  *
1298  **********************************************************************/
1299 static void
1300 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1301 {
1302 	struct adapter		*adapter = rxr->adapter;
1303 	bus_dma_segment_t	seg[1];
1304 	struct ixgbe_rx_buf	*rxbuf;
1305 	struct mbuf		*mp;
1306 	int			i, j, nsegs, error;
1307 	bool			refreshed = FALSE;
1308 
1309 	i = j = rxr->next_to_refresh;
1310 	/* Control the loop with one beyond */
1311 	if (++j == rxr->num_desc)
1312 		j = 0;
1313 
1314 	while (j != limit) {
1315 		rxbuf = &rxr->rx_buffers[i];
1316 		if (rxbuf->buf == NULL) {
1317 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1318 			    M_PKTHDR, rxr->mbuf_sz);
1319 			if (mp == NULL)
1320 				goto update;
1321 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1322 				m_adj(mp, ETHER_ALIGN);
1323 		} else
1324 			mp = rxbuf->buf;
1325 
1326 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1327 
1328 		/* If we're dealing with an mbuf that was copied rather
1329 		 * than replaced, there's no need to go through busdma.
1330 		 */
1331 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1332 			/* Get the memory mapping */
1333 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1334 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1335 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1336 			if (error != 0) {
1337 				printf("Refresh mbufs: payload dmamap load"
1338 				    " failure - %d\n", error);
1339 				m_free(mp);
1340 				rxbuf->buf = NULL;
1341 				goto update;
1342 			}
1343 			rxbuf->buf = mp;
1344 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1345 			    BUS_DMASYNC_PREREAD);
1346 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1347 			    htole64(seg[0].ds_addr);
1348 		} else {
1349 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1350 			rxbuf->flags &= ~IXGBE_RX_COPY;
1351 		}
1352 
1353 		refreshed = TRUE;
1354 		/* Next is precalculated */
1355 		i = j;
1356 		rxr->next_to_refresh = i;
1357 		if (++j == rxr->num_desc)
1358 			j = 0;
1359 	}
1360 update:
1361 	if (refreshed) /* Update hardware tail index */
1362 		IXGBE_WRITE_REG(&adapter->hw,
1363 		    rxr->tail, rxr->next_to_refresh);
1364 	return;
1365 }
1366 
1367 /*********************************************************************
1368  *
1369  *  Allocate memory for rx_buffer structures. Since we use one
1370  *  rx_buffer per received packet, the maximum number of rx_buffer's
1371  *  that we'll need is equal to the number of receive descriptors
1372  *  that we've allocated.
1373  *
1374  **********************************************************************/
1375 int
1376 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1377 {
1378 	struct	adapter 	*adapter = rxr->adapter;
1379 	device_t 		dev = adapter->dev;
1380 	struct ixgbe_rx_buf 	*rxbuf;
1381 	int             	bsize, error;
1382 
1383 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1384 	if (!(rxr->rx_buffers =
1385 	    (struct ixgbe_rx_buf *) malloc(bsize,
1386 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1387 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1388 		error = ENOMEM;
1389 		goto fail;
1390 	}
1391 
1392 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1393 				   1, 0,	/* alignment, bounds */
1394 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1395 				   BUS_SPACE_MAXADDR,	/* highaddr */
1396 				   NULL, NULL,		/* filter, filterarg */
1397 				   MJUM16BYTES,		/* maxsize */
1398 				   1,			/* nsegments */
1399 				   MJUM16BYTES,		/* maxsegsize */
1400 				   0,			/* flags */
1401 				   NULL,		/* lockfunc */
1402 				   NULL,		/* lockfuncarg */
1403 				   &rxr->ptag))) {
1404 		device_printf(dev, "Unable to create RX DMA tag\n");
1405 		goto fail;
1406 	}
1407 
1408 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1409 		rxbuf = &rxr->rx_buffers[i];
1410 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1411 		if (error) {
1412 			device_printf(dev, "Unable to create RX dma map\n");
1413 			goto fail;
1414 		}
1415 	}
1416 
1417 	return (0);
1418 
1419 fail:
1420 	/* Frees all, but can handle partial completion */
1421 	ixgbe_free_receive_structures(adapter);
1422 	return (error);
1423 }
1424 
1425 
1426 static void
1427 ixgbe_free_receive_ring(struct rx_ring *rxr)
1428 {
1429 	struct ixgbe_rx_buf       *rxbuf;
1430 
1431 	for (int i = 0; i < rxr->num_desc; i++) {
1432 		rxbuf = &rxr->rx_buffers[i];
1433 		if (rxbuf->buf != NULL) {
1434 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1435 			    BUS_DMASYNC_POSTREAD);
1436 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1437 			rxbuf->buf->m_flags |= M_PKTHDR;
1438 			m_freem(rxbuf->buf);
1439 			rxbuf->buf = NULL;
1440 			rxbuf->flags = 0;
1441 		}
1442 	}
1443 }
1444 
1445 
1446 /*********************************************************************
1447  *
1448  *  Initialize a receive ring and its buffers.
1449  *
1450  **********************************************************************/
1451 static int
1452 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1453 {
1454 	struct	adapter 	*adapter;
1455 	struct ifnet		*ifp;
1456 	device_t		dev;
1457 	struct ixgbe_rx_buf	*rxbuf;
1458 	bus_dma_segment_t	seg[1];
1459 	struct lro_ctrl		*lro = &rxr->lro;
1460 	int			rsize, nsegs, error = 0;
1461 #ifdef DEV_NETMAP
1462 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1463 	struct netmap_slot *slot;
1464 #endif /* DEV_NETMAP */
1465 
1466 	adapter = rxr->adapter;
1467 	ifp = adapter->ifp;
1468 	dev = adapter->dev;
1469 
1470 	/* Clear the ring contents */
1471 	IXGBE_RX_LOCK(rxr);
1472 #ifdef DEV_NETMAP
1473 	/* same as in ixgbe_setup_transmit_ring() */
1474 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1475 #endif /* DEV_NETMAP */
1476 	rsize = roundup2(adapter->num_rx_desc *
1477 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1478 	bzero((void *)rxr->rx_base, rsize);
1479 	/* Cache the size */
1480 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1481 
1482 	/* Free current RX buffer structs and their mbufs */
1483 	ixgbe_free_receive_ring(rxr);
1484 
1485 	/* Now replenish the mbufs */
1486 	for (int j = 0; j != rxr->num_desc; ++j) {
1487 		struct mbuf	*mp;
1488 
1489 		rxbuf = &rxr->rx_buffers[j];
1490 #ifdef DEV_NETMAP
1491 		/*
1492 		 * In netmap mode, fill the map and set the buffer
1493 		 * address in the NIC ring, considering the offset
1494 		 * between the netmap and NIC rings (see comment in
1495 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1496 		 * an mbuf, so end the block with a continue;
1497 		 */
1498 		if (slot) {
1499 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1500 			uint64_t paddr;
1501 			void *addr;
1502 
1503 			addr = PNMB(na, slot + sj, &paddr);
1504 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1505 			/* Update descriptor and the cached value */
1506 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1507 			rxbuf->addr = htole64(paddr);
1508 			continue;
1509 		}
1510 #endif /* DEV_NETMAP */
1511 		rxbuf->flags = 0;
1512 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1513 		    M_PKTHDR, adapter->rx_mbuf_sz);
1514 		if (rxbuf->buf == NULL) {
1515 			error = ENOBUFS;
1516                         goto fail;
1517 		}
1518 		mp = rxbuf->buf;
1519 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1520 		/* Get the memory mapping */
1521 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1522 		    rxbuf->pmap, mp, seg,
1523 		    &nsegs, BUS_DMA_NOWAIT);
1524 		if (error != 0)
1525                         goto fail;
1526 		bus_dmamap_sync(rxr->ptag,
1527 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1528 		/* Update the descriptor and the cached value */
1529 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1530 		rxbuf->addr = htole64(seg[0].ds_addr);
1531 	}
1532 
1533 
1534 	/* Setup our descriptor indices */
1535 	rxr->next_to_check = 0;
1536 	rxr->next_to_refresh = 0;
1537 	rxr->lro_enabled = FALSE;
1538 	rxr->rx_copies = 0;
1539 	rxr->rx_bytes = 0;
1540 	rxr->vtag_strip = FALSE;
1541 
1542 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1543 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1544 
1545 	/*
1546 	** Now set up the LRO interface:
1547 	*/
1548 	if (ixgbe_rsc_enable)
1549 		ixgbe_setup_hw_rsc(rxr);
1550 	else if (ifp->if_capenable & IFCAP_LRO) {
1551 		int err = tcp_lro_init(lro);
1552 		if (err) {
1553 			device_printf(dev, "LRO Initialization failed!\n");
1554 			goto fail;
1555 		}
1556 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1557 		rxr->lro_enabled = TRUE;
1558 		lro->ifp = adapter->ifp;
1559 	}
1560 
1561 	IXGBE_RX_UNLOCK(rxr);
1562 	return (0);
1563 
1564 fail:
1565 	ixgbe_free_receive_ring(rxr);
1566 	IXGBE_RX_UNLOCK(rxr);
1567 	return (error);
1568 }
1569 
1570 /*********************************************************************
1571  *
1572  *  Initialize all receive rings.
1573  *
1574  **********************************************************************/
1575 int
1576 ixgbe_setup_receive_structures(struct adapter *adapter)
1577 {
1578 	struct rx_ring *rxr = adapter->rx_rings;
1579 	int j;
1580 
1581 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1582 		if (ixgbe_setup_receive_ring(rxr))
1583 			goto fail;
1584 
1585 	return (0);
1586 fail:
1587 	/*
1588 	 * Free RX buffers allocated so far, we will only handle
1589 	 * the rings that completed, the failing case will have
1590 	 * cleaned up for itself. 'j' failed, so its the terminus.
1591 	 */
1592 	for (int i = 0; i < j; ++i) {
1593 		rxr = &adapter->rx_rings[i];
1594 		ixgbe_free_receive_ring(rxr);
1595 	}
1596 
1597 	return (ENOBUFS);
1598 }
1599 
1600 
1601 /*********************************************************************
1602  *
1603  *  Free all receive rings.
1604  *
1605  **********************************************************************/
1606 void
1607 ixgbe_free_receive_structures(struct adapter *adapter)
1608 {
1609 	struct rx_ring *rxr = adapter->rx_rings;
1610 
1611 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1612 
1613 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1614 		struct lro_ctrl		*lro = &rxr->lro;
1615 		ixgbe_free_receive_buffers(rxr);
1616 		/* Free LRO memory */
1617 		tcp_lro_free(lro);
1618 		/* Free the ring memory as well */
1619 		ixgbe_dma_free(adapter, &rxr->rxdma);
1620 	}
1621 
1622 	free(adapter->rx_rings, M_DEVBUF);
1623 }
1624 
1625 
1626 /*********************************************************************
1627  *
1628  *  Free receive ring data structures
1629  *
1630  **********************************************************************/
1631 void
1632 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1633 {
1634 	struct adapter		*adapter = rxr->adapter;
1635 	struct ixgbe_rx_buf	*rxbuf;
1636 
1637 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1638 
1639 	/* Cleanup any existing buffers */
1640 	if (rxr->rx_buffers != NULL) {
1641 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1642 			rxbuf = &rxr->rx_buffers[i];
1643 			if (rxbuf->buf != NULL) {
1644 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1645 				    BUS_DMASYNC_POSTREAD);
1646 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1647 				rxbuf->buf->m_flags |= M_PKTHDR;
1648 				m_freem(rxbuf->buf);
1649 			}
1650 			rxbuf->buf = NULL;
1651 			if (rxbuf->pmap != NULL) {
1652 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1653 				rxbuf->pmap = NULL;
1654 			}
1655 		}
1656 		if (rxr->rx_buffers != NULL) {
1657 			free(rxr->rx_buffers, M_DEVBUF);
1658 			rxr->rx_buffers = NULL;
1659 		}
1660 	}
1661 
1662 	if (rxr->ptag != NULL) {
1663 		bus_dma_tag_destroy(rxr->ptag);
1664 		rxr->ptag = NULL;
1665 	}
1666 
1667 	return;
1668 }
1669 
1670 static __inline void
1671 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1672 {
1673 
1674         /*
1675          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1676          * should be computed by hardware. Also it should not have VLAN tag in
1677          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1678          */
1679         if (rxr->lro_enabled &&
1680             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1681             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1682             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1683             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1684             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1685             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1686             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1687             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1688                 /*
1689                  * Send to the stack if:
1690                  **  - LRO not enabled, or
1691                  **  - no LRO resources, or
1692                  **  - lro enqueue fails
1693                  */
1694                 if (rxr->lro.lro_cnt != 0)
1695                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1696                                 return;
1697         }
1698 	IXGBE_RX_UNLOCK(rxr);
1699         (*ifp->if_input)(ifp, m);
1700 	IXGBE_RX_LOCK(rxr);
1701 }
1702 
1703 static __inline void
1704 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1705 {
1706 	struct ixgbe_rx_buf	*rbuf;
1707 
1708 	rbuf = &rxr->rx_buffers[i];
1709 
1710 
1711 	/*
1712 	** With advanced descriptors the writeback
1713 	** clobbers the buffer addrs, so its easier
1714 	** to just free the existing mbufs and take
1715 	** the normal refresh path to get new buffers
1716 	** and mapping.
1717 	*/
1718 
1719 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1720 		rbuf->fmp->m_flags |= M_PKTHDR;
1721 		m_freem(rbuf->fmp);
1722 		rbuf->fmp = NULL;
1723 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1724 	} else if (rbuf->buf) {
1725 		m_free(rbuf->buf);
1726 		rbuf->buf = NULL;
1727 	}
1728 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1729 
1730 	rbuf->flags = 0;
1731 
1732 	return;
1733 }
1734 
1735 
1736 /*********************************************************************
1737  *
1738  *  This routine executes in interrupt context. It replenishes
1739  *  the mbufs in the descriptor and sends data which has been
1740  *  dma'ed into host memory to upper layer.
1741  *
1742  *  Return TRUE for more work, FALSE for all clean.
1743  *********************************************************************/
1744 bool
1745 ixgbe_rxeof(struct ix_queue *que)
1746 {
1747 	struct adapter		*adapter = que->adapter;
1748 	struct rx_ring		*rxr = que->rxr;
1749 	struct ifnet		*ifp = adapter->ifp;
1750 	struct lro_ctrl		*lro = &rxr->lro;
1751 	struct lro_entry	*queued;
1752 	int			i, nextp, processed = 0;
1753 	u32			staterr = 0;
1754 	u16			count = rxr->process_limit;
1755 	union ixgbe_adv_rx_desc	*cur;
1756 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1757 	u16			pkt_info;
1758 
1759 	IXGBE_RX_LOCK(rxr);
1760 
1761 #ifdef DEV_NETMAP
1762 	/* Same as the txeof routine: wakeup clients on intr. */
1763 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1764 		IXGBE_RX_UNLOCK(rxr);
1765 		return (FALSE);
1766 	}
1767 #endif /* DEV_NETMAP */
1768 
1769 	for (i = rxr->next_to_check; count != 0;) {
1770 		struct mbuf	*sendmp, *mp;
1771 		u32		rsc, ptype;
1772 		u16		len;
1773 		u16		vtag = 0;
1774 		bool		eop;
1775 
1776 		/* Sync the ring. */
1777 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1778 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1779 
1780 		cur = &rxr->rx_base[i];
1781 		staterr = le32toh(cur->wb.upper.status_error);
1782 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1783 
1784 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1785 			break;
1786 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1787 			break;
1788 
1789 		count--;
1790 		sendmp = NULL;
1791 		nbuf = NULL;
1792 		rsc = 0;
1793 		cur->wb.upper.status_error = 0;
1794 		rbuf = &rxr->rx_buffers[i];
1795 		mp = rbuf->buf;
1796 
1797 		len = le16toh(cur->wb.upper.length);
1798 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1799 		    IXGBE_RXDADV_PKTTYPE_MASK;
1800 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1801 
1802 		/* Make sure bad packets are discarded */
1803 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1804 #if __FreeBSD_version >= 1100036
1805 			if (IXGBE_IS_VF(adapter))
1806 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1807 #endif
1808 			rxr->rx_discarded++;
1809 			ixgbe_rx_discard(rxr, i);
1810 			goto next_desc;
1811 		}
1812 
1813 		/*
1814 		** On 82599 which supports a hardware
1815 		** LRO (called HW RSC), packets need
1816 		** not be fragmented across sequential
1817 		** descriptors, rather the next descriptor
1818 		** is indicated in bits of the descriptor.
1819 		** This also means that we might proceses
1820 		** more than one packet at a time, something
1821 		** that has never been true before, it
1822 		** required eliminating global chain pointers
1823 		** in favor of what we are doing here.  -jfv
1824 		*/
1825 		if (!eop) {
1826 			/*
1827 			** Figure out the next descriptor
1828 			** of this frame.
1829 			*/
1830 			if (rxr->hw_rsc == TRUE) {
1831 				rsc = ixgbe_rsc_count(cur);
1832 				rxr->rsc_num += (rsc - 1);
1833 			}
1834 			if (rsc) { /* Get hardware index */
1835 				nextp = ((staterr &
1836 				    IXGBE_RXDADV_NEXTP_MASK) >>
1837 				    IXGBE_RXDADV_NEXTP_SHIFT);
1838 			} else { /* Just sequential */
1839 				nextp = i + 1;
1840 				if (nextp == adapter->num_rx_desc)
1841 					nextp = 0;
1842 			}
1843 			nbuf = &rxr->rx_buffers[nextp];
1844 			prefetch(nbuf);
1845 		}
1846 		/*
1847 		** Rather than using the fmp/lmp global pointers
1848 		** we now keep the head of a packet chain in the
1849 		** buffer struct and pass this along from one
1850 		** descriptor to the next, until we get EOP.
1851 		*/
1852 		mp->m_len = len;
1853 		/*
1854 		** See if there is a stored head
1855 		** that determines what we are
1856 		*/
1857 		sendmp = rbuf->fmp;
1858 		if (sendmp != NULL) {  /* secondary frag */
1859 			rbuf->buf = rbuf->fmp = NULL;
1860 			mp->m_flags &= ~M_PKTHDR;
1861 			sendmp->m_pkthdr.len += mp->m_len;
1862 		} else {
1863 			/*
1864 			 * Optimize.  This might be a small packet,
1865 			 * maybe just a TCP ACK.  Do a fast copy that
1866 			 * is cache aligned into a new mbuf, and
1867 			 * leave the old mbuf+cluster for re-use.
1868 			 */
1869 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1870 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1871 				if (sendmp != NULL) {
1872 					sendmp->m_data +=
1873 					    IXGBE_RX_COPY_ALIGN;
1874 					ixgbe_bcopy(mp->m_data,
1875 					    sendmp->m_data, len);
1876 					sendmp->m_len = len;
1877 					rxr->rx_copies++;
1878 					rbuf->flags |= IXGBE_RX_COPY;
1879 				}
1880 			}
1881 			if (sendmp == NULL) {
1882 				rbuf->buf = rbuf->fmp = NULL;
1883 				sendmp = mp;
1884 			}
1885 
1886 			/* first desc of a non-ps chain */
1887 			sendmp->m_flags |= M_PKTHDR;
1888 			sendmp->m_pkthdr.len = mp->m_len;
1889 		}
1890 		++processed;
1891 
1892 		/* Pass the head pointer on */
1893 		if (eop == 0) {
1894 			nbuf->fmp = sendmp;
1895 			sendmp = NULL;
1896 			mp->m_next = nbuf->buf;
1897 		} else { /* Sending this frame */
1898 			sendmp->m_pkthdr.rcvif = ifp;
1899 			rxr->rx_packets++;
1900 			/* capture data for AIM */
1901 			rxr->bytes += sendmp->m_pkthdr.len;
1902 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1903 			/* Process vlan info */
1904 			if ((rxr->vtag_strip) &&
1905 			    (staterr & IXGBE_RXD_STAT_VP))
1906 				vtag = le16toh(cur->wb.upper.vlan);
1907 			if (vtag) {
1908 				sendmp->m_pkthdr.ether_vtag = vtag;
1909 				sendmp->m_flags |= M_VLANTAG;
1910 			}
1911 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1912 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1913 #if __FreeBSD_version >= 800000
1914 #ifdef RSS
1915 			sendmp->m_pkthdr.flowid =
1916 			    le32toh(cur->wb.lower.hi_dword.rss);
1917 #if __FreeBSD_version < 1100054
1918 			sendmp->m_flags |= M_FLOWID;
1919 #endif
1920 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1921 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1922 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1923 				break;
1924 			case IXGBE_RXDADV_RSSTYPE_IPV4:
1925 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1926 				break;
1927 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1929 				break;
1930 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1931 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1932 				break;
1933 			case IXGBE_RXDADV_RSSTYPE_IPV6:
1934 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1935 				break;
1936 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1937 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1938 				break;
1939 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1940 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1941 				break;
1942 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1943 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1944 				break;
1945 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1946 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1947 				break;
1948 			default:
1949 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1950 			}
1951 #else /* RSS */
1952 			sendmp->m_pkthdr.flowid = que->msix;
1953 #if __FreeBSD_version >= 1100054
1954 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1955 #else
1956 			sendmp->m_flags |= M_FLOWID;
1957 #endif
1958 #endif /* RSS */
1959 #endif /* FreeBSD_version */
1960 		}
1961 next_desc:
1962 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1963 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1964 
1965 		/* Advance our pointers to the next descriptor. */
1966 		if (++i == rxr->num_desc)
1967 			i = 0;
1968 
1969 		/* Now send to the stack or do LRO */
1970 		if (sendmp != NULL) {
1971 			rxr->next_to_check = i;
1972 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1973 			i = rxr->next_to_check;
1974 		}
1975 
1976                /* Every 8 descriptors we go to refresh mbufs */
1977 		if (processed == 8) {
1978 			ixgbe_refresh_mbufs(rxr, i);
1979 			processed = 0;
1980 		}
1981 	}
1982 
1983 	/* Refresh any remaining buf structs */
1984 	if (ixgbe_rx_unrefreshed(rxr))
1985 		ixgbe_refresh_mbufs(rxr, i);
1986 
1987 	rxr->next_to_check = i;
1988 
1989 	/*
1990 	 * Flush any outstanding LRO work
1991 	 */
1992 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1993 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1994 		tcp_lro_flush(lro, queued);
1995 	}
1996 
1997 	IXGBE_RX_UNLOCK(rxr);
1998 
1999 	/*
2000 	** Still have cleaning to do?
2001 	*/
2002 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2003 		return (TRUE);
2004 	else
2005 		return (FALSE);
2006 }
2007 
2008 
2009 /*********************************************************************
2010  *
2011  *  Verify that the hardware indicated that the checksum is valid.
2012  *  Inform the stack about the status of checksum so that stack
2013  *  doesn't spend time verifying the checksum.
2014  *
2015  *********************************************************************/
2016 static void
2017 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2018 {
2019 	u16	status = (u16) staterr;
2020 	u8	errors = (u8) (staterr >> 24);
2021 	bool	sctp = FALSE;
2022 
2023 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2024 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2025 		sctp = TRUE;
2026 
2027 	if (status & IXGBE_RXD_STAT_IPCS) {
2028 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2029 			/* IP Checksum Good */
2030 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2031 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2032 
2033 		} else
2034 			mp->m_pkthdr.csum_flags = 0;
2035 	}
2036 	if (status & IXGBE_RXD_STAT_L4CS) {
2037 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2038 #if __FreeBSD_version >= 800000
2039 		if (sctp)
2040 			type = CSUM_SCTP_VALID;
2041 #endif
2042 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2043 			mp->m_pkthdr.csum_flags |= type;
2044 			if (!sctp)
2045 				mp->m_pkthdr.csum_data = htons(0xffff);
2046 		}
2047 	}
2048 	return;
2049 }
2050 
2051 /********************************************************************
2052  * Manage DMA'able memory.
2053  *******************************************************************/
2054 static void
2055 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2056 {
2057 	if (error)
2058 		return;
2059 	*(bus_addr_t *) arg = segs->ds_addr;
2060 	return;
2061 }
2062 
2063 int
2064 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2065 		struct ixgbe_dma_alloc *dma, int mapflags)
2066 {
2067 	device_t dev = adapter->dev;
2068 	int             r;
2069 
2070 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2071 			       DBA_ALIGN, 0,	/* alignment, bounds */
2072 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2073 			       BUS_SPACE_MAXADDR,	/* highaddr */
2074 			       NULL, NULL,	/* filter, filterarg */
2075 			       size,	/* maxsize */
2076 			       1,	/* nsegments */
2077 			       size,	/* maxsegsize */
2078 			       BUS_DMA_ALLOCNOW,	/* flags */
2079 			       NULL,	/* lockfunc */
2080 			       NULL,	/* lockfuncarg */
2081 			       &dma->dma_tag);
2082 	if (r != 0) {
2083 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2084 		       "error %u\n", r);
2085 		goto fail_0;
2086 	}
2087 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2088 			     BUS_DMA_NOWAIT, &dma->dma_map);
2089 	if (r != 0) {
2090 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2091 		       "error %u\n", r);
2092 		goto fail_1;
2093 	}
2094 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2095 			    size,
2096 			    ixgbe_dmamap_cb,
2097 			    &dma->dma_paddr,
2098 			    mapflags | BUS_DMA_NOWAIT);
2099 	if (r != 0) {
2100 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2101 		       "error %u\n", r);
2102 		goto fail_2;
2103 	}
2104 	dma->dma_size = size;
2105 	return (0);
2106 fail_2:
2107 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2108 fail_1:
2109 	bus_dma_tag_destroy(dma->dma_tag);
2110 fail_0:
2111 	dma->dma_tag = NULL;
2112 	return (r);
2113 }
2114 
2115 void
2116 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2117 {
2118 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2119 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2120 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2121 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2122 	bus_dma_tag_destroy(dma->dma_tag);
2123 }
2124 
2125 
2126 /*********************************************************************
2127  *
2128  *  Allocate memory for the transmit and receive rings, and then
2129  *  the descriptors associated with each, called only once at attach.
2130  *
2131  **********************************************************************/
2132 int
2133 ixgbe_allocate_queues(struct adapter *adapter)
2134 {
2135 	device_t	dev = adapter->dev;
2136 	struct ix_queue	*que;
2137 	struct tx_ring	*txr;
2138 	struct rx_ring	*rxr;
2139 	int rsize, tsize, error = IXGBE_SUCCESS;
2140 	int txconf = 0, rxconf = 0;
2141 #ifdef PCI_IOV
2142 	enum ixgbe_iov_mode iov_mode;
2143 #endif
2144 
2145         /* First allocate the top level queue structs */
2146         if (!(adapter->queues =
2147             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2148             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2149                 device_printf(dev, "Unable to allocate queue memory\n");
2150                 error = ENOMEM;
2151                 goto fail;
2152         }
2153 
2154 	/* First allocate the TX ring struct memory */
2155 	if (!(adapter->tx_rings =
2156 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2157 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2158 		device_printf(dev, "Unable to allocate TX ring memory\n");
2159 		error = ENOMEM;
2160 		goto tx_fail;
2161 	}
2162 
2163 	/* Next allocate the RX */
2164 	if (!(adapter->rx_rings =
2165 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2166 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2167 		device_printf(dev, "Unable to allocate RX ring memory\n");
2168 		error = ENOMEM;
2169 		goto rx_fail;
2170 	}
2171 
2172 	/* For the ring itself */
2173 	tsize = roundup2(adapter->num_tx_desc *
2174 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2175 
2176 #ifdef PCI_IOV
2177 	iov_mode = ixgbe_get_iov_mode(adapter);
2178 	adapter->pool = ixgbe_max_vfs(iov_mode);
2179 #else
2180 	adapter->pool = 0;
2181 #endif
2182 	/*
2183 	 * Now set up the TX queues, txconf is needed to handle the
2184 	 * possibility that things fail midcourse and we need to
2185 	 * undo memory gracefully
2186 	 */
2187 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2188 		/* Set up some basics */
2189 		txr = &adapter->tx_rings[i];
2190 		txr->adapter = adapter;
2191 #ifdef PCI_IOV
2192 		txr->me = ixgbe_pf_que_index(iov_mode, i);
2193 #else
2194 		txr->me = i;
2195 #endif
2196 		txr->num_desc = adapter->num_tx_desc;
2197 
2198 		/* Initialize the TX side lock */
2199 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2200 		    device_get_nameunit(dev), txr->me);
2201 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2202 
2203 		if (ixgbe_dma_malloc(adapter, tsize,
2204 			&txr->txdma, BUS_DMA_NOWAIT)) {
2205 			device_printf(dev,
2206 			    "Unable to allocate TX Descriptor memory\n");
2207 			error = ENOMEM;
2208 			goto err_tx_desc;
2209 		}
2210 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2211 		bzero((void *)txr->tx_base, tsize);
2212 
2213         	/* Now allocate transmit buffers for the ring */
2214         	if (ixgbe_allocate_transmit_buffers(txr)) {
2215 			device_printf(dev,
2216 			    "Critical Failure setting up transmit buffers\n");
2217 			error = ENOMEM;
2218 			goto err_tx_desc;
2219         	}
2220 #ifndef IXGBE_LEGACY_TX
2221 		/* Allocate a buf ring */
2222 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2223 		    M_WAITOK, &txr->tx_mtx);
2224 		if (txr->br == NULL) {
2225 			device_printf(dev,
2226 			    "Critical Failure setting up buf ring\n");
2227 			error = ENOMEM;
2228 			goto err_tx_desc;
2229         	}
2230 #endif
2231 	}
2232 
2233 	/*
2234 	 * Next the RX queues...
2235 	 */
2236 	rsize = roundup2(adapter->num_rx_desc *
2237 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2238 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2239 		rxr = &adapter->rx_rings[i];
2240 		/* Set up some basics */
2241 		rxr->adapter = adapter;
2242 #ifdef PCI_IOV
2243 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
2244 #else
2245 		rxr->me = i;
2246 #endif
2247 		rxr->num_desc = adapter->num_rx_desc;
2248 
2249 		/* Initialize the RX side lock */
2250 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2251 		    device_get_nameunit(dev), rxr->me);
2252 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2253 
2254 		if (ixgbe_dma_malloc(adapter, rsize,
2255 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2256 			device_printf(dev,
2257 			    "Unable to allocate RxDescriptor memory\n");
2258 			error = ENOMEM;
2259 			goto err_rx_desc;
2260 		}
2261 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2262 		bzero((void *)rxr->rx_base, rsize);
2263 
2264         	/* Allocate receive buffers for the ring*/
2265 		if (ixgbe_allocate_receive_buffers(rxr)) {
2266 			device_printf(dev,
2267 			    "Critical Failure setting up receive buffers\n");
2268 			error = ENOMEM;
2269 			goto err_rx_desc;
2270 		}
2271 	}
2272 
2273 	/*
2274 	** Finally set up the queue holding structs
2275 	*/
2276 	for (int i = 0; i < adapter->num_queues; i++) {
2277 		que = &adapter->queues[i];
2278 		que->adapter = adapter;
2279 		que->me = i;
2280 		que->txr = &adapter->tx_rings[i];
2281 		que->rxr = &adapter->rx_rings[i];
2282 	}
2283 
2284 	return (0);
2285 
2286 err_rx_desc:
2287 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2288 		ixgbe_dma_free(adapter, &rxr->rxdma);
2289 err_tx_desc:
2290 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2291 		ixgbe_dma_free(adapter, &txr->txdma);
2292 	free(adapter->rx_rings, M_DEVBUF);
2293 rx_fail:
2294 	free(adapter->tx_rings, M_DEVBUF);
2295 tx_fail:
2296 	free(adapter->queues, M_DEVBUF);
2297 fail:
2298 	return (error);
2299 }
2300