xref: /freebsd/sys/dev/ixgbe/ix_txrx.c (revision 49b49cda41feabe3439f7318e8bf40e3896c7bf4)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41 
42 #include "ixgbe.h"
43 
44 #ifdef	RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48 
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53 
54 extern int ix_crcstrip;
55 #endif
56 
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70 
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83 
84 /*********************************************************************
85  *  Local Function prototypes
86  *********************************************************************/
87 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int	ixgbe_setup_receive_ring(struct rx_ring *);
90 static void     ixgbe_free_receive_buffers(struct rx_ring *);
91 
92 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
96 		    struct mbuf *, u32 *, u32 *);
97 static int	ixgbe_tso_setup(struct tx_ring *,
98 		    struct mbuf *, u32 *, u32 *);
99 #ifdef IXGBE_FDIR
100 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
101 #endif
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
104 		    struct mbuf *, u32);
105 
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108  *  Transmit entry point
109  *
110  *  ixgbe_start is called by the stack to initiate a transmit.
111  *  The driver will remain in this routine as long as there are
112  *  packets to transmit and transmit resources are available.
113  *  In case resources are not available stack is notified and
114  *  the packet is requeued.
115  **********************************************************************/
116 
117 void
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
119 {
120 	struct mbuf    *m_head;
121 	struct adapter *adapter = txr->adapter;
122 
123 	IXGBE_TX_LOCK_ASSERT(txr);
124 
125 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
126 		return;
127 	if (!adapter->link_active)
128 		return;
129 
130 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
132 			break;
133 
134 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
135 		if (m_head == NULL)
136 			break;
137 
138 		if (ixgbe_xmit(txr, &m_head)) {
139 			if (m_head != NULL)
140 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
141 			break;
142 		}
143 		/* Send a copy of the frame to the BPF listener */
144 		ETHER_BPF_MTAP(ifp, m_head);
145 	}
146 	return;
147 }
148 
149 /*
150  * Legacy TX start - called by the stack, this
151  * always uses the first tx ring, and should
152  * not be used with multiqueue tx enabled.
153  */
154 void
155 ixgbe_start(struct ifnet *ifp)
156 {
157 	struct adapter *adapter = ifp->if_softc;
158 	struct tx_ring	*txr = adapter->tx_rings;
159 
160 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
161 		IXGBE_TX_LOCK(txr);
162 		ixgbe_start_locked(txr, ifp);
163 		IXGBE_TX_UNLOCK(txr);
164 	}
165 	return;
166 }
167 
168 #else /* ! IXGBE_LEGACY_TX */
169 
170 /*
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
173 */
174 int
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
176 {
177 	struct adapter	*adapter = ifp->if_softc;
178 	struct ix_queue	*que;
179 	struct tx_ring	*txr;
180 	int 		i, err = 0;
181 #ifdef	RSS
182 	uint32_t bucket_id;
183 #endif
184 
185 	/*
186 	 * When doing RSS, map it to the same outbound queue
187 	 * as the incoming flow would be mapped to.
188 	 *
189 	 * If everything is setup correctly, it should be the
190 	 * same bucket that the current CPU we're on is.
191 	 */
192 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
193 #ifdef	RSS
194 		if (rss_hash2bucket(m->m_pkthdr.flowid,
195 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
196 			i = bucket_id % adapter->num_queues;
197 #ifdef IXGBE_DEBUG
198 			if (bucket_id > adapter->num_queues)
199 				if_printf(ifp, "bucket_id (%d) > num_queues "
200 				    "(%d)\n", bucket_id, adapter->num_queues);
201 #endif
202 		} else
203 #endif
204 			i = m->m_pkthdr.flowid % adapter->num_queues;
205 	} else
206 		i = curcpu % adapter->num_queues;
207 
208 	/* Check for a hung queue and pick alternative */
209 	if (((1 << i) & adapter->active_queues) == 0)
210 		i = ffsl(adapter->active_queues);
211 
212 	txr = &adapter->tx_rings[i];
213 	que = &adapter->queues[i];
214 
215 	err = drbr_enqueue(ifp, txr->br, m);
216 	if (err)
217 		return (err);
218 	if (IXGBE_TX_TRYLOCK(txr)) {
219 		ixgbe_mq_start_locked(ifp, txr);
220 		IXGBE_TX_UNLOCK(txr);
221 	} else
222 		taskqueue_enqueue(que->tq, &txr->txq_task);
223 
224 	return (0);
225 }
226 
227 int
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
229 {
230 	struct adapter  *adapter = txr->adapter;
231         struct mbuf     *next;
232         int             enqueued = 0, err = 0;
233 
234 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235 	    adapter->link_active == 0)
236 		return (ENETDOWN);
237 
238 	/* Process the queue */
239 #if __FreeBSD_version < 901504
240 	next = drbr_dequeue(ifp, txr->br);
241 	while (next != NULL) {
242 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
243 			if (next != NULL)
244 				err = drbr_enqueue(ifp, txr->br, next);
245 #else
246 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
248 			if (next == NULL) {
249 				drbr_advance(ifp, txr->br);
250 			} else {
251 				drbr_putback(ifp, txr->br, next);
252 			}
253 #endif
254 			break;
255 		}
256 #if __FreeBSD_version >= 901504
257 		drbr_advance(ifp, txr->br);
258 #endif
259 		enqueued++;
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
262 		/*
263 		 * Since we're looking at the tx ring, we can check
264 		 * to see if we're a VF by examing our tail register
265 		 * address.
266 		 */
267 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
269 #endif
270 #endif
271 		/* Send a copy of the frame to the BPF listener */
272 		ETHER_BPF_MTAP(ifp, next);
273 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
274 			break;
275 #if __FreeBSD_version < 901504
276 		next = drbr_dequeue(ifp, txr->br);
277 #endif
278 	}
279 
280 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
281 		ixgbe_txeof(txr);
282 
283 	return (err);
284 }
285 
286 /*
287  * Called from a taskqueue to drain queued transmit packets.
288  */
289 void
290 ixgbe_deferred_mq_start(void *arg, int pending)
291 {
292 	struct tx_ring *txr = arg;
293 	struct adapter *adapter = txr->adapter;
294 	struct ifnet *ifp = adapter->ifp;
295 
296 	IXGBE_TX_LOCK(txr);
297 	if (!drbr_empty(ifp, txr->br))
298 		ixgbe_mq_start_locked(ifp, txr);
299 	IXGBE_TX_UNLOCK(txr);
300 }
301 
302 /*
303  * Flush all ring buffers
304  */
305 void
306 ixgbe_qflush(struct ifnet *ifp)
307 {
308 	struct adapter	*adapter = ifp->if_softc;
309 	struct tx_ring	*txr = adapter->tx_rings;
310 	struct mbuf	*m;
311 
312 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
313 		IXGBE_TX_LOCK(txr);
314 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
315 			m_freem(m);
316 		IXGBE_TX_UNLOCK(txr);
317 	}
318 	if_qflush(ifp);
319 }
320 #endif /* IXGBE_LEGACY_TX */
321 
322 
323 /*********************************************************************
324  *
325  *  This routine maps the mbufs to tx descriptors, allowing the
326  *  TX engine to transmit the packets.
327  *  	- return 0 on success, positive on failure
328  *
329  **********************************************************************/
330 
331 static int
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
333 {
334 	struct adapter  *adapter = txr->adapter;
335 	u32		olinfo_status = 0, cmd_type_len;
336 	int             i, j, error, nsegs;
337 	int		first;
338 	bool		remap = TRUE;
339 	struct mbuf	*m_head;
340 	bus_dma_segment_t segs[adapter->num_segs];
341 	bus_dmamap_t	map;
342 	struct ixgbe_tx_buf *txbuf;
343 	union ixgbe_adv_tx_desc *txd = NULL;
344 
345 	m_head = *m_headp;
346 
347 	/* Basic descriptor defines */
348         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
350 
351 	if (m_head->m_flags & M_VLANTAG)
352         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
353 
354         /*
355          * Important to capture the first descriptor
356          * used because it will contain the index of
357          * the one we tell the hardware to report back
358          */
359         first = txr->next_avail_desc;
360 	txbuf = &txr->tx_buffers[first];
361 	map = txbuf->map;
362 
363 	/*
364 	 * Map the packet for DMA.
365 	 */
366 retry:
367 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
369 
370 	if (__predict_false(error)) {
371 		struct mbuf *m;
372 
373 		switch (error) {
374 		case EFBIG:
375 			/* Try it again? - one try */
376 			if (remap == TRUE) {
377 				remap = FALSE;
378 				/*
379 				 * XXX: m_defrag will choke on
380 				 * non-MCLBYTES-sized clusters
381 				 */
382 				m = m_defrag(*m_headp, M_NOWAIT);
383 				if (m == NULL) {
384 					adapter->mbuf_defrag_failed++;
385 					m_freem(*m_headp);
386 					*m_headp = NULL;
387 					return (ENOBUFS);
388 				}
389 				*m_headp = m;
390 				goto retry;
391 			} else
392 				return (error);
393 		case ENOMEM:
394 			txr->no_tx_dma_setup++;
395 			return (error);
396 		default:
397 			txr->no_tx_dma_setup++;
398 			m_freem(*m_headp);
399 			*m_headp = NULL;
400 			return (error);
401 		}
402 	}
403 
404 	/* Make certain there are enough descriptors */
405 	if (nsegs > txr->tx_avail - 2) {
406 		txr->no_desc_avail++;
407 		bus_dmamap_unload(txr->txtag, map);
408 		return (ENOBUFS);
409 	}
410 	m_head = *m_headp;
411 
412 	/*
413 	 * Set up the appropriate offload context
414 	 * this will consume the first descriptor
415 	 */
416 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417 	if (__predict_false(error)) {
418 		if (error == ENOBUFS)
419 			*m_headp = NULL;
420 		return (error);
421 	}
422 
423 #ifdef IXGBE_FDIR
424 	/* Do the flow director magic */
425 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
426 		++txr->atr_count;
427 		if (txr->atr_count >= atr_sample_rate) {
428 			ixgbe_atr(txr, m_head);
429 			txr->atr_count = 0;
430 		}
431 	}
432 #endif
433 
434 	olinfo_status |= IXGBE_ADVTXD_CC;
435 	i = txr->next_avail_desc;
436 	for (j = 0; j < nsegs; j++) {
437 		bus_size_t seglen;
438 		bus_addr_t segaddr;
439 
440 		txbuf = &txr->tx_buffers[i];
441 		txd = &txr->tx_base[i];
442 		seglen = segs[j].ds_len;
443 		segaddr = htole64(segs[j].ds_addr);
444 
445 		txd->read.buffer_addr = segaddr;
446 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 		    cmd_type_len |seglen);
448 		txd->read.olinfo_status = htole32(olinfo_status);
449 
450 		if (++i == txr->num_desc)
451 			i = 0;
452 	}
453 
454 	txd->read.cmd_type_len |=
455 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 	txr->tx_avail -= nsegs;
457 	txr->next_avail_desc = i;
458 
459 	txbuf->m_head = m_head;
460 	/*
461 	 * Here we swap the map so the last descriptor,
462 	 * which gets the completion interrupt has the
463 	 * real map, and the first descriptor gets the
464 	 * unused map from this descriptor.
465 	 */
466 	txr->tx_buffers[first].map = txbuf->map;
467 	txbuf->map = map;
468 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469 
470         /* Set the EOP descriptor that will be marked done */
471         txbuf = &txr->tx_buffers[first];
472 	txbuf->eop = txd;
473 
474         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476 	/*
477 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 	 * hardware that this frame is available to transmit.
479 	 */
480 	++txr->total_packets;
481 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482 
483 	/* Mark queue as having work */
484 	if (txr->busy == 0)
485 		txr->busy = 1;
486 
487 	return (0);
488 }
489 
490 
491 /*********************************************************************
492  *
493  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494  *  the information needed to transmit a packet on the wire. This is
495  *  called only once at attach, setup is done every reset.
496  *
497  **********************************************************************/
498 int
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 {
501 	struct adapter *adapter = txr->adapter;
502 	device_t dev = adapter->dev;
503 	struct ixgbe_tx_buf *txbuf;
504 	int error, i;
505 
506 	/*
507 	 * Setup DMA descriptor areas.
508 	 */
509 	if ((error = bus_dma_tag_create(
510 			       bus_get_dma_tag(adapter->dev),	/* parent */
511 			       1, 0,		/* alignment, bounds */
512 			       BUS_SPACE_MAXADDR,	/* lowaddr */
513 			       BUS_SPACE_MAXADDR,	/* highaddr */
514 			       NULL, NULL,		/* filter, filterarg */
515 			       IXGBE_TSO_SIZE,		/* maxsize */
516 			       adapter->num_segs,	/* nsegments */
517 			       PAGE_SIZE,		/* maxsegsize */
518 			       0,			/* flags */
519 			       NULL,			/* lockfunc */
520 			       NULL,			/* lockfuncarg */
521 			       &txr->txtag))) {
522 		device_printf(dev,"Unable to allocate TX DMA tag\n");
523 		goto fail;
524 	}
525 
526 	if (!(txr->tx_buffers =
527 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
530 		error = ENOMEM;
531 		goto fail;
532 	}
533 
534         /* Create the descriptor buffer dma maps */
535 	txbuf = txr->tx_buffers;
536 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538 		if (error != 0) {
539 			device_printf(dev, "Unable to create TX DMA map\n");
540 			goto fail;
541 		}
542 	}
543 
544 	return 0;
545 fail:
546 	/* We free all, it handles case where we are in the middle */
547 	ixgbe_free_transmit_structures(adapter);
548 	return (error);
549 }
550 
551 /*********************************************************************
552  *
553  *  Initialize a transmit ring.
554  *
555  **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559 	struct adapter *adapter = txr->adapter;
560 	struct ixgbe_tx_buf *txbuf;
561 #ifdef DEV_NETMAP
562 	struct netmap_adapter *na = NA(adapter->ifp);
563 	struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
565 
566 	/* Clear the old ring contents */
567 	IXGBE_TX_LOCK(txr);
568 #ifdef DEV_NETMAP
569 	/*
570 	 * (under lock): if in netmap mode, do some consistency
571 	 * checks and set slot to entry 0 of the netmap ring.
572 	 */
573 	slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575 	bzero((void *)txr->tx_base,
576 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
577 	/* Reset indices */
578 	txr->next_avail_desc = 0;
579 	txr->next_to_clean = 0;
580 
581 	/* Free any existing tx buffers. */
582         txbuf = txr->tx_buffers;
583 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584 		if (txbuf->m_head != NULL) {
585 			bus_dmamap_sync(txr->txtag, txbuf->map,
586 			    BUS_DMASYNC_POSTWRITE);
587 			bus_dmamap_unload(txr->txtag, txbuf->map);
588 			m_freem(txbuf->m_head);
589 			txbuf->m_head = NULL;
590 		}
591 #ifdef DEV_NETMAP
592 		/*
593 		 * In netmap mode, set the map for the packet buffer.
594 		 * NOTE: Some drivers (not this one) also need to set
595 		 * the physical buffer address in the NIC ring.
596 		 * Slots in the netmap ring (indexed by "si") are
597 		 * kring->nkr_hwofs positions "ahead" wrt the
598 		 * corresponding slot in the NIC ring. In some drivers
599 		 * (not here) nkr_hwofs can be negative. Function
600 		 * netmap_idx_n2k() handles wraparounds properly.
601 		 */
602 		if (slot) {
603 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604 			netmap_load_map(na, txr->txtag,
605 			    txbuf->map, NMB(na, slot + si));
606 		}
607 #endif /* DEV_NETMAP */
608 		/* Clear the EOP descriptor pointer */
609 		txbuf->eop = NULL;
610         }
611 
612 #ifdef IXGBE_FDIR
613 	/* Set the rate at which we sample packets */
614 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 		txr->atr_sample = atr_sample_rate;
616 #endif
617 
618 	/* Set number of descriptors available */
619 	txr->tx_avail = adapter->num_tx_desc;
620 
621 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 	IXGBE_TX_UNLOCK(txr);
624 }
625 
626 /*********************************************************************
627  *
628  *  Initialize all transmit rings.
629  *
630  **********************************************************************/
631 int
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 {
634 	struct tx_ring *txr = adapter->tx_rings;
635 
636 	for (int i = 0; i < adapter->num_queues; i++, txr++)
637 		ixgbe_setup_transmit_ring(txr);
638 
639 	return (0);
640 }
641 
642 /*********************************************************************
643  *
644  *  Free all transmit rings.
645  *
646  **********************************************************************/
647 void
648 ixgbe_free_transmit_structures(struct adapter *adapter)
649 {
650 	struct tx_ring *txr = adapter->tx_rings;
651 
652 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
653 		IXGBE_TX_LOCK(txr);
654 		ixgbe_free_transmit_buffers(txr);
655 		ixgbe_dma_free(adapter, &txr->txdma);
656 		IXGBE_TX_UNLOCK(txr);
657 		IXGBE_TX_LOCK_DESTROY(txr);
658 	}
659 	free(adapter->tx_rings, M_DEVBUF);
660 }
661 
662 /*********************************************************************
663  *
664  *  Free transmit ring related data structures.
665  *
666  **********************************************************************/
667 static void
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 {
670 	struct adapter *adapter = txr->adapter;
671 	struct ixgbe_tx_buf *tx_buffer;
672 	int             i;
673 
674 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675 
676 	if (txr->tx_buffers == NULL)
677 		return;
678 
679 	tx_buffer = txr->tx_buffers;
680 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 		if (tx_buffer->m_head != NULL) {
682 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 			    BUS_DMASYNC_POSTWRITE);
684 			bus_dmamap_unload(txr->txtag,
685 			    tx_buffer->map);
686 			m_freem(tx_buffer->m_head);
687 			tx_buffer->m_head = NULL;
688 			if (tx_buffer->map != NULL) {
689 				bus_dmamap_destroy(txr->txtag,
690 				    tx_buffer->map);
691 				tx_buffer->map = NULL;
692 			}
693 		} else if (tx_buffer->map != NULL) {
694 			bus_dmamap_unload(txr->txtag,
695 			    tx_buffer->map);
696 			bus_dmamap_destroy(txr->txtag,
697 			    tx_buffer->map);
698 			tx_buffer->map = NULL;
699 		}
700 	}
701 #ifdef IXGBE_LEGACY_TX
702 	if (txr->br != NULL)
703 		buf_ring_free(txr->br, M_DEVBUF);
704 #endif
705 	if (txr->tx_buffers != NULL) {
706 		free(txr->tx_buffers, M_DEVBUF);
707 		txr->tx_buffers = NULL;
708 	}
709 	if (txr->txtag != NULL) {
710 		bus_dma_tag_destroy(txr->txtag);
711 		txr->txtag = NULL;
712 	}
713 	return;
714 }
715 
716 /*********************************************************************
717  *
718  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719  *
720  **********************************************************************/
721 
722 static int
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724     u32 *cmd_type_len, u32 *olinfo_status)
725 {
726 	struct adapter *adapter = txr->adapter;
727 	struct ixgbe_adv_tx_context_desc *TXD;
728 	struct ether_vlan_header *eh;
729 #ifdef INET
730 	struct ip *ip;
731 #endif
732 #ifdef INET6
733 	struct ip6_hdr *ip6;
734 #endif
735 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736 	int	ehdrlen, ip_hlen = 0;
737 	u16	etype;
738 	u8	ipproto = 0;
739 	int	offload = TRUE;
740 	int	ctxd = txr->next_avail_desc;
741 	u16	vtag = 0;
742 	caddr_t l3d;
743 
744 
745 	/* First check if TSO is to be used */
746 	if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
748 
749 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
750 		offload = FALSE;
751 
752 	/* Indicate the whole packet as payload when not doing TSO */
753        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
754 
755 	/* Now ready a context descriptor */
756 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
757 
758 	/*
759 	** In advanced descriptors the vlan tag must
760 	** be placed into the context descriptor. Hence
761 	** we need to make one even if not doing offloads.
762 	*/
763 	if (mp->m_flags & M_VLANTAG) {
764 		vtag = htole16(mp->m_pkthdr.ether_vtag);
765 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
767 		return (0);
768 
769 	/*
770 	 * Determine where frame payload starts.
771 	 * Jump over vlan headers if already present,
772 	 * helpful for QinQ too.
773 	 */
774 	eh = mtod(mp, struct ether_vlan_header *);
775 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776 		etype = ntohs(eh->evl_proto);
777 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
778 	} else {
779 		etype = ntohs(eh->evl_encap_proto);
780 		ehdrlen = ETHER_HDR_LEN;
781 	}
782 
783 	/* Set the ether header length */
784 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
785 
786 	if (offload == FALSE)
787 		goto no_offloads;
788 
789 	/*
790 	 * If the first mbuf only includes the ethernet header, jump to the next one
791 	 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792 	 * XXX: And assumes the entire IP header is contained in one mbuf
793 	 */
794 	if (mp->m_len == ehdrlen && mp->m_next)
795 		l3d = mtod(mp->m_next, caddr_t);
796 	else
797 		l3d = mtod(mp, caddr_t) + ehdrlen;
798 
799 	switch (etype) {
800 #ifdef INET
801 		case ETHERTYPE_IP:
802 			ip = (struct ip *)(l3d);
803 			ip_hlen = ip->ip_hl << 2;
804 			ipproto = ip->ip_p;
805 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
806 			/* Insert IPv4 checksum into data descriptors */
807 			if (mp->m_pkthdr.csum_flags & CSUM_IP) {
808 				ip->ip_sum = 0;
809 				*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
810 			}
811 			break;
812 #endif
813 #ifdef INET6
814 		case ETHERTYPE_IPV6:
815 			ip6 = (struct ip6_hdr *)(l3d);
816 			ip_hlen = sizeof(struct ip6_hdr);
817 			ipproto = ip6->ip6_nxt;
818 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
819 			break;
820 #endif
821 		default:
822 			offload = FALSE;
823 			break;
824 	}
825 
826 	vlan_macip_lens |= ip_hlen;
827 
828 	/* No support for offloads for non-L4 next headers */
829 	switch (ipproto) {
830 		case IPPROTO_TCP:
831 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
832 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
833 			else
834 				offload = false;
835 			break;
836 		case IPPROTO_UDP:
837 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
838 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
839 			else
840 				offload = false;
841 			break;
842 		case IPPROTO_SCTP:
843 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
844 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
845 			else
846 				offload = false;
847 			break;
848 		default:
849 			offload = false;
850 			break;
851 	}
852 
853 	if (offload) /* Insert L4 checksum into data descriptors */
854 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
855 
856 no_offloads:
857 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
858 
859 	/* Now copy bits into descriptor */
860 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
861 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
862 	TXD->seqnum_seed = htole32(0);
863 	TXD->mss_l4len_idx = htole32(0);
864 
865 	/* We've consumed the first desc, adjust counters */
866 	if (++ctxd == txr->num_desc)
867 		ctxd = 0;
868 	txr->next_avail_desc = ctxd;
869 	--txr->tx_avail;
870 
871         return (0);
872 }
873 
874 /**********************************************************************
875  *
876  *  Setup work for hardware segmentation offload (TSO) on
877  *  adapters using advanced tx descriptors
878  *
879  **********************************************************************/
880 static int
881 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
882     u32 *cmd_type_len, u32 *olinfo_status)
883 {
884 	struct ixgbe_adv_tx_context_desc *TXD;
885 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
886 	u32 mss_l4len_idx = 0, paylen;
887 	u16 vtag = 0, eh_type;
888 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
889 	struct ether_vlan_header *eh;
890 #ifdef INET6
891 	struct ip6_hdr *ip6;
892 #endif
893 #ifdef INET
894 	struct ip *ip;
895 #endif
896 	struct tcphdr *th;
897 
898 	/*
899 	 * Determine where frame payload starts.
900 	 * Jump over vlan headers if already present
901 	 */
902 	eh = mtod(mp, struct ether_vlan_header *);
903 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
904 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
905 		eh_type = eh->evl_proto;
906 	} else {
907 		ehdrlen = ETHER_HDR_LEN;
908 		eh_type = eh->evl_encap_proto;
909 	}
910 
911 	switch (ntohs(eh_type)) {
912 #ifdef INET6
913 	case ETHERTYPE_IPV6:
914 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
916 		if (ip6->ip6_nxt != IPPROTO_TCP)
917 			return (ENXIO);
918 		ip_hlen = sizeof(struct ip6_hdr);
919 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
920 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
921 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
922 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
923 		break;
924 #endif
925 #ifdef INET
926 	case ETHERTYPE_IP:
927 		ip = (struct ip *)(mp->m_data + ehdrlen);
928 		if (ip->ip_p != IPPROTO_TCP)
929 			return (ENXIO);
930 		ip->ip_sum = 0;
931 		ip_hlen = ip->ip_hl << 2;
932 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
933 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
934 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
935 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
936 		/* Tell transmit desc to also do IPv4 checksum. */
937 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
938 		break;
939 #endif
940 	default:
941 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
942 		    __func__, ntohs(eh_type));
943 		break;
944 	}
945 
946 	ctxd = txr->next_avail_desc;
947 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
948 
949 	tcp_hlen = th->th_off << 2;
950 
951 	/* This is used in the transmit desc in encap */
952 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
953 
954 	/* VLAN MACLEN IPLEN */
955 	if (mp->m_flags & M_VLANTAG) {
956 		vtag = htole16(mp->m_pkthdr.ether_vtag);
957                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
958 	}
959 
960 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
961 	vlan_macip_lens |= ip_hlen;
962 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
963 
964 	/* ADV DTYPE TUCMD */
965 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
967 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
968 
969 	/* MSS L4LEN IDX */
970 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
971 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
972 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
973 
974 	TXD->seqnum_seed = htole32(0);
975 
976 	if (++ctxd == txr->num_desc)
977 		ctxd = 0;
978 
979 	txr->tx_avail--;
980 	txr->next_avail_desc = ctxd;
981 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
982 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
983 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
984 	++txr->tso_tx;
985 	return (0);
986 }
987 
988 
989 /**********************************************************************
990  *
991  *  Examine each tx_buffer in the used queue. If the hardware is done
992  *  processing the packet then free associated resources. The
993  *  tx_buffer is put back on the free queue.
994  *
995  **********************************************************************/
996 void
997 ixgbe_txeof(struct tx_ring *txr)
998 {
999 	struct adapter		*adapter = txr->adapter;
1000 #ifdef DEV_NETMAP
1001 	struct ifnet		*ifp = adapter->ifp;
1002 #endif
1003 	u32			work, processed = 0;
1004 	u32			limit = adapter->tx_process_limit;
1005 	struct ixgbe_tx_buf	*buf;
1006 	union ixgbe_adv_tx_desc *txd;
1007 
1008 	mtx_assert(&txr->tx_mtx, MA_OWNED);
1009 
1010 #ifdef DEV_NETMAP
1011 	if (ifp->if_capenable & IFCAP_NETMAP) {
1012 		struct netmap_adapter *na = NA(ifp);
1013 		struct netmap_kring *kring = &na->tx_rings[txr->me];
1014 		txd = txr->tx_base;
1015 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1016 		    BUS_DMASYNC_POSTREAD);
1017 		/*
1018 		 * In netmap mode, all the work is done in the context
1019 		 * of the client thread. Interrupt handlers only wake up
1020 		 * clients, which may be sleeping on individual rings
1021 		 * or on a global resource for all rings.
1022 		 * To implement tx interrupt mitigation, we wake up the client
1023 		 * thread roughly every half ring, even if the NIC interrupts
1024 		 * more frequently. This is implemented as follows:
1025 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1026 		 *   the slot that should wake up the thread (nkr_num_slots
1027 		 *   means the user thread should not be woken up);
1028 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1029 		 *   or the slot has the DD bit set.
1030 		 */
1031 		if (!netmap_mitigate ||
1032 		    (kring->nr_kflags < kring->nkr_num_slots &&
1033 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1034 			netmap_tx_irq(ifp, txr->me);
1035 		}
1036 		return;
1037 	}
1038 #endif /* DEV_NETMAP */
1039 
1040 	if (txr->tx_avail == txr->num_desc) {
1041 		txr->busy = 0;
1042 		return;
1043 	}
1044 
1045 	/* Get work starting point */
1046 	work = txr->next_to_clean;
1047 	buf = &txr->tx_buffers[work];
1048 	txd = &txr->tx_base[work];
1049 	work -= txr->num_desc; /* The distance to ring end */
1050         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1051             BUS_DMASYNC_POSTREAD);
1052 
1053 	do {
1054 		union ixgbe_adv_tx_desc *eop = buf->eop;
1055 		if (eop == NULL) /* No work */
1056 			break;
1057 
1058 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1059 			break;	/* I/O not complete */
1060 
1061 		if (buf->m_head) {
1062 			txr->bytes +=
1063 			    buf->m_head->m_pkthdr.len;
1064 			bus_dmamap_sync(txr->txtag,
1065 			    buf->map,
1066 			    BUS_DMASYNC_POSTWRITE);
1067 			bus_dmamap_unload(txr->txtag,
1068 			    buf->map);
1069 			m_freem(buf->m_head);
1070 			buf->m_head = NULL;
1071 		}
1072 		buf->eop = NULL;
1073 		++txr->tx_avail;
1074 
1075 		/* We clean the range if multi segment */
1076 		while (txd != eop) {
1077 			++txd;
1078 			++buf;
1079 			++work;
1080 			/* wrap the ring? */
1081 			if (__predict_false(!work)) {
1082 				work -= txr->num_desc;
1083 				buf = txr->tx_buffers;
1084 				txd = txr->tx_base;
1085 			}
1086 			if (buf->m_head) {
1087 				txr->bytes +=
1088 				    buf->m_head->m_pkthdr.len;
1089 				bus_dmamap_sync(txr->txtag,
1090 				    buf->map,
1091 				    BUS_DMASYNC_POSTWRITE);
1092 				bus_dmamap_unload(txr->txtag,
1093 				    buf->map);
1094 				m_freem(buf->m_head);
1095 				buf->m_head = NULL;
1096 			}
1097 			++txr->tx_avail;
1098 			buf->eop = NULL;
1099 
1100 		}
1101 		++txr->packets;
1102 		++processed;
1103 
1104 		/* Try the next packet */
1105 		++txd;
1106 		++buf;
1107 		++work;
1108 		/* reset with a wrap */
1109 		if (__predict_false(!work)) {
1110 			work -= txr->num_desc;
1111 			buf = txr->tx_buffers;
1112 			txd = txr->tx_base;
1113 		}
1114 		prefetch(txd);
1115 	} while (__predict_true(--limit));
1116 
1117 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1118 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1119 
1120 	work += txr->num_desc;
1121 	txr->next_to_clean = work;
1122 
1123 	/*
1124 	** Queue Hang detection, we know there's
1125 	** work outstanding or the first return
1126 	** would have been taken, so increment busy
1127 	** if nothing managed to get cleaned, then
1128 	** in local_timer it will be checked and
1129 	** marked as HUNG if it exceeds a MAX attempt.
1130 	*/
1131 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1132 		++txr->busy;
1133 	/*
1134 	** If anything gets cleaned we reset state to 1,
1135 	** note this will turn off HUNG if its set.
1136 	*/
1137 	if (processed)
1138 		txr->busy = 1;
1139 
1140 	if (txr->tx_avail == txr->num_desc)
1141 		txr->busy = 0;
1142 
1143 	return;
1144 }
1145 
1146 
1147 #ifdef IXGBE_FDIR
1148 /*
1149 ** This routine parses packet headers so that Flow
1150 ** Director can make a hashed filter table entry
1151 ** allowing traffic flows to be identified and kept
1152 ** on the same cpu.  This would be a performance
1153 ** hit, but we only do it at IXGBE_FDIR_RATE of
1154 ** packets.
1155 */
1156 static void
1157 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1158 {
1159 	struct adapter			*adapter = txr->adapter;
1160 	struct ix_queue			*que;
1161 	struct ip			*ip;
1162 	struct tcphdr			*th;
1163 	struct udphdr			*uh;
1164 	struct ether_vlan_header	*eh;
1165 	union ixgbe_atr_hash_dword	input = {.dword = 0};
1166 	union ixgbe_atr_hash_dword	common = {.dword = 0};
1167 	int  				ehdrlen, ip_hlen;
1168 	u16				etype;
1169 
1170 	eh = mtod(mp, struct ether_vlan_header *);
1171 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1172 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1173 		etype = eh->evl_proto;
1174 	} else {
1175 		ehdrlen = ETHER_HDR_LEN;
1176 		etype = eh->evl_encap_proto;
1177 	}
1178 
1179 	/* Only handling IPv4 */
1180 	if (etype != htons(ETHERTYPE_IP))
1181 		return;
1182 
1183 	ip = (struct ip *)(mp->m_data + ehdrlen);
1184 	ip_hlen = ip->ip_hl << 2;
1185 
1186 	/* check if we're UDP or TCP */
1187 	switch (ip->ip_p) {
1188 	case IPPROTO_TCP:
1189 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1190 		/* src and dst are inverted */
1191 		common.port.dst ^= th->th_sport;
1192 		common.port.src ^= th->th_dport;
1193 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1194 		break;
1195 	case IPPROTO_UDP:
1196 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1197 		/* src and dst are inverted */
1198 		common.port.dst ^= uh->uh_sport;
1199 		common.port.src ^= uh->uh_dport;
1200 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1201 		break;
1202 	default:
1203 		return;
1204 	}
1205 
1206 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1207 	if (mp->m_pkthdr.ether_vtag)
1208 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1209 	else
1210 		common.flex_bytes ^= etype;
1211 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1212 
1213 	que = &adapter->queues[txr->me];
1214 	/*
1215 	** This assumes the Rx queue and Tx
1216 	** queue are bound to the same CPU
1217 	*/
1218 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1219 	    input, common, que->msix);
1220 }
1221 #endif /* IXGBE_FDIR */
1222 
1223 /*
1224 ** Used to detect a descriptor that has
1225 ** been merged by Hardware RSC.
1226 */
1227 static inline u32
1228 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1229 {
1230 	return (le32toh(rx->wb.lower.lo_dword.data) &
1231 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1232 }
1233 
1234 /*********************************************************************
1235  *
1236  *  Initialize Hardware RSC (LRO) feature on 82599
1237  *  for an RX ring, this is toggled by the LRO capability
1238  *  even though it is transparent to the stack.
1239  *
1240  *  NOTE: since this HW feature only works with IPV4 and
1241  *        our testing has shown soft LRO to be as effective
1242  *        I have decided to disable this by default.
1243  *
1244  **********************************************************************/
1245 static void
1246 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1247 {
1248 	struct	adapter 	*adapter = rxr->adapter;
1249 	struct	ixgbe_hw	*hw = &adapter->hw;
1250 	u32			rscctrl, rdrxctl;
1251 
1252 	/* If turning LRO/RSC off we need to disable it */
1253 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1254 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1255 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1256 		return;
1257 	}
1258 
1259 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1260 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1261 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1262 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1263 #endif /* DEV_NETMAP */
1264 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1265 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1266 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1267 
1268 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1269 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1270 	/*
1271 	** Limit the total number of descriptors that
1272 	** can be combined, so it does not exceed 64K
1273 	*/
1274 	if (rxr->mbuf_sz == MCLBYTES)
1275 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1276 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1277 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1278 	else if (rxr->mbuf_sz == MJUM9BYTES)
1279 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1280 	else  /* Using 16K cluster */
1281 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1282 
1283 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1284 
1285 	/* Enable TCP header recognition */
1286 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1287 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1288 	    IXGBE_PSRTYPE_TCPHDR));
1289 
1290 	/* Disable RSC for ACK packets */
1291 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1292 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1293 
1294 	rxr->hw_rsc = TRUE;
1295 }
1296 
1297 /*********************************************************************
1298  *
1299  *  Refresh mbuf buffers for RX descriptor rings
1300  *   - now keeps its own state so discards due to resource
1301  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1302  *     it just returns, keeping its placeholder, thus it can simply
1303  *     be recalled to try again.
1304  *
1305  **********************************************************************/
1306 static void
1307 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1308 {
1309 	struct adapter		*adapter = rxr->adapter;
1310 	bus_dma_segment_t	seg[1];
1311 	struct ixgbe_rx_buf	*rxbuf;
1312 	struct mbuf		*mp;
1313 	int			i, j, nsegs, error;
1314 	bool			refreshed = FALSE;
1315 
1316 	i = j = rxr->next_to_refresh;
1317 	/* Control the loop with one beyond */
1318 	if (++j == rxr->num_desc)
1319 		j = 0;
1320 
1321 	while (j != limit) {
1322 		rxbuf = &rxr->rx_buffers[i];
1323 		if (rxbuf->buf == NULL) {
1324 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1325 			    M_PKTHDR, rxr->mbuf_sz);
1326 			if (mp == NULL)
1327 				goto update;
1328 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1329 				m_adj(mp, ETHER_ALIGN);
1330 		} else
1331 			mp = rxbuf->buf;
1332 
1333 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1334 
1335 		/* If we're dealing with an mbuf that was copied rather
1336 		 * than replaced, there's no need to go through busdma.
1337 		 */
1338 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1339 			/* Get the memory mapping */
1340 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1341 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1342 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1343 			if (error != 0) {
1344 				printf("Refresh mbufs: payload dmamap load"
1345 				    " failure - %d\n", error);
1346 				m_free(mp);
1347 				rxbuf->buf = NULL;
1348 				goto update;
1349 			}
1350 			rxbuf->buf = mp;
1351 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1352 			    BUS_DMASYNC_PREREAD);
1353 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1354 			    htole64(seg[0].ds_addr);
1355 		} else {
1356 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1357 			rxbuf->flags &= ~IXGBE_RX_COPY;
1358 		}
1359 
1360 		refreshed = TRUE;
1361 		/* Next is precalculated */
1362 		i = j;
1363 		rxr->next_to_refresh = i;
1364 		if (++j == rxr->num_desc)
1365 			j = 0;
1366 	}
1367 update:
1368 	if (refreshed) /* Update hardware tail index */
1369 		IXGBE_WRITE_REG(&adapter->hw,
1370 		    rxr->tail, rxr->next_to_refresh);
1371 	return;
1372 }
1373 
1374 /*********************************************************************
1375  *
1376  *  Allocate memory for rx_buffer structures. Since we use one
1377  *  rx_buffer per received packet, the maximum number of rx_buffer's
1378  *  that we'll need is equal to the number of receive descriptors
1379  *  that we've allocated.
1380  *
1381  **********************************************************************/
1382 int
1383 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1384 {
1385 	struct	adapter 	*adapter = rxr->adapter;
1386 	device_t 		dev = adapter->dev;
1387 	struct ixgbe_rx_buf 	*rxbuf;
1388 	int             	bsize, error;
1389 
1390 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1391 	if (!(rxr->rx_buffers =
1392 	    (struct ixgbe_rx_buf *) malloc(bsize,
1393 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1394 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1395 		error = ENOMEM;
1396 		goto fail;
1397 	}
1398 
1399 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1400 				   1, 0,	/* alignment, bounds */
1401 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1402 				   BUS_SPACE_MAXADDR,	/* highaddr */
1403 				   NULL, NULL,		/* filter, filterarg */
1404 				   MJUM16BYTES,		/* maxsize */
1405 				   1,			/* nsegments */
1406 				   MJUM16BYTES,		/* maxsegsize */
1407 				   0,			/* flags */
1408 				   NULL,		/* lockfunc */
1409 				   NULL,		/* lockfuncarg */
1410 				   &rxr->ptag))) {
1411 		device_printf(dev, "Unable to create RX DMA tag\n");
1412 		goto fail;
1413 	}
1414 
1415 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1416 		rxbuf = &rxr->rx_buffers[i];
1417 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1418 		if (error) {
1419 			device_printf(dev, "Unable to create RX dma map\n");
1420 			goto fail;
1421 		}
1422 	}
1423 
1424 	return (0);
1425 
1426 fail:
1427 	/* Frees all, but can handle partial completion */
1428 	ixgbe_free_receive_structures(adapter);
1429 	return (error);
1430 }
1431 
1432 static void
1433 ixgbe_free_receive_ring(struct rx_ring *rxr)
1434 {
1435 	struct ixgbe_rx_buf       *rxbuf;
1436 
1437 	for (int i = 0; i < rxr->num_desc; i++) {
1438 		rxbuf = &rxr->rx_buffers[i];
1439 		if (rxbuf->buf != NULL) {
1440 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1441 			    BUS_DMASYNC_POSTREAD);
1442 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1443 			rxbuf->buf->m_flags |= M_PKTHDR;
1444 			m_freem(rxbuf->buf);
1445 			rxbuf->buf = NULL;
1446 			rxbuf->flags = 0;
1447 		}
1448 	}
1449 }
1450 
1451 /*********************************************************************
1452  *
1453  *  Initialize a receive ring and its buffers.
1454  *
1455  **********************************************************************/
1456 static int
1457 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1458 {
1459 	struct	adapter 	*adapter;
1460 	struct ifnet		*ifp;
1461 	device_t		dev;
1462 	struct ixgbe_rx_buf	*rxbuf;
1463 	bus_dma_segment_t	seg[1];
1464 	struct lro_ctrl		*lro = &rxr->lro;
1465 	int			rsize, nsegs, error = 0;
1466 #ifdef DEV_NETMAP
1467 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1468 	struct netmap_slot *slot;
1469 #endif /* DEV_NETMAP */
1470 
1471 	adapter = rxr->adapter;
1472 	ifp = adapter->ifp;
1473 	dev = adapter->dev;
1474 
1475 	/* Clear the ring contents */
1476 	IXGBE_RX_LOCK(rxr);
1477 #ifdef DEV_NETMAP
1478 	/* same as in ixgbe_setup_transmit_ring() */
1479 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1480 #endif /* DEV_NETMAP */
1481 	rsize = roundup2(adapter->num_rx_desc *
1482 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1483 	bzero((void *)rxr->rx_base, rsize);
1484 	/* Cache the size */
1485 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1486 
1487 	/* Free current RX buffer structs and their mbufs */
1488 	ixgbe_free_receive_ring(rxr);
1489 
1490 	/* Now replenish the mbufs */
1491 	for (int j = 0; j != rxr->num_desc; ++j) {
1492 		struct mbuf	*mp;
1493 
1494 		rxbuf = &rxr->rx_buffers[j];
1495 #ifdef DEV_NETMAP
1496 		/*
1497 		 * In netmap mode, fill the map and set the buffer
1498 		 * address in the NIC ring, considering the offset
1499 		 * between the netmap and NIC rings (see comment in
1500 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1501 		 * an mbuf, so end the block with a continue;
1502 		 */
1503 		if (slot) {
1504 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1505 			uint64_t paddr;
1506 			void *addr;
1507 
1508 			addr = PNMB(na, slot + sj, &paddr);
1509 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1510 			/* Update descriptor and the cached value */
1511 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1512 			rxbuf->addr = htole64(paddr);
1513 			continue;
1514 		}
1515 #endif /* DEV_NETMAP */
1516 		rxbuf->flags = 0;
1517 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1518 		    M_PKTHDR, adapter->rx_mbuf_sz);
1519 		if (rxbuf->buf == NULL) {
1520 			error = ENOBUFS;
1521                         goto fail;
1522 		}
1523 		mp = rxbuf->buf;
1524 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1525 		/* Get the memory mapping */
1526 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1527 		    rxbuf->pmap, mp, seg,
1528 		    &nsegs, BUS_DMA_NOWAIT);
1529 		if (error != 0)
1530                         goto fail;
1531 		bus_dmamap_sync(rxr->ptag,
1532 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1533 		/* Update the descriptor and the cached value */
1534 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1535 		rxbuf->addr = htole64(seg[0].ds_addr);
1536 	}
1537 
1538 
1539 	/* Setup our descriptor indices */
1540 	rxr->next_to_check = 0;
1541 	rxr->next_to_refresh = 0;
1542 	rxr->lro_enabled = FALSE;
1543 	rxr->rx_copies = 0;
1544 	rxr->rx_bytes = 0;
1545 	rxr->vtag_strip = FALSE;
1546 
1547 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1548 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1549 
1550 	/*
1551 	** Now set up the LRO interface:
1552 	*/
1553 	if (ixgbe_rsc_enable)
1554 		ixgbe_setup_hw_rsc(rxr);
1555 	else if (ifp->if_capenable & IFCAP_LRO) {
1556 		int err = tcp_lro_init(lro);
1557 		if (err) {
1558 			device_printf(dev, "LRO Initialization failed!\n");
1559 			goto fail;
1560 		}
1561 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1562 		rxr->lro_enabled = TRUE;
1563 		lro->ifp = adapter->ifp;
1564 	}
1565 
1566 	IXGBE_RX_UNLOCK(rxr);
1567 	return (0);
1568 
1569 fail:
1570 	ixgbe_free_receive_ring(rxr);
1571 	IXGBE_RX_UNLOCK(rxr);
1572 	return (error);
1573 }
1574 
1575 /*********************************************************************
1576  *
1577  *  Initialize all receive rings.
1578  *
1579  **********************************************************************/
1580 int
1581 ixgbe_setup_receive_structures(struct adapter *adapter)
1582 {
1583 	struct rx_ring *rxr = adapter->rx_rings;
1584 	int j;
1585 
1586 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1587 		if (ixgbe_setup_receive_ring(rxr))
1588 			goto fail;
1589 
1590 	return (0);
1591 fail:
1592 	/*
1593 	 * Free RX buffers allocated so far, we will only handle
1594 	 * the rings that completed, the failing case will have
1595 	 * cleaned up for itself. 'j' failed, so its the terminus.
1596 	 */
1597 	for (int i = 0; i < j; ++i) {
1598 		rxr = &adapter->rx_rings[i];
1599 		ixgbe_free_receive_ring(rxr);
1600 	}
1601 
1602 	return (ENOBUFS);
1603 }
1604 
1605 
1606 /*********************************************************************
1607  *
1608  *  Free all receive rings.
1609  *
1610  **********************************************************************/
1611 void
1612 ixgbe_free_receive_structures(struct adapter *adapter)
1613 {
1614 	struct rx_ring *rxr = adapter->rx_rings;
1615 
1616 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1617 
1618 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1619 		struct lro_ctrl		*lro = &rxr->lro;
1620 		ixgbe_free_receive_buffers(rxr);
1621 		/* Free LRO memory */
1622 		tcp_lro_free(lro);
1623 		/* Free the ring memory as well */
1624 		ixgbe_dma_free(adapter, &rxr->rxdma);
1625 	}
1626 
1627 	free(adapter->rx_rings, M_DEVBUF);
1628 }
1629 
1630 
1631 /*********************************************************************
1632  *
1633  *  Free receive ring data structures
1634  *
1635  **********************************************************************/
1636 void
1637 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1638 {
1639 	struct adapter		*adapter = rxr->adapter;
1640 	struct ixgbe_rx_buf	*rxbuf;
1641 
1642 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1643 
1644 	/* Cleanup any existing buffers */
1645 	if (rxr->rx_buffers != NULL) {
1646 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1647 			rxbuf = &rxr->rx_buffers[i];
1648 			if (rxbuf->buf != NULL) {
1649 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1650 				    BUS_DMASYNC_POSTREAD);
1651 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1652 				rxbuf->buf->m_flags |= M_PKTHDR;
1653 				m_freem(rxbuf->buf);
1654 			}
1655 			rxbuf->buf = NULL;
1656 			if (rxbuf->pmap != NULL) {
1657 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1658 				rxbuf->pmap = NULL;
1659 			}
1660 		}
1661 		if (rxr->rx_buffers != NULL) {
1662 			free(rxr->rx_buffers, M_DEVBUF);
1663 			rxr->rx_buffers = NULL;
1664 		}
1665 	}
1666 
1667 	if (rxr->ptag != NULL) {
1668 		bus_dma_tag_destroy(rxr->ptag);
1669 		rxr->ptag = NULL;
1670 	}
1671 
1672 	return;
1673 }
1674 
1675 static __inline void
1676 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1677 {
1678 
1679         /*
1680          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1681          * should be computed by hardware. Also it should not have VLAN tag in
1682          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1683          */
1684         if (rxr->lro_enabled &&
1685             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1686             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1687             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1688             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1689             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1690             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1691             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1692             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1693                 /*
1694                  * Send to the stack if:
1695                  **  - LRO not enabled, or
1696                  **  - no LRO resources, or
1697                  **  - lro enqueue fails
1698                  */
1699                 if (rxr->lro.lro_cnt != 0)
1700                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1701                                 return;
1702         }
1703 	IXGBE_RX_UNLOCK(rxr);
1704         (*ifp->if_input)(ifp, m);
1705 	IXGBE_RX_LOCK(rxr);
1706 }
1707 
1708 static __inline void
1709 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1710 {
1711 	struct ixgbe_rx_buf	*rbuf;
1712 
1713 	rbuf = &rxr->rx_buffers[i];
1714 
1715 
1716 	/*
1717 	** With advanced descriptors the writeback
1718 	** clobbers the buffer addrs, so its easier
1719 	** to just free the existing mbufs and take
1720 	** the normal refresh path to get new buffers
1721 	** and mapping.
1722 	*/
1723 
1724 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1725 		rbuf->fmp->m_flags |= M_PKTHDR;
1726 		m_freem(rbuf->fmp);
1727 		rbuf->fmp = NULL;
1728 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1729 	} else if (rbuf->buf) {
1730 		m_free(rbuf->buf);
1731 		rbuf->buf = NULL;
1732 	}
1733 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1734 
1735 	rbuf->flags = 0;
1736 
1737 	return;
1738 }
1739 
1740 
1741 /*********************************************************************
1742  *
1743  *  This routine executes in interrupt context. It replenishes
1744  *  the mbufs in the descriptor and sends data which has been
1745  *  dma'ed into host memory to upper layer.
1746  *
1747  *  Return TRUE for more work, FALSE for all clean.
1748  *********************************************************************/
1749 bool
1750 ixgbe_rxeof(struct ix_queue *que)
1751 {
1752 	struct adapter		*adapter = que->adapter;
1753 	struct rx_ring		*rxr = que->rxr;
1754 	struct ifnet		*ifp = adapter->ifp;
1755 	struct lro_ctrl		*lro = &rxr->lro;
1756 	struct lro_entry	*queued;
1757 	int			i, nextp, processed = 0;
1758 	u32			staterr = 0;
1759 	u32			count = adapter->rx_process_limit;
1760 	union ixgbe_adv_rx_desc	*cur;
1761 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1762 	u16			pkt_info;
1763 
1764 	IXGBE_RX_LOCK(rxr);
1765 
1766 #ifdef DEV_NETMAP
1767 	/* Same as the txeof routine: wakeup clients on intr. */
1768 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1769 		IXGBE_RX_UNLOCK(rxr);
1770 		return (FALSE);
1771 	}
1772 #endif /* DEV_NETMAP */
1773 
1774 	for (i = rxr->next_to_check; count != 0;) {
1775 		struct mbuf	*sendmp, *mp;
1776 		u32		rsc, ptype;
1777 		u16		len;
1778 		u16		vtag = 0;
1779 		bool		eop;
1780 
1781 		/* Sync the ring. */
1782 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1783 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1784 
1785 		cur = &rxr->rx_base[i];
1786 		staterr = le32toh(cur->wb.upper.status_error);
1787 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1788 
1789 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1790 			break;
1791 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1792 			break;
1793 
1794 		count--;
1795 		sendmp = NULL;
1796 		nbuf = NULL;
1797 		rsc = 0;
1798 		cur->wb.upper.status_error = 0;
1799 		rbuf = &rxr->rx_buffers[i];
1800 		mp = rbuf->buf;
1801 
1802 		len = le16toh(cur->wb.upper.length);
1803 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1804 		    IXGBE_RXDADV_PKTTYPE_MASK;
1805 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1806 
1807 		/* Make sure bad packets are discarded */
1808 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1809 #if __FreeBSD_version >= 1100036
1810 			if (IXGBE_IS_VF(adapter))
1811 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1812 #endif
1813 			rxr->rx_discarded++;
1814 			ixgbe_rx_discard(rxr, i);
1815 			goto next_desc;
1816 		}
1817 
1818 		/*
1819 		** On 82599 which supports a hardware
1820 		** LRO (called HW RSC), packets need
1821 		** not be fragmented across sequential
1822 		** descriptors, rather the next descriptor
1823 		** is indicated in bits of the descriptor.
1824 		** This also means that we might proceses
1825 		** more than one packet at a time, something
1826 		** that has never been true before, it
1827 		** required eliminating global chain pointers
1828 		** in favor of what we are doing here.  -jfv
1829 		*/
1830 		if (!eop) {
1831 			/*
1832 			** Figure out the next descriptor
1833 			** of this frame.
1834 			*/
1835 			if (rxr->hw_rsc == TRUE) {
1836 				rsc = ixgbe_rsc_count(cur);
1837 				rxr->rsc_num += (rsc - 1);
1838 			}
1839 			if (rsc) { /* Get hardware index */
1840 				nextp = ((staterr &
1841 				    IXGBE_RXDADV_NEXTP_MASK) >>
1842 				    IXGBE_RXDADV_NEXTP_SHIFT);
1843 			} else { /* Just sequential */
1844 				nextp = i + 1;
1845 				if (nextp == adapter->num_rx_desc)
1846 					nextp = 0;
1847 			}
1848 			nbuf = &rxr->rx_buffers[nextp];
1849 			prefetch(nbuf);
1850 		}
1851 		/*
1852 		** Rather than using the fmp/lmp global pointers
1853 		** we now keep the head of a packet chain in the
1854 		** buffer struct and pass this along from one
1855 		** descriptor to the next, until we get EOP.
1856 		*/
1857 		mp->m_len = len;
1858 		/*
1859 		** See if there is a stored head
1860 		** that determines what we are
1861 		*/
1862 		sendmp = rbuf->fmp;
1863 		if (sendmp != NULL) {  /* secondary frag */
1864 			rbuf->buf = rbuf->fmp = NULL;
1865 			mp->m_flags &= ~M_PKTHDR;
1866 			sendmp->m_pkthdr.len += mp->m_len;
1867 		} else {
1868 			/*
1869 			 * Optimize.  This might be a small packet,
1870 			 * maybe just a TCP ACK.  Do a fast copy that
1871 			 * is cache aligned into a new mbuf, and
1872 			 * leave the old mbuf+cluster for re-use.
1873 			 */
1874 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1875 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1876 				if (sendmp != NULL) {
1877 					sendmp->m_data +=
1878 					    IXGBE_RX_COPY_ALIGN;
1879 					ixgbe_bcopy(mp->m_data,
1880 					    sendmp->m_data, len);
1881 					sendmp->m_len = len;
1882 					rxr->rx_copies++;
1883 					rbuf->flags |= IXGBE_RX_COPY;
1884 				}
1885 			}
1886 			if (sendmp == NULL) {
1887 				rbuf->buf = rbuf->fmp = NULL;
1888 				sendmp = mp;
1889 			}
1890 
1891 			/* first desc of a non-ps chain */
1892 			sendmp->m_flags |= M_PKTHDR;
1893 			sendmp->m_pkthdr.len = mp->m_len;
1894 		}
1895 		++processed;
1896 
1897 		/* Pass the head pointer on */
1898 		if (eop == 0) {
1899 			nbuf->fmp = sendmp;
1900 			sendmp = NULL;
1901 			mp->m_next = nbuf->buf;
1902 		} else { /* Sending this frame */
1903 			sendmp->m_pkthdr.rcvif = ifp;
1904 			rxr->rx_packets++;
1905 			/* capture data for AIM */
1906 			rxr->bytes += sendmp->m_pkthdr.len;
1907 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1908 			/* Process vlan info */
1909 			if ((rxr->vtag_strip) &&
1910 			    (staterr & IXGBE_RXD_STAT_VP))
1911 				vtag = le16toh(cur->wb.upper.vlan);
1912 			if (vtag) {
1913 				sendmp->m_pkthdr.ether_vtag = vtag;
1914 				sendmp->m_flags |= M_VLANTAG;
1915 			}
1916 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1917 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1918 
1919                         /*
1920                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1921                          * and never cleared. This means we have RSS hash
1922                          * available to be used.
1923                          */
1924                         if (adapter->num_queues > 1) {
1925                                 sendmp->m_pkthdr.flowid =
1926                                     le32toh(cur->wb.lower.hi_dword.rss);
1927                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1928                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
1929                                         M_HASHTYPE_SET(sendmp,
1930                                             M_HASHTYPE_RSS_IPV4);
1931                                         break;
1932                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1933                                         M_HASHTYPE_SET(sendmp,
1934                                             M_HASHTYPE_RSS_TCP_IPV4);
1935                                         break;
1936                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
1937                                         M_HASHTYPE_SET(sendmp,
1938                                             M_HASHTYPE_RSS_IPV6);
1939                                         break;
1940                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1941                                         M_HASHTYPE_SET(sendmp,
1942                                             M_HASHTYPE_RSS_TCP_IPV6);
1943                                         break;
1944                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1945                                         M_HASHTYPE_SET(sendmp,
1946                                             M_HASHTYPE_RSS_IPV6_EX);
1947                                         break;
1948                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1949                                         M_HASHTYPE_SET(sendmp,
1950                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1951                                         break;
1952 #if __FreeBSD_version > 1100000
1953                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1954                                         M_HASHTYPE_SET(sendmp,
1955                                             M_HASHTYPE_RSS_UDP_IPV4);
1956                                         break;
1957                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1958                                         M_HASHTYPE_SET(sendmp,
1959                                             M_HASHTYPE_RSS_UDP_IPV6);
1960                                         break;
1961                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1962                                         M_HASHTYPE_SET(sendmp,
1963                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1964                                         break;
1965 #endif
1966                                     default:
1967                                         M_HASHTYPE_SET(sendmp,
1968                                             M_HASHTYPE_OPAQUE);
1969                                 }
1970                         } else {
1971                                 sendmp->m_pkthdr.flowid = que->msix;
1972 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1973 			}
1974 		}
1975 next_desc:
1976 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1977 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1978 
1979 		/* Advance our pointers to the next descriptor. */
1980 		if (++i == rxr->num_desc)
1981 			i = 0;
1982 
1983 		/* Now send to the stack or do LRO */
1984 		if (sendmp != NULL) {
1985 			rxr->next_to_check = i;
1986 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1987 			i = rxr->next_to_check;
1988 		}
1989 
1990                /* Every 8 descriptors we go to refresh mbufs */
1991 		if (processed == 8) {
1992 			ixgbe_refresh_mbufs(rxr, i);
1993 			processed = 0;
1994 		}
1995 	}
1996 
1997 	/* Refresh any remaining buf structs */
1998 	if (ixgbe_rx_unrefreshed(rxr))
1999 		ixgbe_refresh_mbufs(rxr, i);
2000 
2001 	rxr->next_to_check = i;
2002 
2003 	/*
2004 	 * Flush any outstanding LRO work
2005 	 */
2006 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2007 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
2008 		tcp_lro_flush(lro, queued);
2009 	}
2010 
2011 	IXGBE_RX_UNLOCK(rxr);
2012 
2013 	/*
2014 	** Still have cleaning to do?
2015 	*/
2016 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2017 		return (TRUE);
2018 	else
2019 		return (FALSE);
2020 }
2021 
2022 
2023 /*********************************************************************
2024  *
2025  *  Verify that the hardware indicated that the checksum is valid.
2026  *  Inform the stack about the status of checksum so that stack
2027  *  doesn't spend time verifying the checksum.
2028  *
2029  *********************************************************************/
2030 static void
2031 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2032 {
2033 	u16	status = (u16) staterr;
2034 	u8	errors = (u8) (staterr >> 24);
2035 	bool	sctp = false;
2036 
2037 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2038 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2039 		sctp = true;
2040 
2041 	/* IPv4 checksum */
2042 	if (status & IXGBE_RXD_STAT_IPCS) {
2043 		mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2044 		/* IP Checksum Good */
2045 		if (!(errors & IXGBE_RXD_ERR_IPE))
2046 			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2047 	}
2048 	/* TCP/UDP/SCTP checksum */
2049 	if (status & IXGBE_RXD_STAT_L4CS) {
2050 		mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2051 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2052 			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2053 			if (!sctp)
2054 				mp->m_pkthdr.csum_data = htons(0xffff);
2055 		}
2056 	}
2057 }
2058 
2059 /********************************************************************
2060  * Manage DMA'able memory.
2061  *******************************************************************/
2062 static void
2063 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2064 {
2065 	if (error)
2066 		return;
2067 	*(bus_addr_t *) arg = segs->ds_addr;
2068 	return;
2069 }
2070 
2071 int
2072 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2073 		struct ixgbe_dma_alloc *dma, int mapflags)
2074 {
2075 	device_t dev = adapter->dev;
2076 	int             r;
2077 
2078 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2079 			       DBA_ALIGN, 0,	/* alignment, bounds */
2080 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2081 			       BUS_SPACE_MAXADDR,	/* highaddr */
2082 			       NULL, NULL,	/* filter, filterarg */
2083 			       size,	/* maxsize */
2084 			       1,	/* nsegments */
2085 			       size,	/* maxsegsize */
2086 			       BUS_DMA_ALLOCNOW,	/* flags */
2087 			       NULL,	/* lockfunc */
2088 			       NULL,	/* lockfuncarg */
2089 			       &dma->dma_tag);
2090 	if (r != 0) {
2091 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2092 		       "error %u\n", r);
2093 		goto fail_0;
2094 	}
2095 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2096 			     BUS_DMA_NOWAIT, &dma->dma_map);
2097 	if (r != 0) {
2098 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2099 		       "error %u\n", r);
2100 		goto fail_1;
2101 	}
2102 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2103 			    size,
2104 			    ixgbe_dmamap_cb,
2105 			    &dma->dma_paddr,
2106 			    mapflags | BUS_DMA_NOWAIT);
2107 	if (r != 0) {
2108 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2109 		       "error %u\n", r);
2110 		goto fail_2;
2111 	}
2112 	dma->dma_size = size;
2113 	return (0);
2114 fail_2:
2115 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2116 fail_1:
2117 	bus_dma_tag_destroy(dma->dma_tag);
2118 fail_0:
2119 	dma->dma_tag = NULL;
2120 	return (r);
2121 }
2122 
2123 void
2124 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2125 {
2126 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2127 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2128 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2129 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2130 	bus_dma_tag_destroy(dma->dma_tag);
2131 }
2132 
2133 
2134 /*********************************************************************
2135  *
2136  *  Allocate memory for the transmit and receive rings, and then
2137  *  the descriptors associated with each, called only once at attach.
2138  *
2139  **********************************************************************/
2140 int
2141 ixgbe_allocate_queues(struct adapter *adapter)
2142 {
2143 	device_t	dev = adapter->dev;
2144 	struct ix_queue	*que;
2145 	struct tx_ring	*txr;
2146 	struct rx_ring	*rxr;
2147 	int rsize, tsize, error = IXGBE_SUCCESS;
2148 	int txconf = 0, rxconf = 0;
2149 #ifdef PCI_IOV
2150 	enum ixgbe_iov_mode iov_mode;
2151 #endif
2152 
2153         /* First allocate the top level queue structs */
2154         if (!(adapter->queues =
2155             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2156             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2157                 device_printf(dev, "Unable to allocate queue memory\n");
2158                 error = ENOMEM;
2159                 goto fail;
2160         }
2161 
2162 	/* First allocate the TX ring struct memory */
2163 	if (!(adapter->tx_rings =
2164 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2165 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2166 		device_printf(dev, "Unable to allocate TX ring memory\n");
2167 		error = ENOMEM;
2168 		goto tx_fail;
2169 	}
2170 
2171 	/* Next allocate the RX */
2172 	if (!(adapter->rx_rings =
2173 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2174 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2175 		device_printf(dev, "Unable to allocate RX ring memory\n");
2176 		error = ENOMEM;
2177 		goto rx_fail;
2178 	}
2179 
2180 	/* For the ring itself */
2181 	tsize = roundup2(adapter->num_tx_desc *
2182 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2183 
2184 #ifdef PCI_IOV
2185 	iov_mode = ixgbe_get_iov_mode(adapter);
2186 	adapter->pool = ixgbe_max_vfs(iov_mode);
2187 #else
2188 	adapter->pool = 0;
2189 #endif
2190 	/*
2191 	 * Now set up the TX queues, txconf is needed to handle the
2192 	 * possibility that things fail midcourse and we need to
2193 	 * undo memory gracefully
2194 	 */
2195 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2196 		/* Set up some basics */
2197 		txr = &adapter->tx_rings[i];
2198 		txr->adapter = adapter;
2199 #ifdef PCI_IOV
2200 		txr->me = ixgbe_pf_que_index(iov_mode, i);
2201 #else
2202 		txr->me = i;
2203 #endif
2204 		txr->num_desc = adapter->num_tx_desc;
2205 
2206 		/* Initialize the TX side lock */
2207 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2208 		    device_get_nameunit(dev), txr->me);
2209 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2210 
2211 		if (ixgbe_dma_malloc(adapter, tsize,
2212 			&txr->txdma, BUS_DMA_NOWAIT)) {
2213 			device_printf(dev,
2214 			    "Unable to allocate TX Descriptor memory\n");
2215 			error = ENOMEM;
2216 			goto err_tx_desc;
2217 		}
2218 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2219 		bzero((void *)txr->tx_base, tsize);
2220 
2221         	/* Now allocate transmit buffers for the ring */
2222         	if (ixgbe_allocate_transmit_buffers(txr)) {
2223 			device_printf(dev,
2224 			    "Critical Failure setting up transmit buffers\n");
2225 			error = ENOMEM;
2226 			goto err_tx_desc;
2227         	}
2228 #ifndef IXGBE_LEGACY_TX
2229 		/* Allocate a buf ring */
2230 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2231 		    M_WAITOK, &txr->tx_mtx);
2232 		if (txr->br == NULL) {
2233 			device_printf(dev,
2234 			    "Critical Failure setting up buf ring\n");
2235 			error = ENOMEM;
2236 			goto err_tx_desc;
2237         	}
2238 #endif
2239 	}
2240 
2241 	/*
2242 	 * Next the RX queues...
2243 	 */
2244 	rsize = roundup2(adapter->num_rx_desc *
2245 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2246 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2247 		rxr = &adapter->rx_rings[i];
2248 		/* Set up some basics */
2249 		rxr->adapter = adapter;
2250 #ifdef PCI_IOV
2251 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
2252 #else
2253 		rxr->me = i;
2254 #endif
2255 		rxr->num_desc = adapter->num_rx_desc;
2256 
2257 		/* Initialize the RX side lock */
2258 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2259 		    device_get_nameunit(dev), rxr->me);
2260 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2261 
2262 		if (ixgbe_dma_malloc(adapter, rsize,
2263 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2264 			device_printf(dev,
2265 			    "Unable to allocate RxDescriptor memory\n");
2266 			error = ENOMEM;
2267 			goto err_rx_desc;
2268 		}
2269 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2270 		bzero((void *)rxr->rx_base, rsize);
2271 
2272         	/* Allocate receive buffers for the ring*/
2273 		if (ixgbe_allocate_receive_buffers(rxr)) {
2274 			device_printf(dev,
2275 			    "Critical Failure setting up receive buffers\n");
2276 			error = ENOMEM;
2277 			goto err_rx_desc;
2278 		}
2279 	}
2280 
2281 	/*
2282 	** Finally set up the queue holding structs
2283 	*/
2284 	for (int i = 0; i < adapter->num_queues; i++) {
2285 		que = &adapter->queues[i];
2286 		que->adapter = adapter;
2287 		que->me = i;
2288 		que->txr = &adapter->tx_rings[i];
2289 		que->rxr = &adapter->rx_rings[i];
2290 	}
2291 
2292 	return (0);
2293 
2294 err_rx_desc:
2295 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2296 		ixgbe_dma_free(adapter, &rxr->rxdma);
2297 err_tx_desc:
2298 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2299 		ixgbe_dma_free(adapter, &txr->txdma);
2300 	free(adapter->rx_rings, M_DEVBUF);
2301 rx_fail:
2302 	free(adapter->tx_rings, M_DEVBUF);
2303 tx_fail:
2304 	free(adapter->queues, M_DEVBUF);
2305 fail:
2306 	return (error);
2307 }
2308