xref: /freebsd/sys/dev/sfxge/sfxge_rx.c (revision 3c838a9f51e4d2a7753500d167ba7dbbb9827c82)
1e948693eSPhilip Paeps /*-
2*3c838a9fSAndrew Rybchenko  * Copyright (c) 2010-2015 Solarflare Communications Inc.
3e948693eSPhilip Paeps  * All rights reserved.
4e948693eSPhilip Paeps  *
5e948693eSPhilip Paeps  * This software was developed in part by Philip Paeps under contract for
6e948693eSPhilip Paeps  * Solarflare Communications, Inc.
7e948693eSPhilip Paeps  *
8e948693eSPhilip Paeps  * Redistribution and use in source and binary forms, with or without
9*3c838a9fSAndrew Rybchenko  * modification, are permitted provided that the following conditions are met:
10e948693eSPhilip Paeps  *
11*3c838a9fSAndrew Rybchenko  * 1. Redistributions of source code must retain the above copyright notice,
12*3c838a9fSAndrew Rybchenko  *    this list of conditions and the following disclaimer.
13*3c838a9fSAndrew Rybchenko  * 2. Redistributions in binary form must reproduce the above copyright notice,
14*3c838a9fSAndrew Rybchenko  *    this list of conditions and the following disclaimer in the documentation
15*3c838a9fSAndrew Rybchenko  *    and/or other materials provided with the distribution.
16*3c838a9fSAndrew Rybchenko  *
17*3c838a9fSAndrew Rybchenko  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18*3c838a9fSAndrew Rybchenko  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19*3c838a9fSAndrew Rybchenko  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20*3c838a9fSAndrew Rybchenko  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21*3c838a9fSAndrew Rybchenko  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22*3c838a9fSAndrew Rybchenko  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23*3c838a9fSAndrew Rybchenko  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24*3c838a9fSAndrew Rybchenko  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25*3c838a9fSAndrew Rybchenko  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26*3c838a9fSAndrew Rybchenko  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27*3c838a9fSAndrew Rybchenko  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*3c838a9fSAndrew Rybchenko  *
29*3c838a9fSAndrew Rybchenko  * The views and conclusions contained in the software and documentation are
30*3c838a9fSAndrew Rybchenko  * those of the authors and should not be interpreted as representing official
31*3c838a9fSAndrew Rybchenko  * policies, either expressed or implied, of the FreeBSD Project.
32e948693eSPhilip Paeps  */
33e948693eSPhilip Paeps 
34e948693eSPhilip Paeps #include <sys/cdefs.h>
35e948693eSPhilip Paeps __FBSDID("$FreeBSD$");
36e948693eSPhilip Paeps 
37e948693eSPhilip Paeps #include <sys/types.h>
38e948693eSPhilip Paeps #include <sys/mbuf.h>
39e948693eSPhilip Paeps #include <sys/smp.h>
40e948693eSPhilip Paeps #include <sys/socket.h>
41e948693eSPhilip Paeps #include <sys/sysctl.h>
42*3c838a9fSAndrew Rybchenko #include <sys/syslog.h>
43e948693eSPhilip Paeps #include <sys/limits.h>
44245d1576SAndrew Rybchenko #include <sys/syslog.h>
45e948693eSPhilip Paeps 
46e948693eSPhilip Paeps #include <net/ethernet.h>
47e948693eSPhilip Paeps #include <net/if.h>
48e948693eSPhilip Paeps #include <net/if_vlan_var.h>
49e948693eSPhilip Paeps 
50e948693eSPhilip Paeps #include <netinet/in.h>
51e948693eSPhilip Paeps #include <netinet/ip.h>
52e948693eSPhilip Paeps #include <netinet/ip6.h>
53e948693eSPhilip Paeps #include <netinet/tcp.h>
54e948693eSPhilip Paeps 
55e948693eSPhilip Paeps #include <machine/in_cksum.h>
56e948693eSPhilip Paeps 
57e948693eSPhilip Paeps #include "common/efx.h"
58e948693eSPhilip Paeps 
59e948693eSPhilip Paeps 
60e948693eSPhilip Paeps #include "sfxge.h"
61e948693eSPhilip Paeps #include "sfxge_rx.h"
62e948693eSPhilip Paeps 
63385b1d8eSGeorge V. Neville-Neil #define	RX_REFILL_THRESHOLD(_entries)	(EFX_RXQ_LIMIT(_entries) * 9 / 10)
64e948693eSPhilip Paeps 
6518daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
6618daa0eeSAndrew Rybchenko 
67245d1576SAndrew Rybchenko SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL,
68245d1576SAndrew Rybchenko 	    "Large receive offload (LRO) parameters");
69245d1576SAndrew Rybchenko 
70245d1576SAndrew Rybchenko #define	SFXGE_LRO_PARAM(_param)	SFXGE_PARAM(lro._param)
71245d1576SAndrew Rybchenko 
72e948693eSPhilip Paeps /* Size of the LRO hash table.  Must be a power of 2.  A larger table
73e948693eSPhilip Paeps  * means we can accelerate a larger number of streams.
74e948693eSPhilip Paeps  */
75e948693eSPhilip Paeps static unsigned lro_table_size = 128;
76245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size);
77245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN,
78245d1576SAndrew Rybchenko 	    &lro_table_size, 0,
79245d1576SAndrew Rybchenko 	    "Size of the LRO hash table (must be a power of 2)");
80e948693eSPhilip Paeps 
81e948693eSPhilip Paeps /* Maximum length of a hash chain.  If chains get too long then the lookup
82e948693eSPhilip Paeps  * time increases and may exceed the benefit of LRO.
83e948693eSPhilip Paeps  */
84e948693eSPhilip Paeps static unsigned lro_chain_max = 20;
85245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max);
86245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN,
87245d1576SAndrew Rybchenko 	    &lro_chain_max, 0,
88245d1576SAndrew Rybchenko 	    "The maximum length of a hash chain");
89e948693eSPhilip Paeps 
90e948693eSPhilip Paeps /* Maximum time (in ticks) that a connection can be idle before it's LRO
91e948693eSPhilip Paeps  * state is discarded.
92e948693eSPhilip Paeps  */
93e948693eSPhilip Paeps static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
94245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks);
95245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN,
96245d1576SAndrew Rybchenko 	    &lro_idle_ticks, 0,
97245d1576SAndrew Rybchenko 	    "The maximum time (in ticks) that a connection can be idle "
98245d1576SAndrew Rybchenko 	    "before it's LRO state is discarded");
99e948693eSPhilip Paeps 
100e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order before a
101e948693eSPhilip Paeps  * connection is eligible for LRO.  The idea is we should avoid coalescing
102e948693eSPhilip Paeps  * segments when the sender is in slow-start because reducing the ACK rate
103e948693eSPhilip Paeps  * can damage performance.
104e948693eSPhilip Paeps  */
105e948693eSPhilip Paeps static int lro_slow_start_packets = 2000;
106245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets);
107245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN,
108245d1576SAndrew Rybchenko 	    &lro_slow_start_packets, 0,
109245d1576SAndrew Rybchenko 	    "Number of packets with payload that must arrive in-order before "
110245d1576SAndrew Rybchenko 	    "a connection is eligible for LRO");
111e948693eSPhilip Paeps 
112e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order following loss
113e948693eSPhilip Paeps  * before a connection is eligible for LRO.  The idea is we should avoid
114e948693eSPhilip Paeps  * coalescing segments when the sender is recovering from loss, because
115e948693eSPhilip Paeps  * reducing the ACK rate can damage performance.
116e948693eSPhilip Paeps  */
117e948693eSPhilip Paeps static int lro_loss_packets = 20;
118245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets);
119245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN,
120245d1576SAndrew Rybchenko 	    &lro_loss_packets, 0,
121245d1576SAndrew Rybchenko 	    "Number of packets with payload that must arrive in-order "
122245d1576SAndrew Rybchenko 	    "following loss before a connection is eligible for LRO");
123e948693eSPhilip Paeps 
124e948693eSPhilip Paeps /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
125e948693eSPhilip Paeps #define	SFXGE_LRO_L2_ID_VLAN 0x4000
126e948693eSPhilip Paeps #define	SFXGE_LRO_L2_ID_IPV6 0x8000
127e948693eSPhilip Paeps #define	SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
128e948693eSPhilip Paeps #define	SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
129e948693eSPhilip Paeps 
130e948693eSPhilip Paeps /* Compare IPv6 addresses, avoiding conditional branches */
1310b28bbdcSAndrew Rybchenko static unsigned long ipv6_addr_cmp(const struct in6_addr *left,
132e948693eSPhilip Paeps 				   const struct in6_addr *right)
133e948693eSPhilip Paeps {
134e948693eSPhilip Paeps #if LONG_BIT == 64
135e948693eSPhilip Paeps 	const uint64_t *left64 = (const uint64_t *)left;
136e948693eSPhilip Paeps 	const uint64_t *right64 = (const uint64_t *)right;
137e948693eSPhilip Paeps 	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
138e948693eSPhilip Paeps #else
139e948693eSPhilip Paeps 	return (left->s6_addr32[0] - right->s6_addr32[0]) |
140e948693eSPhilip Paeps 	       (left->s6_addr32[1] - right->s6_addr32[1]) |
141e948693eSPhilip Paeps 	       (left->s6_addr32[2] - right->s6_addr32[2]) |
142e948693eSPhilip Paeps 	       (left->s6_addr32[3] - right->s6_addr32[3]);
143e948693eSPhilip Paeps #endif
144e948693eSPhilip Paeps }
145e948693eSPhilip Paeps 
14618daa0eeSAndrew Rybchenko #endif	/* SFXGE_LRO */
14718daa0eeSAndrew Rybchenko 
148e948693eSPhilip Paeps void
149e948693eSPhilip Paeps sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
150e948693eSPhilip Paeps {
151e948693eSPhilip Paeps 
152e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_DONE;
153e948693eSPhilip Paeps }
154e948693eSPhilip Paeps 
155e948693eSPhilip Paeps void
156e948693eSPhilip Paeps sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
157e948693eSPhilip Paeps {
158e948693eSPhilip Paeps 
159e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_FAILED;
160e948693eSPhilip Paeps }
161e948693eSPhilip Paeps 
162e948693eSPhilip Paeps static uint8_t toep_key[] = {
163e948693eSPhilip Paeps 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
164e948693eSPhilip Paeps 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
165e948693eSPhilip Paeps 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
166e948693eSPhilip Paeps 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
167e948693eSPhilip Paeps 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
168e948693eSPhilip Paeps };
169e948693eSPhilip Paeps 
170e948693eSPhilip Paeps static void
171e948693eSPhilip Paeps sfxge_rx_post_refill(void *arg)
172e948693eSPhilip Paeps {
173e948693eSPhilip Paeps 	struct sfxge_rxq *rxq = arg;
174e948693eSPhilip Paeps 	struct sfxge_softc *sc;
175e948693eSPhilip Paeps 	unsigned int index;
176e948693eSPhilip Paeps 	struct sfxge_evq *evq;
177e948693eSPhilip Paeps 	uint16_t magic;
178e948693eSPhilip Paeps 
179e948693eSPhilip Paeps 	sc = rxq->sc;
180e948693eSPhilip Paeps 	index = rxq->index;
181e948693eSPhilip Paeps 	evq = sc->evq[index];
182e948693eSPhilip Paeps 
183e948693eSPhilip Paeps 	magic = SFXGE_MAGIC_RX_QREFILL | index;
184e948693eSPhilip Paeps 
185e948693eSPhilip Paeps 	/* This is guaranteed due to the start/stop order of rx and ev */
186e948693eSPhilip Paeps 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
187e948693eSPhilip Paeps 	    ("evq not started"));
188e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
189e948693eSPhilip Paeps 	    ("rxq not started"));
190e948693eSPhilip Paeps 	efx_ev_qpost(evq->common, magic);
191e948693eSPhilip Paeps }
192e948693eSPhilip Paeps 
193e948693eSPhilip Paeps static void
194e948693eSPhilip Paeps sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
195e948693eSPhilip Paeps {
196e948693eSPhilip Paeps 	/* Initially retry after 100 ms, but back off in case of
197e948693eSPhilip Paeps 	 * repeated failures as we probably have to wait for the
198e948693eSPhilip Paeps 	 * administrator to raise the pool limit. */
199e948693eSPhilip Paeps 	if (retrying)
200e948693eSPhilip Paeps 		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
201e948693eSPhilip Paeps 	else
202e948693eSPhilip Paeps 		rxq->refill_delay = hz / 10;
203e948693eSPhilip Paeps 
204e948693eSPhilip Paeps 	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
205e948693eSPhilip Paeps 			     sfxge_rx_post_refill, rxq);
206e948693eSPhilip Paeps }
207e948693eSPhilip Paeps 
2080b28bbdcSAndrew Rybchenko static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
209e948693eSPhilip Paeps {
210e948693eSPhilip Paeps 	struct mb_args args;
211e948693eSPhilip Paeps 	struct mbuf *m;
212e948693eSPhilip Paeps 
213e948693eSPhilip Paeps 	/* Allocate mbuf structure */
214e948693eSPhilip Paeps 	args.flags = M_PKTHDR;
215e948693eSPhilip Paeps 	args.type = MT_DATA;
216e275c0d3SGleb Smirnoff 	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
217e948693eSPhilip Paeps 
218e948693eSPhilip Paeps 	/* Allocate (and attach) packet buffer */
219b7b0edd1SGeorge V. Neville-Neil 	if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
220e948693eSPhilip Paeps 		uma_zfree(zone_mbuf, m);
221e948693eSPhilip Paeps 		m = NULL;
222e948693eSPhilip Paeps 	}
223e948693eSPhilip Paeps 
224b7b0edd1SGeorge V. Neville-Neil 	return (m);
225e948693eSPhilip Paeps }
226e948693eSPhilip Paeps 
227e948693eSPhilip Paeps #define	SFXGE_REFILL_BATCH  64
228e948693eSPhilip Paeps 
229e948693eSPhilip Paeps static void
230e948693eSPhilip Paeps sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
231e948693eSPhilip Paeps {
232e948693eSPhilip Paeps 	struct sfxge_softc *sc;
233e948693eSPhilip Paeps 	unsigned int index;
234e948693eSPhilip Paeps 	struct sfxge_evq *evq;
235e948693eSPhilip Paeps 	unsigned int batch;
236e948693eSPhilip Paeps 	unsigned int rxfill;
237e948693eSPhilip Paeps 	unsigned int mblksize;
238e948693eSPhilip Paeps 	int ntodo;
239e948693eSPhilip Paeps 	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
240e948693eSPhilip Paeps 
241e948693eSPhilip Paeps 	sc = rxq->sc;
242e948693eSPhilip Paeps 	index = rxq->index;
243e948693eSPhilip Paeps 	evq = sc->evq[index];
244e948693eSPhilip Paeps 
245e948693eSPhilip Paeps 	prefetch_read_many(sc->enp);
246e948693eSPhilip Paeps 	prefetch_read_many(rxq->common);
247e948693eSPhilip Paeps 
248763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
249e948693eSPhilip Paeps 
250851128b8SAndrew Rybchenko 	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
251e948693eSPhilip Paeps 		return;
252e948693eSPhilip Paeps 
253e948693eSPhilip Paeps 	rxfill = rxq->added - rxq->completed;
254385b1d8eSGeorge V. Neville-Neil 	KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries),
255385b1d8eSGeorge V. Neville-Neil 	    ("rxfill > EFX_RXQ_LIMIT(rxq->entries)"));
256385b1d8eSGeorge V. Neville-Neil 	ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target);
257385b1d8eSGeorge V. Neville-Neil 	KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries),
258385b1d8eSGeorge V. Neville-Neil 	    ("ntodo > EFX_RQX_LIMIT(rxq->entries)"));
259e948693eSPhilip Paeps 
260e948693eSPhilip Paeps 	if (ntodo == 0)
261e948693eSPhilip Paeps 		return;
262e948693eSPhilip Paeps 
263e948693eSPhilip Paeps 	batch = 0;
264*3c838a9fSAndrew Rybchenko 	mblksize = sc->rx_buffer_size - sc->rx_buffer_align;
265e948693eSPhilip Paeps 	while (ntodo-- > 0) {
266e948693eSPhilip Paeps 		unsigned int id;
267e948693eSPhilip Paeps 		struct sfxge_rx_sw_desc *rx_desc;
268e948693eSPhilip Paeps 		bus_dma_segment_t seg;
269e948693eSPhilip Paeps 		struct mbuf *m;
270e948693eSPhilip Paeps 
271385b1d8eSGeorge V. Neville-Neil 		id = (rxq->added + batch) & rxq->ptr_mask;
272e948693eSPhilip Paeps 		rx_desc = &rxq->queue[id];
273e948693eSPhilip Paeps 		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
274e948693eSPhilip Paeps 
275e948693eSPhilip Paeps 		rx_desc->flags = EFX_DISCARD;
276e948693eSPhilip Paeps 		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
277e948693eSPhilip Paeps 		if (m == NULL)
278e948693eSPhilip Paeps 			break;
279*3c838a9fSAndrew Rybchenko 
280*3c838a9fSAndrew Rybchenko 		/* m_len specifies length of area to be mapped for DMA */
281*3c838a9fSAndrew Rybchenko 		m->m_len  = mblksize;
282*3c838a9fSAndrew Rybchenko 		m->m_data = (caddr_t)P2ROUNDUP((uintptr_t)m->m_data, CACHE_LINE_SIZE);
283*3c838a9fSAndrew Rybchenko 		m->m_data += sc->rx_buffer_align;
284*3c838a9fSAndrew Rybchenko 
285e948693eSPhilip Paeps 		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
286e948693eSPhilip Paeps 		addr[batch++] = seg.ds_addr;
287e948693eSPhilip Paeps 
288e948693eSPhilip Paeps 		if (batch == SFXGE_REFILL_BATCH) {
289e948693eSPhilip Paeps 			efx_rx_qpost(rxq->common, addr, mblksize, batch,
290e948693eSPhilip Paeps 			    rxq->completed, rxq->added);
291e948693eSPhilip Paeps 			rxq->added += batch;
292e948693eSPhilip Paeps 			batch = 0;
293e948693eSPhilip Paeps 		}
294e948693eSPhilip Paeps 	}
295e948693eSPhilip Paeps 
296e948693eSPhilip Paeps 	if (ntodo != 0)
297e948693eSPhilip Paeps 		sfxge_rx_schedule_refill(rxq, retrying);
298e948693eSPhilip Paeps 
299e948693eSPhilip Paeps 	if (batch != 0) {
300e948693eSPhilip Paeps 		efx_rx_qpost(rxq->common, addr, mblksize, batch,
301e948693eSPhilip Paeps 		    rxq->completed, rxq->added);
302e948693eSPhilip Paeps 		rxq->added += batch;
303e948693eSPhilip Paeps 	}
304e948693eSPhilip Paeps 
305e948693eSPhilip Paeps 	/* Make the descriptors visible to the hardware */
306e948693eSPhilip Paeps 	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
307e948693eSPhilip Paeps 			BUS_DMASYNC_PREWRITE);
308e948693eSPhilip Paeps 
309*3c838a9fSAndrew Rybchenko 	efx_rx_qpush(rxq->common, rxq->added, &rxq->pushed);
310*3c838a9fSAndrew Rybchenko 
311*3c838a9fSAndrew Rybchenko 	/* The queue could still be empty if no descriptors were actually
312*3c838a9fSAndrew Rybchenko 	 * pushed, in which case there will be no event to cause the next
313*3c838a9fSAndrew Rybchenko 	 * refill, so we must schedule a refill ourselves.
314*3c838a9fSAndrew Rybchenko 	 */
315*3c838a9fSAndrew Rybchenko 	if(rxq->pushed == rxq->completed) {
316*3c838a9fSAndrew Rybchenko 		sfxge_rx_schedule_refill(rxq, retrying);
317*3c838a9fSAndrew Rybchenko 	}
318e948693eSPhilip Paeps }
319e948693eSPhilip Paeps 
320e948693eSPhilip Paeps void
321e948693eSPhilip Paeps sfxge_rx_qrefill(struct sfxge_rxq *rxq)
322e948693eSPhilip Paeps {
323e948693eSPhilip Paeps 
324851128b8SAndrew Rybchenko 	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
325e948693eSPhilip Paeps 		return;
326e948693eSPhilip Paeps 
327e948693eSPhilip Paeps 	/* Make sure the queue is full */
328385b1d8eSGeorge V. Neville-Neil 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE);
329e948693eSPhilip Paeps }
330e948693eSPhilip Paeps 
331e948693eSPhilip Paeps static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
332e948693eSPhilip Paeps {
333e948693eSPhilip Paeps 	struct ifnet *ifp = sc->ifnet;
334e948693eSPhilip Paeps 
335e948693eSPhilip Paeps 	m->m_pkthdr.rcvif = ifp;
336e948693eSPhilip Paeps 	m->m_pkthdr.csum_data = 0xffff;
337e948693eSPhilip Paeps 	ifp->if_input(ifp, m);
338e948693eSPhilip Paeps }
339e948693eSPhilip Paeps 
340e948693eSPhilip Paeps static void
341e948693eSPhilip Paeps sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
342e948693eSPhilip Paeps {
343e948693eSPhilip Paeps 	struct mbuf *m = rx_desc->mbuf;
344588644a4SAndrew Rybchenko 	int flags = rx_desc->flags;
345e948693eSPhilip Paeps 	int csum_flags;
346e948693eSPhilip Paeps 
347e948693eSPhilip Paeps 	/* Convert checksum flags */
348588644a4SAndrew Rybchenko 	csum_flags = (flags & EFX_CKSUM_IPV4) ?
349e948693eSPhilip Paeps 		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
350588644a4SAndrew Rybchenko 	if (flags & EFX_CKSUM_TCPUDP)
351e948693eSPhilip Paeps 		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
352e948693eSPhilip Paeps 
353588644a4SAndrew Rybchenko 	if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) {
354*3c838a9fSAndrew Rybchenko 		m->m_pkthdr.flowid =
355*3c838a9fSAndrew Rybchenko 			efx_psuedo_hdr_hash_get(sc->enp,
356*3c838a9fSAndrew Rybchenko 						EFX_RX_HASHALG_TOEPLITZ,
357e948693eSPhilip Paeps 						mtod(m, uint8_t *));
358dcf08586SAndrew Rybchenko 		/* The hash covers a 4-tuple for TCP only */
359dcf08586SAndrew Rybchenko 		M_HASHTYPE_SET(m,
360588644a4SAndrew Rybchenko 		    (flags & EFX_PKT_IPV4) ?
361588644a4SAndrew Rybchenko 			((flags & EFX_PKT_TCP) ?
362dcf08586SAndrew Rybchenko 			    M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_IPV4) :
363588644a4SAndrew Rybchenko 			((flags & EFX_PKT_TCP) ?
364dcf08586SAndrew Rybchenko 			    M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_IPV6));
365e948693eSPhilip Paeps 	}
366e948693eSPhilip Paeps 	m->m_data += sc->rx_prefix_size;
367e948693eSPhilip Paeps 	m->m_len = rx_desc->size - sc->rx_prefix_size;
368e948693eSPhilip Paeps 	m->m_pkthdr.len = m->m_len;
369e948693eSPhilip Paeps 	m->m_pkthdr.csum_flags = csum_flags;
370e948693eSPhilip Paeps 	__sfxge_rx_deliver(sc, rx_desc->mbuf);
371e948693eSPhilip Paeps 
372e948693eSPhilip Paeps 	rx_desc->flags = EFX_DISCARD;
373e948693eSPhilip Paeps 	rx_desc->mbuf = NULL;
374e948693eSPhilip Paeps }
375e948693eSPhilip Paeps 
37618daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
37718daa0eeSAndrew Rybchenko 
378e948693eSPhilip Paeps static void
379e948693eSPhilip Paeps sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
380e948693eSPhilip Paeps {
381e948693eSPhilip Paeps 	struct sfxge_softc *sc = st->sc;
382e948693eSPhilip Paeps 	struct mbuf *m = c->mbuf;
383e948693eSPhilip Paeps 	struct tcphdr *c_th;
384e948693eSPhilip Paeps 	int csum_flags;
385e948693eSPhilip Paeps 
386e948693eSPhilip Paeps 	KASSERT(m, ("no mbuf to deliver"));
387e948693eSPhilip Paeps 
388e948693eSPhilip Paeps 	++st->n_bursts;
389e948693eSPhilip Paeps 
390e948693eSPhilip Paeps 	/* Finish off packet munging and recalculate IP header checksum. */
391e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
392e948693eSPhilip Paeps 		struct ip *iph = c->nh;
393e948693eSPhilip Paeps 		iph->ip_len = htons(iph->ip_len);
394e948693eSPhilip Paeps 		iph->ip_sum = 0;
395e948693eSPhilip Paeps 		iph->ip_sum = in_cksum_hdr(iph);
396e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
397e948693eSPhilip Paeps 		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
398e948693eSPhilip Paeps 			      CSUM_IP_CHECKED | CSUM_IP_VALID);
399e948693eSPhilip Paeps 	} else {
400e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->nh;
401e948693eSPhilip Paeps 		iph->ip6_plen = htons(iph->ip6_plen);
402e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
403e948693eSPhilip Paeps 		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
404e948693eSPhilip Paeps 	}
405e948693eSPhilip Paeps 
406e948693eSPhilip Paeps 	c_th->th_win = c->th_last->th_win;
407e948693eSPhilip Paeps 	c_th->th_ack = c->th_last->th_ack;
408e948693eSPhilip Paeps 	if (c_th->th_off == c->th_last->th_off) {
409e948693eSPhilip Paeps 		/* Copy TCP options (take care to avoid going negative). */
410e948693eSPhilip Paeps 		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
411e948693eSPhilip Paeps 		memcpy(c_th + 1, c->th_last + 1, optlen);
412e948693eSPhilip Paeps 	}
413e948693eSPhilip Paeps 
414e948693eSPhilip Paeps 	m->m_pkthdr.flowid = c->conn_hash;
415dcf08586SAndrew Rybchenko 	M_HASHTYPE_SET(m,
416dcf08586SAndrew Rybchenko 	    SFXGE_LRO_CONN_IS_TCPIPV4(c) ?
417dcf08586SAndrew Rybchenko 		M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_TCP_IPV6);
418a411fe4eSAndrew Rybchenko 
419e948693eSPhilip Paeps 	m->m_pkthdr.csum_flags = csum_flags;
420e948693eSPhilip Paeps 	__sfxge_rx_deliver(sc, m);
421e948693eSPhilip Paeps 
422e948693eSPhilip Paeps 	c->mbuf = NULL;
423e948693eSPhilip Paeps 	c->delivered = 1;
424e948693eSPhilip Paeps }
425e948693eSPhilip Paeps 
426e948693eSPhilip Paeps /* Drop the given connection, and add it to the free list. */
427e948693eSPhilip Paeps static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
428e948693eSPhilip Paeps {
429e948693eSPhilip Paeps 	unsigned bucket;
430e948693eSPhilip Paeps 
431e948693eSPhilip Paeps 	KASSERT(!c->mbuf, ("found orphaned mbuf"));
432e948693eSPhilip Paeps 
433b7b0edd1SGeorge V. Neville-Neil 	if (c->next_buf.mbuf != NULL) {
434e948693eSPhilip Paeps 		sfxge_rx_deliver(rxq->sc, &c->next_buf);
435e948693eSPhilip Paeps 		LIST_REMOVE(c, active_link);
436e948693eSPhilip Paeps 	}
437e948693eSPhilip Paeps 
438e948693eSPhilip Paeps 	bucket = c->conn_hash & rxq->lro.conns_mask;
439e948693eSPhilip Paeps 	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
440e948693eSPhilip Paeps 	--rxq->lro.conns_n[bucket];
441e948693eSPhilip Paeps 	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
442e948693eSPhilip Paeps 	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
443e948693eSPhilip Paeps }
444e948693eSPhilip Paeps 
445e948693eSPhilip Paeps /* Stop tracking connections that have gone idle in order to keep hash
446e948693eSPhilip Paeps  * chains short.
447e948693eSPhilip Paeps  */
448e948693eSPhilip Paeps static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
449e948693eSPhilip Paeps {
450e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
451e948693eSPhilip Paeps 	unsigned i;
452e948693eSPhilip Paeps 
453e948693eSPhilip Paeps 	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
454e948693eSPhilip Paeps 		("found active connections"));
455e948693eSPhilip Paeps 
456e948693eSPhilip Paeps 	rxq->lro.last_purge_ticks = now;
457e948693eSPhilip Paeps 	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
458e948693eSPhilip Paeps 		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
459e948693eSPhilip Paeps 			continue;
460e948693eSPhilip Paeps 
461e948693eSPhilip Paeps 		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
462e948693eSPhilip Paeps 		if (now - c->last_pkt_ticks > lro_idle_ticks) {
463e948693eSPhilip Paeps 			++rxq->lro.n_drop_idle;
464e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
465e948693eSPhilip Paeps 		}
466e948693eSPhilip Paeps 	}
467e948693eSPhilip Paeps }
468e948693eSPhilip Paeps 
469e948693eSPhilip Paeps static void
470e948693eSPhilip Paeps sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
471e948693eSPhilip Paeps 		struct mbuf *mbuf, struct tcphdr *th)
472e948693eSPhilip Paeps {
473e948693eSPhilip Paeps 	struct tcphdr *c_th;
474e948693eSPhilip Paeps 
475e948693eSPhilip Paeps 	/* Tack the new mbuf onto the chain. */
476e948693eSPhilip Paeps 	KASSERT(!mbuf->m_next, ("mbuf already chained"));
477e948693eSPhilip Paeps 	c->mbuf_tail->m_next = mbuf;
478e948693eSPhilip Paeps 	c->mbuf_tail = mbuf;
479e948693eSPhilip Paeps 
480e948693eSPhilip Paeps 	/* Increase length appropriately */
481e948693eSPhilip Paeps 	c->mbuf->m_pkthdr.len += mbuf->m_len;
482e948693eSPhilip Paeps 
483e948693eSPhilip Paeps 	/* Update the connection state flags */
484e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
485e948693eSPhilip Paeps 		struct ip *iph = c->nh;
486e948693eSPhilip Paeps 		iph->ip_len += mbuf->m_len;
487e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
488e948693eSPhilip Paeps 	} else {
489e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->nh;
490e948693eSPhilip Paeps 		iph->ip6_plen += mbuf->m_len;
491e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
492e948693eSPhilip Paeps 	}
493e948693eSPhilip Paeps 	c_th->th_flags |= (th->th_flags & TH_PUSH);
494e948693eSPhilip Paeps 	c->th_last = th;
495e948693eSPhilip Paeps 	++st->n_merges;
496e948693eSPhilip Paeps 
497e948693eSPhilip Paeps 	/* Pass packet up now if another segment could overflow the IP
498e948693eSPhilip Paeps 	 * length.
499e948693eSPhilip Paeps 	 */
500e948693eSPhilip Paeps 	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
501e948693eSPhilip Paeps 		sfxge_lro_deliver(st, c);
502e948693eSPhilip Paeps }
503e948693eSPhilip Paeps 
504e948693eSPhilip Paeps static void
505e948693eSPhilip Paeps sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
506e948693eSPhilip Paeps 		struct mbuf *mbuf, void *nh, struct tcphdr *th)
507e948693eSPhilip Paeps {
508e948693eSPhilip Paeps 	/* Start the chain */
509e948693eSPhilip Paeps 	c->mbuf = mbuf;
510e948693eSPhilip Paeps 	c->mbuf_tail = c->mbuf;
511e948693eSPhilip Paeps 	c->nh = nh;
512e948693eSPhilip Paeps 	c->th_last = th;
513e948693eSPhilip Paeps 
514e948693eSPhilip Paeps 	mbuf->m_pkthdr.len = mbuf->m_len;
515e948693eSPhilip Paeps 
516e948693eSPhilip Paeps 	/* Mangle header fields for later processing */
517e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
518e948693eSPhilip Paeps 		struct ip *iph = nh;
519e948693eSPhilip Paeps 		iph->ip_len = ntohs(iph->ip_len);
520e948693eSPhilip Paeps 	} else {
521e948693eSPhilip Paeps 		struct ip6_hdr *iph = nh;
522e948693eSPhilip Paeps 		iph->ip6_plen = ntohs(iph->ip6_plen);
523e948693eSPhilip Paeps 	}
524e948693eSPhilip Paeps }
525e948693eSPhilip Paeps 
526e948693eSPhilip Paeps /* Try to merge or otherwise hold or deliver (as appropriate) the
527e948693eSPhilip Paeps  * packet buffered for this connection (c->next_buf).  Return a flag
528e948693eSPhilip Paeps  * indicating whether the connection is still active for LRO purposes.
529e948693eSPhilip Paeps  */
530e948693eSPhilip Paeps static int
531e948693eSPhilip Paeps sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
532e948693eSPhilip Paeps {
533e948693eSPhilip Paeps 	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
534e948693eSPhilip Paeps 	char *eh = c->next_eh;
535e948693eSPhilip Paeps 	int data_length, hdr_length, dont_merge;
536e948693eSPhilip Paeps 	unsigned th_seq, pkt_length;
537e948693eSPhilip Paeps 	struct tcphdr *th;
538e948693eSPhilip Paeps 	unsigned now;
539e948693eSPhilip Paeps 
540e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
541e948693eSPhilip Paeps 		struct ip *iph = c->next_nh;
542e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
543e948693eSPhilip Paeps 		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
544e948693eSPhilip Paeps 	} else {
545e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->next_nh;
546e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
547e948693eSPhilip Paeps 		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
548e948693eSPhilip Paeps 	}
549e948693eSPhilip Paeps 
550e948693eSPhilip Paeps 	hdr_length = (char *) th + th->th_off * 4 - eh;
551e948693eSPhilip Paeps 	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
552e948693eSPhilip Paeps 		       hdr_length);
553e948693eSPhilip Paeps 	th_seq = ntohl(th->th_seq);
554e948693eSPhilip Paeps 	dont_merge = ((data_length <= 0)
555e948693eSPhilip Paeps 		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
556e948693eSPhilip Paeps 
557e948693eSPhilip Paeps 	/* Check for options other than aligned timestamp. */
558e948693eSPhilip Paeps 	if (th->th_off != 5) {
559e948693eSPhilip Paeps 		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
560e948693eSPhilip Paeps 		if (th->th_off == 8 &&
561e948693eSPhilip Paeps 		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
562e948693eSPhilip Paeps 					(TCPOPT_NOP << 16) |
563e948693eSPhilip Paeps 					(TCPOPT_TIMESTAMP << 8) |
564e948693eSPhilip Paeps 					TCPOLEN_TIMESTAMP)) {
565e948693eSPhilip Paeps 			/* timestamp option -- okay */
566e948693eSPhilip Paeps 		} else {
567e948693eSPhilip Paeps 			dont_merge = 1;
568e948693eSPhilip Paeps 		}
569e948693eSPhilip Paeps 	}
570e948693eSPhilip Paeps 
571e948693eSPhilip Paeps 	if (__predict_false(th_seq != c->next_seq)) {
572e948693eSPhilip Paeps 		/* Out-of-order, so start counting again. */
573b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
574e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
575e948693eSPhilip Paeps 		c->n_in_order_pkts -= lro_loss_packets;
576e948693eSPhilip Paeps 		c->next_seq = th_seq + data_length;
577e948693eSPhilip Paeps 		++rxq->lro.n_misorder;
578e948693eSPhilip Paeps 		goto deliver_buf_out;
579e948693eSPhilip Paeps 	}
580e948693eSPhilip Paeps 	c->next_seq = th_seq + data_length;
581e948693eSPhilip Paeps 
582e948693eSPhilip Paeps 	now = ticks;
583e948693eSPhilip Paeps 	if (now - c->last_pkt_ticks > lro_idle_ticks) {
584e948693eSPhilip Paeps 		++rxq->lro.n_drop_idle;
585b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
586e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
587e948693eSPhilip Paeps 		sfxge_lro_drop(rxq, c);
588b7b0edd1SGeorge V. Neville-Neil 		return (0);
589e948693eSPhilip Paeps 	}
590e948693eSPhilip Paeps 	c->last_pkt_ticks = ticks;
591e948693eSPhilip Paeps 
592e948693eSPhilip Paeps 	if (c->n_in_order_pkts < lro_slow_start_packets) {
593e948693eSPhilip Paeps 		/* May be in slow-start, so don't merge. */
594e948693eSPhilip Paeps 		++rxq->lro.n_slow_start;
595e948693eSPhilip Paeps 		++c->n_in_order_pkts;
596e948693eSPhilip Paeps 		goto deliver_buf_out;
597e948693eSPhilip Paeps 	}
598e948693eSPhilip Paeps 
599e948693eSPhilip Paeps 	if (__predict_false(dont_merge)) {
600b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
601e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
602e948693eSPhilip Paeps 		if (th->th_flags & (TH_FIN | TH_RST)) {
603e948693eSPhilip Paeps 			++rxq->lro.n_drop_closed;
604e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
605b7b0edd1SGeorge V. Neville-Neil 			return (0);
606e948693eSPhilip Paeps 		}
607e948693eSPhilip Paeps 		goto deliver_buf_out;
608e948693eSPhilip Paeps 	}
609e948693eSPhilip Paeps 
610e948693eSPhilip Paeps 	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
611e948693eSPhilip Paeps 
612e948693eSPhilip Paeps 	if (__predict_true(c->mbuf != NULL)) {
613e948693eSPhilip Paeps 		/* Remove headers and any padding */
614e948693eSPhilip Paeps 		rx_buf->mbuf->m_data += hdr_length;
615e948693eSPhilip Paeps 		rx_buf->mbuf->m_len = data_length;
616e948693eSPhilip Paeps 
617e948693eSPhilip Paeps 		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
618e948693eSPhilip Paeps 	} else {
619e948693eSPhilip Paeps 		/* Remove any padding */
620e948693eSPhilip Paeps 		rx_buf->mbuf->m_len = pkt_length;
621e948693eSPhilip Paeps 
622e948693eSPhilip Paeps 		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
623e948693eSPhilip Paeps 	}
624e948693eSPhilip Paeps 
625e948693eSPhilip Paeps 	rx_buf->mbuf = NULL;
626b7b0edd1SGeorge V. Neville-Neil 	return (1);
627e948693eSPhilip Paeps 
628e948693eSPhilip Paeps  deliver_buf_out:
629e948693eSPhilip Paeps 	sfxge_rx_deliver(rxq->sc, rx_buf);
630b7b0edd1SGeorge V. Neville-Neil 	return (1);
631e948693eSPhilip Paeps }
632e948693eSPhilip Paeps 
633e948693eSPhilip Paeps static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
634e948693eSPhilip Paeps 			       uint16_t l2_id, void *nh, struct tcphdr *th)
635e948693eSPhilip Paeps {
636e948693eSPhilip Paeps 	unsigned bucket = conn_hash & st->conns_mask;
637e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
638e948693eSPhilip Paeps 
639e948693eSPhilip Paeps 	if (st->conns_n[bucket] >= lro_chain_max) {
640e948693eSPhilip Paeps 		++st->n_too_many;
641e948693eSPhilip Paeps 		return;
642e948693eSPhilip Paeps 	}
643e948693eSPhilip Paeps 
644e948693eSPhilip Paeps 	if (!TAILQ_EMPTY(&st->free_conns)) {
645e948693eSPhilip Paeps 		c = TAILQ_FIRST(&st->free_conns);
646e948693eSPhilip Paeps 		TAILQ_REMOVE(&st->free_conns, c, link);
647e948693eSPhilip Paeps 	} else {
648e275c0d3SGleb Smirnoff 		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
649e948693eSPhilip Paeps 		if (c == NULL)
650e948693eSPhilip Paeps 			return;
651e948693eSPhilip Paeps 		c->mbuf = NULL;
652e948693eSPhilip Paeps 		c->next_buf.mbuf = NULL;
653e948693eSPhilip Paeps 	}
654e948693eSPhilip Paeps 
655e948693eSPhilip Paeps 	/* Create the connection tracking data */
656e948693eSPhilip Paeps 	++st->conns_n[bucket];
657e948693eSPhilip Paeps 	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
658e948693eSPhilip Paeps 	c->l2_id = l2_id;
659e948693eSPhilip Paeps 	c->conn_hash = conn_hash;
660e948693eSPhilip Paeps 	c->source = th->th_sport;
661e948693eSPhilip Paeps 	c->dest = th->th_dport;
662e948693eSPhilip Paeps 	c->n_in_order_pkts = 0;
663e948693eSPhilip Paeps 	c->last_pkt_ticks = *(volatile int *)&ticks;
664e948693eSPhilip Paeps 	c->delivered = 0;
665e948693eSPhilip Paeps 	++st->n_new_stream;
666e948693eSPhilip Paeps 	/* NB. We don't initialise c->next_seq, and it doesn't matter what
667e948693eSPhilip Paeps 	 * value it has.  Most likely the next packet received for this
668e948693eSPhilip Paeps 	 * connection will not match -- no harm done.
669e948693eSPhilip Paeps 	 */
670e948693eSPhilip Paeps }
671e948693eSPhilip Paeps 
672e948693eSPhilip Paeps /* Process mbuf and decide whether to dispatch it to the stack now or
673e948693eSPhilip Paeps  * later.
674e948693eSPhilip Paeps  */
675e948693eSPhilip Paeps static void
676e948693eSPhilip Paeps sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
677e948693eSPhilip Paeps {
678e948693eSPhilip Paeps 	struct sfxge_softc *sc = rxq->sc;
679e948693eSPhilip Paeps 	struct mbuf *m = rx_buf->mbuf;
680e948693eSPhilip Paeps 	struct ether_header *eh;
681e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
682e948693eSPhilip Paeps 	uint16_t l2_id;
683e948693eSPhilip Paeps 	uint16_t l3_proto;
684e948693eSPhilip Paeps 	void *nh;
685e948693eSPhilip Paeps 	struct tcphdr *th;
686e948693eSPhilip Paeps 	uint32_t conn_hash;
687e948693eSPhilip Paeps 	unsigned bucket;
688e948693eSPhilip Paeps 
689e948693eSPhilip Paeps 	/* Get the hardware hash */
690*3c838a9fSAndrew Rybchenko 	conn_hash = efx_psuedo_hdr_hash_get(sc->enp,
691*3c838a9fSAndrew Rybchenko 					    EFX_RX_HASHALG_TOEPLITZ,
692e948693eSPhilip Paeps 					    mtod(m, uint8_t *));
693e948693eSPhilip Paeps 
694e948693eSPhilip Paeps 	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
695e948693eSPhilip Paeps 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
696e948693eSPhilip Paeps 		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
697e948693eSPhilip Paeps 		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
698e948693eSPhilip Paeps 			SFXGE_LRO_L2_ID_VLAN;
699e948693eSPhilip Paeps 		l3_proto = veh->evl_proto;
700e948693eSPhilip Paeps 		nh = veh + 1;
701e948693eSPhilip Paeps 	} else {
702e948693eSPhilip Paeps 		l2_id = 0;
703e948693eSPhilip Paeps 		l3_proto = eh->ether_type;
704e948693eSPhilip Paeps 		nh = eh + 1;
705e948693eSPhilip Paeps 	}
706e948693eSPhilip Paeps 
707e948693eSPhilip Paeps 	/* Check whether this is a suitable packet (unfragmented
708e948693eSPhilip Paeps 	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
709e948693eSPhilip Paeps 	 * length, and compute a hash if necessary.  If not, return.
710e948693eSPhilip Paeps 	 */
711e948693eSPhilip Paeps 	if (l3_proto == htons(ETHERTYPE_IP)) {
712e948693eSPhilip Paeps 		struct ip *iph = nh;
7133b3390c1SAndrew Rybchenko 
7143b3390c1SAndrew Rybchenko 		KASSERT(iph->ip_p == IPPROTO_TCP,
7153b3390c1SAndrew Rybchenko 		    ("IPv4 protocol is not TCP, but packet marker is set"));
7163b3390c1SAndrew Rybchenko 		if ((iph->ip_hl - (sizeof(*iph) >> 2u)) |
717e948693eSPhilip Paeps 		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
718e948693eSPhilip Paeps 			goto deliver_now;
719e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
720e948693eSPhilip Paeps 	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
721e948693eSPhilip Paeps 		struct ip6_hdr *iph = nh;
7223b3390c1SAndrew Rybchenko 
7233b3390c1SAndrew Rybchenko 		KASSERT(iph->ip6_nxt == IPPROTO_TCP,
7243b3390c1SAndrew Rybchenko 		    ("IPv6 next header is not TCP, but packet marker is set"));
725e948693eSPhilip Paeps 		l2_id |= SFXGE_LRO_L2_ID_IPV6;
726e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
727e948693eSPhilip Paeps 	} else {
728e948693eSPhilip Paeps 		goto deliver_now;
729e948693eSPhilip Paeps 	}
730e948693eSPhilip Paeps 
731e948693eSPhilip Paeps 	bucket = conn_hash & rxq->lro.conns_mask;
732e948693eSPhilip Paeps 
733e948693eSPhilip Paeps 	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
734e948693eSPhilip Paeps 		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
735e948693eSPhilip Paeps 			continue;
736e948693eSPhilip Paeps 		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
737e948693eSPhilip Paeps 			continue;
738b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL) {
739e948693eSPhilip Paeps 			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
740e948693eSPhilip Paeps 				struct ip *c_iph, *iph = nh;
741e948693eSPhilip Paeps 				c_iph = c->nh;
742e948693eSPhilip Paeps 				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
743e948693eSPhilip Paeps 				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
744e948693eSPhilip Paeps 					continue;
745e948693eSPhilip Paeps 			} else {
746e948693eSPhilip Paeps 				struct ip6_hdr *c_iph, *iph = nh;
747e948693eSPhilip Paeps 				c_iph = c->nh;
748e948693eSPhilip Paeps 				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
749e948693eSPhilip Paeps 				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
750e948693eSPhilip Paeps 					continue;
751e948693eSPhilip Paeps 			}
752e948693eSPhilip Paeps 		}
753e948693eSPhilip Paeps 
754e948693eSPhilip Paeps 		/* Re-insert at head of list to reduce lookup time. */
755e948693eSPhilip Paeps 		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
756e948693eSPhilip Paeps 		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
757e948693eSPhilip Paeps 
758b7b0edd1SGeorge V. Neville-Neil 		if (c->next_buf.mbuf != NULL) {
759e948693eSPhilip Paeps 			if (!sfxge_lro_try_merge(rxq, c))
760e948693eSPhilip Paeps 				goto deliver_now;
761e948693eSPhilip Paeps 		} else {
762e948693eSPhilip Paeps 			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
763e948693eSPhilip Paeps 			    active_link);
764e948693eSPhilip Paeps 		}
765e948693eSPhilip Paeps 		c->next_buf = *rx_buf;
766e948693eSPhilip Paeps 		c->next_eh = eh;
767e948693eSPhilip Paeps 		c->next_nh = nh;
768e948693eSPhilip Paeps 
769e948693eSPhilip Paeps 		rx_buf->mbuf = NULL;
770e948693eSPhilip Paeps 		rx_buf->flags = EFX_DISCARD;
771e948693eSPhilip Paeps 		return;
772e948693eSPhilip Paeps 	}
773e948693eSPhilip Paeps 
774e948693eSPhilip Paeps 	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
775e948693eSPhilip Paeps  deliver_now:
776e948693eSPhilip Paeps 	sfxge_rx_deliver(sc, rx_buf);
777e948693eSPhilip Paeps }
778e948693eSPhilip Paeps 
779e948693eSPhilip Paeps static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
780e948693eSPhilip Paeps {
781e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
782e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
783e948693eSPhilip Paeps 	unsigned t;
784e948693eSPhilip Paeps 
785e948693eSPhilip Paeps 	while (!LIST_EMPTY(&st->active_conns)) {
786e948693eSPhilip Paeps 		c = LIST_FIRST(&st->active_conns);
787b7b0edd1SGeorge V. Neville-Neil 		if (!c->delivered && c->mbuf != NULL)
788e948693eSPhilip Paeps 			sfxge_lro_deliver(st, c);
789e948693eSPhilip Paeps 		if (sfxge_lro_try_merge(rxq, c)) {
790b7b0edd1SGeorge V. Neville-Neil 			if (c->mbuf != NULL)
791e948693eSPhilip Paeps 				sfxge_lro_deliver(st, c);
792e948693eSPhilip Paeps 			LIST_REMOVE(c, active_link);
793e948693eSPhilip Paeps 		}
794e948693eSPhilip Paeps 		c->delivered = 0;
795e948693eSPhilip Paeps 	}
796e948693eSPhilip Paeps 
797e948693eSPhilip Paeps 	t = *(volatile int *)&ticks;
798e948693eSPhilip Paeps 	if (__predict_false(t != st->last_purge_ticks))
799e948693eSPhilip Paeps 		sfxge_lro_purge_idle(rxq, t);
800e948693eSPhilip Paeps }
801e948693eSPhilip Paeps 
80218daa0eeSAndrew Rybchenko #else	/* !SFXGE_LRO */
80318daa0eeSAndrew Rybchenko 
80418daa0eeSAndrew Rybchenko static void
80518daa0eeSAndrew Rybchenko sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
80618daa0eeSAndrew Rybchenko {
80718daa0eeSAndrew Rybchenko }
80818daa0eeSAndrew Rybchenko 
80918daa0eeSAndrew Rybchenko static void
81018daa0eeSAndrew Rybchenko sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
81118daa0eeSAndrew Rybchenko {
81218daa0eeSAndrew Rybchenko }
81318daa0eeSAndrew Rybchenko 
81418daa0eeSAndrew Rybchenko #endif	/* SFXGE_LRO */
81518daa0eeSAndrew Rybchenko 
816e948693eSPhilip Paeps void
817e948693eSPhilip Paeps sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
818e948693eSPhilip Paeps {
819e948693eSPhilip Paeps 	struct sfxge_softc *sc = rxq->sc;
820b5bae9f4SAndrew Rybchenko 	int if_capenable = sc->ifnet->if_capenable;
821b5bae9f4SAndrew Rybchenko 	int lro_enabled = if_capenable & IFCAP_LRO;
822e948693eSPhilip Paeps 	unsigned int index;
823e948693eSPhilip Paeps 	struct sfxge_evq *evq;
824e948693eSPhilip Paeps 	unsigned int completed;
825e948693eSPhilip Paeps 	unsigned int level;
826e948693eSPhilip Paeps 	struct mbuf *m;
827e948693eSPhilip Paeps 	struct sfxge_rx_sw_desc *prev = NULL;
828e948693eSPhilip Paeps 
829e948693eSPhilip Paeps 	index = rxq->index;
830e948693eSPhilip Paeps 	evq = sc->evq[index];
831e948693eSPhilip Paeps 
832763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
833e948693eSPhilip Paeps 
834e948693eSPhilip Paeps 	completed = rxq->completed;
835e948693eSPhilip Paeps 	while (completed != rxq->pending) {
836e948693eSPhilip Paeps 		unsigned int id;
837e948693eSPhilip Paeps 		struct sfxge_rx_sw_desc *rx_desc;
838e948693eSPhilip Paeps 
839385b1d8eSGeorge V. Neville-Neil 		id = completed++ & rxq->ptr_mask;
840e948693eSPhilip Paeps 		rx_desc = &rxq->queue[id];
841e948693eSPhilip Paeps 		m = rx_desc->mbuf;
842e948693eSPhilip Paeps 
843851128b8SAndrew Rybchenko 		if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
844e948693eSPhilip Paeps 			goto discard;
845e948693eSPhilip Paeps 
846e948693eSPhilip Paeps 		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
847e948693eSPhilip Paeps 			goto discard;
848e948693eSPhilip Paeps 
849*3c838a9fSAndrew Rybchenko 		/* Read the length from the psuedo header if required */
850*3c838a9fSAndrew Rybchenko 		if (rx_desc->flags & EFX_PKT_PREFIX_LEN) {
851*3c838a9fSAndrew Rybchenko 			uint16_t tmp_size;
852*3c838a9fSAndrew Rybchenko 			int rc;
853*3c838a9fSAndrew Rybchenko 			rc = efx_psuedo_hdr_pkt_length_get(sc->enp,
854*3c838a9fSAndrew Rybchenko 							   mtod(m, uint8_t *),
855*3c838a9fSAndrew Rybchenko 							   &tmp_size);
856*3c838a9fSAndrew Rybchenko 			KASSERT(rc == 0, ("cannot get packet length: %d", rc));
857*3c838a9fSAndrew Rybchenko 			rx_desc->size = (int)tmp_size + sc->rx_prefix_size;
858*3c838a9fSAndrew Rybchenko 		}
859*3c838a9fSAndrew Rybchenko 
860e948693eSPhilip Paeps 		prefetch_read_many(mtod(m, caddr_t));
861e948693eSPhilip Paeps 
862b5bae9f4SAndrew Rybchenko 		switch (rx_desc->flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) {
863b5bae9f4SAndrew Rybchenko 		case EFX_PKT_IPV4:
864b5bae9f4SAndrew Rybchenko 			if (~if_capenable & IFCAP_RXCSUM)
865b5bae9f4SAndrew Rybchenko 				rx_desc->flags &=
866b5bae9f4SAndrew Rybchenko 				    ~(EFX_CKSUM_IPV4 | EFX_CKSUM_TCPUDP);
867b5bae9f4SAndrew Rybchenko 			break;
868b5bae9f4SAndrew Rybchenko 		case EFX_PKT_IPV6:
869b5bae9f4SAndrew Rybchenko 			if (~if_capenable & IFCAP_RXCSUM_IPV6)
870b5bae9f4SAndrew Rybchenko 				rx_desc->flags &= ~EFX_CKSUM_TCPUDP;
871b5bae9f4SAndrew Rybchenko 			break;
872b5bae9f4SAndrew Rybchenko 		case 0:
873e948693eSPhilip Paeps 			/* Check for loopback packets */
874b5bae9f4SAndrew Rybchenko 			{
875e948693eSPhilip Paeps 				struct ether_header *etherhp;
876e948693eSPhilip Paeps 
877e948693eSPhilip Paeps 				/*LINTED*/
878e948693eSPhilip Paeps 				etherhp = mtod(m, struct ether_header *);
879e948693eSPhilip Paeps 
880e948693eSPhilip Paeps 				if (etherhp->ether_type ==
881e948693eSPhilip Paeps 				    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
882e948693eSPhilip Paeps 					EFSYS_PROBE(loopback);
883e948693eSPhilip Paeps 
884e948693eSPhilip Paeps 					rxq->loopback++;
885e948693eSPhilip Paeps 					goto discard;
886e948693eSPhilip Paeps 				}
887e948693eSPhilip Paeps 			}
888b5bae9f4SAndrew Rybchenko 			break;
889b5bae9f4SAndrew Rybchenko 		default:
890b5bae9f4SAndrew Rybchenko 			KASSERT(B_FALSE,
891b5bae9f4SAndrew Rybchenko 			    ("Rx descriptor with both IPv4 and IPv6 flags"));
892b5bae9f4SAndrew Rybchenko 			goto discard;
893b5bae9f4SAndrew Rybchenko 		}
894e948693eSPhilip Paeps 
895e948693eSPhilip Paeps 		/* Pass packet up the stack or into LRO (pipelined) */
896e948693eSPhilip Paeps 		if (prev != NULL) {
8973b3390c1SAndrew Rybchenko 			if (lro_enabled &&
8983b3390c1SAndrew Rybchenko 			    ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) ==
8993b3390c1SAndrew Rybchenko 			     (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)))
900e948693eSPhilip Paeps 				sfxge_lro(rxq, prev);
901e948693eSPhilip Paeps 			else
902e948693eSPhilip Paeps 				sfxge_rx_deliver(sc, prev);
903e948693eSPhilip Paeps 		}
904e948693eSPhilip Paeps 		prev = rx_desc;
905e948693eSPhilip Paeps 		continue;
906e948693eSPhilip Paeps 
907e948693eSPhilip Paeps discard:
908e948693eSPhilip Paeps 		/* Return the packet to the pool */
909e948693eSPhilip Paeps 		m_free(m);
910e948693eSPhilip Paeps 		rx_desc->mbuf = NULL;
911e948693eSPhilip Paeps 	}
912e948693eSPhilip Paeps 	rxq->completed = completed;
913e948693eSPhilip Paeps 
914e948693eSPhilip Paeps 	level = rxq->added - rxq->completed;
915e948693eSPhilip Paeps 
916e948693eSPhilip Paeps 	/* Pass last packet up the stack or into LRO */
917e948693eSPhilip Paeps 	if (prev != NULL) {
9183b3390c1SAndrew Rybchenko 		if (lro_enabled &&
9193b3390c1SAndrew Rybchenko 		    ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) ==
9203b3390c1SAndrew Rybchenko 		     (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)))
921e948693eSPhilip Paeps 			sfxge_lro(rxq, prev);
922e948693eSPhilip Paeps 		else
923e948693eSPhilip Paeps 			sfxge_rx_deliver(sc, prev);
924e948693eSPhilip Paeps 	}
925e948693eSPhilip Paeps 
926e948693eSPhilip Paeps 	/*
927e948693eSPhilip Paeps 	 * If there are any pending flows and this is the end of the
928e948693eSPhilip Paeps 	 * poll then they must be completed.
929e948693eSPhilip Paeps 	 */
930e948693eSPhilip Paeps 	if (eop)
931e948693eSPhilip Paeps 		sfxge_lro_end_of_burst(rxq);
932e948693eSPhilip Paeps 
933e948693eSPhilip Paeps 	/* Top up the queue if necessary */
934385b1d8eSGeorge V. Neville-Neil 	if (level < rxq->refill_threshold)
935385b1d8eSGeorge V. Neville-Neil 		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE);
936e948693eSPhilip Paeps }
937e948693eSPhilip Paeps 
938e948693eSPhilip Paeps static void
939e948693eSPhilip Paeps sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
940e948693eSPhilip Paeps {
941e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
942e948693eSPhilip Paeps 	struct sfxge_evq *evq;
943e948693eSPhilip Paeps 	unsigned int count;
944*3c838a9fSAndrew Rybchenko 	unsigned int retry = 3;
945*3c838a9fSAndrew Rybchenko 
946*3c838a9fSAndrew Rybchenko 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
947e948693eSPhilip Paeps 
948e948693eSPhilip Paeps 	rxq = sc->rxq[index];
949e948693eSPhilip Paeps 	evq = sc->evq[index];
950e948693eSPhilip Paeps 
951763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
952e948693eSPhilip Paeps 
953e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
954e948693eSPhilip Paeps 	    ("rxq not started"));
955e948693eSPhilip Paeps 
956e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
957e948693eSPhilip Paeps 
958e948693eSPhilip Paeps 	callout_stop(&rxq->refill_callout);
959e948693eSPhilip Paeps 
960*3c838a9fSAndrew Rybchenko 	while (rxq->flush_state != SFXGE_FLUSH_DONE && retry != 0) {
961e948693eSPhilip Paeps 		rxq->flush_state = SFXGE_FLUSH_PENDING;
962e948693eSPhilip Paeps 
963763cab71SAndrew Rybchenko 		SFXGE_EVQ_UNLOCK(evq);
964e948693eSPhilip Paeps 
965*3c838a9fSAndrew Rybchenko 		/* Flush the receive queue */
966*3c838a9fSAndrew Rybchenko 		if (efx_rx_qflush(rxq->common) != 0) {
967*3c838a9fSAndrew Rybchenko 			SFXGE_EVQ_LOCK(evq);
968*3c838a9fSAndrew Rybchenko 			rxq->flush_state = SFXGE_FLUSH_FAILED;
969*3c838a9fSAndrew Rybchenko 			break;
970*3c838a9fSAndrew Rybchenko 		}
971*3c838a9fSAndrew Rybchenko 
972e948693eSPhilip Paeps 		count = 0;
973e948693eSPhilip Paeps 		do {
974e948693eSPhilip Paeps 			/* Spin for 100 ms */
975e948693eSPhilip Paeps 			DELAY(100000);
976e948693eSPhilip Paeps 
977e948693eSPhilip Paeps 			if (rxq->flush_state != SFXGE_FLUSH_PENDING)
978e948693eSPhilip Paeps 				break;
979e948693eSPhilip Paeps 
980e948693eSPhilip Paeps 		} while (++count < 20);
981e948693eSPhilip Paeps 
982763cab71SAndrew Rybchenko 		SFXGE_EVQ_LOCK(evq);
983e948693eSPhilip Paeps 
984*3c838a9fSAndrew Rybchenko 		if (rxq->flush_state == SFXGE_FLUSH_PENDING) {
985*3c838a9fSAndrew Rybchenko 			/* Flush timeout - neither done nor failed */
986*3c838a9fSAndrew Rybchenko 			log(LOG_ERR, "%s: Cannot flush Rx queue %u\n",
987*3c838a9fSAndrew Rybchenko 			    device_get_nameunit(sc->dev), index);
988e948693eSPhilip Paeps 			rxq->flush_state = SFXGE_FLUSH_DONE;
989*3c838a9fSAndrew Rybchenko 		}
990*3c838a9fSAndrew Rybchenko 		retry--;
991*3c838a9fSAndrew Rybchenko 	}
992*3c838a9fSAndrew Rybchenko 	if (rxq->flush_state == SFXGE_FLUSH_FAILED) {
993*3c838a9fSAndrew Rybchenko 		log(LOG_ERR, "%s: Flushing Rx queue %u failed\n",
994*3c838a9fSAndrew Rybchenko 		    device_get_nameunit(sc->dev), index);
995*3c838a9fSAndrew Rybchenko 		rxq->flush_state = SFXGE_FLUSH_DONE;
996*3c838a9fSAndrew Rybchenko 	}
997e948693eSPhilip Paeps 
998e948693eSPhilip Paeps 	rxq->pending = rxq->added;
999e948693eSPhilip Paeps 	sfxge_rx_qcomplete(rxq, B_TRUE);
1000e948693eSPhilip Paeps 
1001e948693eSPhilip Paeps 	KASSERT(rxq->completed == rxq->pending,
1002e948693eSPhilip Paeps 	    ("rxq->completed != rxq->pending"));
1003e948693eSPhilip Paeps 
1004e948693eSPhilip Paeps 	rxq->added = 0;
1005*3c838a9fSAndrew Rybchenko 	rxq->pushed = 0;
1006e948693eSPhilip Paeps 	rxq->pending = 0;
1007e948693eSPhilip Paeps 	rxq->completed = 0;
1008e948693eSPhilip Paeps 	rxq->loopback = 0;
1009e948693eSPhilip Paeps 
1010e948693eSPhilip Paeps 	/* Destroy the common code receive queue. */
1011e948693eSPhilip Paeps 	efx_rx_qdestroy(rxq->common);
1012e948693eSPhilip Paeps 
1013e948693eSPhilip Paeps 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
1014385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries));
1015e948693eSPhilip Paeps 
1016763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
1017e948693eSPhilip Paeps }
1018e948693eSPhilip Paeps 
1019e948693eSPhilip Paeps static int
1020e948693eSPhilip Paeps sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
1021e948693eSPhilip Paeps {
1022e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
1023e948693eSPhilip Paeps 	efsys_mem_t *esmp;
1024e948693eSPhilip Paeps 	struct sfxge_evq *evq;
1025e948693eSPhilip Paeps 	int rc;
1026e948693eSPhilip Paeps 
1027*3c838a9fSAndrew Rybchenko 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1028*3c838a9fSAndrew Rybchenko 
1029e948693eSPhilip Paeps 	rxq = sc->rxq[index];
1030e948693eSPhilip Paeps 	esmp = &rxq->mem;
1031e948693eSPhilip Paeps 	evq = sc->evq[index];
1032e948693eSPhilip Paeps 
1033e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1034e948693eSPhilip Paeps 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1035e948693eSPhilip Paeps 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
1036e948693eSPhilip Paeps 	    ("evq->init_state != SFXGE_EVQ_STARTED"));
1037e948693eSPhilip Paeps 
1038e948693eSPhilip Paeps 	/* Program the buffer table. */
1039e948693eSPhilip Paeps 	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
1040385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries))) != 0)
1041385b1d8eSGeorge V. Neville-Neil 		return (rc);
1042e948693eSPhilip Paeps 
1043e948693eSPhilip Paeps 	/* Create the common code receive queue. */
1044e948693eSPhilip Paeps 	if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
1045385b1d8eSGeorge V. Neville-Neil 	    esmp, sc->rxq_entries, rxq->buf_base_id, evq->common,
1046e948693eSPhilip Paeps 	    &rxq->common)) != 0)
1047e948693eSPhilip Paeps 		goto fail;
1048e948693eSPhilip Paeps 
1049763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
1050e948693eSPhilip Paeps 
1051e948693eSPhilip Paeps 	/* Enable the receive queue. */
1052e948693eSPhilip Paeps 	efx_rx_qenable(rxq->common);
1053e948693eSPhilip Paeps 
1054e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_STARTED;
1055*3c838a9fSAndrew Rybchenko 	rxq->flush_state = SFXGE_FLUSH_REQUIRED;
1056e948693eSPhilip Paeps 
1057e948693eSPhilip Paeps 	/* Try to fill the queue from the pool. */
1058385b1d8eSGeorge V. Neville-Neil 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE);
1059e948693eSPhilip Paeps 
1060763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
1061e948693eSPhilip Paeps 
1062e948693eSPhilip Paeps 	return (0);
1063e948693eSPhilip Paeps 
1064e948693eSPhilip Paeps fail:
1065e948693eSPhilip Paeps 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
1066385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries));
1067385b1d8eSGeorge V. Neville-Neil 	return (rc);
1068e948693eSPhilip Paeps }
1069e948693eSPhilip Paeps 
1070e948693eSPhilip Paeps void
1071e948693eSPhilip Paeps sfxge_rx_stop(struct sfxge_softc *sc)
1072e948693eSPhilip Paeps {
1073e948693eSPhilip Paeps 	int index;
1074e948693eSPhilip Paeps 
1075*3c838a9fSAndrew Rybchenko 	efx_mac_filter_default_rxq_clear(sc->enp);
1076*3c838a9fSAndrew Rybchenko 
1077e948693eSPhilip Paeps 	/* Stop the receive queue(s) */
1078133366a6SAndrew Rybchenko 	index = sc->rxq_count;
1079e948693eSPhilip Paeps 	while (--index >= 0)
1080e948693eSPhilip Paeps 		sfxge_rx_qstop(sc, index);
1081e948693eSPhilip Paeps 
1082e948693eSPhilip Paeps 	sc->rx_prefix_size = 0;
1083e948693eSPhilip Paeps 	sc->rx_buffer_size = 0;
1084e948693eSPhilip Paeps 
1085e948693eSPhilip Paeps 	efx_rx_fini(sc->enp);
1086e948693eSPhilip Paeps }
1087e948693eSPhilip Paeps 
1088e948693eSPhilip Paeps int
1089e948693eSPhilip Paeps sfxge_rx_start(struct sfxge_softc *sc)
1090e948693eSPhilip Paeps {
1091e948693eSPhilip Paeps 	struct sfxge_intr *intr;
1092*3c838a9fSAndrew Rybchenko 	const efx_nic_cfg_t *encp;
1093*3c838a9fSAndrew Rybchenko 	size_t hdrlen, align, reserved;
1094e948693eSPhilip Paeps 	int index;
1095e948693eSPhilip Paeps 	int rc;
1096e948693eSPhilip Paeps 
1097e948693eSPhilip Paeps 	intr = &sc->intr;
1098e948693eSPhilip Paeps 
1099e948693eSPhilip Paeps 	/* Initialize the common code receive module. */
1100e948693eSPhilip Paeps 	if ((rc = efx_rx_init(sc->enp)) != 0)
1101e948693eSPhilip Paeps 		return (rc);
1102e948693eSPhilip Paeps 
1103*3c838a9fSAndrew Rybchenko 	encp = efx_nic_cfg_get(sc->enp);
1104*3c838a9fSAndrew Rybchenko 	sc->rx_buffer_size = EFX_MAC_PDU(sc->ifnet->if_mtu);
1105*3c838a9fSAndrew Rybchenko 
1106e948693eSPhilip Paeps 	/* Calculate the receive packet buffer size. */
1107*3c838a9fSAndrew Rybchenko 	sc->rx_prefix_size = encp->enc_rx_prefix_size;
1108*3c838a9fSAndrew Rybchenko 
1109*3c838a9fSAndrew Rybchenko 	/* Ensure IP headers are 32bit aligned */
1110*3c838a9fSAndrew Rybchenko 	hdrlen = sc->rx_prefix_size + sizeof (struct ether_header);
1111*3c838a9fSAndrew Rybchenko 	sc->rx_buffer_align = P2ROUNDUP(hdrlen, 4) - hdrlen;
1112*3c838a9fSAndrew Rybchenko 
1113*3c838a9fSAndrew Rybchenko 	sc->rx_buffer_size += sc->rx_buffer_align;
1114*3c838a9fSAndrew Rybchenko 
1115*3c838a9fSAndrew Rybchenko 	/* Align end of packet buffer for RX DMA end padding */
1116*3c838a9fSAndrew Rybchenko 	align = MAX(1, encp->enc_rx_buf_align_end);
1117*3c838a9fSAndrew Rybchenko 	EFSYS_ASSERT(ISP2(align));
1118*3c838a9fSAndrew Rybchenko 	sc->rx_buffer_size = P2ROUNDUP(sc->rx_buffer_size, align);
1119*3c838a9fSAndrew Rybchenko 
1120*3c838a9fSAndrew Rybchenko 	/*
1121*3c838a9fSAndrew Rybchenko 	 * Standard mbuf zones only guarantee pointer-size alignment;
1122*3c838a9fSAndrew Rybchenko 	 * we need extra space to align to the cache line
1123*3c838a9fSAndrew Rybchenko 	 */
1124*3c838a9fSAndrew Rybchenko 	reserved = sc->rx_buffer_size + CACHE_LINE_SIZE;
1125e948693eSPhilip Paeps 
1126e948693eSPhilip Paeps 	/* Select zone for packet buffers */
1127*3c838a9fSAndrew Rybchenko 	if (reserved <= MCLBYTES)
1128e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_clust;
1129*3c838a9fSAndrew Rybchenko 	else if (reserved <= MJUMPAGESIZE)
1130e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbop;
1131*3c838a9fSAndrew Rybchenko 	else if (reserved <= MJUM9BYTES)
1132e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbo9;
1133e948693eSPhilip Paeps 	else
1134e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbo16;
1135e948693eSPhilip Paeps 
1136e948693eSPhilip Paeps 	/*
1137e948693eSPhilip Paeps 	 * Set up the scale table.  Enable all hash types and hash insertion.
1138e948693eSPhilip Paeps 	 */
1139e948693eSPhilip Paeps 	for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
1140133366a6SAndrew Rybchenko 		sc->rx_indir_table[index] = index % sc->rxq_count;
1141e948693eSPhilip Paeps 	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
1142e948693eSPhilip Paeps 				       SFXGE_RX_SCALE_MAX)) != 0)
1143e948693eSPhilip Paeps 		goto fail;
1144e948693eSPhilip Paeps 	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
1145e948693eSPhilip Paeps 	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
1146e948693eSPhilip Paeps 	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
1147e948693eSPhilip Paeps 
1148*3c838a9fSAndrew Rybchenko 	if ((rc = efx_rx_scale_key_set(sc->enp, toep_key,
1149e948693eSPhilip Paeps 				       sizeof(toep_key))) != 0)
1150e948693eSPhilip Paeps 		goto fail;
1151e948693eSPhilip Paeps 
1152e948693eSPhilip Paeps 	/* Start the receive queue(s). */
1153133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++) {
1154e948693eSPhilip Paeps 		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1155e948693eSPhilip Paeps 			goto fail2;
1156e948693eSPhilip Paeps 	}
1157e948693eSPhilip Paeps 
1158*3c838a9fSAndrew Rybchenko 	rc = efx_mac_filter_default_rxq_set(sc->enp, sc->rxq[0]->common,
1159*3c838a9fSAndrew Rybchenko 					    sc->intr.n_alloc > 1);
1160*3c838a9fSAndrew Rybchenko 	if (rc != 0)
1161*3c838a9fSAndrew Rybchenko 		goto fail3;
1162*3c838a9fSAndrew Rybchenko 
1163e948693eSPhilip Paeps 	return (0);
1164e948693eSPhilip Paeps 
1165*3c838a9fSAndrew Rybchenko fail3:
1166e948693eSPhilip Paeps fail2:
1167e948693eSPhilip Paeps 	while (--index >= 0)
1168e948693eSPhilip Paeps 		sfxge_rx_qstop(sc, index);
1169e948693eSPhilip Paeps 
1170e948693eSPhilip Paeps fail:
1171e948693eSPhilip Paeps 	efx_rx_fini(sc->enp);
1172e948693eSPhilip Paeps 
1173e948693eSPhilip Paeps 	return (rc);
1174e948693eSPhilip Paeps }
1175e948693eSPhilip Paeps 
117618daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
117718daa0eeSAndrew Rybchenko 
1178e948693eSPhilip Paeps static void sfxge_lro_init(struct sfxge_rxq *rxq)
1179e948693eSPhilip Paeps {
1180e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
1181e948693eSPhilip Paeps 	unsigned i;
1182e948693eSPhilip Paeps 
1183e948693eSPhilip Paeps 	st->conns_mask = lro_table_size - 1;
1184e948693eSPhilip Paeps 	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1185e948693eSPhilip Paeps 		("lro_table_size must be a power of 2"));
1186e948693eSPhilip Paeps 	st->sc = rxq->sc;
1187e948693eSPhilip Paeps 	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1188e948693eSPhilip Paeps 			   M_SFXGE, M_WAITOK);
1189e948693eSPhilip Paeps 	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1190e948693eSPhilip Paeps 			     M_SFXGE, M_WAITOK);
1191e948693eSPhilip Paeps 	for (i = 0; i <= st->conns_mask; ++i) {
1192e948693eSPhilip Paeps 		TAILQ_INIT(&st->conns[i]);
1193e948693eSPhilip Paeps 		st->conns_n[i] = 0;
1194e948693eSPhilip Paeps 	}
1195e948693eSPhilip Paeps 	LIST_INIT(&st->active_conns);
1196e948693eSPhilip Paeps 	TAILQ_INIT(&st->free_conns);
1197e948693eSPhilip Paeps }
1198e948693eSPhilip Paeps 
1199e948693eSPhilip Paeps static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1200e948693eSPhilip Paeps {
1201e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
1202e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
1203e948693eSPhilip Paeps 	unsigned i;
1204e948693eSPhilip Paeps 
1205e948693eSPhilip Paeps 	/* Return cleanly if sfxge_lro_init() has not been called. */
1206e948693eSPhilip Paeps 	if (st->conns == NULL)
1207e948693eSPhilip Paeps 		return;
1208e948693eSPhilip Paeps 
1209e948693eSPhilip Paeps 	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1210e948693eSPhilip Paeps 
1211e948693eSPhilip Paeps 	for (i = 0; i <= st->conns_mask; ++i) {
1212e948693eSPhilip Paeps 		while (!TAILQ_EMPTY(&st->conns[i])) {
1213e948693eSPhilip Paeps 			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1214e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
1215e948693eSPhilip Paeps 		}
1216e948693eSPhilip Paeps 	}
1217e948693eSPhilip Paeps 
1218e948693eSPhilip Paeps 	while (!TAILQ_EMPTY(&st->free_conns)) {
1219e948693eSPhilip Paeps 		c = TAILQ_FIRST(&st->free_conns);
1220e948693eSPhilip Paeps 		TAILQ_REMOVE(&st->free_conns, c, link);
1221e948693eSPhilip Paeps 		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1222e948693eSPhilip Paeps 		free(c, M_SFXGE);
1223e948693eSPhilip Paeps 	}
1224e948693eSPhilip Paeps 
1225e948693eSPhilip Paeps 	free(st->conns_n, M_SFXGE);
1226e948693eSPhilip Paeps 	free(st->conns, M_SFXGE);
1227e948693eSPhilip Paeps 	st->conns = NULL;
1228e948693eSPhilip Paeps }
1229e948693eSPhilip Paeps 
123018daa0eeSAndrew Rybchenko #else
123118daa0eeSAndrew Rybchenko 
123218daa0eeSAndrew Rybchenko static void
123318daa0eeSAndrew Rybchenko sfxge_lro_init(struct sfxge_rxq *rxq)
123418daa0eeSAndrew Rybchenko {
123518daa0eeSAndrew Rybchenko }
123618daa0eeSAndrew Rybchenko 
123718daa0eeSAndrew Rybchenko static void
123818daa0eeSAndrew Rybchenko sfxge_lro_fini(struct sfxge_rxq *rxq)
123918daa0eeSAndrew Rybchenko {
124018daa0eeSAndrew Rybchenko }
124118daa0eeSAndrew Rybchenko 
124218daa0eeSAndrew Rybchenko #endif	/* SFXGE_LRO */
124318daa0eeSAndrew Rybchenko 
1244e948693eSPhilip Paeps static void
1245e948693eSPhilip Paeps sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1246e948693eSPhilip Paeps {
1247e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
1248e948693eSPhilip Paeps 
1249e948693eSPhilip Paeps 	rxq = sc->rxq[index];
1250e948693eSPhilip Paeps 
1251e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1252e948693eSPhilip Paeps 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1253e948693eSPhilip Paeps 
1254e948693eSPhilip Paeps 	/* Free the context array and the flow table. */
1255e948693eSPhilip Paeps 	free(rxq->queue, M_SFXGE);
1256e948693eSPhilip Paeps 	sfxge_lro_fini(rxq);
1257e948693eSPhilip Paeps 
1258e948693eSPhilip Paeps 	/* Release DMA memory. */
1259e948693eSPhilip Paeps 	sfxge_dma_free(&rxq->mem);
1260e948693eSPhilip Paeps 
1261e948693eSPhilip Paeps 	sc->rxq[index] = NULL;
1262e948693eSPhilip Paeps 
1263e948693eSPhilip Paeps 	free(rxq, M_SFXGE);
1264e948693eSPhilip Paeps }
1265e948693eSPhilip Paeps 
1266e948693eSPhilip Paeps static int
1267e948693eSPhilip Paeps sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1268e948693eSPhilip Paeps {
1269e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
1270e948693eSPhilip Paeps 	struct sfxge_evq *evq;
1271e948693eSPhilip Paeps 	efsys_mem_t *esmp;
1272e948693eSPhilip Paeps 	int rc;
1273e948693eSPhilip Paeps 
1274133366a6SAndrew Rybchenko 	KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count));
1275e948693eSPhilip Paeps 
1276e948693eSPhilip Paeps 	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1277e948693eSPhilip Paeps 	rxq->sc = sc;
1278e948693eSPhilip Paeps 	rxq->index = index;
1279385b1d8eSGeorge V. Neville-Neil 	rxq->entries = sc->rxq_entries;
1280385b1d8eSGeorge V. Neville-Neil 	rxq->ptr_mask = rxq->entries - 1;
1281385b1d8eSGeorge V. Neville-Neil 	rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries);
1282e948693eSPhilip Paeps 
1283e948693eSPhilip Paeps 	sc->rxq[index] = rxq;
1284e948693eSPhilip Paeps 	esmp = &rxq->mem;
1285e948693eSPhilip Paeps 
1286e948693eSPhilip Paeps 	evq = sc->evq[index];
1287e948693eSPhilip Paeps 
1288e948693eSPhilip Paeps 	/* Allocate and zero DMA space. */
1289385b1d8eSGeorge V. Neville-Neil 	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0)
1290e948693eSPhilip Paeps 		return (rc);
1291e948693eSPhilip Paeps 
1292e948693eSPhilip Paeps 	/* Allocate buffer table entries. */
1293385b1d8eSGeorge V. Neville-Neil 	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries),
1294e948693eSPhilip Paeps 				 &rxq->buf_base_id);
1295e948693eSPhilip Paeps 
1296e948693eSPhilip Paeps 	/* Allocate the context array and the flow table. */
1297385b1d8eSGeorge V. Neville-Neil 	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries,
1298e948693eSPhilip Paeps 	    M_SFXGE, M_WAITOK | M_ZERO);
1299e948693eSPhilip Paeps 	sfxge_lro_init(rxq);
1300e948693eSPhilip Paeps 
1301fd90e2edSJung-uk Kim 	callout_init(&rxq->refill_callout, 1);
1302e948693eSPhilip Paeps 
1303e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1304e948693eSPhilip Paeps 
1305e948693eSPhilip Paeps 	return (0);
1306e948693eSPhilip Paeps }
1307e948693eSPhilip Paeps 
1308e948693eSPhilip Paeps static const struct {
1309e948693eSPhilip Paeps 	const char *name;
1310e948693eSPhilip Paeps 	size_t offset;
1311e948693eSPhilip Paeps } sfxge_rx_stats[] = {
1312e948693eSPhilip Paeps #define	SFXGE_RX_STAT(name, member) \
1313e948693eSPhilip Paeps 	{ #name, offsetof(struct sfxge_rxq, member) }
131418daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
1315e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1316e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1317e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1318e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1319e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1320e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1321e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1322e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
132318daa0eeSAndrew Rybchenko #endif
1324e948693eSPhilip Paeps };
1325e948693eSPhilip Paeps 
1326e948693eSPhilip Paeps static int
1327e948693eSPhilip Paeps sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1328e948693eSPhilip Paeps {
1329e948693eSPhilip Paeps 	struct sfxge_softc *sc = arg1;
1330e948693eSPhilip Paeps 	unsigned int id = arg2;
1331e948693eSPhilip Paeps 	unsigned int sum, index;
1332e948693eSPhilip Paeps 
1333e948693eSPhilip Paeps 	/* Sum across all RX queues */
1334e948693eSPhilip Paeps 	sum = 0;
1335133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++)
1336e948693eSPhilip Paeps 		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1337e948693eSPhilip Paeps 					 sfxge_rx_stats[id].offset);
1338e948693eSPhilip Paeps 
1339b7b0edd1SGeorge V. Neville-Neil 	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1340e948693eSPhilip Paeps }
1341e948693eSPhilip Paeps 
1342e948693eSPhilip Paeps static void
1343e948693eSPhilip Paeps sfxge_rx_stat_init(struct sfxge_softc *sc)
1344e948693eSPhilip Paeps {
1345e948693eSPhilip Paeps 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1346e948693eSPhilip Paeps 	struct sysctl_oid_list *stat_list;
1347e948693eSPhilip Paeps 	unsigned int id;
1348e948693eSPhilip Paeps 
1349e948693eSPhilip Paeps 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1350e948693eSPhilip Paeps 
1351612d8e28SAndrew Rybchenko 	for (id = 0; id < nitems(sfxge_rx_stats); id++) {
1352e948693eSPhilip Paeps 		SYSCTL_ADD_PROC(
1353e948693eSPhilip Paeps 			ctx, stat_list,
1354e948693eSPhilip Paeps 			OID_AUTO, sfxge_rx_stats[id].name,
1355e948693eSPhilip Paeps 			CTLTYPE_UINT|CTLFLAG_RD,
1356e948693eSPhilip Paeps 			sc, id, sfxge_rx_stat_handler, "IU",
1357e948693eSPhilip Paeps 			"");
1358e948693eSPhilip Paeps 	}
1359e948693eSPhilip Paeps }
1360e948693eSPhilip Paeps 
1361e948693eSPhilip Paeps void
1362e948693eSPhilip Paeps sfxge_rx_fini(struct sfxge_softc *sc)
1363e948693eSPhilip Paeps {
1364e948693eSPhilip Paeps 	int index;
1365e948693eSPhilip Paeps 
1366133366a6SAndrew Rybchenko 	index = sc->rxq_count;
1367e948693eSPhilip Paeps 	while (--index >= 0)
1368e948693eSPhilip Paeps 		sfxge_rx_qfini(sc, index);
1369133366a6SAndrew Rybchenko 
1370133366a6SAndrew Rybchenko 	sc->rxq_count = 0;
1371e948693eSPhilip Paeps }
1372e948693eSPhilip Paeps 
1373e948693eSPhilip Paeps int
1374e948693eSPhilip Paeps sfxge_rx_init(struct sfxge_softc *sc)
1375e948693eSPhilip Paeps {
1376e948693eSPhilip Paeps 	struct sfxge_intr *intr;
1377e948693eSPhilip Paeps 	int index;
1378e948693eSPhilip Paeps 	int rc;
1379e948693eSPhilip Paeps 
138018daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
1381245d1576SAndrew Rybchenko 	if (!ISP2(lro_table_size)) {
1382245d1576SAndrew Rybchenko 		log(LOG_ERR, "%s=%u must be power of 2",
1383245d1576SAndrew Rybchenko 		    SFXGE_LRO_PARAM(table_size), lro_table_size);
1384245d1576SAndrew Rybchenko 		rc = EINVAL;
1385245d1576SAndrew Rybchenko 		goto fail_lro_table_size;
1386245d1576SAndrew Rybchenko 	}
1387245d1576SAndrew Rybchenko 
1388e948693eSPhilip Paeps 	if (lro_idle_ticks == 0)
1389e948693eSPhilip Paeps 		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
139018daa0eeSAndrew Rybchenko #endif
1391e948693eSPhilip Paeps 
1392e948693eSPhilip Paeps 	intr = &sc->intr;
1393e948693eSPhilip Paeps 
1394133366a6SAndrew Rybchenko 	sc->rxq_count = intr->n_alloc;
1395133366a6SAndrew Rybchenko 
1396e948693eSPhilip Paeps 	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1397e948693eSPhilip Paeps 	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1398e948693eSPhilip Paeps 
1399e948693eSPhilip Paeps 	/* Initialize the receive queue(s) - one per interrupt. */
1400133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++) {
1401e948693eSPhilip Paeps 		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1402e948693eSPhilip Paeps 			goto fail;
1403e948693eSPhilip Paeps 	}
1404e948693eSPhilip Paeps 
1405e948693eSPhilip Paeps 	sfxge_rx_stat_init(sc);
1406e948693eSPhilip Paeps 
1407e948693eSPhilip Paeps 	return (0);
1408e948693eSPhilip Paeps 
1409e948693eSPhilip Paeps fail:
1410e948693eSPhilip Paeps 	/* Tear down the receive queue(s). */
1411e948693eSPhilip Paeps 	while (--index >= 0)
1412e948693eSPhilip Paeps 		sfxge_rx_qfini(sc, index);
1413e948693eSPhilip Paeps 
1414133366a6SAndrew Rybchenko 	sc->rxq_count = 0;
1415245d1576SAndrew Rybchenko 
1416245d1576SAndrew Rybchenko #ifdef SFXGE_LRO
1417245d1576SAndrew Rybchenko fail_lro_table_size:
1418245d1576SAndrew Rybchenko #endif
1419e948693eSPhilip Paeps 	return (rc);
1420e948693eSPhilip Paeps }
1421