xref: /freebsd/sys/dev/sfxge/sfxge_rx.c (revision 133366a6e5a0355d856b2cef68211f35fc877604)
1e948693eSPhilip Paeps /*-
2e948693eSPhilip Paeps  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
3e948693eSPhilip Paeps  * All rights reserved.
4e948693eSPhilip Paeps  *
5e948693eSPhilip Paeps  * This software was developed in part by Philip Paeps under contract for
6e948693eSPhilip Paeps  * Solarflare Communications, Inc.
7e948693eSPhilip Paeps  *
8e948693eSPhilip Paeps  * Redistribution and use in source and binary forms, with or without
9e948693eSPhilip Paeps  * modification, are permitted provided that the following conditions
10e948693eSPhilip Paeps  * are met:
11e948693eSPhilip Paeps  * 1. Redistributions of source code must retain the above copyright
12e948693eSPhilip Paeps  *    notice, this list of conditions and the following disclaimer.
13e948693eSPhilip Paeps  * 2. Redistributions in binary form must reproduce the above copyright
14e948693eSPhilip Paeps  *    notice, this list of conditions and the following disclaimer in the
15e948693eSPhilip Paeps  *    documentation and/or other materials provided with the distribution.
16e948693eSPhilip Paeps  *
17e948693eSPhilip Paeps  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18e948693eSPhilip Paeps  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19e948693eSPhilip Paeps  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20e948693eSPhilip Paeps  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21e948693eSPhilip Paeps  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22e948693eSPhilip Paeps  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23e948693eSPhilip Paeps  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24e948693eSPhilip Paeps  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25e948693eSPhilip Paeps  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26e948693eSPhilip Paeps  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27e948693eSPhilip Paeps  * SUCH DAMAGE.
28e948693eSPhilip Paeps  */
29e948693eSPhilip Paeps 
30e948693eSPhilip Paeps #include <sys/cdefs.h>
31e948693eSPhilip Paeps __FBSDID("$FreeBSD$");
32e948693eSPhilip Paeps 
33e948693eSPhilip Paeps #include <sys/types.h>
34e948693eSPhilip Paeps #include <sys/mbuf.h>
35e948693eSPhilip Paeps #include <sys/smp.h>
36e948693eSPhilip Paeps #include <sys/socket.h>
37e948693eSPhilip Paeps #include <sys/sysctl.h>
38e948693eSPhilip Paeps #include <sys/limits.h>
39e948693eSPhilip Paeps 
40e948693eSPhilip Paeps #include <net/ethernet.h>
41e948693eSPhilip Paeps #include <net/if.h>
42e948693eSPhilip Paeps #include <net/if_vlan_var.h>
43e948693eSPhilip Paeps 
44e948693eSPhilip Paeps #include <netinet/in.h>
45e948693eSPhilip Paeps #include <netinet/ip.h>
46e948693eSPhilip Paeps #include <netinet/ip6.h>
47e948693eSPhilip Paeps #include <netinet/tcp.h>
48e948693eSPhilip Paeps 
49e948693eSPhilip Paeps #include <machine/in_cksum.h>
50e948693eSPhilip Paeps 
51e948693eSPhilip Paeps #include "common/efx.h"
52e948693eSPhilip Paeps 
53e948693eSPhilip Paeps 
54e948693eSPhilip Paeps #include "sfxge.h"
55e948693eSPhilip Paeps #include "sfxge_rx.h"
56e948693eSPhilip Paeps 
57385b1d8eSGeorge V. Neville-Neil #define	RX_REFILL_THRESHOLD(_entries)	(EFX_RXQ_LIMIT(_entries) * 9 / 10)
58e948693eSPhilip Paeps 
59e948693eSPhilip Paeps /* Size of the LRO hash table.  Must be a power of 2.  A larger table
60e948693eSPhilip Paeps  * means we can accelerate a larger number of streams.
61e948693eSPhilip Paeps  */
62e948693eSPhilip Paeps static unsigned lro_table_size = 128;
63e948693eSPhilip Paeps 
64e948693eSPhilip Paeps /* Maximum length of a hash chain.  If chains get too long then the lookup
65e948693eSPhilip Paeps  * time increases and may exceed the benefit of LRO.
66e948693eSPhilip Paeps  */
67e948693eSPhilip Paeps static unsigned lro_chain_max = 20;
68e948693eSPhilip Paeps 
69e948693eSPhilip Paeps /* Maximum time (in ticks) that a connection can be idle before it's LRO
70e948693eSPhilip Paeps  * state is discarded.
71e948693eSPhilip Paeps  */
72e948693eSPhilip Paeps static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
73e948693eSPhilip Paeps 
74e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order before a
75e948693eSPhilip Paeps  * connection is eligible for LRO.  The idea is we should avoid coalescing
76e948693eSPhilip Paeps  * segments when the sender is in slow-start because reducing the ACK rate
77e948693eSPhilip Paeps  * can damage performance.
78e948693eSPhilip Paeps  */
79e948693eSPhilip Paeps static int lro_slow_start_packets = 2000;
80e948693eSPhilip Paeps 
81e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order following loss
82e948693eSPhilip Paeps  * before a connection is eligible for LRO.  The idea is we should avoid
83e948693eSPhilip Paeps  * coalescing segments when the sender is recovering from loss, because
84e948693eSPhilip Paeps  * reducing the ACK rate can damage performance.
85e948693eSPhilip Paeps  */
86e948693eSPhilip Paeps static int lro_loss_packets = 20;
87e948693eSPhilip Paeps 
88e948693eSPhilip Paeps /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
89e948693eSPhilip Paeps #define	SFXGE_LRO_L2_ID_VLAN 0x4000
90e948693eSPhilip Paeps #define	SFXGE_LRO_L2_ID_IPV6 0x8000
91e948693eSPhilip Paeps #define	SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
92e948693eSPhilip Paeps #define	SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
93e948693eSPhilip Paeps 
94e948693eSPhilip Paeps /* Compare IPv6 addresses, avoiding conditional branches */
950b28bbdcSAndrew Rybchenko static unsigned long ipv6_addr_cmp(const struct in6_addr *left,
96e948693eSPhilip Paeps 				   const struct in6_addr *right)
97e948693eSPhilip Paeps {
98e948693eSPhilip Paeps #if LONG_BIT == 64
99e948693eSPhilip Paeps 	const uint64_t *left64 = (const uint64_t *)left;
100e948693eSPhilip Paeps 	const uint64_t *right64 = (const uint64_t *)right;
101e948693eSPhilip Paeps 	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
102e948693eSPhilip Paeps #else
103e948693eSPhilip Paeps 	return (left->s6_addr32[0] - right->s6_addr32[0]) |
104e948693eSPhilip Paeps 	       (left->s6_addr32[1] - right->s6_addr32[1]) |
105e948693eSPhilip Paeps 	       (left->s6_addr32[2] - right->s6_addr32[2]) |
106e948693eSPhilip Paeps 	       (left->s6_addr32[3] - right->s6_addr32[3]);
107e948693eSPhilip Paeps #endif
108e948693eSPhilip Paeps }
109e948693eSPhilip Paeps 
110e948693eSPhilip Paeps void
111e948693eSPhilip Paeps sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
112e948693eSPhilip Paeps {
113e948693eSPhilip Paeps 
114e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_DONE;
115e948693eSPhilip Paeps }
116e948693eSPhilip Paeps 
117e948693eSPhilip Paeps void
118e948693eSPhilip Paeps sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
119e948693eSPhilip Paeps {
120e948693eSPhilip Paeps 
121e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_FAILED;
122e948693eSPhilip Paeps }
123e948693eSPhilip Paeps 
124e948693eSPhilip Paeps static uint8_t toep_key[] = {
125e948693eSPhilip Paeps 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
126e948693eSPhilip Paeps 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
127e948693eSPhilip Paeps 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
128e948693eSPhilip Paeps 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
129e948693eSPhilip Paeps 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
130e948693eSPhilip Paeps };
131e948693eSPhilip Paeps 
132e948693eSPhilip Paeps static void
133e948693eSPhilip Paeps sfxge_rx_post_refill(void *arg)
134e948693eSPhilip Paeps {
135e948693eSPhilip Paeps 	struct sfxge_rxq *rxq = arg;
136e948693eSPhilip Paeps 	struct sfxge_softc *sc;
137e948693eSPhilip Paeps 	unsigned int index;
138e948693eSPhilip Paeps 	struct sfxge_evq *evq;
139e948693eSPhilip Paeps 	uint16_t magic;
140e948693eSPhilip Paeps 
141e948693eSPhilip Paeps 	sc = rxq->sc;
142e948693eSPhilip Paeps 	index = rxq->index;
143e948693eSPhilip Paeps 	evq = sc->evq[index];
144e948693eSPhilip Paeps 
145e948693eSPhilip Paeps 	magic = SFXGE_MAGIC_RX_QREFILL | index;
146e948693eSPhilip Paeps 
147e948693eSPhilip Paeps 	/* This is guaranteed due to the start/stop order of rx and ev */
148e948693eSPhilip Paeps 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
149e948693eSPhilip Paeps 	    ("evq not started"));
150e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
151e948693eSPhilip Paeps 	    ("rxq not started"));
152e948693eSPhilip Paeps 	efx_ev_qpost(evq->common, magic);
153e948693eSPhilip Paeps }
154e948693eSPhilip Paeps 
155e948693eSPhilip Paeps static void
156e948693eSPhilip Paeps sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
157e948693eSPhilip Paeps {
158e948693eSPhilip Paeps 	/* Initially retry after 100 ms, but back off in case of
159e948693eSPhilip Paeps 	 * repeated failures as we probably have to wait for the
160e948693eSPhilip Paeps 	 * administrator to raise the pool limit. */
161e948693eSPhilip Paeps 	if (retrying)
162e948693eSPhilip Paeps 		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
163e948693eSPhilip Paeps 	else
164e948693eSPhilip Paeps 		rxq->refill_delay = hz / 10;
165e948693eSPhilip Paeps 
166e948693eSPhilip Paeps 	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
167e948693eSPhilip Paeps 			     sfxge_rx_post_refill, rxq);
168e948693eSPhilip Paeps }
169e948693eSPhilip Paeps 
1700b28bbdcSAndrew Rybchenko static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
171e948693eSPhilip Paeps {
172e948693eSPhilip Paeps 	struct mb_args args;
173e948693eSPhilip Paeps 	struct mbuf *m;
174e948693eSPhilip Paeps 
175e948693eSPhilip Paeps 	/* Allocate mbuf structure */
176e948693eSPhilip Paeps 	args.flags = M_PKTHDR;
177e948693eSPhilip Paeps 	args.type = MT_DATA;
178e275c0d3SGleb Smirnoff 	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
179e948693eSPhilip Paeps 
180e948693eSPhilip Paeps 	/* Allocate (and attach) packet buffer */
181b7b0edd1SGeorge V. Neville-Neil 	if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
182e948693eSPhilip Paeps 		uma_zfree(zone_mbuf, m);
183e948693eSPhilip Paeps 		m = NULL;
184e948693eSPhilip Paeps 	}
185e948693eSPhilip Paeps 
186b7b0edd1SGeorge V. Neville-Neil 	return (m);
187e948693eSPhilip Paeps }
188e948693eSPhilip Paeps 
189e948693eSPhilip Paeps #define	SFXGE_REFILL_BATCH  64
190e948693eSPhilip Paeps 
191e948693eSPhilip Paeps static void
192e948693eSPhilip Paeps sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
193e948693eSPhilip Paeps {
194e948693eSPhilip Paeps 	struct sfxge_softc *sc;
195e948693eSPhilip Paeps 	unsigned int index;
196e948693eSPhilip Paeps 	struct sfxge_evq *evq;
197e948693eSPhilip Paeps 	unsigned int batch;
198e948693eSPhilip Paeps 	unsigned int rxfill;
199e948693eSPhilip Paeps 	unsigned int mblksize;
200e948693eSPhilip Paeps 	int ntodo;
201e948693eSPhilip Paeps 	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
202e948693eSPhilip Paeps 
203e948693eSPhilip Paeps 	sc = rxq->sc;
204e948693eSPhilip Paeps 	index = rxq->index;
205e948693eSPhilip Paeps 	evq = sc->evq[index];
206e948693eSPhilip Paeps 
207e948693eSPhilip Paeps 	prefetch_read_many(sc->enp);
208e948693eSPhilip Paeps 	prefetch_read_many(rxq->common);
209e948693eSPhilip Paeps 
210763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
211e948693eSPhilip Paeps 
212e948693eSPhilip Paeps 	if (rxq->init_state != SFXGE_RXQ_STARTED)
213e948693eSPhilip Paeps 		return;
214e948693eSPhilip Paeps 
215e948693eSPhilip Paeps 	rxfill = rxq->added - rxq->completed;
216385b1d8eSGeorge V. Neville-Neil 	KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries),
217385b1d8eSGeorge V. Neville-Neil 	    ("rxfill > EFX_RXQ_LIMIT(rxq->entries)"));
218385b1d8eSGeorge V. Neville-Neil 	ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target);
219385b1d8eSGeorge V. Neville-Neil 	KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries),
220385b1d8eSGeorge V. Neville-Neil 	    ("ntodo > EFX_RQX_LIMIT(rxq->entries)"));
221e948693eSPhilip Paeps 
222e948693eSPhilip Paeps 	if (ntodo == 0)
223e948693eSPhilip Paeps 		return;
224e948693eSPhilip Paeps 
225e948693eSPhilip Paeps 	batch = 0;
226e948693eSPhilip Paeps 	mblksize = sc->rx_buffer_size;
227e948693eSPhilip Paeps 	while (ntodo-- > 0) {
228e948693eSPhilip Paeps 		unsigned int id;
229e948693eSPhilip Paeps 		struct sfxge_rx_sw_desc *rx_desc;
230e948693eSPhilip Paeps 		bus_dma_segment_t seg;
231e948693eSPhilip Paeps 		struct mbuf *m;
232e948693eSPhilip Paeps 
233385b1d8eSGeorge V. Neville-Neil 		id = (rxq->added + batch) & rxq->ptr_mask;
234e948693eSPhilip Paeps 		rx_desc = &rxq->queue[id];
235e948693eSPhilip Paeps 		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
236e948693eSPhilip Paeps 
237e948693eSPhilip Paeps 		rx_desc->flags = EFX_DISCARD;
238e948693eSPhilip Paeps 		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
239e948693eSPhilip Paeps 		if (m == NULL)
240e948693eSPhilip Paeps 			break;
241e948693eSPhilip Paeps 		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
242e948693eSPhilip Paeps 		addr[batch++] = seg.ds_addr;
243e948693eSPhilip Paeps 
244e948693eSPhilip Paeps 		if (batch == SFXGE_REFILL_BATCH) {
245e948693eSPhilip Paeps 			efx_rx_qpost(rxq->common, addr, mblksize, batch,
246e948693eSPhilip Paeps 			    rxq->completed, rxq->added);
247e948693eSPhilip Paeps 			rxq->added += batch;
248e948693eSPhilip Paeps 			batch = 0;
249e948693eSPhilip Paeps 		}
250e948693eSPhilip Paeps 	}
251e948693eSPhilip Paeps 
252e948693eSPhilip Paeps 	if (ntodo != 0)
253e948693eSPhilip Paeps 		sfxge_rx_schedule_refill(rxq, retrying);
254e948693eSPhilip Paeps 
255e948693eSPhilip Paeps 	if (batch != 0) {
256e948693eSPhilip Paeps 		efx_rx_qpost(rxq->common, addr, mblksize, batch,
257e948693eSPhilip Paeps 		    rxq->completed, rxq->added);
258e948693eSPhilip Paeps 		rxq->added += batch;
259e948693eSPhilip Paeps 	}
260e948693eSPhilip Paeps 
261e948693eSPhilip Paeps 	/* Make the descriptors visible to the hardware */
262e948693eSPhilip Paeps 	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
263e948693eSPhilip Paeps 			BUS_DMASYNC_PREWRITE);
264e948693eSPhilip Paeps 
265e948693eSPhilip Paeps 	efx_rx_qpush(rxq->common, rxq->added);
266e948693eSPhilip Paeps }
267e948693eSPhilip Paeps 
268e948693eSPhilip Paeps void
269e948693eSPhilip Paeps sfxge_rx_qrefill(struct sfxge_rxq *rxq)
270e948693eSPhilip Paeps {
271e948693eSPhilip Paeps 
272e948693eSPhilip Paeps 	if (rxq->init_state != SFXGE_RXQ_STARTED)
273e948693eSPhilip Paeps 		return;
274e948693eSPhilip Paeps 
275e948693eSPhilip Paeps 	/* Make sure the queue is full */
276385b1d8eSGeorge V. Neville-Neil 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE);
277e948693eSPhilip Paeps }
278e948693eSPhilip Paeps 
279e948693eSPhilip Paeps static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
280e948693eSPhilip Paeps {
281e948693eSPhilip Paeps 	struct ifnet *ifp = sc->ifnet;
282e948693eSPhilip Paeps 
283e948693eSPhilip Paeps 	m->m_pkthdr.rcvif = ifp;
284e948693eSPhilip Paeps 	m->m_pkthdr.csum_data = 0xffff;
285e948693eSPhilip Paeps 	ifp->if_input(ifp, m);
286e948693eSPhilip Paeps }
287e948693eSPhilip Paeps 
288e948693eSPhilip Paeps static void
289e948693eSPhilip Paeps sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
290e948693eSPhilip Paeps {
291e948693eSPhilip Paeps 	struct mbuf *m = rx_desc->mbuf;
292e948693eSPhilip Paeps 	int csum_flags;
293e948693eSPhilip Paeps 
294e948693eSPhilip Paeps 	/* Convert checksum flags */
295e948693eSPhilip Paeps 	csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
296e948693eSPhilip Paeps 		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
297e948693eSPhilip Paeps 	if (rx_desc->flags & EFX_CKSUM_TCPUDP)
298e948693eSPhilip Paeps 		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
299e948693eSPhilip Paeps 
300e948693eSPhilip Paeps #ifdef SFXGE_HAVE_MQ
301e948693eSPhilip Paeps 	/* The hash covers a 4-tuple for TCP only */
302e948693eSPhilip Paeps 	if (rx_desc->flags & EFX_PKT_TCP) {
303e948693eSPhilip Paeps 		m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
304e948693eSPhilip Paeps 						       mtod(m, uint8_t *));
305c2529042SHans Petter Selasky 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
306e948693eSPhilip Paeps 	}
307e948693eSPhilip Paeps #endif
308e948693eSPhilip Paeps 	m->m_data += sc->rx_prefix_size;
309e948693eSPhilip Paeps 	m->m_len = rx_desc->size - sc->rx_prefix_size;
310e948693eSPhilip Paeps 	m->m_pkthdr.len = m->m_len;
311e948693eSPhilip Paeps 	m->m_pkthdr.csum_flags = csum_flags;
312e948693eSPhilip Paeps 	__sfxge_rx_deliver(sc, rx_desc->mbuf);
313e948693eSPhilip Paeps 
314e948693eSPhilip Paeps 	rx_desc->flags = EFX_DISCARD;
315e948693eSPhilip Paeps 	rx_desc->mbuf = NULL;
316e948693eSPhilip Paeps }
317e948693eSPhilip Paeps 
318e948693eSPhilip Paeps static void
319e948693eSPhilip Paeps sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
320e948693eSPhilip Paeps {
321e948693eSPhilip Paeps 	struct sfxge_softc *sc = st->sc;
322e948693eSPhilip Paeps 	struct mbuf *m = c->mbuf;
323e948693eSPhilip Paeps 	struct tcphdr *c_th;
324e948693eSPhilip Paeps 	int csum_flags;
325e948693eSPhilip Paeps 
326e948693eSPhilip Paeps 	KASSERT(m, ("no mbuf to deliver"));
327e948693eSPhilip Paeps 
328e948693eSPhilip Paeps 	++st->n_bursts;
329e948693eSPhilip Paeps 
330e948693eSPhilip Paeps 	/* Finish off packet munging and recalculate IP header checksum. */
331e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
332e948693eSPhilip Paeps 		struct ip *iph = c->nh;
333e948693eSPhilip Paeps 		iph->ip_len = htons(iph->ip_len);
334e948693eSPhilip Paeps 		iph->ip_sum = 0;
335e948693eSPhilip Paeps 		iph->ip_sum = in_cksum_hdr(iph);
336e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
337e948693eSPhilip Paeps 		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
338e948693eSPhilip Paeps 			      CSUM_IP_CHECKED | CSUM_IP_VALID);
339e948693eSPhilip Paeps 	} else {
340e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->nh;
341e948693eSPhilip Paeps 		iph->ip6_plen = htons(iph->ip6_plen);
342e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
343e948693eSPhilip Paeps 		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
344e948693eSPhilip Paeps 	}
345e948693eSPhilip Paeps 
346e948693eSPhilip Paeps 	c_th->th_win = c->th_last->th_win;
347e948693eSPhilip Paeps 	c_th->th_ack = c->th_last->th_ack;
348e948693eSPhilip Paeps 	if (c_th->th_off == c->th_last->th_off) {
349e948693eSPhilip Paeps 		/* Copy TCP options (take care to avoid going negative). */
350e948693eSPhilip Paeps 		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
351e948693eSPhilip Paeps 		memcpy(c_th + 1, c->th_last + 1, optlen);
352e948693eSPhilip Paeps 	}
353e948693eSPhilip Paeps 
354e948693eSPhilip Paeps #ifdef SFXGE_HAVE_MQ
355e948693eSPhilip Paeps 	m->m_pkthdr.flowid = c->conn_hash;
356c2529042SHans Petter Selasky 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
357e948693eSPhilip Paeps #endif
358e948693eSPhilip Paeps 	m->m_pkthdr.csum_flags = csum_flags;
359e948693eSPhilip Paeps 	__sfxge_rx_deliver(sc, m);
360e948693eSPhilip Paeps 
361e948693eSPhilip Paeps 	c->mbuf = NULL;
362e948693eSPhilip Paeps 	c->delivered = 1;
363e948693eSPhilip Paeps }
364e948693eSPhilip Paeps 
365e948693eSPhilip Paeps /* Drop the given connection, and add it to the free list. */
366e948693eSPhilip Paeps static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
367e948693eSPhilip Paeps {
368e948693eSPhilip Paeps 	unsigned bucket;
369e948693eSPhilip Paeps 
370e948693eSPhilip Paeps 	KASSERT(!c->mbuf, ("found orphaned mbuf"));
371e948693eSPhilip Paeps 
372b7b0edd1SGeorge V. Neville-Neil 	if (c->next_buf.mbuf != NULL) {
373e948693eSPhilip Paeps 		sfxge_rx_deliver(rxq->sc, &c->next_buf);
374e948693eSPhilip Paeps 		LIST_REMOVE(c, active_link);
375e948693eSPhilip Paeps 	}
376e948693eSPhilip Paeps 
377e948693eSPhilip Paeps 	bucket = c->conn_hash & rxq->lro.conns_mask;
378e948693eSPhilip Paeps 	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
379e948693eSPhilip Paeps 	--rxq->lro.conns_n[bucket];
380e948693eSPhilip Paeps 	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
381e948693eSPhilip Paeps 	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
382e948693eSPhilip Paeps }
383e948693eSPhilip Paeps 
384e948693eSPhilip Paeps /* Stop tracking connections that have gone idle in order to keep hash
385e948693eSPhilip Paeps  * chains short.
386e948693eSPhilip Paeps  */
387e948693eSPhilip Paeps static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
388e948693eSPhilip Paeps {
389e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
390e948693eSPhilip Paeps 	unsigned i;
391e948693eSPhilip Paeps 
392e948693eSPhilip Paeps 	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
393e948693eSPhilip Paeps 		("found active connections"));
394e948693eSPhilip Paeps 
395e948693eSPhilip Paeps 	rxq->lro.last_purge_ticks = now;
396e948693eSPhilip Paeps 	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
397e948693eSPhilip Paeps 		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
398e948693eSPhilip Paeps 			continue;
399e948693eSPhilip Paeps 
400e948693eSPhilip Paeps 		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
401e948693eSPhilip Paeps 		if (now - c->last_pkt_ticks > lro_idle_ticks) {
402e948693eSPhilip Paeps 			++rxq->lro.n_drop_idle;
403e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
404e948693eSPhilip Paeps 		}
405e948693eSPhilip Paeps 	}
406e948693eSPhilip Paeps }
407e948693eSPhilip Paeps 
408e948693eSPhilip Paeps static void
409e948693eSPhilip Paeps sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
410e948693eSPhilip Paeps 		struct mbuf *mbuf, struct tcphdr *th)
411e948693eSPhilip Paeps {
412e948693eSPhilip Paeps 	struct tcphdr *c_th;
413e948693eSPhilip Paeps 
414e948693eSPhilip Paeps 	/* Tack the new mbuf onto the chain. */
415e948693eSPhilip Paeps 	KASSERT(!mbuf->m_next, ("mbuf already chained"));
416e948693eSPhilip Paeps 	c->mbuf_tail->m_next = mbuf;
417e948693eSPhilip Paeps 	c->mbuf_tail = mbuf;
418e948693eSPhilip Paeps 
419e948693eSPhilip Paeps 	/* Increase length appropriately */
420e948693eSPhilip Paeps 	c->mbuf->m_pkthdr.len += mbuf->m_len;
421e948693eSPhilip Paeps 
422e948693eSPhilip Paeps 	/* Update the connection state flags */
423e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
424e948693eSPhilip Paeps 		struct ip *iph = c->nh;
425e948693eSPhilip Paeps 		iph->ip_len += mbuf->m_len;
426e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
427e948693eSPhilip Paeps 	} else {
428e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->nh;
429e948693eSPhilip Paeps 		iph->ip6_plen += mbuf->m_len;
430e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
431e948693eSPhilip Paeps 	}
432e948693eSPhilip Paeps 	c_th->th_flags |= (th->th_flags & TH_PUSH);
433e948693eSPhilip Paeps 	c->th_last = th;
434e948693eSPhilip Paeps 	++st->n_merges;
435e948693eSPhilip Paeps 
436e948693eSPhilip Paeps 	/* Pass packet up now if another segment could overflow the IP
437e948693eSPhilip Paeps 	 * length.
438e948693eSPhilip Paeps 	 */
439e948693eSPhilip Paeps 	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
440e948693eSPhilip Paeps 		sfxge_lro_deliver(st, c);
441e948693eSPhilip Paeps }
442e948693eSPhilip Paeps 
443e948693eSPhilip Paeps static void
444e948693eSPhilip Paeps sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
445e948693eSPhilip Paeps 		struct mbuf *mbuf, void *nh, struct tcphdr *th)
446e948693eSPhilip Paeps {
447e948693eSPhilip Paeps 	/* Start the chain */
448e948693eSPhilip Paeps 	c->mbuf = mbuf;
449e948693eSPhilip Paeps 	c->mbuf_tail = c->mbuf;
450e948693eSPhilip Paeps 	c->nh = nh;
451e948693eSPhilip Paeps 	c->th_last = th;
452e948693eSPhilip Paeps 
453e948693eSPhilip Paeps 	mbuf->m_pkthdr.len = mbuf->m_len;
454e948693eSPhilip Paeps 
455e948693eSPhilip Paeps 	/* Mangle header fields for later processing */
456e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
457e948693eSPhilip Paeps 		struct ip *iph = nh;
458e948693eSPhilip Paeps 		iph->ip_len = ntohs(iph->ip_len);
459e948693eSPhilip Paeps 	} else {
460e948693eSPhilip Paeps 		struct ip6_hdr *iph = nh;
461e948693eSPhilip Paeps 		iph->ip6_plen = ntohs(iph->ip6_plen);
462e948693eSPhilip Paeps 	}
463e948693eSPhilip Paeps }
464e948693eSPhilip Paeps 
465e948693eSPhilip Paeps /* Try to merge or otherwise hold or deliver (as appropriate) the
466e948693eSPhilip Paeps  * packet buffered for this connection (c->next_buf).  Return a flag
467e948693eSPhilip Paeps  * indicating whether the connection is still active for LRO purposes.
468e948693eSPhilip Paeps  */
469e948693eSPhilip Paeps static int
470e948693eSPhilip Paeps sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
471e948693eSPhilip Paeps {
472e948693eSPhilip Paeps 	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
473e948693eSPhilip Paeps 	char *eh = c->next_eh;
474e948693eSPhilip Paeps 	int data_length, hdr_length, dont_merge;
475e948693eSPhilip Paeps 	unsigned th_seq, pkt_length;
476e948693eSPhilip Paeps 	struct tcphdr *th;
477e948693eSPhilip Paeps 	unsigned now;
478e948693eSPhilip Paeps 
479e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
480e948693eSPhilip Paeps 		struct ip *iph = c->next_nh;
481e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
482e948693eSPhilip Paeps 		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
483e948693eSPhilip Paeps 	} else {
484e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->next_nh;
485e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
486e948693eSPhilip Paeps 		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
487e948693eSPhilip Paeps 	}
488e948693eSPhilip Paeps 
489e948693eSPhilip Paeps 	hdr_length = (char *) th + th->th_off * 4 - eh;
490e948693eSPhilip Paeps 	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
491e948693eSPhilip Paeps 		       hdr_length);
492e948693eSPhilip Paeps 	th_seq = ntohl(th->th_seq);
493e948693eSPhilip Paeps 	dont_merge = ((data_length <= 0)
494e948693eSPhilip Paeps 		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
495e948693eSPhilip Paeps 
496e948693eSPhilip Paeps 	/* Check for options other than aligned timestamp. */
497e948693eSPhilip Paeps 	if (th->th_off != 5) {
498e948693eSPhilip Paeps 		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
499e948693eSPhilip Paeps 		if (th->th_off == 8 &&
500e948693eSPhilip Paeps 		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
501e948693eSPhilip Paeps 					(TCPOPT_NOP << 16) |
502e948693eSPhilip Paeps 					(TCPOPT_TIMESTAMP << 8) |
503e948693eSPhilip Paeps 					TCPOLEN_TIMESTAMP)) {
504e948693eSPhilip Paeps 			/* timestamp option -- okay */
505e948693eSPhilip Paeps 		} else {
506e948693eSPhilip Paeps 			dont_merge = 1;
507e948693eSPhilip Paeps 		}
508e948693eSPhilip Paeps 	}
509e948693eSPhilip Paeps 
510e948693eSPhilip Paeps 	if (__predict_false(th_seq != c->next_seq)) {
511e948693eSPhilip Paeps 		/* Out-of-order, so start counting again. */
512b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
513e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
514e948693eSPhilip Paeps 		c->n_in_order_pkts -= lro_loss_packets;
515e948693eSPhilip Paeps 		c->next_seq = th_seq + data_length;
516e948693eSPhilip Paeps 		++rxq->lro.n_misorder;
517e948693eSPhilip Paeps 		goto deliver_buf_out;
518e948693eSPhilip Paeps 	}
519e948693eSPhilip Paeps 	c->next_seq = th_seq + data_length;
520e948693eSPhilip Paeps 
521e948693eSPhilip Paeps 	now = ticks;
522e948693eSPhilip Paeps 	if (now - c->last_pkt_ticks > lro_idle_ticks) {
523e948693eSPhilip Paeps 		++rxq->lro.n_drop_idle;
524b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
525e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
526e948693eSPhilip Paeps 		sfxge_lro_drop(rxq, c);
527b7b0edd1SGeorge V. Neville-Neil 		return (0);
528e948693eSPhilip Paeps 	}
529e948693eSPhilip Paeps 	c->last_pkt_ticks = ticks;
530e948693eSPhilip Paeps 
531e948693eSPhilip Paeps 	if (c->n_in_order_pkts < lro_slow_start_packets) {
532e948693eSPhilip Paeps 		/* May be in slow-start, so don't merge. */
533e948693eSPhilip Paeps 		++rxq->lro.n_slow_start;
534e948693eSPhilip Paeps 		++c->n_in_order_pkts;
535e948693eSPhilip Paeps 		goto deliver_buf_out;
536e948693eSPhilip Paeps 	}
537e948693eSPhilip Paeps 
538e948693eSPhilip Paeps 	if (__predict_false(dont_merge)) {
539b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
540e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
541e948693eSPhilip Paeps 		if (th->th_flags & (TH_FIN | TH_RST)) {
542e948693eSPhilip Paeps 			++rxq->lro.n_drop_closed;
543e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
544b7b0edd1SGeorge V. Neville-Neil 			return (0);
545e948693eSPhilip Paeps 		}
546e948693eSPhilip Paeps 		goto deliver_buf_out;
547e948693eSPhilip Paeps 	}
548e948693eSPhilip Paeps 
549e948693eSPhilip Paeps 	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
550e948693eSPhilip Paeps 
551e948693eSPhilip Paeps 	if (__predict_true(c->mbuf != NULL)) {
552e948693eSPhilip Paeps 		/* Remove headers and any padding */
553e948693eSPhilip Paeps 		rx_buf->mbuf->m_data += hdr_length;
554e948693eSPhilip Paeps 		rx_buf->mbuf->m_len = data_length;
555e948693eSPhilip Paeps 
556e948693eSPhilip Paeps 		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
557e948693eSPhilip Paeps 	} else {
558e948693eSPhilip Paeps 		/* Remove any padding */
559e948693eSPhilip Paeps 		rx_buf->mbuf->m_len = pkt_length;
560e948693eSPhilip Paeps 
561e948693eSPhilip Paeps 		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
562e948693eSPhilip Paeps 	}
563e948693eSPhilip Paeps 
564e948693eSPhilip Paeps 	rx_buf->mbuf = NULL;
565b7b0edd1SGeorge V. Neville-Neil 	return (1);
566e948693eSPhilip Paeps 
567e948693eSPhilip Paeps  deliver_buf_out:
568e948693eSPhilip Paeps 	sfxge_rx_deliver(rxq->sc, rx_buf);
569b7b0edd1SGeorge V. Neville-Neil 	return (1);
570e948693eSPhilip Paeps }
571e948693eSPhilip Paeps 
572e948693eSPhilip Paeps static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
573e948693eSPhilip Paeps 			       uint16_t l2_id, void *nh, struct tcphdr *th)
574e948693eSPhilip Paeps {
575e948693eSPhilip Paeps 	unsigned bucket = conn_hash & st->conns_mask;
576e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
577e948693eSPhilip Paeps 
578e948693eSPhilip Paeps 	if (st->conns_n[bucket] >= lro_chain_max) {
579e948693eSPhilip Paeps 		++st->n_too_many;
580e948693eSPhilip Paeps 		return;
581e948693eSPhilip Paeps 	}
582e948693eSPhilip Paeps 
583e948693eSPhilip Paeps 	if (!TAILQ_EMPTY(&st->free_conns)) {
584e948693eSPhilip Paeps 		c = TAILQ_FIRST(&st->free_conns);
585e948693eSPhilip Paeps 		TAILQ_REMOVE(&st->free_conns, c, link);
586e948693eSPhilip Paeps 	} else {
587e275c0d3SGleb Smirnoff 		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
588e948693eSPhilip Paeps 		if (c == NULL)
589e948693eSPhilip Paeps 			return;
590e948693eSPhilip Paeps 		c->mbuf = NULL;
591e948693eSPhilip Paeps 		c->next_buf.mbuf = NULL;
592e948693eSPhilip Paeps 	}
593e948693eSPhilip Paeps 
594e948693eSPhilip Paeps 	/* Create the connection tracking data */
595e948693eSPhilip Paeps 	++st->conns_n[bucket];
596e948693eSPhilip Paeps 	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
597e948693eSPhilip Paeps 	c->l2_id = l2_id;
598e948693eSPhilip Paeps 	c->conn_hash = conn_hash;
599e948693eSPhilip Paeps 	c->source = th->th_sport;
600e948693eSPhilip Paeps 	c->dest = th->th_dport;
601e948693eSPhilip Paeps 	c->n_in_order_pkts = 0;
602e948693eSPhilip Paeps 	c->last_pkt_ticks = *(volatile int *)&ticks;
603e948693eSPhilip Paeps 	c->delivered = 0;
604e948693eSPhilip Paeps 	++st->n_new_stream;
605e948693eSPhilip Paeps 	/* NB. We don't initialise c->next_seq, and it doesn't matter what
606e948693eSPhilip Paeps 	 * value it has.  Most likely the next packet received for this
607e948693eSPhilip Paeps 	 * connection will not match -- no harm done.
608e948693eSPhilip Paeps 	 */
609e948693eSPhilip Paeps }
610e948693eSPhilip Paeps 
611e948693eSPhilip Paeps /* Process mbuf and decide whether to dispatch it to the stack now or
612e948693eSPhilip Paeps  * later.
613e948693eSPhilip Paeps  */
614e948693eSPhilip Paeps static void
615e948693eSPhilip Paeps sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
616e948693eSPhilip Paeps {
617e948693eSPhilip Paeps 	struct sfxge_softc *sc = rxq->sc;
618e948693eSPhilip Paeps 	struct mbuf *m = rx_buf->mbuf;
619e948693eSPhilip Paeps 	struct ether_header *eh;
620e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
621e948693eSPhilip Paeps 	uint16_t l2_id;
622e948693eSPhilip Paeps 	uint16_t l3_proto;
623e948693eSPhilip Paeps 	void *nh;
624e948693eSPhilip Paeps 	struct tcphdr *th;
625e948693eSPhilip Paeps 	uint32_t conn_hash;
626e948693eSPhilip Paeps 	unsigned bucket;
627e948693eSPhilip Paeps 
628e948693eSPhilip Paeps 	/* Get the hardware hash */
629e948693eSPhilip Paeps 	conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
630e948693eSPhilip Paeps 				      mtod(m, uint8_t *));
631e948693eSPhilip Paeps 
632e948693eSPhilip Paeps 	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
633e948693eSPhilip Paeps 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
634e948693eSPhilip Paeps 		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
635e948693eSPhilip Paeps 		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
636e948693eSPhilip Paeps 			SFXGE_LRO_L2_ID_VLAN;
637e948693eSPhilip Paeps 		l3_proto = veh->evl_proto;
638e948693eSPhilip Paeps 		nh = veh + 1;
639e948693eSPhilip Paeps 	} else {
640e948693eSPhilip Paeps 		l2_id = 0;
641e948693eSPhilip Paeps 		l3_proto = eh->ether_type;
642e948693eSPhilip Paeps 		nh = eh + 1;
643e948693eSPhilip Paeps 	}
644e948693eSPhilip Paeps 
645e948693eSPhilip Paeps 	/* Check whether this is a suitable packet (unfragmented
646e948693eSPhilip Paeps 	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
647e948693eSPhilip Paeps 	 * length, and compute a hash if necessary.  If not, return.
648e948693eSPhilip Paeps 	 */
649e948693eSPhilip Paeps 	if (l3_proto == htons(ETHERTYPE_IP)) {
650e948693eSPhilip Paeps 		struct ip *iph = nh;
651e948693eSPhilip Paeps 		if ((iph->ip_p - IPPROTO_TCP) |
652e948693eSPhilip Paeps 		    (iph->ip_hl - (sizeof(*iph) >> 2u)) |
653e948693eSPhilip Paeps 		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
654e948693eSPhilip Paeps 			goto deliver_now;
655e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
656e948693eSPhilip Paeps 	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
657e948693eSPhilip Paeps 		struct ip6_hdr *iph = nh;
658e948693eSPhilip Paeps 		if (iph->ip6_nxt != IPPROTO_TCP)
659e948693eSPhilip Paeps 			goto deliver_now;
660e948693eSPhilip Paeps 		l2_id |= SFXGE_LRO_L2_ID_IPV6;
661e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
662e948693eSPhilip Paeps 	} else {
663e948693eSPhilip Paeps 		goto deliver_now;
664e948693eSPhilip Paeps 	}
665e948693eSPhilip Paeps 
666e948693eSPhilip Paeps 	bucket = conn_hash & rxq->lro.conns_mask;
667e948693eSPhilip Paeps 
668e948693eSPhilip Paeps 	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
669e948693eSPhilip Paeps 		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
670e948693eSPhilip Paeps 			continue;
671e948693eSPhilip Paeps 		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
672e948693eSPhilip Paeps 			continue;
673b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL) {
674e948693eSPhilip Paeps 			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
675e948693eSPhilip Paeps 				struct ip *c_iph, *iph = nh;
676e948693eSPhilip Paeps 				c_iph = c->nh;
677e948693eSPhilip Paeps 				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
678e948693eSPhilip Paeps 				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
679e948693eSPhilip Paeps 					continue;
680e948693eSPhilip Paeps 			} else {
681e948693eSPhilip Paeps 				struct ip6_hdr *c_iph, *iph = nh;
682e948693eSPhilip Paeps 				c_iph = c->nh;
683e948693eSPhilip Paeps 				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
684e948693eSPhilip Paeps 				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
685e948693eSPhilip Paeps 					continue;
686e948693eSPhilip Paeps 			}
687e948693eSPhilip Paeps 		}
688e948693eSPhilip Paeps 
689e948693eSPhilip Paeps 		/* Re-insert at head of list to reduce lookup time. */
690e948693eSPhilip Paeps 		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
691e948693eSPhilip Paeps 		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
692e948693eSPhilip Paeps 
693b7b0edd1SGeorge V. Neville-Neil 		if (c->next_buf.mbuf != NULL) {
694e948693eSPhilip Paeps 			if (!sfxge_lro_try_merge(rxq, c))
695e948693eSPhilip Paeps 				goto deliver_now;
696e948693eSPhilip Paeps 		} else {
697e948693eSPhilip Paeps 			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
698e948693eSPhilip Paeps 			    active_link);
699e948693eSPhilip Paeps 		}
700e948693eSPhilip Paeps 		c->next_buf = *rx_buf;
701e948693eSPhilip Paeps 		c->next_eh = eh;
702e948693eSPhilip Paeps 		c->next_nh = nh;
703e948693eSPhilip Paeps 
704e948693eSPhilip Paeps 		rx_buf->mbuf = NULL;
705e948693eSPhilip Paeps 		rx_buf->flags = EFX_DISCARD;
706e948693eSPhilip Paeps 		return;
707e948693eSPhilip Paeps 	}
708e948693eSPhilip Paeps 
709e948693eSPhilip Paeps 	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
710e948693eSPhilip Paeps  deliver_now:
711e948693eSPhilip Paeps 	sfxge_rx_deliver(sc, rx_buf);
712e948693eSPhilip Paeps }
713e948693eSPhilip Paeps 
714e948693eSPhilip Paeps static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
715e948693eSPhilip Paeps {
716e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
717e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
718e948693eSPhilip Paeps 	unsigned t;
719e948693eSPhilip Paeps 
720e948693eSPhilip Paeps 	while (!LIST_EMPTY(&st->active_conns)) {
721e948693eSPhilip Paeps 		c = LIST_FIRST(&st->active_conns);
722b7b0edd1SGeorge V. Neville-Neil 		if (!c->delivered && c->mbuf != NULL)
723e948693eSPhilip Paeps 			sfxge_lro_deliver(st, c);
724e948693eSPhilip Paeps 		if (sfxge_lro_try_merge(rxq, c)) {
725b7b0edd1SGeorge V. Neville-Neil 			if (c->mbuf != NULL)
726e948693eSPhilip Paeps 				sfxge_lro_deliver(st, c);
727e948693eSPhilip Paeps 			LIST_REMOVE(c, active_link);
728e948693eSPhilip Paeps 		}
729e948693eSPhilip Paeps 		c->delivered = 0;
730e948693eSPhilip Paeps 	}
731e948693eSPhilip Paeps 
732e948693eSPhilip Paeps 	t = *(volatile int *)&ticks;
733e948693eSPhilip Paeps 	if (__predict_false(t != st->last_purge_ticks))
734e948693eSPhilip Paeps 		sfxge_lro_purge_idle(rxq, t);
735e948693eSPhilip Paeps }
736e948693eSPhilip Paeps 
737e948693eSPhilip Paeps void
738e948693eSPhilip Paeps sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
739e948693eSPhilip Paeps {
740e948693eSPhilip Paeps 	struct sfxge_softc *sc = rxq->sc;
741e948693eSPhilip Paeps 	int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
742e948693eSPhilip Paeps 	unsigned int index;
743e948693eSPhilip Paeps 	struct sfxge_evq *evq;
744e948693eSPhilip Paeps 	unsigned int completed;
745e948693eSPhilip Paeps 	unsigned int level;
746e948693eSPhilip Paeps 	struct mbuf *m;
747e948693eSPhilip Paeps 	struct sfxge_rx_sw_desc *prev = NULL;
748e948693eSPhilip Paeps 
749e948693eSPhilip Paeps 	index = rxq->index;
750e948693eSPhilip Paeps 	evq = sc->evq[index];
751e948693eSPhilip Paeps 
752763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
753e948693eSPhilip Paeps 
754e948693eSPhilip Paeps 	completed = rxq->completed;
755e948693eSPhilip Paeps 	while (completed != rxq->pending) {
756e948693eSPhilip Paeps 		unsigned int id;
757e948693eSPhilip Paeps 		struct sfxge_rx_sw_desc *rx_desc;
758e948693eSPhilip Paeps 
759385b1d8eSGeorge V. Neville-Neil 		id = completed++ & rxq->ptr_mask;
760e948693eSPhilip Paeps 		rx_desc = &rxq->queue[id];
761e948693eSPhilip Paeps 		m = rx_desc->mbuf;
762e948693eSPhilip Paeps 
763e948693eSPhilip Paeps 		if (rxq->init_state != SFXGE_RXQ_STARTED)
764e948693eSPhilip Paeps 			goto discard;
765e948693eSPhilip Paeps 
766e948693eSPhilip Paeps 		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
767e948693eSPhilip Paeps 			goto discard;
768e948693eSPhilip Paeps 
769e948693eSPhilip Paeps 		prefetch_read_many(mtod(m, caddr_t));
770e948693eSPhilip Paeps 
771e948693eSPhilip Paeps 		/* Check for loopback packets */
772e948693eSPhilip Paeps 		if (!(rx_desc->flags & EFX_PKT_IPV4) &&
773e948693eSPhilip Paeps 		    !(rx_desc->flags & EFX_PKT_IPV6)) {
774e948693eSPhilip Paeps 			struct ether_header *etherhp;
775e948693eSPhilip Paeps 
776e948693eSPhilip Paeps 			/*LINTED*/
777e948693eSPhilip Paeps 			etherhp = mtod(m, struct ether_header *);
778e948693eSPhilip Paeps 
779e948693eSPhilip Paeps 			if (etherhp->ether_type ==
780e948693eSPhilip Paeps 			    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
781e948693eSPhilip Paeps 				EFSYS_PROBE(loopback);
782e948693eSPhilip Paeps 
783e948693eSPhilip Paeps 				rxq->loopback++;
784e948693eSPhilip Paeps 				goto discard;
785e948693eSPhilip Paeps 			}
786e948693eSPhilip Paeps 		}
787e948693eSPhilip Paeps 
788e948693eSPhilip Paeps 		/* Pass packet up the stack or into LRO (pipelined) */
789e948693eSPhilip Paeps 		if (prev != NULL) {
790e948693eSPhilip Paeps 			if (lro_enabled)
791e948693eSPhilip Paeps 				sfxge_lro(rxq, prev);
792e948693eSPhilip Paeps 			else
793e948693eSPhilip Paeps 				sfxge_rx_deliver(sc, prev);
794e948693eSPhilip Paeps 		}
795e948693eSPhilip Paeps 		prev = rx_desc;
796e948693eSPhilip Paeps 		continue;
797e948693eSPhilip Paeps 
798e948693eSPhilip Paeps discard:
799e948693eSPhilip Paeps 		/* Return the packet to the pool */
800e948693eSPhilip Paeps 		m_free(m);
801e948693eSPhilip Paeps 		rx_desc->mbuf = NULL;
802e948693eSPhilip Paeps 	}
803e948693eSPhilip Paeps 	rxq->completed = completed;
804e948693eSPhilip Paeps 
805e948693eSPhilip Paeps 	level = rxq->added - rxq->completed;
806e948693eSPhilip Paeps 
807e948693eSPhilip Paeps 	/* Pass last packet up the stack or into LRO */
808e948693eSPhilip Paeps 	if (prev != NULL) {
809e948693eSPhilip Paeps 		if (lro_enabled)
810e948693eSPhilip Paeps 			sfxge_lro(rxq, prev);
811e948693eSPhilip Paeps 		else
812e948693eSPhilip Paeps 			sfxge_rx_deliver(sc, prev);
813e948693eSPhilip Paeps 	}
814e948693eSPhilip Paeps 
815e948693eSPhilip Paeps 	/*
816e948693eSPhilip Paeps 	 * If there are any pending flows and this is the end of the
817e948693eSPhilip Paeps 	 * poll then they must be completed.
818e948693eSPhilip Paeps 	 */
819e948693eSPhilip Paeps 	if (eop)
820e948693eSPhilip Paeps 		sfxge_lro_end_of_burst(rxq);
821e948693eSPhilip Paeps 
822e948693eSPhilip Paeps 	/* Top up the queue if necessary */
823385b1d8eSGeorge V. Neville-Neil 	if (level < rxq->refill_threshold)
824385b1d8eSGeorge V. Neville-Neil 		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE);
825e948693eSPhilip Paeps }
826e948693eSPhilip Paeps 
827e948693eSPhilip Paeps static void
828e948693eSPhilip Paeps sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
829e948693eSPhilip Paeps {
830e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
831e948693eSPhilip Paeps 	struct sfxge_evq *evq;
832e948693eSPhilip Paeps 	unsigned int count;
833e948693eSPhilip Paeps 
834e948693eSPhilip Paeps 	rxq = sc->rxq[index];
835e948693eSPhilip Paeps 	evq = sc->evq[index];
836e948693eSPhilip Paeps 
837763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
838e948693eSPhilip Paeps 
839e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
840e948693eSPhilip Paeps 	    ("rxq not started"));
841e948693eSPhilip Paeps 
842e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
843e948693eSPhilip Paeps 
844e948693eSPhilip Paeps 	callout_stop(&rxq->refill_callout);
845e948693eSPhilip Paeps 
846e948693eSPhilip Paeps again:
847e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_PENDING;
848e948693eSPhilip Paeps 
849e948693eSPhilip Paeps 	/* Flush the receive queue */
850e948693eSPhilip Paeps 	efx_rx_qflush(rxq->common);
851e948693eSPhilip Paeps 
852763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
853e948693eSPhilip Paeps 
854e948693eSPhilip Paeps 	count = 0;
855e948693eSPhilip Paeps 	do {
856e948693eSPhilip Paeps 		/* Spin for 100 ms */
857e948693eSPhilip Paeps 		DELAY(100000);
858e948693eSPhilip Paeps 
859e948693eSPhilip Paeps 		if (rxq->flush_state != SFXGE_FLUSH_PENDING)
860e948693eSPhilip Paeps 			break;
861e948693eSPhilip Paeps 
862e948693eSPhilip Paeps 	} while (++count < 20);
863e948693eSPhilip Paeps 
864763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
865e948693eSPhilip Paeps 
866e948693eSPhilip Paeps 	if (rxq->flush_state == SFXGE_FLUSH_FAILED)
867e948693eSPhilip Paeps 		goto again;
868e948693eSPhilip Paeps 
869e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_DONE;
870e948693eSPhilip Paeps 
871e948693eSPhilip Paeps 	rxq->pending = rxq->added;
872e948693eSPhilip Paeps 	sfxge_rx_qcomplete(rxq, B_TRUE);
873e948693eSPhilip Paeps 
874e948693eSPhilip Paeps 	KASSERT(rxq->completed == rxq->pending,
875e948693eSPhilip Paeps 	    ("rxq->completed != rxq->pending"));
876e948693eSPhilip Paeps 
877e948693eSPhilip Paeps 	rxq->added = 0;
878e948693eSPhilip Paeps 	rxq->pending = 0;
879e948693eSPhilip Paeps 	rxq->completed = 0;
880e948693eSPhilip Paeps 	rxq->loopback = 0;
881e948693eSPhilip Paeps 
882e948693eSPhilip Paeps 	/* Destroy the common code receive queue. */
883e948693eSPhilip Paeps 	efx_rx_qdestroy(rxq->common);
884e948693eSPhilip Paeps 
885e948693eSPhilip Paeps 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
886385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries));
887e948693eSPhilip Paeps 
888763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
889e948693eSPhilip Paeps }
890e948693eSPhilip Paeps 
891e948693eSPhilip Paeps static int
892e948693eSPhilip Paeps sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
893e948693eSPhilip Paeps {
894e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
895e948693eSPhilip Paeps 	efsys_mem_t *esmp;
896e948693eSPhilip Paeps 	struct sfxge_evq *evq;
897e948693eSPhilip Paeps 	int rc;
898e948693eSPhilip Paeps 
899e948693eSPhilip Paeps 	rxq = sc->rxq[index];
900e948693eSPhilip Paeps 	esmp = &rxq->mem;
901e948693eSPhilip Paeps 	evq = sc->evq[index];
902e948693eSPhilip Paeps 
903e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
904e948693eSPhilip Paeps 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
905e948693eSPhilip Paeps 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
906e948693eSPhilip Paeps 	    ("evq->init_state != SFXGE_EVQ_STARTED"));
907e948693eSPhilip Paeps 
908e948693eSPhilip Paeps 	/* Program the buffer table. */
909e948693eSPhilip Paeps 	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
910385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries))) != 0)
911385b1d8eSGeorge V. Neville-Neil 		return (rc);
912e948693eSPhilip Paeps 
913e948693eSPhilip Paeps 	/* Create the common code receive queue. */
914e948693eSPhilip Paeps 	if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
915385b1d8eSGeorge V. Neville-Neil 	    esmp, sc->rxq_entries, rxq->buf_base_id, evq->common,
916e948693eSPhilip Paeps 	    &rxq->common)) != 0)
917e948693eSPhilip Paeps 		goto fail;
918e948693eSPhilip Paeps 
919763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
920e948693eSPhilip Paeps 
921e948693eSPhilip Paeps 	/* Enable the receive queue. */
922e948693eSPhilip Paeps 	efx_rx_qenable(rxq->common);
923e948693eSPhilip Paeps 
924e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_STARTED;
925e948693eSPhilip Paeps 
926e948693eSPhilip Paeps 	/* Try to fill the queue from the pool. */
927385b1d8eSGeorge V. Neville-Neil 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE);
928e948693eSPhilip Paeps 
929763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
930e948693eSPhilip Paeps 
931e948693eSPhilip Paeps 	return (0);
932e948693eSPhilip Paeps 
933e948693eSPhilip Paeps fail:
934e948693eSPhilip Paeps 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
935385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries));
936385b1d8eSGeorge V. Neville-Neil 	return (rc);
937e948693eSPhilip Paeps }
938e948693eSPhilip Paeps 
939e948693eSPhilip Paeps void
940e948693eSPhilip Paeps sfxge_rx_stop(struct sfxge_softc *sc)
941e948693eSPhilip Paeps {
942e948693eSPhilip Paeps 	int index;
943e948693eSPhilip Paeps 
944e948693eSPhilip Paeps 	/* Stop the receive queue(s) */
945*133366a6SAndrew Rybchenko 	index = sc->rxq_count;
946e948693eSPhilip Paeps 	while (--index >= 0)
947e948693eSPhilip Paeps 		sfxge_rx_qstop(sc, index);
948e948693eSPhilip Paeps 
949e948693eSPhilip Paeps 	sc->rx_prefix_size = 0;
950e948693eSPhilip Paeps 	sc->rx_buffer_size = 0;
951e948693eSPhilip Paeps 
952e948693eSPhilip Paeps 	efx_rx_fini(sc->enp);
953e948693eSPhilip Paeps }
954e948693eSPhilip Paeps 
955e948693eSPhilip Paeps int
956e948693eSPhilip Paeps sfxge_rx_start(struct sfxge_softc *sc)
957e948693eSPhilip Paeps {
958e948693eSPhilip Paeps 	struct sfxge_intr *intr;
959e948693eSPhilip Paeps 	int index;
960e948693eSPhilip Paeps 	int rc;
961e948693eSPhilip Paeps 
962e948693eSPhilip Paeps 	intr = &sc->intr;
963e948693eSPhilip Paeps 
964e948693eSPhilip Paeps 	/* Initialize the common code receive module. */
965e948693eSPhilip Paeps 	if ((rc = efx_rx_init(sc->enp)) != 0)
966e948693eSPhilip Paeps 		return (rc);
967e948693eSPhilip Paeps 
968e948693eSPhilip Paeps 	/* Calculate the receive packet buffer size. */
969e948693eSPhilip Paeps 	sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
970e948693eSPhilip Paeps 	sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
971e948693eSPhilip Paeps 			      sc->rx_prefix_size);
972e948693eSPhilip Paeps 
973e948693eSPhilip Paeps 	/* Select zone for packet buffers */
974e948693eSPhilip Paeps 	if (sc->rx_buffer_size <= MCLBYTES)
975e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_clust;
976e948693eSPhilip Paeps 	else if (sc->rx_buffer_size <= MJUMPAGESIZE)
977e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbop;
978e948693eSPhilip Paeps 	else if (sc->rx_buffer_size <= MJUM9BYTES)
979e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbo9;
980e948693eSPhilip Paeps 	else
981e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbo16;
982e948693eSPhilip Paeps 
983e948693eSPhilip Paeps 	/*
984e948693eSPhilip Paeps 	 * Set up the scale table.  Enable all hash types and hash insertion.
985e948693eSPhilip Paeps 	 */
986e948693eSPhilip Paeps 	for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
987*133366a6SAndrew Rybchenko 		sc->rx_indir_table[index] = index % sc->rxq_count;
988e948693eSPhilip Paeps 	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
989e948693eSPhilip Paeps 				       SFXGE_RX_SCALE_MAX)) != 0)
990e948693eSPhilip Paeps 		goto fail;
991e948693eSPhilip Paeps 	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
992e948693eSPhilip Paeps 	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
993e948693eSPhilip Paeps 	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
994e948693eSPhilip Paeps 
995e948693eSPhilip Paeps 	if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
996e948693eSPhilip Paeps 	    sizeof(toep_key))) != 0)
997e948693eSPhilip Paeps 		goto fail;
998e948693eSPhilip Paeps 
999e948693eSPhilip Paeps 	/* Start the receive queue(s). */
1000*133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++) {
1001e948693eSPhilip Paeps 		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1002e948693eSPhilip Paeps 			goto fail2;
1003e948693eSPhilip Paeps 	}
1004e948693eSPhilip Paeps 
1005e948693eSPhilip Paeps 	return (0);
1006e948693eSPhilip Paeps 
1007e948693eSPhilip Paeps fail2:
1008e948693eSPhilip Paeps 	while (--index >= 0)
1009e948693eSPhilip Paeps 		sfxge_rx_qstop(sc, index);
1010e948693eSPhilip Paeps 
1011e948693eSPhilip Paeps fail:
1012e948693eSPhilip Paeps 	efx_rx_fini(sc->enp);
1013e948693eSPhilip Paeps 
1014e948693eSPhilip Paeps 	return (rc);
1015e948693eSPhilip Paeps }
1016e948693eSPhilip Paeps 
1017e948693eSPhilip Paeps static void sfxge_lro_init(struct sfxge_rxq *rxq)
1018e948693eSPhilip Paeps {
1019e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
1020e948693eSPhilip Paeps 	unsigned i;
1021e948693eSPhilip Paeps 
1022e948693eSPhilip Paeps 	st->conns_mask = lro_table_size - 1;
1023e948693eSPhilip Paeps 	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1024e948693eSPhilip Paeps 		("lro_table_size must be a power of 2"));
1025e948693eSPhilip Paeps 	st->sc = rxq->sc;
1026e948693eSPhilip Paeps 	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1027e948693eSPhilip Paeps 			   M_SFXGE, M_WAITOK);
1028e948693eSPhilip Paeps 	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1029e948693eSPhilip Paeps 			     M_SFXGE, M_WAITOK);
1030e948693eSPhilip Paeps 	for (i = 0; i <= st->conns_mask; ++i) {
1031e948693eSPhilip Paeps 		TAILQ_INIT(&st->conns[i]);
1032e948693eSPhilip Paeps 		st->conns_n[i] = 0;
1033e948693eSPhilip Paeps 	}
1034e948693eSPhilip Paeps 	LIST_INIT(&st->active_conns);
1035e948693eSPhilip Paeps 	TAILQ_INIT(&st->free_conns);
1036e948693eSPhilip Paeps }
1037e948693eSPhilip Paeps 
1038e948693eSPhilip Paeps static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1039e948693eSPhilip Paeps {
1040e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
1041e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
1042e948693eSPhilip Paeps 	unsigned i;
1043e948693eSPhilip Paeps 
1044e948693eSPhilip Paeps 	/* Return cleanly if sfxge_lro_init() has not been called. */
1045e948693eSPhilip Paeps 	if (st->conns == NULL)
1046e948693eSPhilip Paeps 		return;
1047e948693eSPhilip Paeps 
1048e948693eSPhilip Paeps 	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1049e948693eSPhilip Paeps 
1050e948693eSPhilip Paeps 	for (i = 0; i <= st->conns_mask; ++i) {
1051e948693eSPhilip Paeps 		while (!TAILQ_EMPTY(&st->conns[i])) {
1052e948693eSPhilip Paeps 			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1053e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
1054e948693eSPhilip Paeps 		}
1055e948693eSPhilip Paeps 	}
1056e948693eSPhilip Paeps 
1057e948693eSPhilip Paeps 	while (!TAILQ_EMPTY(&st->free_conns)) {
1058e948693eSPhilip Paeps 		c = TAILQ_FIRST(&st->free_conns);
1059e948693eSPhilip Paeps 		TAILQ_REMOVE(&st->free_conns, c, link);
1060e948693eSPhilip Paeps 		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1061e948693eSPhilip Paeps 		free(c, M_SFXGE);
1062e948693eSPhilip Paeps 	}
1063e948693eSPhilip Paeps 
1064e948693eSPhilip Paeps 	free(st->conns_n, M_SFXGE);
1065e948693eSPhilip Paeps 	free(st->conns, M_SFXGE);
1066e948693eSPhilip Paeps 	st->conns = NULL;
1067e948693eSPhilip Paeps }
1068e948693eSPhilip Paeps 
1069e948693eSPhilip Paeps static void
1070e948693eSPhilip Paeps sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1071e948693eSPhilip Paeps {
1072e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
1073e948693eSPhilip Paeps 
1074e948693eSPhilip Paeps 	rxq = sc->rxq[index];
1075e948693eSPhilip Paeps 
1076e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1077e948693eSPhilip Paeps 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1078e948693eSPhilip Paeps 
1079e948693eSPhilip Paeps 	/* Free the context array and the flow table. */
1080e948693eSPhilip Paeps 	free(rxq->queue, M_SFXGE);
1081e948693eSPhilip Paeps 	sfxge_lro_fini(rxq);
1082e948693eSPhilip Paeps 
1083e948693eSPhilip Paeps 	/* Release DMA memory. */
1084e948693eSPhilip Paeps 	sfxge_dma_free(&rxq->mem);
1085e948693eSPhilip Paeps 
1086e948693eSPhilip Paeps 	sc->rxq[index] = NULL;
1087e948693eSPhilip Paeps 
1088e948693eSPhilip Paeps 	free(rxq, M_SFXGE);
1089e948693eSPhilip Paeps }
1090e948693eSPhilip Paeps 
1091e948693eSPhilip Paeps static int
1092e948693eSPhilip Paeps sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1093e948693eSPhilip Paeps {
1094e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
1095e948693eSPhilip Paeps 	struct sfxge_evq *evq;
1096e948693eSPhilip Paeps 	efsys_mem_t *esmp;
1097e948693eSPhilip Paeps 	int rc;
1098e948693eSPhilip Paeps 
1099*133366a6SAndrew Rybchenko 	KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count));
1100e948693eSPhilip Paeps 
1101e948693eSPhilip Paeps 	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1102e948693eSPhilip Paeps 	rxq->sc = sc;
1103e948693eSPhilip Paeps 	rxq->index = index;
1104385b1d8eSGeorge V. Neville-Neil 	rxq->entries = sc->rxq_entries;
1105385b1d8eSGeorge V. Neville-Neil 	rxq->ptr_mask = rxq->entries - 1;
1106385b1d8eSGeorge V. Neville-Neil 	rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries);
1107e948693eSPhilip Paeps 
1108e948693eSPhilip Paeps 	sc->rxq[index] = rxq;
1109e948693eSPhilip Paeps 	esmp = &rxq->mem;
1110e948693eSPhilip Paeps 
1111e948693eSPhilip Paeps 	evq = sc->evq[index];
1112e948693eSPhilip Paeps 
1113e948693eSPhilip Paeps 	/* Allocate and zero DMA space. */
1114385b1d8eSGeorge V. Neville-Neil 	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0)
1115e948693eSPhilip Paeps 		return (rc);
1116385b1d8eSGeorge V. Neville-Neil 	(void)memset(esmp->esm_base, 0, EFX_RXQ_SIZE(sc->rxq_entries));
1117e948693eSPhilip Paeps 
1118e948693eSPhilip Paeps 	/* Allocate buffer table entries. */
1119385b1d8eSGeorge V. Neville-Neil 	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries),
1120e948693eSPhilip Paeps 				 &rxq->buf_base_id);
1121e948693eSPhilip Paeps 
1122e948693eSPhilip Paeps 	/* Allocate the context array and the flow table. */
1123385b1d8eSGeorge V. Neville-Neil 	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries,
1124e948693eSPhilip Paeps 	    M_SFXGE, M_WAITOK | M_ZERO);
1125e948693eSPhilip Paeps 	sfxge_lro_init(rxq);
1126e948693eSPhilip Paeps 
1127e948693eSPhilip Paeps 	callout_init(&rxq->refill_callout, B_TRUE);
1128e948693eSPhilip Paeps 
1129e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1130e948693eSPhilip Paeps 
1131e948693eSPhilip Paeps 	return (0);
1132e948693eSPhilip Paeps }
1133e948693eSPhilip Paeps 
1134e948693eSPhilip Paeps static const struct {
1135e948693eSPhilip Paeps 	const char *name;
1136e948693eSPhilip Paeps 	size_t offset;
1137e948693eSPhilip Paeps } sfxge_rx_stats[] = {
1138e948693eSPhilip Paeps #define	SFXGE_RX_STAT(name, member) \
1139e948693eSPhilip Paeps 	{ #name, offsetof(struct sfxge_rxq, member) }
1140e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1141e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1142e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1143e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1144e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1145e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1146e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1147e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
1148e948693eSPhilip Paeps };
1149e948693eSPhilip Paeps 
1150e948693eSPhilip Paeps static int
1151e948693eSPhilip Paeps sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1152e948693eSPhilip Paeps {
1153e948693eSPhilip Paeps 	struct sfxge_softc *sc = arg1;
1154e948693eSPhilip Paeps 	unsigned int id = arg2;
1155e948693eSPhilip Paeps 	unsigned int sum, index;
1156e948693eSPhilip Paeps 
1157e948693eSPhilip Paeps 	/* Sum across all RX queues */
1158e948693eSPhilip Paeps 	sum = 0;
1159*133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++)
1160e948693eSPhilip Paeps 		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1161e948693eSPhilip Paeps 					 sfxge_rx_stats[id].offset);
1162e948693eSPhilip Paeps 
1163b7b0edd1SGeorge V. Neville-Neil 	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1164e948693eSPhilip Paeps }
1165e948693eSPhilip Paeps 
1166e948693eSPhilip Paeps static void
1167e948693eSPhilip Paeps sfxge_rx_stat_init(struct sfxge_softc *sc)
1168e948693eSPhilip Paeps {
1169e948693eSPhilip Paeps 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1170e948693eSPhilip Paeps 	struct sysctl_oid_list *stat_list;
1171e948693eSPhilip Paeps 	unsigned int id;
1172e948693eSPhilip Paeps 
1173e948693eSPhilip Paeps 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1174e948693eSPhilip Paeps 
1175e948693eSPhilip Paeps 	for (id = 0;
1176e948693eSPhilip Paeps 	     id < sizeof(sfxge_rx_stats) / sizeof(sfxge_rx_stats[0]);
1177e948693eSPhilip Paeps 	     id++) {
1178e948693eSPhilip Paeps 		SYSCTL_ADD_PROC(
1179e948693eSPhilip Paeps 			ctx, stat_list,
1180e948693eSPhilip Paeps 			OID_AUTO, sfxge_rx_stats[id].name,
1181e948693eSPhilip Paeps 			CTLTYPE_UINT|CTLFLAG_RD,
1182e948693eSPhilip Paeps 			sc, id, sfxge_rx_stat_handler, "IU",
1183e948693eSPhilip Paeps 			"");
1184e948693eSPhilip Paeps 	}
1185e948693eSPhilip Paeps }
1186e948693eSPhilip Paeps 
1187e948693eSPhilip Paeps void
1188e948693eSPhilip Paeps sfxge_rx_fini(struct sfxge_softc *sc)
1189e948693eSPhilip Paeps {
1190e948693eSPhilip Paeps 	int index;
1191e948693eSPhilip Paeps 
1192*133366a6SAndrew Rybchenko 	index = sc->rxq_count;
1193e948693eSPhilip Paeps 	while (--index >= 0)
1194e948693eSPhilip Paeps 		sfxge_rx_qfini(sc, index);
1195*133366a6SAndrew Rybchenko 
1196*133366a6SAndrew Rybchenko 	sc->rxq_count = 0;
1197e948693eSPhilip Paeps }
1198e948693eSPhilip Paeps 
1199e948693eSPhilip Paeps int
1200e948693eSPhilip Paeps sfxge_rx_init(struct sfxge_softc *sc)
1201e948693eSPhilip Paeps {
1202e948693eSPhilip Paeps 	struct sfxge_intr *intr;
1203e948693eSPhilip Paeps 	int index;
1204e948693eSPhilip Paeps 	int rc;
1205e948693eSPhilip Paeps 
1206e948693eSPhilip Paeps 	if (lro_idle_ticks == 0)
1207e948693eSPhilip Paeps 		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
1208e948693eSPhilip Paeps 
1209e948693eSPhilip Paeps 	intr = &sc->intr;
1210e948693eSPhilip Paeps 
1211*133366a6SAndrew Rybchenko 	sc->rxq_count = intr->n_alloc;
1212*133366a6SAndrew Rybchenko 
1213e948693eSPhilip Paeps 	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1214e948693eSPhilip Paeps 	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1215e948693eSPhilip Paeps 
1216e948693eSPhilip Paeps 	/* Initialize the receive queue(s) - one per interrupt. */
1217*133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++) {
1218e948693eSPhilip Paeps 		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1219e948693eSPhilip Paeps 			goto fail;
1220e948693eSPhilip Paeps 	}
1221e948693eSPhilip Paeps 
1222e948693eSPhilip Paeps 	sfxge_rx_stat_init(sc);
1223e948693eSPhilip Paeps 
1224e948693eSPhilip Paeps 	return (0);
1225e948693eSPhilip Paeps 
1226e948693eSPhilip Paeps fail:
1227e948693eSPhilip Paeps 	/* Tear down the receive queue(s). */
1228e948693eSPhilip Paeps 	while (--index >= 0)
1229e948693eSPhilip Paeps 		sfxge_rx_qfini(sc, index);
1230e948693eSPhilip Paeps 
1231*133366a6SAndrew Rybchenko 	sc->rxq_count = 0;
1232e948693eSPhilip Paeps 	return (rc);
1233e948693eSPhilip Paeps }
1234