xref: /freebsd/sys/dev/sfxge/sfxge_rx.c (revision 245d157651bd6e7e588b1e4a1ae25c1f7bbd30fa)
1e948693eSPhilip Paeps /*-
2e948693eSPhilip Paeps  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
3e948693eSPhilip Paeps  * All rights reserved.
4e948693eSPhilip Paeps  *
5e948693eSPhilip Paeps  * This software was developed in part by Philip Paeps under contract for
6e948693eSPhilip Paeps  * Solarflare Communications, Inc.
7e948693eSPhilip Paeps  *
8e948693eSPhilip Paeps  * Redistribution and use in source and binary forms, with or without
9e948693eSPhilip Paeps  * modification, are permitted provided that the following conditions
10e948693eSPhilip Paeps  * are met:
11e948693eSPhilip Paeps  * 1. Redistributions of source code must retain the above copyright
12e948693eSPhilip Paeps  *    notice, this list of conditions and the following disclaimer.
13e948693eSPhilip Paeps  * 2. Redistributions in binary form must reproduce the above copyright
14e948693eSPhilip Paeps  *    notice, this list of conditions and the following disclaimer in the
15e948693eSPhilip Paeps  *    documentation and/or other materials provided with the distribution.
16e948693eSPhilip Paeps  *
17e948693eSPhilip Paeps  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18e948693eSPhilip Paeps  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19e948693eSPhilip Paeps  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20e948693eSPhilip Paeps  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21e948693eSPhilip Paeps  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22e948693eSPhilip Paeps  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23e948693eSPhilip Paeps  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24e948693eSPhilip Paeps  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25e948693eSPhilip Paeps  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26e948693eSPhilip Paeps  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27e948693eSPhilip Paeps  * SUCH DAMAGE.
28e948693eSPhilip Paeps  */
29e948693eSPhilip Paeps 
30e948693eSPhilip Paeps #include <sys/cdefs.h>
31e948693eSPhilip Paeps __FBSDID("$FreeBSD$");
32e948693eSPhilip Paeps 
33e948693eSPhilip Paeps #include <sys/types.h>
34e948693eSPhilip Paeps #include <sys/mbuf.h>
35e948693eSPhilip Paeps #include <sys/smp.h>
36e948693eSPhilip Paeps #include <sys/socket.h>
37e948693eSPhilip Paeps #include <sys/sysctl.h>
38e948693eSPhilip Paeps #include <sys/limits.h>
39*245d1576SAndrew Rybchenko #include <sys/syslog.h>
40e948693eSPhilip Paeps 
41e948693eSPhilip Paeps #include <net/ethernet.h>
42e948693eSPhilip Paeps #include <net/if.h>
43e948693eSPhilip Paeps #include <net/if_vlan_var.h>
44e948693eSPhilip Paeps 
45e948693eSPhilip Paeps #include <netinet/in.h>
46e948693eSPhilip Paeps #include <netinet/ip.h>
47e948693eSPhilip Paeps #include <netinet/ip6.h>
48e948693eSPhilip Paeps #include <netinet/tcp.h>
49e948693eSPhilip Paeps 
50e948693eSPhilip Paeps #include <machine/in_cksum.h>
51e948693eSPhilip Paeps 
52e948693eSPhilip Paeps #include "common/efx.h"
53e948693eSPhilip Paeps 
54e948693eSPhilip Paeps 
55e948693eSPhilip Paeps #include "sfxge.h"
56e948693eSPhilip Paeps #include "sfxge_rx.h"
57e948693eSPhilip Paeps 
58385b1d8eSGeorge V. Neville-Neil #define	RX_REFILL_THRESHOLD(_entries)	(EFX_RXQ_LIMIT(_entries) * 9 / 10)
59e948693eSPhilip Paeps 
6018daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
6118daa0eeSAndrew Rybchenko 
62*245d1576SAndrew Rybchenko SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL,
63*245d1576SAndrew Rybchenko 	    "Large receive offload (LRO) parameters");
64*245d1576SAndrew Rybchenko 
65*245d1576SAndrew Rybchenko #define	SFXGE_LRO_PARAM(_param)	SFXGE_PARAM(lro._param)
66*245d1576SAndrew Rybchenko 
67e948693eSPhilip Paeps /* Size of the LRO hash table.  Must be a power of 2.  A larger table
68e948693eSPhilip Paeps  * means we can accelerate a larger number of streams.
69e948693eSPhilip Paeps  */
70e948693eSPhilip Paeps static unsigned lro_table_size = 128;
71*245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size);
72*245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN,
73*245d1576SAndrew Rybchenko 	    &lro_table_size, 0,
74*245d1576SAndrew Rybchenko 	    "Size of the LRO hash table (must be a power of 2)");
75e948693eSPhilip Paeps 
76e948693eSPhilip Paeps /* Maximum length of a hash chain.  If chains get too long then the lookup
77e948693eSPhilip Paeps  * time increases and may exceed the benefit of LRO.
78e948693eSPhilip Paeps  */
79e948693eSPhilip Paeps static unsigned lro_chain_max = 20;
80*245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max);
81*245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN,
82*245d1576SAndrew Rybchenko 	    &lro_chain_max, 0,
83*245d1576SAndrew Rybchenko 	    "The maximum length of a hash chain");
84e948693eSPhilip Paeps 
85e948693eSPhilip Paeps /* Maximum time (in ticks) that a connection can be idle before it's LRO
86e948693eSPhilip Paeps  * state is discarded.
87e948693eSPhilip Paeps  */
88e948693eSPhilip Paeps static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
89*245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks);
90*245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN,
91*245d1576SAndrew Rybchenko 	    &lro_idle_ticks, 0,
92*245d1576SAndrew Rybchenko 	    "The maximum time (in ticks) that a connection can be idle "
93*245d1576SAndrew Rybchenko 	    "before it's LRO state is discarded");
94e948693eSPhilip Paeps 
95e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order before a
96e948693eSPhilip Paeps  * connection is eligible for LRO.  The idea is we should avoid coalescing
97e948693eSPhilip Paeps  * segments when the sender is in slow-start because reducing the ACK rate
98e948693eSPhilip Paeps  * can damage performance.
99e948693eSPhilip Paeps  */
100e948693eSPhilip Paeps static int lro_slow_start_packets = 2000;
101*245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets);
102*245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN,
103*245d1576SAndrew Rybchenko 	    &lro_slow_start_packets, 0,
104*245d1576SAndrew Rybchenko 	    "Number of packets with payload that must arrive in-order before "
105*245d1576SAndrew Rybchenko 	    "a connection is eligible for LRO");
106e948693eSPhilip Paeps 
107e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order following loss
108e948693eSPhilip Paeps  * before a connection is eligible for LRO.  The idea is we should avoid
109e948693eSPhilip Paeps  * coalescing segments when the sender is recovering from loss, because
110e948693eSPhilip Paeps  * reducing the ACK rate can damage performance.
111e948693eSPhilip Paeps  */
112e948693eSPhilip Paeps static int lro_loss_packets = 20;
113*245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets);
114*245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN,
115*245d1576SAndrew Rybchenko 	    &lro_loss_packets, 0,
116*245d1576SAndrew Rybchenko 	    "Number of packets with payload that must arrive in-order "
117*245d1576SAndrew Rybchenko 	    "following loss before a connection is eligible for LRO");
118e948693eSPhilip Paeps 
119e948693eSPhilip Paeps /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
120e948693eSPhilip Paeps #define	SFXGE_LRO_L2_ID_VLAN 0x4000
121e948693eSPhilip Paeps #define	SFXGE_LRO_L2_ID_IPV6 0x8000
122e948693eSPhilip Paeps #define	SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
123e948693eSPhilip Paeps #define	SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
124e948693eSPhilip Paeps 
125e948693eSPhilip Paeps /* Compare IPv6 addresses, avoiding conditional branches */
1260b28bbdcSAndrew Rybchenko static unsigned long ipv6_addr_cmp(const struct in6_addr *left,
127e948693eSPhilip Paeps 				   const struct in6_addr *right)
128e948693eSPhilip Paeps {
129e948693eSPhilip Paeps #if LONG_BIT == 64
130e948693eSPhilip Paeps 	const uint64_t *left64 = (const uint64_t *)left;
131e948693eSPhilip Paeps 	const uint64_t *right64 = (const uint64_t *)right;
132e948693eSPhilip Paeps 	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
133e948693eSPhilip Paeps #else
134e948693eSPhilip Paeps 	return (left->s6_addr32[0] - right->s6_addr32[0]) |
135e948693eSPhilip Paeps 	       (left->s6_addr32[1] - right->s6_addr32[1]) |
136e948693eSPhilip Paeps 	       (left->s6_addr32[2] - right->s6_addr32[2]) |
137e948693eSPhilip Paeps 	       (left->s6_addr32[3] - right->s6_addr32[3]);
138e948693eSPhilip Paeps #endif
139e948693eSPhilip Paeps }
140e948693eSPhilip Paeps 
14118daa0eeSAndrew Rybchenko #endif	/* SFXGE_LRO */
14218daa0eeSAndrew Rybchenko 
143e948693eSPhilip Paeps void
144e948693eSPhilip Paeps sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
145e948693eSPhilip Paeps {
146e948693eSPhilip Paeps 
147e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_DONE;
148e948693eSPhilip Paeps }
149e948693eSPhilip Paeps 
150e948693eSPhilip Paeps void
151e948693eSPhilip Paeps sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
152e948693eSPhilip Paeps {
153e948693eSPhilip Paeps 
154e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_FAILED;
155e948693eSPhilip Paeps }
156e948693eSPhilip Paeps 
157e948693eSPhilip Paeps static uint8_t toep_key[] = {
158e948693eSPhilip Paeps 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
159e948693eSPhilip Paeps 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
160e948693eSPhilip Paeps 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
161e948693eSPhilip Paeps 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
162e948693eSPhilip Paeps 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
163e948693eSPhilip Paeps };
164e948693eSPhilip Paeps 
165e948693eSPhilip Paeps static void
166e948693eSPhilip Paeps sfxge_rx_post_refill(void *arg)
167e948693eSPhilip Paeps {
168e948693eSPhilip Paeps 	struct sfxge_rxq *rxq = arg;
169e948693eSPhilip Paeps 	struct sfxge_softc *sc;
170e948693eSPhilip Paeps 	unsigned int index;
171e948693eSPhilip Paeps 	struct sfxge_evq *evq;
172e948693eSPhilip Paeps 	uint16_t magic;
173e948693eSPhilip Paeps 
174e948693eSPhilip Paeps 	sc = rxq->sc;
175e948693eSPhilip Paeps 	index = rxq->index;
176e948693eSPhilip Paeps 	evq = sc->evq[index];
177e948693eSPhilip Paeps 
178e948693eSPhilip Paeps 	magic = SFXGE_MAGIC_RX_QREFILL | index;
179e948693eSPhilip Paeps 
180e948693eSPhilip Paeps 	/* This is guaranteed due to the start/stop order of rx and ev */
181e948693eSPhilip Paeps 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
182e948693eSPhilip Paeps 	    ("evq not started"));
183e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
184e948693eSPhilip Paeps 	    ("rxq not started"));
185e948693eSPhilip Paeps 	efx_ev_qpost(evq->common, magic);
186e948693eSPhilip Paeps }
187e948693eSPhilip Paeps 
188e948693eSPhilip Paeps static void
189e948693eSPhilip Paeps sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
190e948693eSPhilip Paeps {
191e948693eSPhilip Paeps 	/* Initially retry after 100 ms, but back off in case of
192e948693eSPhilip Paeps 	 * repeated failures as we probably have to wait for the
193e948693eSPhilip Paeps 	 * administrator to raise the pool limit. */
194e948693eSPhilip Paeps 	if (retrying)
195e948693eSPhilip Paeps 		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
196e948693eSPhilip Paeps 	else
197e948693eSPhilip Paeps 		rxq->refill_delay = hz / 10;
198e948693eSPhilip Paeps 
199e948693eSPhilip Paeps 	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
200e948693eSPhilip Paeps 			     sfxge_rx_post_refill, rxq);
201e948693eSPhilip Paeps }
202e948693eSPhilip Paeps 
2030b28bbdcSAndrew Rybchenko static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
204e948693eSPhilip Paeps {
205e948693eSPhilip Paeps 	struct mb_args args;
206e948693eSPhilip Paeps 	struct mbuf *m;
207e948693eSPhilip Paeps 
208e948693eSPhilip Paeps 	/* Allocate mbuf structure */
209e948693eSPhilip Paeps 	args.flags = M_PKTHDR;
210e948693eSPhilip Paeps 	args.type = MT_DATA;
211e275c0d3SGleb Smirnoff 	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
212e948693eSPhilip Paeps 
213e948693eSPhilip Paeps 	/* Allocate (and attach) packet buffer */
214b7b0edd1SGeorge V. Neville-Neil 	if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
215e948693eSPhilip Paeps 		uma_zfree(zone_mbuf, m);
216e948693eSPhilip Paeps 		m = NULL;
217e948693eSPhilip Paeps 	}
218e948693eSPhilip Paeps 
219b7b0edd1SGeorge V. Neville-Neil 	return (m);
220e948693eSPhilip Paeps }
221e948693eSPhilip Paeps 
222e948693eSPhilip Paeps #define	SFXGE_REFILL_BATCH  64
223e948693eSPhilip Paeps 
224e948693eSPhilip Paeps static void
225e948693eSPhilip Paeps sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
226e948693eSPhilip Paeps {
227e948693eSPhilip Paeps 	struct sfxge_softc *sc;
228e948693eSPhilip Paeps 	unsigned int index;
229e948693eSPhilip Paeps 	struct sfxge_evq *evq;
230e948693eSPhilip Paeps 	unsigned int batch;
231e948693eSPhilip Paeps 	unsigned int rxfill;
232e948693eSPhilip Paeps 	unsigned int mblksize;
233e948693eSPhilip Paeps 	int ntodo;
234e948693eSPhilip Paeps 	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
235e948693eSPhilip Paeps 
236e948693eSPhilip Paeps 	sc = rxq->sc;
237e948693eSPhilip Paeps 	index = rxq->index;
238e948693eSPhilip Paeps 	evq = sc->evq[index];
239e948693eSPhilip Paeps 
240e948693eSPhilip Paeps 	prefetch_read_many(sc->enp);
241e948693eSPhilip Paeps 	prefetch_read_many(rxq->common);
242e948693eSPhilip Paeps 
243763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
244e948693eSPhilip Paeps 
245851128b8SAndrew Rybchenko 	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
246e948693eSPhilip Paeps 		return;
247e948693eSPhilip Paeps 
248e948693eSPhilip Paeps 	rxfill = rxq->added - rxq->completed;
249385b1d8eSGeorge V. Neville-Neil 	KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries),
250385b1d8eSGeorge V. Neville-Neil 	    ("rxfill > EFX_RXQ_LIMIT(rxq->entries)"));
251385b1d8eSGeorge V. Neville-Neil 	ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target);
252385b1d8eSGeorge V. Neville-Neil 	KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries),
253385b1d8eSGeorge V. Neville-Neil 	    ("ntodo > EFX_RQX_LIMIT(rxq->entries)"));
254e948693eSPhilip Paeps 
255e948693eSPhilip Paeps 	if (ntodo == 0)
256e948693eSPhilip Paeps 		return;
257e948693eSPhilip Paeps 
258e948693eSPhilip Paeps 	batch = 0;
259e948693eSPhilip Paeps 	mblksize = sc->rx_buffer_size;
260e948693eSPhilip Paeps 	while (ntodo-- > 0) {
261e948693eSPhilip Paeps 		unsigned int id;
262e948693eSPhilip Paeps 		struct sfxge_rx_sw_desc *rx_desc;
263e948693eSPhilip Paeps 		bus_dma_segment_t seg;
264e948693eSPhilip Paeps 		struct mbuf *m;
265e948693eSPhilip Paeps 
266385b1d8eSGeorge V. Neville-Neil 		id = (rxq->added + batch) & rxq->ptr_mask;
267e948693eSPhilip Paeps 		rx_desc = &rxq->queue[id];
268e948693eSPhilip Paeps 		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
269e948693eSPhilip Paeps 
270e948693eSPhilip Paeps 		rx_desc->flags = EFX_DISCARD;
271e948693eSPhilip Paeps 		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
272e948693eSPhilip Paeps 		if (m == NULL)
273e948693eSPhilip Paeps 			break;
274e948693eSPhilip Paeps 		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
275e948693eSPhilip Paeps 		addr[batch++] = seg.ds_addr;
276e948693eSPhilip Paeps 
277e948693eSPhilip Paeps 		if (batch == SFXGE_REFILL_BATCH) {
278e948693eSPhilip Paeps 			efx_rx_qpost(rxq->common, addr, mblksize, batch,
279e948693eSPhilip Paeps 			    rxq->completed, rxq->added);
280e948693eSPhilip Paeps 			rxq->added += batch;
281e948693eSPhilip Paeps 			batch = 0;
282e948693eSPhilip Paeps 		}
283e948693eSPhilip Paeps 	}
284e948693eSPhilip Paeps 
285e948693eSPhilip Paeps 	if (ntodo != 0)
286e948693eSPhilip Paeps 		sfxge_rx_schedule_refill(rxq, retrying);
287e948693eSPhilip Paeps 
288e948693eSPhilip Paeps 	if (batch != 0) {
289e948693eSPhilip Paeps 		efx_rx_qpost(rxq->common, addr, mblksize, batch,
290e948693eSPhilip Paeps 		    rxq->completed, rxq->added);
291e948693eSPhilip Paeps 		rxq->added += batch;
292e948693eSPhilip Paeps 	}
293e948693eSPhilip Paeps 
294e948693eSPhilip Paeps 	/* Make the descriptors visible to the hardware */
295e948693eSPhilip Paeps 	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
296e948693eSPhilip Paeps 			BUS_DMASYNC_PREWRITE);
297e948693eSPhilip Paeps 
298e948693eSPhilip Paeps 	efx_rx_qpush(rxq->common, rxq->added);
299e948693eSPhilip Paeps }
300e948693eSPhilip Paeps 
301e948693eSPhilip Paeps void
302e948693eSPhilip Paeps sfxge_rx_qrefill(struct sfxge_rxq *rxq)
303e948693eSPhilip Paeps {
304e948693eSPhilip Paeps 
305851128b8SAndrew Rybchenko 	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
306e948693eSPhilip Paeps 		return;
307e948693eSPhilip Paeps 
308e948693eSPhilip Paeps 	/* Make sure the queue is full */
309385b1d8eSGeorge V. Neville-Neil 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE);
310e948693eSPhilip Paeps }
311e948693eSPhilip Paeps 
312e948693eSPhilip Paeps static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
313e948693eSPhilip Paeps {
314e948693eSPhilip Paeps 	struct ifnet *ifp = sc->ifnet;
315e948693eSPhilip Paeps 
316e948693eSPhilip Paeps 	m->m_pkthdr.rcvif = ifp;
317e948693eSPhilip Paeps 	m->m_pkthdr.csum_data = 0xffff;
318e948693eSPhilip Paeps 	ifp->if_input(ifp, m);
319e948693eSPhilip Paeps }
320e948693eSPhilip Paeps 
321e948693eSPhilip Paeps static void
322e948693eSPhilip Paeps sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
323e948693eSPhilip Paeps {
324e948693eSPhilip Paeps 	struct mbuf *m = rx_desc->mbuf;
325e948693eSPhilip Paeps 	int csum_flags;
326e948693eSPhilip Paeps 
327e948693eSPhilip Paeps 	/* Convert checksum flags */
328e948693eSPhilip Paeps 	csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
329e948693eSPhilip Paeps 		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
330e948693eSPhilip Paeps 	if (rx_desc->flags & EFX_CKSUM_TCPUDP)
331e948693eSPhilip Paeps 		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
332e948693eSPhilip Paeps 
333e948693eSPhilip Paeps #ifdef SFXGE_HAVE_MQ
334e948693eSPhilip Paeps 	/* The hash covers a 4-tuple for TCP only */
335e948693eSPhilip Paeps 	if (rx_desc->flags & EFX_PKT_TCP) {
336e948693eSPhilip Paeps 		m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
337e948693eSPhilip Paeps 						       mtod(m, uint8_t *));
338c2529042SHans Petter Selasky 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
339e948693eSPhilip Paeps 	}
340e948693eSPhilip Paeps #endif
341e948693eSPhilip Paeps 	m->m_data += sc->rx_prefix_size;
342e948693eSPhilip Paeps 	m->m_len = rx_desc->size - sc->rx_prefix_size;
343e948693eSPhilip Paeps 	m->m_pkthdr.len = m->m_len;
344e948693eSPhilip Paeps 	m->m_pkthdr.csum_flags = csum_flags;
345e948693eSPhilip Paeps 	__sfxge_rx_deliver(sc, rx_desc->mbuf);
346e948693eSPhilip Paeps 
347e948693eSPhilip Paeps 	rx_desc->flags = EFX_DISCARD;
348e948693eSPhilip Paeps 	rx_desc->mbuf = NULL;
349e948693eSPhilip Paeps }
350e948693eSPhilip Paeps 
35118daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
35218daa0eeSAndrew Rybchenko 
353e948693eSPhilip Paeps static void
354e948693eSPhilip Paeps sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
355e948693eSPhilip Paeps {
356e948693eSPhilip Paeps 	struct sfxge_softc *sc = st->sc;
357e948693eSPhilip Paeps 	struct mbuf *m = c->mbuf;
358e948693eSPhilip Paeps 	struct tcphdr *c_th;
359e948693eSPhilip Paeps 	int csum_flags;
360e948693eSPhilip Paeps 
361e948693eSPhilip Paeps 	KASSERT(m, ("no mbuf to deliver"));
362e948693eSPhilip Paeps 
363e948693eSPhilip Paeps 	++st->n_bursts;
364e948693eSPhilip Paeps 
365e948693eSPhilip Paeps 	/* Finish off packet munging and recalculate IP header checksum. */
366e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
367e948693eSPhilip Paeps 		struct ip *iph = c->nh;
368e948693eSPhilip Paeps 		iph->ip_len = htons(iph->ip_len);
369e948693eSPhilip Paeps 		iph->ip_sum = 0;
370e948693eSPhilip Paeps 		iph->ip_sum = in_cksum_hdr(iph);
371e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
372e948693eSPhilip Paeps 		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
373e948693eSPhilip Paeps 			      CSUM_IP_CHECKED | CSUM_IP_VALID);
374e948693eSPhilip Paeps 	} else {
375e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->nh;
376e948693eSPhilip Paeps 		iph->ip6_plen = htons(iph->ip6_plen);
377e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
378e948693eSPhilip Paeps 		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
379e948693eSPhilip Paeps 	}
380e948693eSPhilip Paeps 
381e948693eSPhilip Paeps 	c_th->th_win = c->th_last->th_win;
382e948693eSPhilip Paeps 	c_th->th_ack = c->th_last->th_ack;
383e948693eSPhilip Paeps 	if (c_th->th_off == c->th_last->th_off) {
384e948693eSPhilip Paeps 		/* Copy TCP options (take care to avoid going negative). */
385e948693eSPhilip Paeps 		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
386e948693eSPhilip Paeps 		memcpy(c_th + 1, c->th_last + 1, optlen);
387e948693eSPhilip Paeps 	}
388e948693eSPhilip Paeps 
389e948693eSPhilip Paeps #ifdef SFXGE_HAVE_MQ
390e948693eSPhilip Paeps 	m->m_pkthdr.flowid = c->conn_hash;
391c2529042SHans Petter Selasky 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
392e948693eSPhilip Paeps #endif
393e948693eSPhilip Paeps 	m->m_pkthdr.csum_flags = csum_flags;
394e948693eSPhilip Paeps 	__sfxge_rx_deliver(sc, m);
395e948693eSPhilip Paeps 
396e948693eSPhilip Paeps 	c->mbuf = NULL;
397e948693eSPhilip Paeps 	c->delivered = 1;
398e948693eSPhilip Paeps }
399e948693eSPhilip Paeps 
400e948693eSPhilip Paeps /* Drop the given connection, and add it to the free list. */
401e948693eSPhilip Paeps static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
402e948693eSPhilip Paeps {
403e948693eSPhilip Paeps 	unsigned bucket;
404e948693eSPhilip Paeps 
405e948693eSPhilip Paeps 	KASSERT(!c->mbuf, ("found orphaned mbuf"));
406e948693eSPhilip Paeps 
407b7b0edd1SGeorge V. Neville-Neil 	if (c->next_buf.mbuf != NULL) {
408e948693eSPhilip Paeps 		sfxge_rx_deliver(rxq->sc, &c->next_buf);
409e948693eSPhilip Paeps 		LIST_REMOVE(c, active_link);
410e948693eSPhilip Paeps 	}
411e948693eSPhilip Paeps 
412e948693eSPhilip Paeps 	bucket = c->conn_hash & rxq->lro.conns_mask;
413e948693eSPhilip Paeps 	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
414e948693eSPhilip Paeps 	--rxq->lro.conns_n[bucket];
415e948693eSPhilip Paeps 	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
416e948693eSPhilip Paeps 	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
417e948693eSPhilip Paeps }
418e948693eSPhilip Paeps 
419e948693eSPhilip Paeps /* Stop tracking connections that have gone idle in order to keep hash
420e948693eSPhilip Paeps  * chains short.
421e948693eSPhilip Paeps  */
422e948693eSPhilip Paeps static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
423e948693eSPhilip Paeps {
424e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
425e948693eSPhilip Paeps 	unsigned i;
426e948693eSPhilip Paeps 
427e948693eSPhilip Paeps 	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
428e948693eSPhilip Paeps 		("found active connections"));
429e948693eSPhilip Paeps 
430e948693eSPhilip Paeps 	rxq->lro.last_purge_ticks = now;
431e948693eSPhilip Paeps 	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
432e948693eSPhilip Paeps 		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
433e948693eSPhilip Paeps 			continue;
434e948693eSPhilip Paeps 
435e948693eSPhilip Paeps 		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
436e948693eSPhilip Paeps 		if (now - c->last_pkt_ticks > lro_idle_ticks) {
437e948693eSPhilip Paeps 			++rxq->lro.n_drop_idle;
438e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
439e948693eSPhilip Paeps 		}
440e948693eSPhilip Paeps 	}
441e948693eSPhilip Paeps }
442e948693eSPhilip Paeps 
443e948693eSPhilip Paeps static void
444e948693eSPhilip Paeps sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
445e948693eSPhilip Paeps 		struct mbuf *mbuf, struct tcphdr *th)
446e948693eSPhilip Paeps {
447e948693eSPhilip Paeps 	struct tcphdr *c_th;
448e948693eSPhilip Paeps 
449e948693eSPhilip Paeps 	/* Tack the new mbuf onto the chain. */
450e948693eSPhilip Paeps 	KASSERT(!mbuf->m_next, ("mbuf already chained"));
451e948693eSPhilip Paeps 	c->mbuf_tail->m_next = mbuf;
452e948693eSPhilip Paeps 	c->mbuf_tail = mbuf;
453e948693eSPhilip Paeps 
454e948693eSPhilip Paeps 	/* Increase length appropriately */
455e948693eSPhilip Paeps 	c->mbuf->m_pkthdr.len += mbuf->m_len;
456e948693eSPhilip Paeps 
457e948693eSPhilip Paeps 	/* Update the connection state flags */
458e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
459e948693eSPhilip Paeps 		struct ip *iph = c->nh;
460e948693eSPhilip Paeps 		iph->ip_len += mbuf->m_len;
461e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
462e948693eSPhilip Paeps 	} else {
463e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->nh;
464e948693eSPhilip Paeps 		iph->ip6_plen += mbuf->m_len;
465e948693eSPhilip Paeps 		c_th = (struct tcphdr *)(iph + 1);
466e948693eSPhilip Paeps 	}
467e948693eSPhilip Paeps 	c_th->th_flags |= (th->th_flags & TH_PUSH);
468e948693eSPhilip Paeps 	c->th_last = th;
469e948693eSPhilip Paeps 	++st->n_merges;
470e948693eSPhilip Paeps 
471e948693eSPhilip Paeps 	/* Pass packet up now if another segment could overflow the IP
472e948693eSPhilip Paeps 	 * length.
473e948693eSPhilip Paeps 	 */
474e948693eSPhilip Paeps 	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
475e948693eSPhilip Paeps 		sfxge_lro_deliver(st, c);
476e948693eSPhilip Paeps }
477e948693eSPhilip Paeps 
478e948693eSPhilip Paeps static void
479e948693eSPhilip Paeps sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
480e948693eSPhilip Paeps 		struct mbuf *mbuf, void *nh, struct tcphdr *th)
481e948693eSPhilip Paeps {
482e948693eSPhilip Paeps 	/* Start the chain */
483e948693eSPhilip Paeps 	c->mbuf = mbuf;
484e948693eSPhilip Paeps 	c->mbuf_tail = c->mbuf;
485e948693eSPhilip Paeps 	c->nh = nh;
486e948693eSPhilip Paeps 	c->th_last = th;
487e948693eSPhilip Paeps 
488e948693eSPhilip Paeps 	mbuf->m_pkthdr.len = mbuf->m_len;
489e948693eSPhilip Paeps 
490e948693eSPhilip Paeps 	/* Mangle header fields for later processing */
491e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
492e948693eSPhilip Paeps 		struct ip *iph = nh;
493e948693eSPhilip Paeps 		iph->ip_len = ntohs(iph->ip_len);
494e948693eSPhilip Paeps 	} else {
495e948693eSPhilip Paeps 		struct ip6_hdr *iph = nh;
496e948693eSPhilip Paeps 		iph->ip6_plen = ntohs(iph->ip6_plen);
497e948693eSPhilip Paeps 	}
498e948693eSPhilip Paeps }
499e948693eSPhilip Paeps 
500e948693eSPhilip Paeps /* Try to merge or otherwise hold or deliver (as appropriate) the
501e948693eSPhilip Paeps  * packet buffered for this connection (c->next_buf).  Return a flag
502e948693eSPhilip Paeps  * indicating whether the connection is still active for LRO purposes.
503e948693eSPhilip Paeps  */
504e948693eSPhilip Paeps static int
505e948693eSPhilip Paeps sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
506e948693eSPhilip Paeps {
507e948693eSPhilip Paeps 	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
508e948693eSPhilip Paeps 	char *eh = c->next_eh;
509e948693eSPhilip Paeps 	int data_length, hdr_length, dont_merge;
510e948693eSPhilip Paeps 	unsigned th_seq, pkt_length;
511e948693eSPhilip Paeps 	struct tcphdr *th;
512e948693eSPhilip Paeps 	unsigned now;
513e948693eSPhilip Paeps 
514e948693eSPhilip Paeps 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
515e948693eSPhilip Paeps 		struct ip *iph = c->next_nh;
516e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
517e948693eSPhilip Paeps 		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
518e948693eSPhilip Paeps 	} else {
519e948693eSPhilip Paeps 		struct ip6_hdr *iph = c->next_nh;
520e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
521e948693eSPhilip Paeps 		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
522e948693eSPhilip Paeps 	}
523e948693eSPhilip Paeps 
524e948693eSPhilip Paeps 	hdr_length = (char *) th + th->th_off * 4 - eh;
525e948693eSPhilip Paeps 	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
526e948693eSPhilip Paeps 		       hdr_length);
527e948693eSPhilip Paeps 	th_seq = ntohl(th->th_seq);
528e948693eSPhilip Paeps 	dont_merge = ((data_length <= 0)
529e948693eSPhilip Paeps 		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
530e948693eSPhilip Paeps 
531e948693eSPhilip Paeps 	/* Check for options other than aligned timestamp. */
532e948693eSPhilip Paeps 	if (th->th_off != 5) {
533e948693eSPhilip Paeps 		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
534e948693eSPhilip Paeps 		if (th->th_off == 8 &&
535e948693eSPhilip Paeps 		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
536e948693eSPhilip Paeps 					(TCPOPT_NOP << 16) |
537e948693eSPhilip Paeps 					(TCPOPT_TIMESTAMP << 8) |
538e948693eSPhilip Paeps 					TCPOLEN_TIMESTAMP)) {
539e948693eSPhilip Paeps 			/* timestamp option -- okay */
540e948693eSPhilip Paeps 		} else {
541e948693eSPhilip Paeps 			dont_merge = 1;
542e948693eSPhilip Paeps 		}
543e948693eSPhilip Paeps 	}
544e948693eSPhilip Paeps 
545e948693eSPhilip Paeps 	if (__predict_false(th_seq != c->next_seq)) {
546e948693eSPhilip Paeps 		/* Out-of-order, so start counting again. */
547b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
548e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
549e948693eSPhilip Paeps 		c->n_in_order_pkts -= lro_loss_packets;
550e948693eSPhilip Paeps 		c->next_seq = th_seq + data_length;
551e948693eSPhilip Paeps 		++rxq->lro.n_misorder;
552e948693eSPhilip Paeps 		goto deliver_buf_out;
553e948693eSPhilip Paeps 	}
554e948693eSPhilip Paeps 	c->next_seq = th_seq + data_length;
555e948693eSPhilip Paeps 
556e948693eSPhilip Paeps 	now = ticks;
557e948693eSPhilip Paeps 	if (now - c->last_pkt_ticks > lro_idle_ticks) {
558e948693eSPhilip Paeps 		++rxq->lro.n_drop_idle;
559b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
560e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
561e948693eSPhilip Paeps 		sfxge_lro_drop(rxq, c);
562b7b0edd1SGeorge V. Neville-Neil 		return (0);
563e948693eSPhilip Paeps 	}
564e948693eSPhilip Paeps 	c->last_pkt_ticks = ticks;
565e948693eSPhilip Paeps 
566e948693eSPhilip Paeps 	if (c->n_in_order_pkts < lro_slow_start_packets) {
567e948693eSPhilip Paeps 		/* May be in slow-start, so don't merge. */
568e948693eSPhilip Paeps 		++rxq->lro.n_slow_start;
569e948693eSPhilip Paeps 		++c->n_in_order_pkts;
570e948693eSPhilip Paeps 		goto deliver_buf_out;
571e948693eSPhilip Paeps 	}
572e948693eSPhilip Paeps 
573e948693eSPhilip Paeps 	if (__predict_false(dont_merge)) {
574b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL)
575e948693eSPhilip Paeps 			sfxge_lro_deliver(&rxq->lro, c);
576e948693eSPhilip Paeps 		if (th->th_flags & (TH_FIN | TH_RST)) {
577e948693eSPhilip Paeps 			++rxq->lro.n_drop_closed;
578e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
579b7b0edd1SGeorge V. Neville-Neil 			return (0);
580e948693eSPhilip Paeps 		}
581e948693eSPhilip Paeps 		goto deliver_buf_out;
582e948693eSPhilip Paeps 	}
583e948693eSPhilip Paeps 
584e948693eSPhilip Paeps 	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
585e948693eSPhilip Paeps 
586e948693eSPhilip Paeps 	if (__predict_true(c->mbuf != NULL)) {
587e948693eSPhilip Paeps 		/* Remove headers and any padding */
588e948693eSPhilip Paeps 		rx_buf->mbuf->m_data += hdr_length;
589e948693eSPhilip Paeps 		rx_buf->mbuf->m_len = data_length;
590e948693eSPhilip Paeps 
591e948693eSPhilip Paeps 		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
592e948693eSPhilip Paeps 	} else {
593e948693eSPhilip Paeps 		/* Remove any padding */
594e948693eSPhilip Paeps 		rx_buf->mbuf->m_len = pkt_length;
595e948693eSPhilip Paeps 
596e948693eSPhilip Paeps 		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
597e948693eSPhilip Paeps 	}
598e948693eSPhilip Paeps 
599e948693eSPhilip Paeps 	rx_buf->mbuf = NULL;
600b7b0edd1SGeorge V. Neville-Neil 	return (1);
601e948693eSPhilip Paeps 
602e948693eSPhilip Paeps  deliver_buf_out:
603e948693eSPhilip Paeps 	sfxge_rx_deliver(rxq->sc, rx_buf);
604b7b0edd1SGeorge V. Neville-Neil 	return (1);
605e948693eSPhilip Paeps }
606e948693eSPhilip Paeps 
607e948693eSPhilip Paeps static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
608e948693eSPhilip Paeps 			       uint16_t l2_id, void *nh, struct tcphdr *th)
609e948693eSPhilip Paeps {
610e948693eSPhilip Paeps 	unsigned bucket = conn_hash & st->conns_mask;
611e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
612e948693eSPhilip Paeps 
613e948693eSPhilip Paeps 	if (st->conns_n[bucket] >= lro_chain_max) {
614e948693eSPhilip Paeps 		++st->n_too_many;
615e948693eSPhilip Paeps 		return;
616e948693eSPhilip Paeps 	}
617e948693eSPhilip Paeps 
618e948693eSPhilip Paeps 	if (!TAILQ_EMPTY(&st->free_conns)) {
619e948693eSPhilip Paeps 		c = TAILQ_FIRST(&st->free_conns);
620e948693eSPhilip Paeps 		TAILQ_REMOVE(&st->free_conns, c, link);
621e948693eSPhilip Paeps 	} else {
622e275c0d3SGleb Smirnoff 		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
623e948693eSPhilip Paeps 		if (c == NULL)
624e948693eSPhilip Paeps 			return;
625e948693eSPhilip Paeps 		c->mbuf = NULL;
626e948693eSPhilip Paeps 		c->next_buf.mbuf = NULL;
627e948693eSPhilip Paeps 	}
628e948693eSPhilip Paeps 
629e948693eSPhilip Paeps 	/* Create the connection tracking data */
630e948693eSPhilip Paeps 	++st->conns_n[bucket];
631e948693eSPhilip Paeps 	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
632e948693eSPhilip Paeps 	c->l2_id = l2_id;
633e948693eSPhilip Paeps 	c->conn_hash = conn_hash;
634e948693eSPhilip Paeps 	c->source = th->th_sport;
635e948693eSPhilip Paeps 	c->dest = th->th_dport;
636e948693eSPhilip Paeps 	c->n_in_order_pkts = 0;
637e948693eSPhilip Paeps 	c->last_pkt_ticks = *(volatile int *)&ticks;
638e948693eSPhilip Paeps 	c->delivered = 0;
639e948693eSPhilip Paeps 	++st->n_new_stream;
640e948693eSPhilip Paeps 	/* NB. We don't initialise c->next_seq, and it doesn't matter what
641e948693eSPhilip Paeps 	 * value it has.  Most likely the next packet received for this
642e948693eSPhilip Paeps 	 * connection will not match -- no harm done.
643e948693eSPhilip Paeps 	 */
644e948693eSPhilip Paeps }
645e948693eSPhilip Paeps 
646e948693eSPhilip Paeps /* Process mbuf and decide whether to dispatch it to the stack now or
647e948693eSPhilip Paeps  * later.
648e948693eSPhilip Paeps  */
649e948693eSPhilip Paeps static void
650e948693eSPhilip Paeps sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
651e948693eSPhilip Paeps {
652e948693eSPhilip Paeps 	struct sfxge_softc *sc = rxq->sc;
653e948693eSPhilip Paeps 	struct mbuf *m = rx_buf->mbuf;
654e948693eSPhilip Paeps 	struct ether_header *eh;
655e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
656e948693eSPhilip Paeps 	uint16_t l2_id;
657e948693eSPhilip Paeps 	uint16_t l3_proto;
658e948693eSPhilip Paeps 	void *nh;
659e948693eSPhilip Paeps 	struct tcphdr *th;
660e948693eSPhilip Paeps 	uint32_t conn_hash;
661e948693eSPhilip Paeps 	unsigned bucket;
662e948693eSPhilip Paeps 
663e948693eSPhilip Paeps 	/* Get the hardware hash */
664e948693eSPhilip Paeps 	conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
665e948693eSPhilip Paeps 				      mtod(m, uint8_t *));
666e948693eSPhilip Paeps 
667e948693eSPhilip Paeps 	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
668e948693eSPhilip Paeps 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
669e948693eSPhilip Paeps 		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
670e948693eSPhilip Paeps 		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
671e948693eSPhilip Paeps 			SFXGE_LRO_L2_ID_VLAN;
672e948693eSPhilip Paeps 		l3_proto = veh->evl_proto;
673e948693eSPhilip Paeps 		nh = veh + 1;
674e948693eSPhilip Paeps 	} else {
675e948693eSPhilip Paeps 		l2_id = 0;
676e948693eSPhilip Paeps 		l3_proto = eh->ether_type;
677e948693eSPhilip Paeps 		nh = eh + 1;
678e948693eSPhilip Paeps 	}
679e948693eSPhilip Paeps 
680e948693eSPhilip Paeps 	/* Check whether this is a suitable packet (unfragmented
681e948693eSPhilip Paeps 	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
682e948693eSPhilip Paeps 	 * length, and compute a hash if necessary.  If not, return.
683e948693eSPhilip Paeps 	 */
684e948693eSPhilip Paeps 	if (l3_proto == htons(ETHERTYPE_IP)) {
685e948693eSPhilip Paeps 		struct ip *iph = nh;
686e948693eSPhilip Paeps 		if ((iph->ip_p - IPPROTO_TCP) |
687e948693eSPhilip Paeps 		    (iph->ip_hl - (sizeof(*iph) >> 2u)) |
688e948693eSPhilip Paeps 		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
689e948693eSPhilip Paeps 			goto deliver_now;
690e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
691e948693eSPhilip Paeps 	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
692e948693eSPhilip Paeps 		struct ip6_hdr *iph = nh;
693e948693eSPhilip Paeps 		if (iph->ip6_nxt != IPPROTO_TCP)
694e948693eSPhilip Paeps 			goto deliver_now;
695e948693eSPhilip Paeps 		l2_id |= SFXGE_LRO_L2_ID_IPV6;
696e948693eSPhilip Paeps 		th = (struct tcphdr *)(iph + 1);
697e948693eSPhilip Paeps 	} else {
698e948693eSPhilip Paeps 		goto deliver_now;
699e948693eSPhilip Paeps 	}
700e948693eSPhilip Paeps 
701e948693eSPhilip Paeps 	bucket = conn_hash & rxq->lro.conns_mask;
702e948693eSPhilip Paeps 
703e948693eSPhilip Paeps 	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
704e948693eSPhilip Paeps 		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
705e948693eSPhilip Paeps 			continue;
706e948693eSPhilip Paeps 		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
707e948693eSPhilip Paeps 			continue;
708b7b0edd1SGeorge V. Neville-Neil 		if (c->mbuf != NULL) {
709e948693eSPhilip Paeps 			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
710e948693eSPhilip Paeps 				struct ip *c_iph, *iph = nh;
711e948693eSPhilip Paeps 				c_iph = c->nh;
712e948693eSPhilip Paeps 				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
713e948693eSPhilip Paeps 				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
714e948693eSPhilip Paeps 					continue;
715e948693eSPhilip Paeps 			} else {
716e948693eSPhilip Paeps 				struct ip6_hdr *c_iph, *iph = nh;
717e948693eSPhilip Paeps 				c_iph = c->nh;
718e948693eSPhilip Paeps 				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
719e948693eSPhilip Paeps 				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
720e948693eSPhilip Paeps 					continue;
721e948693eSPhilip Paeps 			}
722e948693eSPhilip Paeps 		}
723e948693eSPhilip Paeps 
724e948693eSPhilip Paeps 		/* Re-insert at head of list to reduce lookup time. */
725e948693eSPhilip Paeps 		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
726e948693eSPhilip Paeps 		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
727e948693eSPhilip Paeps 
728b7b0edd1SGeorge V. Neville-Neil 		if (c->next_buf.mbuf != NULL) {
729e948693eSPhilip Paeps 			if (!sfxge_lro_try_merge(rxq, c))
730e948693eSPhilip Paeps 				goto deliver_now;
731e948693eSPhilip Paeps 		} else {
732e948693eSPhilip Paeps 			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
733e948693eSPhilip Paeps 			    active_link);
734e948693eSPhilip Paeps 		}
735e948693eSPhilip Paeps 		c->next_buf = *rx_buf;
736e948693eSPhilip Paeps 		c->next_eh = eh;
737e948693eSPhilip Paeps 		c->next_nh = nh;
738e948693eSPhilip Paeps 
739e948693eSPhilip Paeps 		rx_buf->mbuf = NULL;
740e948693eSPhilip Paeps 		rx_buf->flags = EFX_DISCARD;
741e948693eSPhilip Paeps 		return;
742e948693eSPhilip Paeps 	}
743e948693eSPhilip Paeps 
744e948693eSPhilip Paeps 	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
745e948693eSPhilip Paeps  deliver_now:
746e948693eSPhilip Paeps 	sfxge_rx_deliver(sc, rx_buf);
747e948693eSPhilip Paeps }
748e948693eSPhilip Paeps 
749e948693eSPhilip Paeps static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
750e948693eSPhilip Paeps {
751e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
752e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
753e948693eSPhilip Paeps 	unsigned t;
754e948693eSPhilip Paeps 
755e948693eSPhilip Paeps 	while (!LIST_EMPTY(&st->active_conns)) {
756e948693eSPhilip Paeps 		c = LIST_FIRST(&st->active_conns);
757b7b0edd1SGeorge V. Neville-Neil 		if (!c->delivered && c->mbuf != NULL)
758e948693eSPhilip Paeps 			sfxge_lro_deliver(st, c);
759e948693eSPhilip Paeps 		if (sfxge_lro_try_merge(rxq, c)) {
760b7b0edd1SGeorge V. Neville-Neil 			if (c->mbuf != NULL)
761e948693eSPhilip Paeps 				sfxge_lro_deliver(st, c);
762e948693eSPhilip Paeps 			LIST_REMOVE(c, active_link);
763e948693eSPhilip Paeps 		}
764e948693eSPhilip Paeps 		c->delivered = 0;
765e948693eSPhilip Paeps 	}
766e948693eSPhilip Paeps 
767e948693eSPhilip Paeps 	t = *(volatile int *)&ticks;
768e948693eSPhilip Paeps 	if (__predict_false(t != st->last_purge_ticks))
769e948693eSPhilip Paeps 		sfxge_lro_purge_idle(rxq, t);
770e948693eSPhilip Paeps }
771e948693eSPhilip Paeps 
77218daa0eeSAndrew Rybchenko #else	/* !SFXGE_LRO */
77318daa0eeSAndrew Rybchenko 
77418daa0eeSAndrew Rybchenko static void
77518daa0eeSAndrew Rybchenko sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
77618daa0eeSAndrew Rybchenko {
77718daa0eeSAndrew Rybchenko }
77818daa0eeSAndrew Rybchenko 
77918daa0eeSAndrew Rybchenko static void
78018daa0eeSAndrew Rybchenko sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
78118daa0eeSAndrew Rybchenko {
78218daa0eeSAndrew Rybchenko }
78318daa0eeSAndrew Rybchenko 
78418daa0eeSAndrew Rybchenko #endif	/* SFXGE_LRO */
78518daa0eeSAndrew Rybchenko 
786e948693eSPhilip Paeps void
787e948693eSPhilip Paeps sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
788e948693eSPhilip Paeps {
789e948693eSPhilip Paeps 	struct sfxge_softc *sc = rxq->sc;
790e948693eSPhilip Paeps 	int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
791e948693eSPhilip Paeps 	unsigned int index;
792e948693eSPhilip Paeps 	struct sfxge_evq *evq;
793e948693eSPhilip Paeps 	unsigned int completed;
794e948693eSPhilip Paeps 	unsigned int level;
795e948693eSPhilip Paeps 	struct mbuf *m;
796e948693eSPhilip Paeps 	struct sfxge_rx_sw_desc *prev = NULL;
797e948693eSPhilip Paeps 
798e948693eSPhilip Paeps 	index = rxq->index;
799e948693eSPhilip Paeps 	evq = sc->evq[index];
800e948693eSPhilip Paeps 
801763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
802e948693eSPhilip Paeps 
803e948693eSPhilip Paeps 	completed = rxq->completed;
804e948693eSPhilip Paeps 	while (completed != rxq->pending) {
805e948693eSPhilip Paeps 		unsigned int id;
806e948693eSPhilip Paeps 		struct sfxge_rx_sw_desc *rx_desc;
807e948693eSPhilip Paeps 
808385b1d8eSGeorge V. Neville-Neil 		id = completed++ & rxq->ptr_mask;
809e948693eSPhilip Paeps 		rx_desc = &rxq->queue[id];
810e948693eSPhilip Paeps 		m = rx_desc->mbuf;
811e948693eSPhilip Paeps 
812851128b8SAndrew Rybchenko 		if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
813e948693eSPhilip Paeps 			goto discard;
814e948693eSPhilip Paeps 
815e948693eSPhilip Paeps 		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
816e948693eSPhilip Paeps 			goto discard;
817e948693eSPhilip Paeps 
818e948693eSPhilip Paeps 		prefetch_read_many(mtod(m, caddr_t));
819e948693eSPhilip Paeps 
820e948693eSPhilip Paeps 		/* Check for loopback packets */
821e948693eSPhilip Paeps 		if (!(rx_desc->flags & EFX_PKT_IPV4) &&
822e948693eSPhilip Paeps 		    !(rx_desc->flags & EFX_PKT_IPV6)) {
823e948693eSPhilip Paeps 			struct ether_header *etherhp;
824e948693eSPhilip Paeps 
825e948693eSPhilip Paeps 			/*LINTED*/
826e948693eSPhilip Paeps 			etherhp = mtod(m, struct ether_header *);
827e948693eSPhilip Paeps 
828e948693eSPhilip Paeps 			if (etherhp->ether_type ==
829e948693eSPhilip Paeps 			    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
830e948693eSPhilip Paeps 				EFSYS_PROBE(loopback);
831e948693eSPhilip Paeps 
832e948693eSPhilip Paeps 				rxq->loopback++;
833e948693eSPhilip Paeps 				goto discard;
834e948693eSPhilip Paeps 			}
835e948693eSPhilip Paeps 		}
836e948693eSPhilip Paeps 
837e948693eSPhilip Paeps 		/* Pass packet up the stack or into LRO (pipelined) */
838e948693eSPhilip Paeps 		if (prev != NULL) {
839e948693eSPhilip Paeps 			if (lro_enabled)
840e948693eSPhilip Paeps 				sfxge_lro(rxq, prev);
841e948693eSPhilip Paeps 			else
842e948693eSPhilip Paeps 				sfxge_rx_deliver(sc, prev);
843e948693eSPhilip Paeps 		}
844e948693eSPhilip Paeps 		prev = rx_desc;
845e948693eSPhilip Paeps 		continue;
846e948693eSPhilip Paeps 
847e948693eSPhilip Paeps discard:
848e948693eSPhilip Paeps 		/* Return the packet to the pool */
849e948693eSPhilip Paeps 		m_free(m);
850e948693eSPhilip Paeps 		rx_desc->mbuf = NULL;
851e948693eSPhilip Paeps 	}
852e948693eSPhilip Paeps 	rxq->completed = completed;
853e948693eSPhilip Paeps 
854e948693eSPhilip Paeps 	level = rxq->added - rxq->completed;
855e948693eSPhilip Paeps 
856e948693eSPhilip Paeps 	/* Pass last packet up the stack or into LRO */
857e948693eSPhilip Paeps 	if (prev != NULL) {
858e948693eSPhilip Paeps 		if (lro_enabled)
859e948693eSPhilip Paeps 			sfxge_lro(rxq, prev);
860e948693eSPhilip Paeps 		else
861e948693eSPhilip Paeps 			sfxge_rx_deliver(sc, prev);
862e948693eSPhilip Paeps 	}
863e948693eSPhilip Paeps 
864e948693eSPhilip Paeps 	/*
865e948693eSPhilip Paeps 	 * If there are any pending flows and this is the end of the
866e948693eSPhilip Paeps 	 * poll then they must be completed.
867e948693eSPhilip Paeps 	 */
868e948693eSPhilip Paeps 	if (eop)
869e948693eSPhilip Paeps 		sfxge_lro_end_of_burst(rxq);
870e948693eSPhilip Paeps 
871e948693eSPhilip Paeps 	/* Top up the queue if necessary */
872385b1d8eSGeorge V. Neville-Neil 	if (level < rxq->refill_threshold)
873385b1d8eSGeorge V. Neville-Neil 		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE);
874e948693eSPhilip Paeps }
875e948693eSPhilip Paeps 
876e948693eSPhilip Paeps static void
877e948693eSPhilip Paeps sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
878e948693eSPhilip Paeps {
879e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
880e948693eSPhilip Paeps 	struct sfxge_evq *evq;
881e948693eSPhilip Paeps 	unsigned int count;
882e948693eSPhilip Paeps 
883e948693eSPhilip Paeps 	rxq = sc->rxq[index];
884e948693eSPhilip Paeps 	evq = sc->evq[index];
885e948693eSPhilip Paeps 
886763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
887e948693eSPhilip Paeps 
888e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
889e948693eSPhilip Paeps 	    ("rxq not started"));
890e948693eSPhilip Paeps 
891e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
892e948693eSPhilip Paeps 
893e948693eSPhilip Paeps 	callout_stop(&rxq->refill_callout);
894e948693eSPhilip Paeps 
895e948693eSPhilip Paeps again:
896e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_PENDING;
897e948693eSPhilip Paeps 
898e948693eSPhilip Paeps 	/* Flush the receive queue */
899e948693eSPhilip Paeps 	efx_rx_qflush(rxq->common);
900e948693eSPhilip Paeps 
901763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
902e948693eSPhilip Paeps 
903e948693eSPhilip Paeps 	count = 0;
904e948693eSPhilip Paeps 	do {
905e948693eSPhilip Paeps 		/* Spin for 100 ms */
906e948693eSPhilip Paeps 		DELAY(100000);
907e948693eSPhilip Paeps 
908e948693eSPhilip Paeps 		if (rxq->flush_state != SFXGE_FLUSH_PENDING)
909e948693eSPhilip Paeps 			break;
910e948693eSPhilip Paeps 
911e948693eSPhilip Paeps 	} while (++count < 20);
912e948693eSPhilip Paeps 
913763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
914e948693eSPhilip Paeps 
915e948693eSPhilip Paeps 	if (rxq->flush_state == SFXGE_FLUSH_FAILED)
916e948693eSPhilip Paeps 		goto again;
917e948693eSPhilip Paeps 
918e948693eSPhilip Paeps 	rxq->flush_state = SFXGE_FLUSH_DONE;
919e948693eSPhilip Paeps 
920e948693eSPhilip Paeps 	rxq->pending = rxq->added;
921e948693eSPhilip Paeps 	sfxge_rx_qcomplete(rxq, B_TRUE);
922e948693eSPhilip Paeps 
923e948693eSPhilip Paeps 	KASSERT(rxq->completed == rxq->pending,
924e948693eSPhilip Paeps 	    ("rxq->completed != rxq->pending"));
925e948693eSPhilip Paeps 
926e948693eSPhilip Paeps 	rxq->added = 0;
927e948693eSPhilip Paeps 	rxq->pending = 0;
928e948693eSPhilip Paeps 	rxq->completed = 0;
929e948693eSPhilip Paeps 	rxq->loopback = 0;
930e948693eSPhilip Paeps 
931e948693eSPhilip Paeps 	/* Destroy the common code receive queue. */
932e948693eSPhilip Paeps 	efx_rx_qdestroy(rxq->common);
933e948693eSPhilip Paeps 
934e948693eSPhilip Paeps 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
935385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries));
936e948693eSPhilip Paeps 
937763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
938e948693eSPhilip Paeps }
939e948693eSPhilip Paeps 
940e948693eSPhilip Paeps static int
941e948693eSPhilip Paeps sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
942e948693eSPhilip Paeps {
943e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
944e948693eSPhilip Paeps 	efsys_mem_t *esmp;
945e948693eSPhilip Paeps 	struct sfxge_evq *evq;
946e948693eSPhilip Paeps 	int rc;
947e948693eSPhilip Paeps 
948e948693eSPhilip Paeps 	rxq = sc->rxq[index];
949e948693eSPhilip Paeps 	esmp = &rxq->mem;
950e948693eSPhilip Paeps 	evq = sc->evq[index];
951e948693eSPhilip Paeps 
952e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
953e948693eSPhilip Paeps 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
954e948693eSPhilip Paeps 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
955e948693eSPhilip Paeps 	    ("evq->init_state != SFXGE_EVQ_STARTED"));
956e948693eSPhilip Paeps 
957e948693eSPhilip Paeps 	/* Program the buffer table. */
958e948693eSPhilip Paeps 	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
959385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries))) != 0)
960385b1d8eSGeorge V. Neville-Neil 		return (rc);
961e948693eSPhilip Paeps 
962e948693eSPhilip Paeps 	/* Create the common code receive queue. */
963e948693eSPhilip Paeps 	if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
964385b1d8eSGeorge V. Neville-Neil 	    esmp, sc->rxq_entries, rxq->buf_base_id, evq->common,
965e948693eSPhilip Paeps 	    &rxq->common)) != 0)
966e948693eSPhilip Paeps 		goto fail;
967e948693eSPhilip Paeps 
968763cab71SAndrew Rybchenko 	SFXGE_EVQ_LOCK(evq);
969e948693eSPhilip Paeps 
970e948693eSPhilip Paeps 	/* Enable the receive queue. */
971e948693eSPhilip Paeps 	efx_rx_qenable(rxq->common);
972e948693eSPhilip Paeps 
973e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_STARTED;
974e948693eSPhilip Paeps 
975e948693eSPhilip Paeps 	/* Try to fill the queue from the pool. */
976385b1d8eSGeorge V. Neville-Neil 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE);
977e948693eSPhilip Paeps 
978763cab71SAndrew Rybchenko 	SFXGE_EVQ_UNLOCK(evq);
979e948693eSPhilip Paeps 
980e948693eSPhilip Paeps 	return (0);
981e948693eSPhilip Paeps 
982e948693eSPhilip Paeps fail:
983e948693eSPhilip Paeps 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
984385b1d8eSGeorge V. Neville-Neil 	    EFX_RXQ_NBUFS(sc->rxq_entries));
985385b1d8eSGeorge V. Neville-Neil 	return (rc);
986e948693eSPhilip Paeps }
987e948693eSPhilip Paeps 
988e948693eSPhilip Paeps void
989e948693eSPhilip Paeps sfxge_rx_stop(struct sfxge_softc *sc)
990e948693eSPhilip Paeps {
991e948693eSPhilip Paeps 	int index;
992e948693eSPhilip Paeps 
993e948693eSPhilip Paeps 	/* Stop the receive queue(s) */
994133366a6SAndrew Rybchenko 	index = sc->rxq_count;
995e948693eSPhilip Paeps 	while (--index >= 0)
996e948693eSPhilip Paeps 		sfxge_rx_qstop(sc, index);
997e948693eSPhilip Paeps 
998e948693eSPhilip Paeps 	sc->rx_prefix_size = 0;
999e948693eSPhilip Paeps 	sc->rx_buffer_size = 0;
1000e948693eSPhilip Paeps 
1001e948693eSPhilip Paeps 	efx_rx_fini(sc->enp);
1002e948693eSPhilip Paeps }
1003e948693eSPhilip Paeps 
1004e948693eSPhilip Paeps int
1005e948693eSPhilip Paeps sfxge_rx_start(struct sfxge_softc *sc)
1006e948693eSPhilip Paeps {
1007e948693eSPhilip Paeps 	struct sfxge_intr *intr;
1008e948693eSPhilip Paeps 	int index;
1009e948693eSPhilip Paeps 	int rc;
1010e948693eSPhilip Paeps 
1011e948693eSPhilip Paeps 	intr = &sc->intr;
1012e948693eSPhilip Paeps 
1013e948693eSPhilip Paeps 	/* Initialize the common code receive module. */
1014e948693eSPhilip Paeps 	if ((rc = efx_rx_init(sc->enp)) != 0)
1015e948693eSPhilip Paeps 		return (rc);
1016e948693eSPhilip Paeps 
1017e948693eSPhilip Paeps 	/* Calculate the receive packet buffer size. */
1018e948693eSPhilip Paeps 	sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
1019e948693eSPhilip Paeps 	sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
1020e948693eSPhilip Paeps 			      sc->rx_prefix_size);
1021e948693eSPhilip Paeps 
1022e948693eSPhilip Paeps 	/* Select zone for packet buffers */
1023e948693eSPhilip Paeps 	if (sc->rx_buffer_size <= MCLBYTES)
1024e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_clust;
1025e948693eSPhilip Paeps 	else if (sc->rx_buffer_size <= MJUMPAGESIZE)
1026e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbop;
1027e948693eSPhilip Paeps 	else if (sc->rx_buffer_size <= MJUM9BYTES)
1028e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbo9;
1029e948693eSPhilip Paeps 	else
1030e948693eSPhilip Paeps 		sc->rx_buffer_zone = zone_jumbo16;
1031e948693eSPhilip Paeps 
1032e948693eSPhilip Paeps 	/*
1033e948693eSPhilip Paeps 	 * Set up the scale table.  Enable all hash types and hash insertion.
1034e948693eSPhilip Paeps 	 */
1035e948693eSPhilip Paeps 	for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
1036133366a6SAndrew Rybchenko 		sc->rx_indir_table[index] = index % sc->rxq_count;
1037e948693eSPhilip Paeps 	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
1038e948693eSPhilip Paeps 				       SFXGE_RX_SCALE_MAX)) != 0)
1039e948693eSPhilip Paeps 		goto fail;
1040e948693eSPhilip Paeps 	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
1041e948693eSPhilip Paeps 	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
1042e948693eSPhilip Paeps 	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
1043e948693eSPhilip Paeps 
1044e948693eSPhilip Paeps 	if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
1045e948693eSPhilip Paeps 	    sizeof(toep_key))) != 0)
1046e948693eSPhilip Paeps 		goto fail;
1047e948693eSPhilip Paeps 
1048e948693eSPhilip Paeps 	/* Start the receive queue(s). */
1049133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++) {
1050e948693eSPhilip Paeps 		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1051e948693eSPhilip Paeps 			goto fail2;
1052e948693eSPhilip Paeps 	}
1053e948693eSPhilip Paeps 
1054e948693eSPhilip Paeps 	return (0);
1055e948693eSPhilip Paeps 
1056e948693eSPhilip Paeps fail2:
1057e948693eSPhilip Paeps 	while (--index >= 0)
1058e948693eSPhilip Paeps 		sfxge_rx_qstop(sc, index);
1059e948693eSPhilip Paeps 
1060e948693eSPhilip Paeps fail:
1061e948693eSPhilip Paeps 	efx_rx_fini(sc->enp);
1062e948693eSPhilip Paeps 
1063e948693eSPhilip Paeps 	return (rc);
1064e948693eSPhilip Paeps }
1065e948693eSPhilip Paeps 
106618daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
106718daa0eeSAndrew Rybchenko 
1068e948693eSPhilip Paeps static void sfxge_lro_init(struct sfxge_rxq *rxq)
1069e948693eSPhilip Paeps {
1070e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
1071e948693eSPhilip Paeps 	unsigned i;
1072e948693eSPhilip Paeps 
1073e948693eSPhilip Paeps 	st->conns_mask = lro_table_size - 1;
1074e948693eSPhilip Paeps 	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1075e948693eSPhilip Paeps 		("lro_table_size must be a power of 2"));
1076e948693eSPhilip Paeps 	st->sc = rxq->sc;
1077e948693eSPhilip Paeps 	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1078e948693eSPhilip Paeps 			   M_SFXGE, M_WAITOK);
1079e948693eSPhilip Paeps 	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1080e948693eSPhilip Paeps 			     M_SFXGE, M_WAITOK);
1081e948693eSPhilip Paeps 	for (i = 0; i <= st->conns_mask; ++i) {
1082e948693eSPhilip Paeps 		TAILQ_INIT(&st->conns[i]);
1083e948693eSPhilip Paeps 		st->conns_n[i] = 0;
1084e948693eSPhilip Paeps 	}
1085e948693eSPhilip Paeps 	LIST_INIT(&st->active_conns);
1086e948693eSPhilip Paeps 	TAILQ_INIT(&st->free_conns);
1087e948693eSPhilip Paeps }
1088e948693eSPhilip Paeps 
1089e948693eSPhilip Paeps static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1090e948693eSPhilip Paeps {
1091e948693eSPhilip Paeps 	struct sfxge_lro_state *st = &rxq->lro;
1092e948693eSPhilip Paeps 	struct sfxge_lro_conn *c;
1093e948693eSPhilip Paeps 	unsigned i;
1094e948693eSPhilip Paeps 
1095e948693eSPhilip Paeps 	/* Return cleanly if sfxge_lro_init() has not been called. */
1096e948693eSPhilip Paeps 	if (st->conns == NULL)
1097e948693eSPhilip Paeps 		return;
1098e948693eSPhilip Paeps 
1099e948693eSPhilip Paeps 	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1100e948693eSPhilip Paeps 
1101e948693eSPhilip Paeps 	for (i = 0; i <= st->conns_mask; ++i) {
1102e948693eSPhilip Paeps 		while (!TAILQ_EMPTY(&st->conns[i])) {
1103e948693eSPhilip Paeps 			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1104e948693eSPhilip Paeps 			sfxge_lro_drop(rxq, c);
1105e948693eSPhilip Paeps 		}
1106e948693eSPhilip Paeps 	}
1107e948693eSPhilip Paeps 
1108e948693eSPhilip Paeps 	while (!TAILQ_EMPTY(&st->free_conns)) {
1109e948693eSPhilip Paeps 		c = TAILQ_FIRST(&st->free_conns);
1110e948693eSPhilip Paeps 		TAILQ_REMOVE(&st->free_conns, c, link);
1111e948693eSPhilip Paeps 		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1112e948693eSPhilip Paeps 		free(c, M_SFXGE);
1113e948693eSPhilip Paeps 	}
1114e948693eSPhilip Paeps 
1115e948693eSPhilip Paeps 	free(st->conns_n, M_SFXGE);
1116e948693eSPhilip Paeps 	free(st->conns, M_SFXGE);
1117e948693eSPhilip Paeps 	st->conns = NULL;
1118e948693eSPhilip Paeps }
1119e948693eSPhilip Paeps 
112018daa0eeSAndrew Rybchenko #else
112118daa0eeSAndrew Rybchenko 
112218daa0eeSAndrew Rybchenko static void
112318daa0eeSAndrew Rybchenko sfxge_lro_init(struct sfxge_rxq *rxq)
112418daa0eeSAndrew Rybchenko {
112518daa0eeSAndrew Rybchenko }
112618daa0eeSAndrew Rybchenko 
112718daa0eeSAndrew Rybchenko static void
112818daa0eeSAndrew Rybchenko sfxge_lro_fini(struct sfxge_rxq *rxq)
112918daa0eeSAndrew Rybchenko {
113018daa0eeSAndrew Rybchenko }
113118daa0eeSAndrew Rybchenko 
113218daa0eeSAndrew Rybchenko #endif	/* SFXGE_LRO */
113318daa0eeSAndrew Rybchenko 
1134e948693eSPhilip Paeps static void
1135e948693eSPhilip Paeps sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1136e948693eSPhilip Paeps {
1137e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
1138e948693eSPhilip Paeps 
1139e948693eSPhilip Paeps 	rxq = sc->rxq[index];
1140e948693eSPhilip Paeps 
1141e948693eSPhilip Paeps 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1142e948693eSPhilip Paeps 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1143e948693eSPhilip Paeps 
1144e948693eSPhilip Paeps 	/* Free the context array and the flow table. */
1145e948693eSPhilip Paeps 	free(rxq->queue, M_SFXGE);
1146e948693eSPhilip Paeps 	sfxge_lro_fini(rxq);
1147e948693eSPhilip Paeps 
1148e948693eSPhilip Paeps 	/* Release DMA memory. */
1149e948693eSPhilip Paeps 	sfxge_dma_free(&rxq->mem);
1150e948693eSPhilip Paeps 
1151e948693eSPhilip Paeps 	sc->rxq[index] = NULL;
1152e948693eSPhilip Paeps 
1153e948693eSPhilip Paeps 	free(rxq, M_SFXGE);
1154e948693eSPhilip Paeps }
1155e948693eSPhilip Paeps 
1156e948693eSPhilip Paeps static int
1157e948693eSPhilip Paeps sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1158e948693eSPhilip Paeps {
1159e948693eSPhilip Paeps 	struct sfxge_rxq *rxq;
1160e948693eSPhilip Paeps 	struct sfxge_evq *evq;
1161e948693eSPhilip Paeps 	efsys_mem_t *esmp;
1162e948693eSPhilip Paeps 	int rc;
1163e948693eSPhilip Paeps 
1164133366a6SAndrew Rybchenko 	KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count));
1165e948693eSPhilip Paeps 
1166e948693eSPhilip Paeps 	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1167e948693eSPhilip Paeps 	rxq->sc = sc;
1168e948693eSPhilip Paeps 	rxq->index = index;
1169385b1d8eSGeorge V. Neville-Neil 	rxq->entries = sc->rxq_entries;
1170385b1d8eSGeorge V. Neville-Neil 	rxq->ptr_mask = rxq->entries - 1;
1171385b1d8eSGeorge V. Neville-Neil 	rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries);
1172e948693eSPhilip Paeps 
1173e948693eSPhilip Paeps 	sc->rxq[index] = rxq;
1174e948693eSPhilip Paeps 	esmp = &rxq->mem;
1175e948693eSPhilip Paeps 
1176e948693eSPhilip Paeps 	evq = sc->evq[index];
1177e948693eSPhilip Paeps 
1178e948693eSPhilip Paeps 	/* Allocate and zero DMA space. */
1179385b1d8eSGeorge V. Neville-Neil 	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0)
1180e948693eSPhilip Paeps 		return (rc);
1181e948693eSPhilip Paeps 
1182e948693eSPhilip Paeps 	/* Allocate buffer table entries. */
1183385b1d8eSGeorge V. Neville-Neil 	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries),
1184e948693eSPhilip Paeps 				 &rxq->buf_base_id);
1185e948693eSPhilip Paeps 
1186e948693eSPhilip Paeps 	/* Allocate the context array and the flow table. */
1187385b1d8eSGeorge V. Neville-Neil 	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries,
1188e948693eSPhilip Paeps 	    M_SFXGE, M_WAITOK | M_ZERO);
1189e948693eSPhilip Paeps 	sfxge_lro_init(rxq);
1190e948693eSPhilip Paeps 
1191e948693eSPhilip Paeps 	callout_init(&rxq->refill_callout, B_TRUE);
1192e948693eSPhilip Paeps 
1193e948693eSPhilip Paeps 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1194e948693eSPhilip Paeps 
1195e948693eSPhilip Paeps 	return (0);
1196e948693eSPhilip Paeps }
1197e948693eSPhilip Paeps 
1198e948693eSPhilip Paeps static const struct {
1199e948693eSPhilip Paeps 	const char *name;
1200e948693eSPhilip Paeps 	size_t offset;
1201e948693eSPhilip Paeps } sfxge_rx_stats[] = {
1202e948693eSPhilip Paeps #define	SFXGE_RX_STAT(name, member) \
1203e948693eSPhilip Paeps 	{ #name, offsetof(struct sfxge_rxq, member) }
120418daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
1205e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1206e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1207e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1208e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1209e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1210e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1211e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1212e948693eSPhilip Paeps 	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
121318daa0eeSAndrew Rybchenko #endif
1214e948693eSPhilip Paeps };
1215e948693eSPhilip Paeps 
1216e948693eSPhilip Paeps static int
1217e948693eSPhilip Paeps sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1218e948693eSPhilip Paeps {
1219e948693eSPhilip Paeps 	struct sfxge_softc *sc = arg1;
1220e948693eSPhilip Paeps 	unsigned int id = arg2;
1221e948693eSPhilip Paeps 	unsigned int sum, index;
1222e948693eSPhilip Paeps 
1223e948693eSPhilip Paeps 	/* Sum across all RX queues */
1224e948693eSPhilip Paeps 	sum = 0;
1225133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++)
1226e948693eSPhilip Paeps 		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1227e948693eSPhilip Paeps 					 sfxge_rx_stats[id].offset);
1228e948693eSPhilip Paeps 
1229b7b0edd1SGeorge V. Neville-Neil 	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1230e948693eSPhilip Paeps }
1231e948693eSPhilip Paeps 
1232e948693eSPhilip Paeps static void
1233e948693eSPhilip Paeps sfxge_rx_stat_init(struct sfxge_softc *sc)
1234e948693eSPhilip Paeps {
1235e948693eSPhilip Paeps 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1236e948693eSPhilip Paeps 	struct sysctl_oid_list *stat_list;
1237e948693eSPhilip Paeps 	unsigned int id;
1238e948693eSPhilip Paeps 
1239e948693eSPhilip Paeps 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1240e948693eSPhilip Paeps 
1241612d8e28SAndrew Rybchenko 	for (id = 0; id < nitems(sfxge_rx_stats); id++) {
1242e948693eSPhilip Paeps 		SYSCTL_ADD_PROC(
1243e948693eSPhilip Paeps 			ctx, stat_list,
1244e948693eSPhilip Paeps 			OID_AUTO, sfxge_rx_stats[id].name,
1245e948693eSPhilip Paeps 			CTLTYPE_UINT|CTLFLAG_RD,
1246e948693eSPhilip Paeps 			sc, id, sfxge_rx_stat_handler, "IU",
1247e948693eSPhilip Paeps 			"");
1248e948693eSPhilip Paeps 	}
1249e948693eSPhilip Paeps }
1250e948693eSPhilip Paeps 
1251e948693eSPhilip Paeps void
1252e948693eSPhilip Paeps sfxge_rx_fini(struct sfxge_softc *sc)
1253e948693eSPhilip Paeps {
1254e948693eSPhilip Paeps 	int index;
1255e948693eSPhilip Paeps 
1256133366a6SAndrew Rybchenko 	index = sc->rxq_count;
1257e948693eSPhilip Paeps 	while (--index >= 0)
1258e948693eSPhilip Paeps 		sfxge_rx_qfini(sc, index);
1259133366a6SAndrew Rybchenko 
1260133366a6SAndrew Rybchenko 	sc->rxq_count = 0;
1261e948693eSPhilip Paeps }
1262e948693eSPhilip Paeps 
1263e948693eSPhilip Paeps int
1264e948693eSPhilip Paeps sfxge_rx_init(struct sfxge_softc *sc)
1265e948693eSPhilip Paeps {
1266e948693eSPhilip Paeps 	struct sfxge_intr *intr;
1267e948693eSPhilip Paeps 	int index;
1268e948693eSPhilip Paeps 	int rc;
1269e948693eSPhilip Paeps 
127018daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO
1271*245d1576SAndrew Rybchenko 	if (!ISP2(lro_table_size)) {
1272*245d1576SAndrew Rybchenko 		log(LOG_ERR, "%s=%u must be power of 2",
1273*245d1576SAndrew Rybchenko 		    SFXGE_LRO_PARAM(table_size), lro_table_size);
1274*245d1576SAndrew Rybchenko 		rc = EINVAL;
1275*245d1576SAndrew Rybchenko 		goto fail_lro_table_size;
1276*245d1576SAndrew Rybchenko 	}
1277*245d1576SAndrew Rybchenko 
1278e948693eSPhilip Paeps 	if (lro_idle_ticks == 0)
1279e948693eSPhilip Paeps 		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
128018daa0eeSAndrew Rybchenko #endif
1281e948693eSPhilip Paeps 
1282e948693eSPhilip Paeps 	intr = &sc->intr;
1283e948693eSPhilip Paeps 
1284133366a6SAndrew Rybchenko 	sc->rxq_count = intr->n_alloc;
1285133366a6SAndrew Rybchenko 
1286e948693eSPhilip Paeps 	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1287e948693eSPhilip Paeps 	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1288e948693eSPhilip Paeps 
1289e948693eSPhilip Paeps 	/* Initialize the receive queue(s) - one per interrupt. */
1290133366a6SAndrew Rybchenko 	for (index = 0; index < sc->rxq_count; index++) {
1291e948693eSPhilip Paeps 		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1292e948693eSPhilip Paeps 			goto fail;
1293e948693eSPhilip Paeps 	}
1294e948693eSPhilip Paeps 
1295e948693eSPhilip Paeps 	sfxge_rx_stat_init(sc);
1296e948693eSPhilip Paeps 
1297e948693eSPhilip Paeps 	return (0);
1298e948693eSPhilip Paeps 
1299e948693eSPhilip Paeps fail:
1300e948693eSPhilip Paeps 	/* Tear down the receive queue(s). */
1301e948693eSPhilip Paeps 	while (--index >= 0)
1302e948693eSPhilip Paeps 		sfxge_rx_qfini(sc, index);
1303e948693eSPhilip Paeps 
1304133366a6SAndrew Rybchenko 	sc->rxq_count = 0;
1305*245d1576SAndrew Rybchenko 
1306*245d1576SAndrew Rybchenko #ifdef SFXGE_LRO
1307*245d1576SAndrew Rybchenko fail_lro_table_size:
1308*245d1576SAndrew Rybchenko #endif
1309e948693eSPhilip Paeps 	return (rc);
1310e948693eSPhilip Paeps }
1311