xref: /freebsd/sys/dev/sfxge/sfxge_rx.c (revision ce3adf4362fcca6a43e500b2531f0038adbfbd21)
1 /*-
2  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/types.h>
34 #include <sys/mbuf.h>
35 #include <sys/smp.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/limits.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_vlan_var.h>
43 
44 #include <netinet/in.h>
45 #include <netinet/ip.h>
46 #include <netinet/ip6.h>
47 #include <netinet/tcp.h>
48 
49 #include <machine/in_cksum.h>
50 
51 #include "common/efx.h"
52 
53 
54 #include "sfxge.h"
55 #include "sfxge_rx.h"
56 
57 #define RX_REFILL_THRESHOLD (EFX_RXQ_LIMIT(SFXGE_NDESCS) * 9 / 10)
58 #define RX_REFILL_THRESHOLD_2 (RX_REFILL_THRESHOLD / 2)
59 
60 /* Size of the LRO hash table.  Must be a power of 2.  A larger table
61  * means we can accelerate a larger number of streams.
62  */
63 static unsigned lro_table_size = 128;
64 
65 /* Maximum length of a hash chain.  If chains get too long then the lookup
66  * time increases and may exceed the benefit of LRO.
67  */
68 static unsigned lro_chain_max = 20;
69 
70 /* Maximum time (in ticks) that a connection can be idle before it's LRO
71  * state is discarded.
72  */
73 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
74 
75 /* Number of packets with payload that must arrive in-order before a
76  * connection is eligible for LRO.  The idea is we should avoid coalescing
77  * segments when the sender is in slow-start because reducing the ACK rate
78  * can damage performance.
79  */
80 static int lro_slow_start_packets = 2000;
81 
82 /* Number of packets with payload that must arrive in-order following loss
83  * before a connection is eligible for LRO.  The idea is we should avoid
84  * coalescing segments when the sender is recovering from loss, because
85  * reducing the ACK rate can damage performance.
86  */
87 static int lro_loss_packets = 20;
88 
89 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
90 #define SFXGE_LRO_L2_ID_VLAN 0x4000
91 #define SFXGE_LRO_L2_ID_IPV6 0x8000
92 #define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
93 #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
94 
95 /* Compare IPv6 addresses, avoiding conditional branches */
96 static __inline unsigned long ipv6_addr_cmp(const struct in6_addr *left,
97 					    const struct in6_addr *right)
98 {
99 #if LONG_BIT == 64
100 	const uint64_t *left64 = (const uint64_t *)left;
101 	const uint64_t *right64 = (const uint64_t *)right;
102 	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
103 #else
104 	return (left->s6_addr32[0] - right->s6_addr32[0]) |
105 	       (left->s6_addr32[1] - right->s6_addr32[1]) |
106 	       (left->s6_addr32[2] - right->s6_addr32[2]) |
107 	       (left->s6_addr32[3] - right->s6_addr32[3]);
108 #endif
109 }
110 
111 void
112 sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
113 {
114 
115 	rxq->flush_state = SFXGE_FLUSH_DONE;
116 }
117 
118 void
119 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
120 {
121 
122 	rxq->flush_state = SFXGE_FLUSH_FAILED;
123 }
124 
125 static uint8_t toep_key[] = {
126 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
127 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
128 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
129 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
130 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
131 };
132 
133 static void
134 sfxge_rx_post_refill(void *arg)
135 {
136 	struct sfxge_rxq *rxq = arg;
137 	struct sfxge_softc *sc;
138 	unsigned int index;
139 	struct sfxge_evq *evq;
140 	uint16_t magic;
141 
142 	sc = rxq->sc;
143 	index = rxq->index;
144 	evq = sc->evq[index];
145 
146 	magic = SFXGE_MAGIC_RX_QREFILL | index;
147 
148 	/* This is guaranteed due to the start/stop order of rx and ev */
149 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
150 	    ("evq not started"));
151 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
152 	    ("rxq not started"));
153 	efx_ev_qpost(evq->common, magic);
154 }
155 
156 static void
157 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
158 {
159 	/* Initially retry after 100 ms, but back off in case of
160 	 * repeated failures as we probably have to wait for the
161 	 * administrator to raise the pool limit. */
162 	if (retrying)
163 		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
164 	else
165 		rxq->refill_delay = hz / 10;
166 
167 	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
168 			     sfxge_rx_post_refill, rxq);
169 }
170 
171 static inline struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
172 {
173 	struct mb_args args;
174 	struct mbuf *m;
175 
176 	/* Allocate mbuf structure */
177 	args.flags = M_PKTHDR;
178 	args.type = MT_DATA;
179 	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
180 
181 	/* Allocate (and attach) packet buffer */
182 	if (m && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
183 		uma_zfree(zone_mbuf, m);
184 		m = NULL;
185 	}
186 
187 	return m;
188 }
189 
190 #define	SFXGE_REFILL_BATCH  64
191 
192 static void
193 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
194 {
195 	struct sfxge_softc *sc;
196 	unsigned int index;
197 	struct sfxge_evq *evq;
198 	unsigned int batch;
199 	unsigned int rxfill;
200 	unsigned int mblksize;
201 	int ntodo;
202 	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
203 
204 	sc = rxq->sc;
205 	index = rxq->index;
206 	evq = sc->evq[index];
207 
208 	prefetch_read_many(sc->enp);
209 	prefetch_read_many(rxq->common);
210 
211 	mtx_assert(&evq->lock, MA_OWNED);
212 
213 	if (rxq->init_state != SFXGE_RXQ_STARTED)
214 		return;
215 
216 	rxfill = rxq->added - rxq->completed;
217 	KASSERT(rxfill <= EFX_RXQ_LIMIT(SFXGE_NDESCS),
218 	    ("rxfill > EFX_RXQ_LIMIT(SFXGE_NDESCS)"));
219 	ntodo = min(EFX_RXQ_LIMIT(SFXGE_NDESCS) - rxfill, target);
220 	KASSERT(ntodo <= EFX_RXQ_LIMIT(SFXGE_NDESCS),
221 	    ("ntodo > EFX_RQX_LIMIT(SFXGE_NDESCS)"));
222 
223 	if (ntodo == 0)
224 		return;
225 
226 	batch = 0;
227 	mblksize = sc->rx_buffer_size;
228 	while (ntodo-- > 0) {
229 		unsigned int id;
230 		struct sfxge_rx_sw_desc *rx_desc;
231 		bus_dma_segment_t seg;
232 		struct mbuf *m;
233 
234 		id = (rxq->added + batch) & (SFXGE_NDESCS - 1);
235 		rx_desc = &rxq->queue[id];
236 		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
237 
238 		rx_desc->flags = EFX_DISCARD;
239 		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
240 		if (m == NULL)
241 			break;
242 		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
243 		addr[batch++] = seg.ds_addr;
244 
245 		if (batch == SFXGE_REFILL_BATCH) {
246 			efx_rx_qpost(rxq->common, addr, mblksize, batch,
247 			    rxq->completed, rxq->added);
248 			rxq->added += batch;
249 			batch = 0;
250 		}
251 	}
252 
253 	if (ntodo != 0)
254 		sfxge_rx_schedule_refill(rxq, retrying);
255 
256 	if (batch != 0) {
257 		efx_rx_qpost(rxq->common, addr, mblksize, batch,
258 		    rxq->completed, rxq->added);
259 		rxq->added += batch;
260 	}
261 
262 	/* Make the descriptors visible to the hardware */
263 	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
264 			BUS_DMASYNC_PREWRITE);
265 
266 	efx_rx_qpush(rxq->common, rxq->added);
267 }
268 
269 void
270 sfxge_rx_qrefill(struct sfxge_rxq *rxq)
271 {
272 
273 	if (rxq->init_state != SFXGE_RXQ_STARTED)
274 		return;
275 
276 	/* Make sure the queue is full */
277 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_TRUE);
278 }
279 
280 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
281 {
282 	struct ifnet *ifp = sc->ifnet;
283 
284 	m->m_pkthdr.rcvif = ifp;
285 	m->m_pkthdr.csum_data = 0xffff;
286 	ifp->if_input(ifp, m);
287 }
288 
289 static void
290 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
291 {
292 	struct mbuf *m = rx_desc->mbuf;
293 	int csum_flags;
294 
295 	/* Convert checksum flags */
296 	csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
297 		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
298 	if (rx_desc->flags & EFX_CKSUM_TCPUDP)
299 		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
300 
301 #ifdef SFXGE_HAVE_MQ
302 	/* The hash covers a 4-tuple for TCP only */
303 	if (rx_desc->flags & EFX_PKT_TCP) {
304 		m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
305 						       mtod(m, uint8_t *));
306 		m->m_flags |= M_FLOWID;
307 	}
308 #endif
309 	m->m_data += sc->rx_prefix_size;
310 	m->m_len = rx_desc->size - sc->rx_prefix_size;
311 	m->m_pkthdr.len = m->m_len;
312 	m->m_pkthdr.csum_flags = csum_flags;
313 	__sfxge_rx_deliver(sc, rx_desc->mbuf);
314 
315 	rx_desc->flags = EFX_DISCARD;
316 	rx_desc->mbuf = NULL;
317 }
318 
319 static void
320 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
321 {
322 	struct sfxge_softc *sc = st->sc;
323 	struct mbuf *m = c->mbuf;
324 	struct tcphdr *c_th;
325 	int csum_flags;
326 
327 	KASSERT(m, ("no mbuf to deliver"));
328 
329 	++st->n_bursts;
330 
331 	/* Finish off packet munging and recalculate IP header checksum. */
332 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
333 		struct ip *iph = c->nh;
334 		iph->ip_len = htons(iph->ip_len);
335 		iph->ip_sum = 0;
336 		iph->ip_sum = in_cksum_hdr(iph);
337 		c_th = (struct tcphdr *)(iph + 1);
338 		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
339 			      CSUM_IP_CHECKED | CSUM_IP_VALID);
340 	} else {
341 		struct ip6_hdr *iph = c->nh;
342 		iph->ip6_plen = htons(iph->ip6_plen);
343 		c_th = (struct tcphdr *)(iph + 1);
344 		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
345 	}
346 
347 	c_th->th_win = c->th_last->th_win;
348 	c_th->th_ack = c->th_last->th_ack;
349 	if (c_th->th_off == c->th_last->th_off) {
350 		/* Copy TCP options (take care to avoid going negative). */
351 		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
352 		memcpy(c_th + 1, c->th_last + 1, optlen);
353 	}
354 
355 #ifdef SFXGE_HAVE_MQ
356 	m->m_pkthdr.flowid = c->conn_hash;
357 	m->m_flags |= M_FLOWID;
358 #endif
359 	m->m_pkthdr.csum_flags = csum_flags;
360 	__sfxge_rx_deliver(sc, m);
361 
362 	c->mbuf = NULL;
363 	c->delivered = 1;
364 }
365 
366 /* Drop the given connection, and add it to the free list. */
367 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
368 {
369 	unsigned bucket;
370 
371 	KASSERT(!c->mbuf, ("found orphaned mbuf"));
372 
373 	if (c->next_buf.mbuf) {
374 		sfxge_rx_deliver(rxq->sc, &c->next_buf);
375 		LIST_REMOVE(c, active_link);
376 	}
377 
378 	bucket = c->conn_hash & rxq->lro.conns_mask;
379 	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
380 	--rxq->lro.conns_n[bucket];
381 	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
382 	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
383 }
384 
385 /* Stop tracking connections that have gone idle in order to keep hash
386  * chains short.
387  */
388 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
389 {
390 	struct sfxge_lro_conn *c;
391 	unsigned i;
392 
393 	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
394 		("found active connections"));
395 
396 	rxq->lro.last_purge_ticks = now;
397 	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
398 		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
399 			continue;
400 
401 		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
402 		if (now - c->last_pkt_ticks > lro_idle_ticks) {
403 			++rxq->lro.n_drop_idle;
404 			sfxge_lro_drop(rxq, c);
405 		}
406 	}
407 }
408 
409 static void
410 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
411 		struct mbuf *mbuf, struct tcphdr *th)
412 {
413 	struct tcphdr *c_th;
414 
415 	/* Tack the new mbuf onto the chain. */
416 	KASSERT(!mbuf->m_next, ("mbuf already chained"));
417 	c->mbuf_tail->m_next = mbuf;
418 	c->mbuf_tail = mbuf;
419 
420 	/* Increase length appropriately */
421 	c->mbuf->m_pkthdr.len += mbuf->m_len;
422 
423 	/* Update the connection state flags */
424 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
425 		struct ip *iph = c->nh;
426 		iph->ip_len += mbuf->m_len;
427 		c_th = (struct tcphdr *)(iph + 1);
428 	} else {
429 		struct ip6_hdr *iph = c->nh;
430 		iph->ip6_plen += mbuf->m_len;
431 		c_th = (struct tcphdr *)(iph + 1);
432 	}
433 	c_th->th_flags |= (th->th_flags & TH_PUSH);
434 	c->th_last = th;
435 	++st->n_merges;
436 
437 	/* Pass packet up now if another segment could overflow the IP
438 	 * length.
439 	 */
440 	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
441 		sfxge_lro_deliver(st, c);
442 }
443 
444 static void
445 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
446 		struct mbuf *mbuf, void *nh, struct tcphdr *th)
447 {
448 	/* Start the chain */
449 	c->mbuf = mbuf;
450 	c->mbuf_tail = c->mbuf;
451 	c->nh = nh;
452 	c->th_last = th;
453 
454 	mbuf->m_pkthdr.len = mbuf->m_len;
455 
456 	/* Mangle header fields for later processing */
457 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
458 		struct ip *iph = nh;
459 		iph->ip_len = ntohs(iph->ip_len);
460 	} else {
461 		struct ip6_hdr *iph = nh;
462 		iph->ip6_plen = ntohs(iph->ip6_plen);
463 	}
464 }
465 
466 /* Try to merge or otherwise hold or deliver (as appropriate) the
467  * packet buffered for this connection (c->next_buf).  Return a flag
468  * indicating whether the connection is still active for LRO purposes.
469  */
470 static int
471 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
472 {
473 	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
474 	char *eh = c->next_eh;
475 	int data_length, hdr_length, dont_merge;
476 	unsigned th_seq, pkt_length;
477 	struct tcphdr *th;
478 	unsigned now;
479 
480 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
481 		struct ip *iph = c->next_nh;
482 		th = (struct tcphdr *)(iph + 1);
483 		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
484 	} else {
485 		struct ip6_hdr *iph = c->next_nh;
486 		th = (struct tcphdr *)(iph + 1);
487 		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
488 	}
489 
490 	hdr_length = (char *) th + th->th_off * 4 - eh;
491 	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
492 		       hdr_length);
493 	th_seq = ntohl(th->th_seq);
494 	dont_merge = ((data_length <= 0)
495 		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
496 
497 	/* Check for options other than aligned timestamp. */
498 	if (th->th_off != 5) {
499 		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
500 		if (th->th_off == 8 &&
501 		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
502 					(TCPOPT_NOP << 16) |
503 					(TCPOPT_TIMESTAMP << 8) |
504 					TCPOLEN_TIMESTAMP)) {
505 			/* timestamp option -- okay */
506 		} else {
507 			dont_merge = 1;
508 		}
509 	}
510 
511 	if (__predict_false(th_seq != c->next_seq)) {
512 		/* Out-of-order, so start counting again. */
513 		if (c->mbuf)
514 			sfxge_lro_deliver(&rxq->lro, c);
515 		c->n_in_order_pkts -= lro_loss_packets;
516 		c->next_seq = th_seq + data_length;
517 		++rxq->lro.n_misorder;
518 		goto deliver_buf_out;
519 	}
520 	c->next_seq = th_seq + data_length;
521 
522 	now = ticks;
523 	if (now - c->last_pkt_ticks > lro_idle_ticks) {
524 		++rxq->lro.n_drop_idle;
525 		if (c->mbuf)
526 			sfxge_lro_deliver(&rxq->lro, c);
527 		sfxge_lro_drop(rxq, c);
528 		return 0;
529 	}
530 	c->last_pkt_ticks = ticks;
531 
532 	if (c->n_in_order_pkts < lro_slow_start_packets) {
533 		/* May be in slow-start, so don't merge. */
534 		++rxq->lro.n_slow_start;
535 		++c->n_in_order_pkts;
536 		goto deliver_buf_out;
537 	}
538 
539 	if (__predict_false(dont_merge)) {
540 		if (c->mbuf)
541 			sfxge_lro_deliver(&rxq->lro, c);
542 		if (th->th_flags & (TH_FIN | TH_RST)) {
543 			++rxq->lro.n_drop_closed;
544 			sfxge_lro_drop(rxq, c);
545 			return 0;
546 		}
547 		goto deliver_buf_out;
548 	}
549 
550 	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
551 
552 	if (__predict_true(c->mbuf != NULL)) {
553 		/* Remove headers and any padding */
554 		rx_buf->mbuf->m_data += hdr_length;
555 		rx_buf->mbuf->m_len = data_length;
556 
557 		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
558 	} else {
559 		/* Remove any padding */
560 		rx_buf->mbuf->m_len = pkt_length;
561 
562 		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
563 	}
564 
565 	rx_buf->mbuf = NULL;
566 	return 1;
567 
568  deliver_buf_out:
569 	sfxge_rx_deliver(rxq->sc, rx_buf);
570 	return 1;
571 }
572 
573 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
574 			       uint16_t l2_id, void *nh, struct tcphdr *th)
575 {
576 	unsigned bucket = conn_hash & st->conns_mask;
577 	struct sfxge_lro_conn *c;
578 
579 	if (st->conns_n[bucket] >= lro_chain_max) {
580 		++st->n_too_many;
581 		return;
582 	}
583 
584 	if (!TAILQ_EMPTY(&st->free_conns)) {
585 		c = TAILQ_FIRST(&st->free_conns);
586 		TAILQ_REMOVE(&st->free_conns, c, link);
587 	} else {
588 		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
589 		if (c == NULL)
590 			return;
591 		c->mbuf = NULL;
592 		c->next_buf.mbuf = NULL;
593 	}
594 
595 	/* Create the connection tracking data */
596 	++st->conns_n[bucket];
597 	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
598 	c->l2_id = l2_id;
599 	c->conn_hash = conn_hash;
600 	c->source = th->th_sport;
601 	c->dest = th->th_dport;
602 	c->n_in_order_pkts = 0;
603 	c->last_pkt_ticks = *(volatile int *)&ticks;
604 	c->delivered = 0;
605 	++st->n_new_stream;
606 	/* NB. We don't initialise c->next_seq, and it doesn't matter what
607 	 * value it has.  Most likely the next packet received for this
608 	 * connection will not match -- no harm done.
609 	 */
610 }
611 
612 /* Process mbuf and decide whether to dispatch it to the stack now or
613  * later.
614  */
615 static void
616 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
617 {
618 	struct sfxge_softc *sc = rxq->sc;
619 	struct mbuf *m = rx_buf->mbuf;
620 	struct ether_header *eh;
621 	struct sfxge_lro_conn *c;
622 	uint16_t l2_id;
623 	uint16_t l3_proto;
624         void *nh;
625 	struct tcphdr *th;
626 	uint32_t conn_hash;
627 	unsigned bucket;
628 
629 	/* Get the hardware hash */
630 	conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
631 				      mtod(m, uint8_t *));
632 
633 	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
634 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
635 		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
636 		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
637 			SFXGE_LRO_L2_ID_VLAN;
638 		l3_proto = veh->evl_proto;
639 		nh = veh + 1;
640 	} else {
641 		l2_id = 0;
642 		l3_proto = eh->ether_type;
643 		nh = eh + 1;
644 	}
645 
646 	/* Check whether this is a suitable packet (unfragmented
647 	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
648 	 * length, and compute a hash if necessary.  If not, return.
649 	 */
650 	if (l3_proto == htons(ETHERTYPE_IP)) {
651 		struct ip *iph = nh;
652 		if ((iph->ip_p - IPPROTO_TCP) |
653 		    (iph->ip_hl - (sizeof(*iph) >> 2u)) |
654 		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
655 			goto deliver_now;
656 		th = (struct tcphdr *)(iph + 1);
657 	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
658 		struct ip6_hdr *iph = nh;
659 		if (iph->ip6_nxt != IPPROTO_TCP)
660 			goto deliver_now;
661 		l2_id |= SFXGE_LRO_L2_ID_IPV6;
662 		th = (struct tcphdr *)(iph + 1);
663 	} else {
664 		goto deliver_now;
665 	}
666 
667 	bucket = conn_hash & rxq->lro.conns_mask;
668 
669 	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
670 		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
671 			continue;
672 		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
673 			continue;
674 		if (c->mbuf) {
675 			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
676 				struct ip *c_iph, *iph = nh;
677 				c_iph = c->nh;
678 				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
679 				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
680 					continue;
681 			} else {
682 				struct ip6_hdr *c_iph, *iph = nh;
683 				c_iph = c->nh;
684 				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
685 				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
686 					continue;
687 			}
688 		}
689 
690 		/* Re-insert at head of list to reduce lookup time. */
691 		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
692 		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
693 
694 		if (c->next_buf.mbuf) {
695 			if (!sfxge_lro_try_merge(rxq, c))
696 				goto deliver_now;
697 		} else {
698 			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
699 			    active_link);
700 		}
701 		c->next_buf = *rx_buf;
702 		c->next_eh = eh;
703 		c->next_nh = nh;
704 
705 		rx_buf->mbuf = NULL;
706 		rx_buf->flags = EFX_DISCARD;
707 		return;
708 	}
709 
710 	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
711  deliver_now:
712 	sfxge_rx_deliver(sc, rx_buf);
713 }
714 
715 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
716 {
717 	struct sfxge_lro_state *st = &rxq->lro;
718 	struct sfxge_lro_conn *c;
719 	unsigned t;
720 
721 	while (!LIST_EMPTY(&st->active_conns)) {
722 		c = LIST_FIRST(&st->active_conns);
723 		if (!c->delivered && c->mbuf)
724 			sfxge_lro_deliver(st, c);
725 		if (sfxge_lro_try_merge(rxq, c)) {
726 			if (c->mbuf)
727 				sfxge_lro_deliver(st, c);
728 			LIST_REMOVE(c, active_link);
729 		}
730 		c->delivered = 0;
731 	}
732 
733 	t = *(volatile int *)&ticks;
734 	if (__predict_false(t != st->last_purge_ticks))
735 		sfxge_lro_purge_idle(rxq, t);
736 }
737 
738 void
739 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
740 {
741 	struct sfxge_softc *sc = rxq->sc;
742 	int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
743 	unsigned int index;
744 	struct sfxge_evq *evq;
745 	unsigned int completed;
746 	unsigned int level;
747 	struct mbuf *m;
748 	struct sfxge_rx_sw_desc *prev = NULL;
749 
750 	index = rxq->index;
751 	evq = sc->evq[index];
752 
753 	mtx_assert(&evq->lock, MA_OWNED);
754 
755 	completed = rxq->completed;
756 	while (completed != rxq->pending) {
757 		unsigned int id;
758 		struct sfxge_rx_sw_desc *rx_desc;
759 
760 		id = completed++ & (SFXGE_NDESCS - 1);
761 		rx_desc = &rxq->queue[id];
762 		m = rx_desc->mbuf;
763 
764 		if (rxq->init_state != SFXGE_RXQ_STARTED)
765 			goto discard;
766 
767 		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
768 			goto discard;
769 
770 		prefetch_read_many(mtod(m, caddr_t));
771 
772 		/* Check for loopback packets */
773 		if (!(rx_desc->flags & EFX_PKT_IPV4) &&
774 		    !(rx_desc->flags & EFX_PKT_IPV6)) {
775 			struct ether_header *etherhp;
776 
777 			/*LINTED*/
778 			etherhp = mtod(m, struct ether_header *);
779 
780 			if (etherhp->ether_type ==
781 			    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
782 				EFSYS_PROBE(loopback);
783 
784 				rxq->loopback++;
785 				goto discard;
786 			}
787 		}
788 
789 		/* Pass packet up the stack or into LRO (pipelined) */
790 		if (prev != NULL) {
791 			if (lro_enabled)
792 				sfxge_lro(rxq, prev);
793 			else
794 				sfxge_rx_deliver(sc, prev);
795 		}
796 		prev = rx_desc;
797 		continue;
798 
799 discard:
800 		/* Return the packet to the pool */
801 		m_free(m);
802 		rx_desc->mbuf = NULL;
803 	}
804 	rxq->completed = completed;
805 
806 	level = rxq->added - rxq->completed;
807 
808 	/* Pass last packet up the stack or into LRO */
809 	if (prev != NULL) {
810 		if (lro_enabled)
811 			sfxge_lro(rxq, prev);
812 		else
813 			sfxge_rx_deliver(sc, prev);
814 	}
815 
816 	/*
817 	 * If there are any pending flows and this is the end of the
818 	 * poll then they must be completed.
819 	 */
820 	if (eop)
821 		sfxge_lro_end_of_burst(rxq);
822 
823 	/* Top up the queue if necessary */
824 	if (level < RX_REFILL_THRESHOLD)
825 		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_FALSE);
826 }
827 
828 static void
829 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
830 {
831 	struct sfxge_rxq *rxq;
832 	struct sfxge_evq *evq;
833 	unsigned int count;
834 
835 	rxq = sc->rxq[index];
836 	evq = sc->evq[index];
837 
838 	mtx_lock(&evq->lock);
839 
840 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
841 	    ("rxq not started"));
842 
843 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
844 
845 	callout_stop(&rxq->refill_callout);
846 
847 again:
848 	rxq->flush_state = SFXGE_FLUSH_PENDING;
849 
850 	/* Flush the receive queue */
851 	efx_rx_qflush(rxq->common);
852 
853 	mtx_unlock(&evq->lock);
854 
855 	count = 0;
856 	do {
857 		/* Spin for 100 ms */
858 		DELAY(100000);
859 
860 		if (rxq->flush_state != SFXGE_FLUSH_PENDING)
861 			break;
862 
863 	} while (++count < 20);
864 
865 	mtx_lock(&evq->lock);
866 
867 	if (rxq->flush_state == SFXGE_FLUSH_FAILED)
868 		goto again;
869 
870 	rxq->flush_state = SFXGE_FLUSH_DONE;
871 
872 	rxq->pending = rxq->added;
873 	sfxge_rx_qcomplete(rxq, B_TRUE);
874 
875 	KASSERT(rxq->completed == rxq->pending,
876 	    ("rxq->completed != rxq->pending"));
877 
878 	rxq->added = 0;
879 	rxq->pending = 0;
880 	rxq->completed = 0;
881 	rxq->loopback = 0;
882 
883 	/* Destroy the common code receive queue. */
884 	efx_rx_qdestroy(rxq->common);
885 
886 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
887 	    EFX_RXQ_NBUFS(SFXGE_NDESCS));
888 
889 	mtx_unlock(&evq->lock);
890 }
891 
892 static int
893 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
894 {
895 	struct sfxge_rxq *rxq;
896 	efsys_mem_t *esmp;
897 	struct sfxge_evq *evq;
898 	int rc;
899 
900 	rxq = sc->rxq[index];
901 	esmp = &rxq->mem;
902 	evq = sc->evq[index];
903 
904 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
905 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
906 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
907 	    ("evq->init_state != SFXGE_EVQ_STARTED"));
908 
909 	/* Program the buffer table. */
910 	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
911 	    EFX_RXQ_NBUFS(SFXGE_NDESCS))) != 0)
912 		return rc;
913 
914 	/* Create the common code receive queue. */
915 	if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
916 	    esmp, SFXGE_NDESCS, rxq->buf_base_id, evq->common,
917 	    &rxq->common)) != 0)
918 		goto fail;
919 
920 	mtx_lock(&evq->lock);
921 
922 	/* Enable the receive queue. */
923 	efx_rx_qenable(rxq->common);
924 
925 	rxq->init_state = SFXGE_RXQ_STARTED;
926 
927 	/* Try to fill the queue from the pool. */
928 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_FALSE);
929 
930 	mtx_unlock(&evq->lock);
931 
932 	return (0);
933 
934 fail:
935 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
936 	    EFX_RXQ_NBUFS(SFXGE_NDESCS));
937 	return rc;
938 }
939 
940 void
941 sfxge_rx_stop(struct sfxge_softc *sc)
942 {
943 	struct sfxge_intr *intr;
944 	int index;
945 
946 	intr = &sc->intr;
947 
948 	/* Stop the receive queue(s) */
949 	index = intr->n_alloc;
950 	while (--index >= 0)
951 		sfxge_rx_qstop(sc, index);
952 
953 	sc->rx_prefix_size = 0;
954 	sc->rx_buffer_size = 0;
955 
956 	efx_rx_fini(sc->enp);
957 }
958 
959 int
960 sfxge_rx_start(struct sfxge_softc *sc)
961 {
962 	struct sfxge_intr *intr;
963 	int index;
964 	int rc;
965 
966 	intr = &sc->intr;
967 
968 	/* Initialize the common code receive module. */
969 	if ((rc = efx_rx_init(sc->enp)) != 0)
970 		return (rc);
971 
972 	/* Calculate the receive packet buffer size. */
973 	sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
974 	sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
975 			      sc->rx_prefix_size);
976 
977 	/* Select zone for packet buffers */
978 	if (sc->rx_buffer_size <= MCLBYTES)
979 		sc->rx_buffer_zone = zone_clust;
980 	else if (sc->rx_buffer_size <= MJUMPAGESIZE)
981 		sc->rx_buffer_zone = zone_jumbop;
982 	else if (sc->rx_buffer_size <= MJUM9BYTES)
983 		sc->rx_buffer_zone = zone_jumbo9;
984 	else
985 		sc->rx_buffer_zone = zone_jumbo16;
986 
987 	/*
988 	 * Set up the scale table.  Enable all hash types and hash insertion.
989 	 */
990 	for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
991 		sc->rx_indir_table[index] = index % sc->intr.n_alloc;
992 	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
993 				       SFXGE_RX_SCALE_MAX)) != 0)
994 		goto fail;
995 	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
996 	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
997 	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
998 
999 	if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
1000 	    sizeof(toep_key))) != 0)
1001 		goto fail;
1002 
1003 	/* Start the receive queue(s). */
1004 	for (index = 0; index < intr->n_alloc; index++) {
1005 		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1006 			goto fail2;
1007 	}
1008 
1009 	return (0);
1010 
1011 fail2:
1012 	while (--index >= 0)
1013 		sfxge_rx_qstop(sc, index);
1014 
1015 fail:
1016 	efx_rx_fini(sc->enp);
1017 
1018 	return (rc);
1019 }
1020 
1021 static void sfxge_lro_init(struct sfxge_rxq *rxq)
1022 {
1023 	struct sfxge_lro_state *st = &rxq->lro;
1024 	unsigned i;
1025 
1026 	st->conns_mask = lro_table_size - 1;
1027 	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1028 		("lro_table_size must be a power of 2"));
1029 	st->sc = rxq->sc;
1030 	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1031 			   M_SFXGE, M_WAITOK);
1032 	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1033 			     M_SFXGE, M_WAITOK);
1034 	for (i = 0; i <= st->conns_mask; ++i) {
1035 		TAILQ_INIT(&st->conns[i]);
1036 		st->conns_n[i] = 0;
1037 	}
1038 	LIST_INIT(&st->active_conns);
1039 	TAILQ_INIT(&st->free_conns);
1040 }
1041 
1042 static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1043 {
1044 	struct sfxge_lro_state *st = &rxq->lro;
1045 	struct sfxge_lro_conn *c;
1046 	unsigned i;
1047 
1048 	/* Return cleanly if sfxge_lro_init() has not been called. */
1049 	if (st->conns == NULL)
1050 		return;
1051 
1052 	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1053 
1054 	for (i = 0; i <= st->conns_mask; ++i) {
1055 		while (!TAILQ_EMPTY(&st->conns[i])) {
1056 			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1057 			sfxge_lro_drop(rxq, c);
1058 		}
1059 	}
1060 
1061 	while (!TAILQ_EMPTY(&st->free_conns)) {
1062 		c = TAILQ_FIRST(&st->free_conns);
1063 		TAILQ_REMOVE(&st->free_conns, c, link);
1064 		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1065 		free(c, M_SFXGE);
1066 	}
1067 
1068 	free(st->conns_n, M_SFXGE);
1069 	free(st->conns, M_SFXGE);
1070 	st->conns = NULL;
1071 }
1072 
1073 static void
1074 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1075 {
1076 	struct sfxge_rxq *rxq;
1077 
1078 	rxq = sc->rxq[index];
1079 
1080 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1081 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1082 
1083 	/* Free the context array and the flow table. */
1084 	free(rxq->queue, M_SFXGE);
1085 	sfxge_lro_fini(rxq);
1086 
1087 	/* Release DMA memory. */
1088 	sfxge_dma_free(&rxq->mem);
1089 
1090 	sc->rxq[index] = NULL;
1091 
1092 	free(rxq, M_SFXGE);
1093 }
1094 
1095 static int
1096 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1097 {
1098 	struct sfxge_rxq *rxq;
1099 	struct sfxge_evq *evq;
1100 	efsys_mem_t *esmp;
1101 	int rc;
1102 
1103 	KASSERT(index < sc->intr.n_alloc, ("index >= %d", sc->intr.n_alloc));
1104 
1105 	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1106 	rxq->sc = sc;
1107 	rxq->index = index;
1108 
1109 	sc->rxq[index] = rxq;
1110 	esmp = &rxq->mem;
1111 
1112 	evq = sc->evq[index];
1113 
1114 	/* Allocate and zero DMA space. */
1115 	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(SFXGE_NDESCS), esmp)) != 0)
1116 		return (rc);
1117 	(void)memset(esmp->esm_base, 0, EFX_RXQ_SIZE(SFXGE_NDESCS));
1118 
1119 	/* Allocate buffer table entries. */
1120 	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(SFXGE_NDESCS),
1121 				 &rxq->buf_base_id);
1122 
1123 	/* Allocate the context array and the flow table. */
1124 	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * SFXGE_NDESCS,
1125 	    M_SFXGE, M_WAITOK | M_ZERO);
1126 	sfxge_lro_init(rxq);
1127 
1128 	callout_init(&rxq->refill_callout, B_TRUE);
1129 
1130 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1131 
1132 	return (0);
1133 }
1134 
1135 static const struct {
1136 	const char *name;
1137 	size_t offset;
1138 } sfxge_rx_stats[] = {
1139 #define SFXGE_RX_STAT(name, member) \
1140 	{ #name, offsetof(struct sfxge_rxq, member) }
1141 	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1142 	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1143 	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1144 	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1145 	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1146 	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1147 	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1148 	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
1149 };
1150 
1151 static int
1152 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1153 {
1154 	struct sfxge_softc *sc = arg1;
1155 	unsigned int id = arg2;
1156 	unsigned int sum, index;
1157 
1158 	/* Sum across all RX queues */
1159 	sum = 0;
1160 	for (index = 0; index < sc->intr.n_alloc; index++)
1161 		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1162 					 sfxge_rx_stats[id].offset);
1163 
1164 	return SYSCTL_OUT(req, &sum, sizeof(sum));
1165 }
1166 
1167 static void
1168 sfxge_rx_stat_init(struct sfxge_softc *sc)
1169 {
1170 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1171 	struct sysctl_oid_list *stat_list;
1172 	unsigned int id;
1173 
1174 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1175 
1176 	for (id = 0;
1177 	     id < sizeof(sfxge_rx_stats) / sizeof(sfxge_rx_stats[0]);
1178 	     id++) {
1179 		SYSCTL_ADD_PROC(
1180 			ctx, stat_list,
1181 			OID_AUTO, sfxge_rx_stats[id].name,
1182 			CTLTYPE_UINT|CTLFLAG_RD,
1183 			sc, id, sfxge_rx_stat_handler, "IU",
1184 			"");
1185 	}
1186 }
1187 
1188 void
1189 sfxge_rx_fini(struct sfxge_softc *sc)
1190 {
1191 	struct sfxge_intr *intr;
1192 	int index;
1193 
1194 	intr = &sc->intr;
1195 
1196 	index = intr->n_alloc;
1197 	while (--index >= 0)
1198 		sfxge_rx_qfini(sc, index);
1199 }
1200 
1201 int
1202 sfxge_rx_init(struct sfxge_softc *sc)
1203 {
1204 	struct sfxge_intr *intr;
1205 	int index;
1206 	int rc;
1207 
1208 	if (lro_idle_ticks == 0)
1209 		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
1210 
1211 	intr = &sc->intr;
1212 
1213 	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1214 	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1215 
1216 	/* Initialize the receive queue(s) - one per interrupt. */
1217 	for (index = 0; index < intr->n_alloc; index++) {
1218 		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1219 			goto fail;
1220 	}
1221 
1222 	sfxge_rx_stat_init(sc);
1223 
1224 	return (0);
1225 
1226 fail:
1227 	/* Tear down the receive queue(s). */
1228 	while (--index >= 0)
1229 		sfxge_rx_qfini(sc, index);
1230 
1231 	return (rc);
1232 }
1233