xref: /freebsd/sys/dev/sfxge/sfxge_rx.c (revision ddd5b8e9b4d8957fce018c520657cdfa4ecffad3)
1 /*-
2  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/types.h>
34 #include <sys/mbuf.h>
35 #include <sys/smp.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/limits.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_vlan_var.h>
43 
44 #include <netinet/in.h>
45 #include <netinet/ip.h>
46 #include <netinet/ip6.h>
47 #include <netinet/tcp.h>
48 
49 #include <machine/in_cksum.h>
50 
51 #include "common/efx.h"
52 
53 
54 #include "sfxge.h"
55 #include "sfxge_rx.h"
56 
57 #define RX_REFILL_THRESHOLD (EFX_RXQ_LIMIT(SFXGE_NDESCS) * 9 / 10)
58 #define RX_REFILL_THRESHOLD_2 (RX_REFILL_THRESHOLD / 2)
59 
60 /* Size of the LRO hash table.  Must be a power of 2.  A larger table
61  * means we can accelerate a larger number of streams.
62  */
63 static unsigned lro_table_size = 128;
64 
65 /* Maximum length of a hash chain.  If chains get too long then the lookup
66  * time increases and may exceed the benefit of LRO.
67  */
68 static unsigned lro_chain_max = 20;
69 
70 /* Maximum time (in ticks) that a connection can be idle before it's LRO
71  * state is discarded.
72  */
73 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
74 
75 /* Number of packets with payload that must arrive in-order before a
76  * connection is eligible for LRO.  The idea is we should avoid coalescing
77  * segments when the sender is in slow-start because reducing the ACK rate
78  * can damage performance.
79  */
80 static int lro_slow_start_packets = 2000;
81 
82 /* Number of packets with payload that must arrive in-order following loss
83  * before a connection is eligible for LRO.  The idea is we should avoid
84  * coalescing segments when the sender is recovering from loss, because
85  * reducing the ACK rate can damage performance.
86  */
87 static int lro_loss_packets = 20;
88 
89 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
90 #define SFXGE_LRO_L2_ID_VLAN 0x4000
91 #define SFXGE_LRO_L2_ID_IPV6 0x8000
92 #define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
93 #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
94 
95 /* Compare IPv6 addresses, avoiding conditional branches */
96 static __inline unsigned long ipv6_addr_cmp(const struct in6_addr *left,
97 					    const struct in6_addr *right)
98 {
99 #if LONG_BIT == 64
100 	const uint64_t *left64 = (const uint64_t *)left;
101 	const uint64_t *right64 = (const uint64_t *)right;
102 	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
103 #else
104 	return (left->s6_addr32[0] - right->s6_addr32[0]) |
105 	       (left->s6_addr32[1] - right->s6_addr32[1]) |
106 	       (left->s6_addr32[2] - right->s6_addr32[2]) |
107 	       (left->s6_addr32[3] - right->s6_addr32[3]);
108 #endif
109 }
110 
111 void
112 sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
113 {
114 
115 	rxq->flush_state = SFXGE_FLUSH_DONE;
116 }
117 
118 void
119 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
120 {
121 
122 	rxq->flush_state = SFXGE_FLUSH_FAILED;
123 }
124 
125 static uint8_t toep_key[] = {
126 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
127 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
128 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
129 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
130 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
131 };
132 
133 static void
134 sfxge_rx_post_refill(void *arg)
135 {
136 	struct sfxge_rxq *rxq = arg;
137 	struct sfxge_softc *sc;
138 	unsigned int index;
139 	struct sfxge_evq *evq;
140 	uint16_t magic;
141 
142 	sc = rxq->sc;
143 	index = rxq->index;
144 	evq = sc->evq[index];
145 
146 	magic = SFXGE_MAGIC_RX_QREFILL | index;
147 
148 	/* This is guaranteed due to the start/stop order of rx and ev */
149 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
150 	    ("evq not started"));
151 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
152 	    ("rxq not started"));
153 	efx_ev_qpost(evq->common, magic);
154 }
155 
156 static void
157 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
158 {
159 	/* Initially retry after 100 ms, but back off in case of
160 	 * repeated failures as we probably have to wait for the
161 	 * administrator to raise the pool limit. */
162 	if (retrying)
163 		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
164 	else
165 		rxq->refill_delay = hz / 10;
166 
167 	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
168 			     sfxge_rx_post_refill, rxq);
169 }
170 
171 static inline struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
172 {
173 	struct mb_args args;
174 	struct mbuf *m;
175 
176 	/* Allocate mbuf structure */
177 	args.flags = M_PKTHDR;
178 	args.type = MT_DATA;
179 	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
180 
181 	/* Allocate (and attach) packet buffer */
182 	if (m && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
183 		uma_zfree(zone_mbuf, m);
184 		m = NULL;
185 	}
186 
187 	return m;
188 }
189 
190 #define	SFXGE_REFILL_BATCH  64
191 
192 static void
193 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
194 {
195 	struct sfxge_softc *sc;
196 	unsigned int index;
197 	struct sfxge_evq *evq;
198 	unsigned int batch;
199 	unsigned int rxfill;
200 	unsigned int mblksize;
201 	int ntodo;
202 	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
203 
204 	sc = rxq->sc;
205 	index = rxq->index;
206 	evq = sc->evq[index];
207 
208 	prefetch_read_many(sc->enp);
209 	prefetch_read_many(rxq->common);
210 
211 	mtx_assert(&evq->lock, MA_OWNED);
212 
213 	if (rxq->init_state != SFXGE_RXQ_STARTED)
214 		return;
215 
216 	rxfill = rxq->added - rxq->completed;
217 	KASSERT(rxfill <= EFX_RXQ_LIMIT(SFXGE_NDESCS),
218 	    ("rxfill > EFX_RXQ_LIMIT(SFXGE_NDESCS)"));
219 	ntodo = min(EFX_RXQ_LIMIT(SFXGE_NDESCS) - rxfill, target);
220 	KASSERT(ntodo <= EFX_RXQ_LIMIT(SFXGE_NDESCS),
221 	    ("ntodo > EFX_RQX_LIMIT(SFXGE_NDESCS)"));
222 
223 	if (ntodo == 0)
224 		return;
225 
226 	batch = 0;
227 	mblksize = sc->rx_buffer_size;
228 	while (ntodo-- > 0) {
229 		unsigned int id;
230 		struct sfxge_rx_sw_desc *rx_desc;
231 		bus_dma_segment_t seg;
232 		struct mbuf *m;
233 
234 		id = (rxq->added + batch) & (SFXGE_NDESCS - 1);
235 		rx_desc = &rxq->queue[id];
236 		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
237 
238 		rx_desc->flags = EFX_DISCARD;
239 		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
240 		if (m == NULL)
241 			break;
242 		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
243 		addr[batch++] = seg.ds_addr;
244 
245 		if (batch == SFXGE_REFILL_BATCH) {
246 			efx_rx_qpost(rxq->common, addr, mblksize, batch,
247 			    rxq->completed, rxq->added);
248 			rxq->added += batch;
249 			batch = 0;
250 		}
251 	}
252 
253 	if (ntodo != 0)
254 		sfxge_rx_schedule_refill(rxq, retrying);
255 
256 	if (batch != 0) {
257 		efx_rx_qpost(rxq->common, addr, mblksize, batch,
258 		    rxq->completed, rxq->added);
259 		rxq->added += batch;
260 	}
261 
262 	/* Make the descriptors visible to the hardware */
263 	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
264 			BUS_DMASYNC_PREWRITE);
265 
266 	efx_rx_qpush(rxq->common, rxq->added);
267 }
268 
269 void
270 sfxge_rx_qrefill(struct sfxge_rxq *rxq)
271 {
272 
273 	if (rxq->init_state != SFXGE_RXQ_STARTED)
274 		return;
275 
276 	/* Make sure the queue is full */
277 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_TRUE);
278 }
279 
280 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
281 {
282 	struct ifnet *ifp = sc->ifnet;
283 
284 	m->m_pkthdr.rcvif = ifp;
285 	m->m_pkthdr.header = m->m_data;
286 	m->m_pkthdr.csum_data = 0xffff;
287 	ifp->if_input(ifp, m);
288 }
289 
290 static void
291 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
292 {
293 	struct mbuf *m = rx_desc->mbuf;
294 	int csum_flags;
295 
296 	/* Convert checksum flags */
297 	csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
298 		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
299 	if (rx_desc->flags & EFX_CKSUM_TCPUDP)
300 		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
301 
302 #ifdef SFXGE_HAVE_MQ
303 	/* The hash covers a 4-tuple for TCP only */
304 	if (rx_desc->flags & EFX_PKT_TCP) {
305 		m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
306 						       mtod(m, uint8_t *));
307 		m->m_flags |= M_FLOWID;
308 	}
309 #endif
310 	m->m_data += sc->rx_prefix_size;
311 	m->m_len = rx_desc->size - sc->rx_prefix_size;
312 	m->m_pkthdr.len = m->m_len;
313 	m->m_pkthdr.csum_flags = csum_flags;
314 	__sfxge_rx_deliver(sc, rx_desc->mbuf);
315 
316 	rx_desc->flags = EFX_DISCARD;
317 	rx_desc->mbuf = NULL;
318 }
319 
320 static void
321 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
322 {
323 	struct sfxge_softc *sc = st->sc;
324 	struct mbuf *m = c->mbuf;
325 	struct tcphdr *c_th;
326 	int csum_flags;
327 
328 	KASSERT(m, ("no mbuf to deliver"));
329 
330 	++st->n_bursts;
331 
332 	/* Finish off packet munging and recalculate IP header checksum. */
333 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
334 		struct ip *iph = c->nh;
335 		iph->ip_len = htons(iph->ip_len);
336 		iph->ip_sum = 0;
337 		iph->ip_sum = in_cksum_hdr(iph);
338 		c_th = (struct tcphdr *)(iph + 1);
339 		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
340 			      CSUM_IP_CHECKED | CSUM_IP_VALID);
341 	} else {
342 		struct ip6_hdr *iph = c->nh;
343 		iph->ip6_plen = htons(iph->ip6_plen);
344 		c_th = (struct tcphdr *)(iph + 1);
345 		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
346 	}
347 
348 	c_th->th_win = c->th_last->th_win;
349 	c_th->th_ack = c->th_last->th_ack;
350 	if (c_th->th_off == c->th_last->th_off) {
351 		/* Copy TCP options (take care to avoid going negative). */
352 		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
353 		memcpy(c_th + 1, c->th_last + 1, optlen);
354 	}
355 
356 #ifdef SFXGE_HAVE_MQ
357 	m->m_pkthdr.flowid = c->conn_hash;
358 	m->m_flags |= M_FLOWID;
359 #endif
360 	m->m_pkthdr.csum_flags = csum_flags;
361 	__sfxge_rx_deliver(sc, m);
362 
363 	c->mbuf = NULL;
364 	c->delivered = 1;
365 }
366 
367 /* Drop the given connection, and add it to the free list. */
368 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
369 {
370 	unsigned bucket;
371 
372 	KASSERT(!c->mbuf, ("found orphaned mbuf"));
373 
374 	if (c->next_buf.mbuf) {
375 		sfxge_rx_deliver(rxq->sc, &c->next_buf);
376 		LIST_REMOVE(c, active_link);
377 	}
378 
379 	bucket = c->conn_hash & rxq->lro.conns_mask;
380 	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
381 	--rxq->lro.conns_n[bucket];
382 	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
383 	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
384 }
385 
386 /* Stop tracking connections that have gone idle in order to keep hash
387  * chains short.
388  */
389 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
390 {
391 	struct sfxge_lro_conn *c;
392 	unsigned i;
393 
394 	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
395 		("found active connections"));
396 
397 	rxq->lro.last_purge_ticks = now;
398 	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
399 		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
400 			continue;
401 
402 		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
403 		if (now - c->last_pkt_ticks > lro_idle_ticks) {
404 			++rxq->lro.n_drop_idle;
405 			sfxge_lro_drop(rxq, c);
406 		}
407 	}
408 }
409 
410 static void
411 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
412 		struct mbuf *mbuf, struct tcphdr *th)
413 {
414 	struct tcphdr *c_th;
415 
416 	/* Tack the new mbuf onto the chain. */
417 	KASSERT(!mbuf->m_next, ("mbuf already chained"));
418 	c->mbuf_tail->m_next = mbuf;
419 	c->mbuf_tail = mbuf;
420 
421 	/* Increase length appropriately */
422 	c->mbuf->m_pkthdr.len += mbuf->m_len;
423 
424 	/* Update the connection state flags */
425 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
426 		struct ip *iph = c->nh;
427 		iph->ip_len += mbuf->m_len;
428 		c_th = (struct tcphdr *)(iph + 1);
429 	} else {
430 		struct ip6_hdr *iph = c->nh;
431 		iph->ip6_plen += mbuf->m_len;
432 		c_th = (struct tcphdr *)(iph + 1);
433 	}
434 	c_th->th_flags |= (th->th_flags & TH_PUSH);
435 	c->th_last = th;
436 	++st->n_merges;
437 
438 	/* Pass packet up now if another segment could overflow the IP
439 	 * length.
440 	 */
441 	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
442 		sfxge_lro_deliver(st, c);
443 }
444 
445 static void
446 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
447 		struct mbuf *mbuf, void *nh, struct tcphdr *th)
448 {
449 	/* Start the chain */
450 	c->mbuf = mbuf;
451 	c->mbuf_tail = c->mbuf;
452 	c->nh = nh;
453 	c->th_last = th;
454 
455 	mbuf->m_pkthdr.len = mbuf->m_len;
456 
457 	/* Mangle header fields for later processing */
458 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
459 		struct ip *iph = nh;
460 		iph->ip_len = ntohs(iph->ip_len);
461 	} else {
462 		struct ip6_hdr *iph = nh;
463 		iph->ip6_plen = ntohs(iph->ip6_plen);
464 	}
465 }
466 
467 /* Try to merge or otherwise hold or deliver (as appropriate) the
468  * packet buffered for this connection (c->next_buf).  Return a flag
469  * indicating whether the connection is still active for LRO purposes.
470  */
471 static int
472 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
473 {
474 	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
475 	char *eh = c->next_eh;
476 	int data_length, hdr_length, dont_merge;
477 	unsigned th_seq, pkt_length;
478 	struct tcphdr *th;
479 	unsigned now;
480 
481 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
482 		struct ip *iph = c->next_nh;
483 		th = (struct tcphdr *)(iph + 1);
484 		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
485 	} else {
486 		struct ip6_hdr *iph = c->next_nh;
487 		th = (struct tcphdr *)(iph + 1);
488 		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
489 	}
490 
491 	hdr_length = (char *) th + th->th_off * 4 - eh;
492 	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
493 		       hdr_length);
494 	th_seq = ntohl(th->th_seq);
495 	dont_merge = ((data_length <= 0)
496 		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
497 
498 	/* Check for options other than aligned timestamp. */
499 	if (th->th_off != 5) {
500 		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
501 		if (th->th_off == 8 &&
502 		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
503 					(TCPOPT_NOP << 16) |
504 					(TCPOPT_TIMESTAMP << 8) |
505 					TCPOLEN_TIMESTAMP)) {
506 			/* timestamp option -- okay */
507 		} else {
508 			dont_merge = 1;
509 		}
510 	}
511 
512 	if (__predict_false(th_seq != c->next_seq)) {
513 		/* Out-of-order, so start counting again. */
514 		if (c->mbuf)
515 			sfxge_lro_deliver(&rxq->lro, c);
516 		c->n_in_order_pkts -= lro_loss_packets;
517 		c->next_seq = th_seq + data_length;
518 		++rxq->lro.n_misorder;
519 		goto deliver_buf_out;
520 	}
521 	c->next_seq = th_seq + data_length;
522 
523 	now = ticks;
524 	if (now - c->last_pkt_ticks > lro_idle_ticks) {
525 		++rxq->lro.n_drop_idle;
526 		if (c->mbuf)
527 			sfxge_lro_deliver(&rxq->lro, c);
528 		sfxge_lro_drop(rxq, c);
529 		return 0;
530 	}
531 	c->last_pkt_ticks = ticks;
532 
533 	if (c->n_in_order_pkts < lro_slow_start_packets) {
534 		/* May be in slow-start, so don't merge. */
535 		++rxq->lro.n_slow_start;
536 		++c->n_in_order_pkts;
537 		goto deliver_buf_out;
538 	}
539 
540 	if (__predict_false(dont_merge)) {
541 		if (c->mbuf)
542 			sfxge_lro_deliver(&rxq->lro, c);
543 		if (th->th_flags & (TH_FIN | TH_RST)) {
544 			++rxq->lro.n_drop_closed;
545 			sfxge_lro_drop(rxq, c);
546 			return 0;
547 		}
548 		goto deliver_buf_out;
549 	}
550 
551 	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
552 
553 	if (__predict_true(c->mbuf != NULL)) {
554 		/* Remove headers and any padding */
555 		rx_buf->mbuf->m_data += hdr_length;
556 		rx_buf->mbuf->m_len = data_length;
557 
558 		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
559 	} else {
560 		/* Remove any padding */
561 		rx_buf->mbuf->m_len = pkt_length;
562 
563 		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
564 	}
565 
566 	rx_buf->mbuf = NULL;
567 	return 1;
568 
569  deliver_buf_out:
570 	sfxge_rx_deliver(rxq->sc, rx_buf);
571 	return 1;
572 }
573 
574 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
575 			       uint16_t l2_id, void *nh, struct tcphdr *th)
576 {
577 	unsigned bucket = conn_hash & st->conns_mask;
578 	struct sfxge_lro_conn *c;
579 
580 	if (st->conns_n[bucket] >= lro_chain_max) {
581 		++st->n_too_many;
582 		return;
583 	}
584 
585 	if (!TAILQ_EMPTY(&st->free_conns)) {
586 		c = TAILQ_FIRST(&st->free_conns);
587 		TAILQ_REMOVE(&st->free_conns, c, link);
588 	} else {
589 		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
590 		if (c == NULL)
591 			return;
592 		c->mbuf = NULL;
593 		c->next_buf.mbuf = NULL;
594 	}
595 
596 	/* Create the connection tracking data */
597 	++st->conns_n[bucket];
598 	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
599 	c->l2_id = l2_id;
600 	c->conn_hash = conn_hash;
601 	c->source = th->th_sport;
602 	c->dest = th->th_dport;
603 	c->n_in_order_pkts = 0;
604 	c->last_pkt_ticks = *(volatile int *)&ticks;
605 	c->delivered = 0;
606 	++st->n_new_stream;
607 	/* NB. We don't initialise c->next_seq, and it doesn't matter what
608 	 * value it has.  Most likely the next packet received for this
609 	 * connection will not match -- no harm done.
610 	 */
611 }
612 
613 /* Process mbuf and decide whether to dispatch it to the stack now or
614  * later.
615  */
616 static void
617 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
618 {
619 	struct sfxge_softc *sc = rxq->sc;
620 	struct mbuf *m = rx_buf->mbuf;
621 	struct ether_header *eh;
622 	struct sfxge_lro_conn *c;
623 	uint16_t l2_id;
624 	uint16_t l3_proto;
625         void *nh;
626 	struct tcphdr *th;
627 	uint32_t conn_hash;
628 	unsigned bucket;
629 
630 	/* Get the hardware hash */
631 	conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
632 				      mtod(m, uint8_t *));
633 
634 	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
635 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
636 		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
637 		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
638 			SFXGE_LRO_L2_ID_VLAN;
639 		l3_proto = veh->evl_proto;
640 		nh = veh + 1;
641 	} else {
642 		l2_id = 0;
643 		l3_proto = eh->ether_type;
644 		nh = eh + 1;
645 	}
646 
647 	/* Check whether this is a suitable packet (unfragmented
648 	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
649 	 * length, and compute a hash if necessary.  If not, return.
650 	 */
651 	if (l3_proto == htons(ETHERTYPE_IP)) {
652 		struct ip *iph = nh;
653 		if ((iph->ip_p - IPPROTO_TCP) |
654 		    (iph->ip_hl - (sizeof(*iph) >> 2u)) |
655 		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
656 			goto deliver_now;
657 		th = (struct tcphdr *)(iph + 1);
658 	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
659 		struct ip6_hdr *iph = nh;
660 		if (iph->ip6_nxt != IPPROTO_TCP)
661 			goto deliver_now;
662 		l2_id |= SFXGE_LRO_L2_ID_IPV6;
663 		th = (struct tcphdr *)(iph + 1);
664 	} else {
665 		goto deliver_now;
666 	}
667 
668 	bucket = conn_hash & rxq->lro.conns_mask;
669 
670 	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
671 		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
672 			continue;
673 		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
674 			continue;
675 		if (c->mbuf) {
676 			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
677 				struct ip *c_iph, *iph = nh;
678 				c_iph = c->nh;
679 				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
680 				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
681 					continue;
682 			} else {
683 				struct ip6_hdr *c_iph, *iph = nh;
684 				c_iph = c->nh;
685 				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
686 				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
687 					continue;
688 			}
689 		}
690 
691 		/* Re-insert at head of list to reduce lookup time. */
692 		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
693 		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
694 
695 		if (c->next_buf.mbuf) {
696 			if (!sfxge_lro_try_merge(rxq, c))
697 				goto deliver_now;
698 		} else {
699 			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
700 			    active_link);
701 		}
702 		c->next_buf = *rx_buf;
703 		c->next_eh = eh;
704 		c->next_nh = nh;
705 
706 		rx_buf->mbuf = NULL;
707 		rx_buf->flags = EFX_DISCARD;
708 		return;
709 	}
710 
711 	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
712  deliver_now:
713 	sfxge_rx_deliver(sc, rx_buf);
714 }
715 
716 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
717 {
718 	struct sfxge_lro_state *st = &rxq->lro;
719 	struct sfxge_lro_conn *c;
720 	unsigned t;
721 
722 	while (!LIST_EMPTY(&st->active_conns)) {
723 		c = LIST_FIRST(&st->active_conns);
724 		if (!c->delivered && c->mbuf)
725 			sfxge_lro_deliver(st, c);
726 		if (sfxge_lro_try_merge(rxq, c)) {
727 			if (c->mbuf)
728 				sfxge_lro_deliver(st, c);
729 			LIST_REMOVE(c, active_link);
730 		}
731 		c->delivered = 0;
732 	}
733 
734 	t = *(volatile int *)&ticks;
735 	if (__predict_false(t != st->last_purge_ticks))
736 		sfxge_lro_purge_idle(rxq, t);
737 }
738 
739 void
740 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
741 {
742 	struct sfxge_softc *sc = rxq->sc;
743 	int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
744 	unsigned int index;
745 	struct sfxge_evq *evq;
746 	unsigned int completed;
747 	unsigned int level;
748 	struct mbuf *m;
749 	struct sfxge_rx_sw_desc *prev = NULL;
750 
751 	index = rxq->index;
752 	evq = sc->evq[index];
753 
754 	mtx_assert(&evq->lock, MA_OWNED);
755 
756 	completed = rxq->completed;
757 	while (completed != rxq->pending) {
758 		unsigned int id;
759 		struct sfxge_rx_sw_desc *rx_desc;
760 
761 		id = completed++ & (SFXGE_NDESCS - 1);
762 		rx_desc = &rxq->queue[id];
763 		m = rx_desc->mbuf;
764 
765 		if (rxq->init_state != SFXGE_RXQ_STARTED)
766 			goto discard;
767 
768 		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
769 			goto discard;
770 
771 		prefetch_read_many(mtod(m, caddr_t));
772 
773 		/* Check for loopback packets */
774 		if (!(rx_desc->flags & EFX_PKT_IPV4) &&
775 		    !(rx_desc->flags & EFX_PKT_IPV6)) {
776 			struct ether_header *etherhp;
777 
778 			/*LINTED*/
779 			etherhp = mtod(m, struct ether_header *);
780 
781 			if (etherhp->ether_type ==
782 			    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
783 				EFSYS_PROBE(loopback);
784 
785 				rxq->loopback++;
786 				goto discard;
787 			}
788 		}
789 
790 		/* Pass packet up the stack or into LRO (pipelined) */
791 		if (prev != NULL) {
792 			if (lro_enabled)
793 				sfxge_lro(rxq, prev);
794 			else
795 				sfxge_rx_deliver(sc, prev);
796 		}
797 		prev = rx_desc;
798 		continue;
799 
800 discard:
801 		/* Return the packet to the pool */
802 		m_free(m);
803 		rx_desc->mbuf = NULL;
804 	}
805 	rxq->completed = completed;
806 
807 	level = rxq->added - rxq->completed;
808 
809 	/* Pass last packet up the stack or into LRO */
810 	if (prev != NULL) {
811 		if (lro_enabled)
812 			sfxge_lro(rxq, prev);
813 		else
814 			sfxge_rx_deliver(sc, prev);
815 	}
816 
817 	/*
818 	 * If there are any pending flows and this is the end of the
819 	 * poll then they must be completed.
820 	 */
821 	if (eop)
822 		sfxge_lro_end_of_burst(rxq);
823 
824 	/* Top up the queue if necessary */
825 	if (level < RX_REFILL_THRESHOLD)
826 		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_FALSE);
827 }
828 
829 static void
830 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
831 {
832 	struct sfxge_rxq *rxq;
833 	struct sfxge_evq *evq;
834 	unsigned int count;
835 
836 	rxq = sc->rxq[index];
837 	evq = sc->evq[index];
838 
839 	mtx_lock(&evq->lock);
840 
841 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
842 	    ("rxq not started"));
843 
844 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
845 
846 	callout_stop(&rxq->refill_callout);
847 
848 again:
849 	rxq->flush_state = SFXGE_FLUSH_PENDING;
850 
851 	/* Flush the receive queue */
852 	efx_rx_qflush(rxq->common);
853 
854 	mtx_unlock(&evq->lock);
855 
856 	count = 0;
857 	do {
858 		/* Spin for 100 ms */
859 		DELAY(100000);
860 
861 		if (rxq->flush_state != SFXGE_FLUSH_PENDING)
862 			break;
863 
864 	} while (++count < 20);
865 
866 	mtx_lock(&evq->lock);
867 
868 	if (rxq->flush_state == SFXGE_FLUSH_FAILED)
869 		goto again;
870 
871 	rxq->flush_state = SFXGE_FLUSH_DONE;
872 
873 	rxq->pending = rxq->added;
874 	sfxge_rx_qcomplete(rxq, B_TRUE);
875 
876 	KASSERT(rxq->completed == rxq->pending,
877 	    ("rxq->completed != rxq->pending"));
878 
879 	rxq->added = 0;
880 	rxq->pending = 0;
881 	rxq->completed = 0;
882 	rxq->loopback = 0;
883 
884 	/* Destroy the common code receive queue. */
885 	efx_rx_qdestroy(rxq->common);
886 
887 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
888 	    EFX_RXQ_NBUFS(SFXGE_NDESCS));
889 
890 	mtx_unlock(&evq->lock);
891 }
892 
893 static int
894 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
895 {
896 	struct sfxge_rxq *rxq;
897 	efsys_mem_t *esmp;
898 	struct sfxge_evq *evq;
899 	int rc;
900 
901 	rxq = sc->rxq[index];
902 	esmp = &rxq->mem;
903 	evq = sc->evq[index];
904 
905 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
906 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
907 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
908 	    ("evq->init_state != SFXGE_EVQ_STARTED"));
909 
910 	/* Program the buffer table. */
911 	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
912 	    EFX_RXQ_NBUFS(SFXGE_NDESCS))) != 0)
913 		return rc;
914 
915 	/* Create the common code receive queue. */
916 	if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
917 	    esmp, SFXGE_NDESCS, rxq->buf_base_id, evq->common,
918 	    &rxq->common)) != 0)
919 		goto fail;
920 
921 	mtx_lock(&evq->lock);
922 
923 	/* Enable the receive queue. */
924 	efx_rx_qenable(rxq->common);
925 
926 	rxq->init_state = SFXGE_RXQ_STARTED;
927 
928 	/* Try to fill the queue from the pool. */
929 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_FALSE);
930 
931 	mtx_unlock(&evq->lock);
932 
933 	return (0);
934 
935 fail:
936 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
937 	    EFX_RXQ_NBUFS(SFXGE_NDESCS));
938 	return rc;
939 }
940 
941 void
942 sfxge_rx_stop(struct sfxge_softc *sc)
943 {
944 	struct sfxge_intr *intr;
945 	int index;
946 
947 	intr = &sc->intr;
948 
949 	/* Stop the receive queue(s) */
950 	index = intr->n_alloc;
951 	while (--index >= 0)
952 		sfxge_rx_qstop(sc, index);
953 
954 	sc->rx_prefix_size = 0;
955 	sc->rx_buffer_size = 0;
956 
957 	efx_rx_fini(sc->enp);
958 }
959 
960 int
961 sfxge_rx_start(struct sfxge_softc *sc)
962 {
963 	struct sfxge_intr *intr;
964 	int index;
965 	int rc;
966 
967 	intr = &sc->intr;
968 
969 	/* Initialize the common code receive module. */
970 	if ((rc = efx_rx_init(sc->enp)) != 0)
971 		return (rc);
972 
973 	/* Calculate the receive packet buffer size. */
974 	sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
975 	sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
976 			      sc->rx_prefix_size);
977 
978 	/* Select zone for packet buffers */
979 	if (sc->rx_buffer_size <= MCLBYTES)
980 		sc->rx_buffer_zone = zone_clust;
981 	else if (sc->rx_buffer_size <= MJUMPAGESIZE)
982 		sc->rx_buffer_zone = zone_jumbop;
983 	else if (sc->rx_buffer_size <= MJUM9BYTES)
984 		sc->rx_buffer_zone = zone_jumbo9;
985 	else
986 		sc->rx_buffer_zone = zone_jumbo16;
987 
988 	/*
989 	 * Set up the scale table.  Enable all hash types and hash insertion.
990 	 */
991 	for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
992 		sc->rx_indir_table[index] = index % sc->intr.n_alloc;
993 	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
994 				       SFXGE_RX_SCALE_MAX)) != 0)
995 		goto fail;
996 	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
997 	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
998 	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
999 
1000 	if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
1001 	    sizeof(toep_key))) != 0)
1002 		goto fail;
1003 
1004 	/* Start the receive queue(s). */
1005 	for (index = 0; index < intr->n_alloc; index++) {
1006 		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1007 			goto fail2;
1008 	}
1009 
1010 	return (0);
1011 
1012 fail2:
1013 	while (--index >= 0)
1014 		sfxge_rx_qstop(sc, index);
1015 
1016 fail:
1017 	efx_rx_fini(sc->enp);
1018 
1019 	return (rc);
1020 }
1021 
1022 static void sfxge_lro_init(struct sfxge_rxq *rxq)
1023 {
1024 	struct sfxge_lro_state *st = &rxq->lro;
1025 	unsigned i;
1026 
1027 	st->conns_mask = lro_table_size - 1;
1028 	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1029 		("lro_table_size must be a power of 2"));
1030 	st->sc = rxq->sc;
1031 	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1032 			   M_SFXGE, M_WAITOK);
1033 	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1034 			     M_SFXGE, M_WAITOK);
1035 	for (i = 0; i <= st->conns_mask; ++i) {
1036 		TAILQ_INIT(&st->conns[i]);
1037 		st->conns_n[i] = 0;
1038 	}
1039 	LIST_INIT(&st->active_conns);
1040 	TAILQ_INIT(&st->free_conns);
1041 }
1042 
1043 static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1044 {
1045 	struct sfxge_lro_state *st = &rxq->lro;
1046 	struct sfxge_lro_conn *c;
1047 	unsigned i;
1048 
1049 	/* Return cleanly if sfxge_lro_init() has not been called. */
1050 	if (st->conns == NULL)
1051 		return;
1052 
1053 	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1054 
1055 	for (i = 0; i <= st->conns_mask; ++i) {
1056 		while (!TAILQ_EMPTY(&st->conns[i])) {
1057 			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1058 			sfxge_lro_drop(rxq, c);
1059 		}
1060 	}
1061 
1062 	while (!TAILQ_EMPTY(&st->free_conns)) {
1063 		c = TAILQ_FIRST(&st->free_conns);
1064 		TAILQ_REMOVE(&st->free_conns, c, link);
1065 		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1066 		free(c, M_SFXGE);
1067 	}
1068 
1069 	free(st->conns_n, M_SFXGE);
1070 	free(st->conns, M_SFXGE);
1071 	st->conns = NULL;
1072 }
1073 
1074 static void
1075 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1076 {
1077 	struct sfxge_rxq *rxq;
1078 
1079 	rxq = sc->rxq[index];
1080 
1081 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1082 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1083 
1084 	/* Free the context array and the flow table. */
1085 	free(rxq->queue, M_SFXGE);
1086 	sfxge_lro_fini(rxq);
1087 
1088 	/* Release DMA memory. */
1089 	sfxge_dma_free(&rxq->mem);
1090 
1091 	sc->rxq[index] = NULL;
1092 
1093 	free(rxq, M_SFXGE);
1094 }
1095 
1096 static int
1097 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1098 {
1099 	struct sfxge_rxq *rxq;
1100 	struct sfxge_evq *evq;
1101 	efsys_mem_t *esmp;
1102 	int rc;
1103 
1104 	KASSERT(index < sc->intr.n_alloc, ("index >= %d", sc->intr.n_alloc));
1105 
1106 	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1107 	rxq->sc = sc;
1108 	rxq->index = index;
1109 
1110 	sc->rxq[index] = rxq;
1111 	esmp = &rxq->mem;
1112 
1113 	evq = sc->evq[index];
1114 
1115 	/* Allocate and zero DMA space. */
1116 	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(SFXGE_NDESCS), esmp)) != 0)
1117 		return (rc);
1118 	(void)memset(esmp->esm_base, 0, EFX_RXQ_SIZE(SFXGE_NDESCS));
1119 
1120 	/* Allocate buffer table entries. */
1121 	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(SFXGE_NDESCS),
1122 				 &rxq->buf_base_id);
1123 
1124 	/* Allocate the context array and the flow table. */
1125 	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * SFXGE_NDESCS,
1126 	    M_SFXGE, M_WAITOK | M_ZERO);
1127 	sfxge_lro_init(rxq);
1128 
1129 	callout_init(&rxq->refill_callout, B_TRUE);
1130 
1131 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1132 
1133 	return (0);
1134 }
1135 
1136 static const struct {
1137 	const char *name;
1138 	size_t offset;
1139 } sfxge_rx_stats[] = {
1140 #define SFXGE_RX_STAT(name, member) \
1141 	{ #name, offsetof(struct sfxge_rxq, member) }
1142 	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1143 	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1144 	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1145 	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1146 	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1147 	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1148 	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1149 	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
1150 };
1151 
1152 static int
1153 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1154 {
1155 	struct sfxge_softc *sc = arg1;
1156 	unsigned int id = arg2;
1157 	unsigned int sum, index;
1158 
1159 	/* Sum across all RX queues */
1160 	sum = 0;
1161 	for (index = 0; index < sc->intr.n_alloc; index++)
1162 		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1163 					 sfxge_rx_stats[id].offset);
1164 
1165 	return SYSCTL_OUT(req, &sum, sizeof(sum));
1166 }
1167 
1168 static void
1169 sfxge_rx_stat_init(struct sfxge_softc *sc)
1170 {
1171 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1172 	struct sysctl_oid_list *stat_list;
1173 	unsigned int id;
1174 
1175 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1176 
1177 	for (id = 0;
1178 	     id < sizeof(sfxge_rx_stats) / sizeof(sfxge_rx_stats[0]);
1179 	     id++) {
1180 		SYSCTL_ADD_PROC(
1181 			ctx, stat_list,
1182 			OID_AUTO, sfxge_rx_stats[id].name,
1183 			CTLTYPE_UINT|CTLFLAG_RD,
1184 			sc, id, sfxge_rx_stat_handler, "IU",
1185 			"");
1186 	}
1187 }
1188 
1189 void
1190 sfxge_rx_fini(struct sfxge_softc *sc)
1191 {
1192 	struct sfxge_intr *intr;
1193 	int index;
1194 
1195 	intr = &sc->intr;
1196 
1197 	index = intr->n_alloc;
1198 	while (--index >= 0)
1199 		sfxge_rx_qfini(sc, index);
1200 }
1201 
1202 int
1203 sfxge_rx_init(struct sfxge_softc *sc)
1204 {
1205 	struct sfxge_intr *intr;
1206 	int index;
1207 	int rc;
1208 
1209 	if (lro_idle_ticks == 0)
1210 		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
1211 
1212 	intr = &sc->intr;
1213 
1214 	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1215 	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1216 
1217 	/* Initialize the receive queue(s) - one per interrupt. */
1218 	for (index = 0; index < intr->n_alloc; index++) {
1219 		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1220 			goto fail;
1221 	}
1222 
1223 	sfxge_rx_stat_init(sc);
1224 
1225 	return (0);
1226 
1227 fail:
1228 	/* Tear down the receive queue(s). */
1229 	while (--index >= 0)
1230 		sfxge_rx_qfini(sc, index);
1231 
1232 	return (rc);
1233 }
1234