xref: /freebsd/sys/dev/sfxge/sfxge_rx.c (revision 7431dfd4580e850375fe5478d92ec770344db098)
1 /*-
2  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/types.h>
34 #include <sys/mbuf.h>
35 #include <sys/smp.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/limits.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_vlan_var.h>
43 
44 #include <netinet/in.h>
45 #include <netinet/ip.h>
46 #include <netinet/ip6.h>
47 #include <netinet/tcp.h>
48 
49 #include <machine/in_cksum.h>
50 
51 #include "common/efx.h"
52 
53 
54 #include "sfxge.h"
55 #include "sfxge_rx.h"
56 
57 #define	RX_REFILL_THRESHOLD(_entries)	(EFX_RXQ_LIMIT(_entries) * 9 / 10)
58 
59 /* Size of the LRO hash table.  Must be a power of 2.  A larger table
60  * means we can accelerate a larger number of streams.
61  */
62 static unsigned lro_table_size = 128;
63 
64 /* Maximum length of a hash chain.  If chains get too long then the lookup
65  * time increases and may exceed the benefit of LRO.
66  */
67 static unsigned lro_chain_max = 20;
68 
69 /* Maximum time (in ticks) that a connection can be idle before it's LRO
70  * state is discarded.
71  */
72 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
73 
74 /* Number of packets with payload that must arrive in-order before a
75  * connection is eligible for LRO.  The idea is we should avoid coalescing
76  * segments when the sender is in slow-start because reducing the ACK rate
77  * can damage performance.
78  */
79 static int lro_slow_start_packets = 2000;
80 
81 /* Number of packets with payload that must arrive in-order following loss
82  * before a connection is eligible for LRO.  The idea is we should avoid
83  * coalescing segments when the sender is recovering from loss, because
84  * reducing the ACK rate can damage performance.
85  */
86 static int lro_loss_packets = 20;
87 
88 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
89 #define	SFXGE_LRO_L2_ID_VLAN 0x4000
90 #define	SFXGE_LRO_L2_ID_IPV6 0x8000
91 #define	SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
92 #define	SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
93 
94 /* Compare IPv6 addresses, avoiding conditional branches */
95 static __inline unsigned long ipv6_addr_cmp(const struct in6_addr *left,
96 					    const struct in6_addr *right)
97 {
98 #if LONG_BIT == 64
99 	const uint64_t *left64 = (const uint64_t *)left;
100 	const uint64_t *right64 = (const uint64_t *)right;
101 	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
102 #else
103 	return (left->s6_addr32[0] - right->s6_addr32[0]) |
104 	       (left->s6_addr32[1] - right->s6_addr32[1]) |
105 	       (left->s6_addr32[2] - right->s6_addr32[2]) |
106 	       (left->s6_addr32[3] - right->s6_addr32[3]);
107 #endif
108 }
109 
110 void
111 sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
112 {
113 
114 	rxq->flush_state = SFXGE_FLUSH_DONE;
115 }
116 
117 void
118 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
119 {
120 
121 	rxq->flush_state = SFXGE_FLUSH_FAILED;
122 }
123 
124 static uint8_t toep_key[] = {
125 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
126 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
127 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
128 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
129 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
130 };
131 
132 static void
133 sfxge_rx_post_refill(void *arg)
134 {
135 	struct sfxge_rxq *rxq = arg;
136 	struct sfxge_softc *sc;
137 	unsigned int index;
138 	struct sfxge_evq *evq;
139 	uint16_t magic;
140 
141 	sc = rxq->sc;
142 	index = rxq->index;
143 	evq = sc->evq[index];
144 
145 	magic = SFXGE_MAGIC_RX_QREFILL | index;
146 
147 	/* This is guaranteed due to the start/stop order of rx and ev */
148 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
149 	    ("evq not started"));
150 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
151 	    ("rxq not started"));
152 	efx_ev_qpost(evq->common, magic);
153 }
154 
155 static void
156 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
157 {
158 	/* Initially retry after 100 ms, but back off in case of
159 	 * repeated failures as we probably have to wait for the
160 	 * administrator to raise the pool limit. */
161 	if (retrying)
162 		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
163 	else
164 		rxq->refill_delay = hz / 10;
165 
166 	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
167 			     sfxge_rx_post_refill, rxq);
168 }
169 
170 static inline struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
171 {
172 	struct mb_args args;
173 	struct mbuf *m;
174 
175 	/* Allocate mbuf structure */
176 	args.flags = M_PKTHDR;
177 	args.type = MT_DATA;
178 	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
179 
180 	/* Allocate (and attach) packet buffer */
181 	if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
182 		uma_zfree(zone_mbuf, m);
183 		m = NULL;
184 	}
185 
186 	return (m);
187 }
188 
189 #define	SFXGE_REFILL_BATCH  64
190 
191 static void
192 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
193 {
194 	struct sfxge_softc *sc;
195 	unsigned int index;
196 	struct sfxge_evq *evq;
197 	unsigned int batch;
198 	unsigned int rxfill;
199 	unsigned int mblksize;
200 	int ntodo;
201 	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
202 
203 	sc = rxq->sc;
204 	index = rxq->index;
205 	evq = sc->evq[index];
206 
207 	prefetch_read_many(sc->enp);
208 	prefetch_read_many(rxq->common);
209 
210 	mtx_assert(&evq->lock, MA_OWNED);
211 
212 	if (rxq->init_state != SFXGE_RXQ_STARTED)
213 		return;
214 
215 	rxfill = rxq->added - rxq->completed;
216 	KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries),
217 	    ("rxfill > EFX_RXQ_LIMIT(rxq->entries)"));
218 	ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target);
219 	KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries),
220 	    ("ntodo > EFX_RQX_LIMIT(rxq->entries)"));
221 
222 	if (ntodo == 0)
223 		return;
224 
225 	batch = 0;
226 	mblksize = sc->rx_buffer_size;
227 	while (ntodo-- > 0) {
228 		unsigned int id;
229 		struct sfxge_rx_sw_desc *rx_desc;
230 		bus_dma_segment_t seg;
231 		struct mbuf *m;
232 
233 		id = (rxq->added + batch) & rxq->ptr_mask;
234 		rx_desc = &rxq->queue[id];
235 		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
236 
237 		rx_desc->flags = EFX_DISCARD;
238 		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
239 		if (m == NULL)
240 			break;
241 		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
242 		addr[batch++] = seg.ds_addr;
243 
244 		if (batch == SFXGE_REFILL_BATCH) {
245 			efx_rx_qpost(rxq->common, addr, mblksize, batch,
246 			    rxq->completed, rxq->added);
247 			rxq->added += batch;
248 			batch = 0;
249 		}
250 	}
251 
252 	if (ntodo != 0)
253 		sfxge_rx_schedule_refill(rxq, retrying);
254 
255 	if (batch != 0) {
256 		efx_rx_qpost(rxq->common, addr, mblksize, batch,
257 		    rxq->completed, rxq->added);
258 		rxq->added += batch;
259 	}
260 
261 	/* Make the descriptors visible to the hardware */
262 	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
263 			BUS_DMASYNC_PREWRITE);
264 
265 	efx_rx_qpush(rxq->common, rxq->added);
266 }
267 
268 void
269 sfxge_rx_qrefill(struct sfxge_rxq *rxq)
270 {
271 
272 	if (rxq->init_state != SFXGE_RXQ_STARTED)
273 		return;
274 
275 	/* Make sure the queue is full */
276 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE);
277 }
278 
279 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
280 {
281 	struct ifnet *ifp = sc->ifnet;
282 
283 	m->m_pkthdr.rcvif = ifp;
284 	m->m_pkthdr.csum_data = 0xffff;
285 	ifp->if_input(ifp, m);
286 }
287 
288 static void
289 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
290 {
291 	struct mbuf *m = rx_desc->mbuf;
292 	int csum_flags;
293 
294 	/* Convert checksum flags */
295 	csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
296 		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
297 	if (rx_desc->flags & EFX_CKSUM_TCPUDP)
298 		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
299 
300 #ifdef SFXGE_HAVE_MQ
301 	/* The hash covers a 4-tuple for TCP only */
302 	if (rx_desc->flags & EFX_PKT_TCP) {
303 		m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
304 						       mtod(m, uint8_t *));
305 		m->m_flags |= M_FLOWID;
306 	}
307 #endif
308 	m->m_data += sc->rx_prefix_size;
309 	m->m_len = rx_desc->size - sc->rx_prefix_size;
310 	m->m_pkthdr.len = m->m_len;
311 	m->m_pkthdr.csum_flags = csum_flags;
312 	__sfxge_rx_deliver(sc, rx_desc->mbuf);
313 
314 	rx_desc->flags = EFX_DISCARD;
315 	rx_desc->mbuf = NULL;
316 }
317 
318 static void
319 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
320 {
321 	struct sfxge_softc *sc = st->sc;
322 	struct mbuf *m = c->mbuf;
323 	struct tcphdr *c_th;
324 	int csum_flags;
325 
326 	KASSERT(m, ("no mbuf to deliver"));
327 
328 	++st->n_bursts;
329 
330 	/* Finish off packet munging and recalculate IP header checksum. */
331 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
332 		struct ip *iph = c->nh;
333 		iph->ip_len = htons(iph->ip_len);
334 		iph->ip_sum = 0;
335 		iph->ip_sum = in_cksum_hdr(iph);
336 		c_th = (struct tcphdr *)(iph + 1);
337 		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
338 			      CSUM_IP_CHECKED | CSUM_IP_VALID);
339 	} else {
340 		struct ip6_hdr *iph = c->nh;
341 		iph->ip6_plen = htons(iph->ip6_plen);
342 		c_th = (struct tcphdr *)(iph + 1);
343 		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
344 	}
345 
346 	c_th->th_win = c->th_last->th_win;
347 	c_th->th_ack = c->th_last->th_ack;
348 	if (c_th->th_off == c->th_last->th_off) {
349 		/* Copy TCP options (take care to avoid going negative). */
350 		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
351 		memcpy(c_th + 1, c->th_last + 1, optlen);
352 	}
353 
354 #ifdef SFXGE_HAVE_MQ
355 	m->m_pkthdr.flowid = c->conn_hash;
356 	m->m_flags |= M_FLOWID;
357 #endif
358 	m->m_pkthdr.csum_flags = csum_flags;
359 	__sfxge_rx_deliver(sc, m);
360 
361 	c->mbuf = NULL;
362 	c->delivered = 1;
363 }
364 
365 /* Drop the given connection, and add it to the free list. */
366 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
367 {
368 	unsigned bucket;
369 
370 	KASSERT(!c->mbuf, ("found orphaned mbuf"));
371 
372 	if (c->next_buf.mbuf != NULL) {
373 		sfxge_rx_deliver(rxq->sc, &c->next_buf);
374 		LIST_REMOVE(c, active_link);
375 	}
376 
377 	bucket = c->conn_hash & rxq->lro.conns_mask;
378 	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
379 	--rxq->lro.conns_n[bucket];
380 	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
381 	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
382 }
383 
384 /* Stop tracking connections that have gone idle in order to keep hash
385  * chains short.
386  */
387 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
388 {
389 	struct sfxge_lro_conn *c;
390 	unsigned i;
391 
392 	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
393 		("found active connections"));
394 
395 	rxq->lro.last_purge_ticks = now;
396 	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
397 		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
398 			continue;
399 
400 		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
401 		if (now - c->last_pkt_ticks > lro_idle_ticks) {
402 			++rxq->lro.n_drop_idle;
403 			sfxge_lro_drop(rxq, c);
404 		}
405 	}
406 }
407 
408 static void
409 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
410 		struct mbuf *mbuf, struct tcphdr *th)
411 {
412 	struct tcphdr *c_th;
413 
414 	/* Tack the new mbuf onto the chain. */
415 	KASSERT(!mbuf->m_next, ("mbuf already chained"));
416 	c->mbuf_tail->m_next = mbuf;
417 	c->mbuf_tail = mbuf;
418 
419 	/* Increase length appropriately */
420 	c->mbuf->m_pkthdr.len += mbuf->m_len;
421 
422 	/* Update the connection state flags */
423 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
424 		struct ip *iph = c->nh;
425 		iph->ip_len += mbuf->m_len;
426 		c_th = (struct tcphdr *)(iph + 1);
427 	} else {
428 		struct ip6_hdr *iph = c->nh;
429 		iph->ip6_plen += mbuf->m_len;
430 		c_th = (struct tcphdr *)(iph + 1);
431 	}
432 	c_th->th_flags |= (th->th_flags & TH_PUSH);
433 	c->th_last = th;
434 	++st->n_merges;
435 
436 	/* Pass packet up now if another segment could overflow the IP
437 	 * length.
438 	 */
439 	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
440 		sfxge_lro_deliver(st, c);
441 }
442 
443 static void
444 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
445 		struct mbuf *mbuf, void *nh, struct tcphdr *th)
446 {
447 	/* Start the chain */
448 	c->mbuf = mbuf;
449 	c->mbuf_tail = c->mbuf;
450 	c->nh = nh;
451 	c->th_last = th;
452 
453 	mbuf->m_pkthdr.len = mbuf->m_len;
454 
455 	/* Mangle header fields for later processing */
456 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
457 		struct ip *iph = nh;
458 		iph->ip_len = ntohs(iph->ip_len);
459 	} else {
460 		struct ip6_hdr *iph = nh;
461 		iph->ip6_plen = ntohs(iph->ip6_plen);
462 	}
463 }
464 
465 /* Try to merge or otherwise hold or deliver (as appropriate) the
466  * packet buffered for this connection (c->next_buf).  Return a flag
467  * indicating whether the connection is still active for LRO purposes.
468  */
469 static int
470 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
471 {
472 	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
473 	char *eh = c->next_eh;
474 	int data_length, hdr_length, dont_merge;
475 	unsigned th_seq, pkt_length;
476 	struct tcphdr *th;
477 	unsigned now;
478 
479 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
480 		struct ip *iph = c->next_nh;
481 		th = (struct tcphdr *)(iph + 1);
482 		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
483 	} else {
484 		struct ip6_hdr *iph = c->next_nh;
485 		th = (struct tcphdr *)(iph + 1);
486 		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
487 	}
488 
489 	hdr_length = (char *) th + th->th_off * 4 - eh;
490 	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
491 		       hdr_length);
492 	th_seq = ntohl(th->th_seq);
493 	dont_merge = ((data_length <= 0)
494 		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
495 
496 	/* Check for options other than aligned timestamp. */
497 	if (th->th_off != 5) {
498 		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
499 		if (th->th_off == 8 &&
500 		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
501 					(TCPOPT_NOP << 16) |
502 					(TCPOPT_TIMESTAMP << 8) |
503 					TCPOLEN_TIMESTAMP)) {
504 			/* timestamp option -- okay */
505 		} else {
506 			dont_merge = 1;
507 		}
508 	}
509 
510 	if (__predict_false(th_seq != c->next_seq)) {
511 		/* Out-of-order, so start counting again. */
512 		if (c->mbuf != NULL)
513 			sfxge_lro_deliver(&rxq->lro, c);
514 		c->n_in_order_pkts -= lro_loss_packets;
515 		c->next_seq = th_seq + data_length;
516 		++rxq->lro.n_misorder;
517 		goto deliver_buf_out;
518 	}
519 	c->next_seq = th_seq + data_length;
520 
521 	now = ticks;
522 	if (now - c->last_pkt_ticks > lro_idle_ticks) {
523 		++rxq->lro.n_drop_idle;
524 		if (c->mbuf != NULL)
525 			sfxge_lro_deliver(&rxq->lro, c);
526 		sfxge_lro_drop(rxq, c);
527 		return (0);
528 	}
529 	c->last_pkt_ticks = ticks;
530 
531 	if (c->n_in_order_pkts < lro_slow_start_packets) {
532 		/* May be in slow-start, so don't merge. */
533 		++rxq->lro.n_slow_start;
534 		++c->n_in_order_pkts;
535 		goto deliver_buf_out;
536 	}
537 
538 	if (__predict_false(dont_merge)) {
539 		if (c->mbuf != NULL)
540 			sfxge_lro_deliver(&rxq->lro, c);
541 		if (th->th_flags & (TH_FIN | TH_RST)) {
542 			++rxq->lro.n_drop_closed;
543 			sfxge_lro_drop(rxq, c);
544 			return (0);
545 		}
546 		goto deliver_buf_out;
547 	}
548 
549 	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
550 
551 	if (__predict_true(c->mbuf != NULL)) {
552 		/* Remove headers and any padding */
553 		rx_buf->mbuf->m_data += hdr_length;
554 		rx_buf->mbuf->m_len = data_length;
555 
556 		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
557 	} else {
558 		/* Remove any padding */
559 		rx_buf->mbuf->m_len = pkt_length;
560 
561 		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
562 	}
563 
564 	rx_buf->mbuf = NULL;
565 	return (1);
566 
567  deliver_buf_out:
568 	sfxge_rx_deliver(rxq->sc, rx_buf);
569 	return (1);
570 }
571 
572 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
573 			       uint16_t l2_id, void *nh, struct tcphdr *th)
574 {
575 	unsigned bucket = conn_hash & st->conns_mask;
576 	struct sfxge_lro_conn *c;
577 
578 	if (st->conns_n[bucket] >= lro_chain_max) {
579 		++st->n_too_many;
580 		return;
581 	}
582 
583 	if (!TAILQ_EMPTY(&st->free_conns)) {
584 		c = TAILQ_FIRST(&st->free_conns);
585 		TAILQ_REMOVE(&st->free_conns, c, link);
586 	} else {
587 		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
588 		if (c == NULL)
589 			return;
590 		c->mbuf = NULL;
591 		c->next_buf.mbuf = NULL;
592 	}
593 
594 	/* Create the connection tracking data */
595 	++st->conns_n[bucket];
596 	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
597 	c->l2_id = l2_id;
598 	c->conn_hash = conn_hash;
599 	c->source = th->th_sport;
600 	c->dest = th->th_dport;
601 	c->n_in_order_pkts = 0;
602 	c->last_pkt_ticks = *(volatile int *)&ticks;
603 	c->delivered = 0;
604 	++st->n_new_stream;
605 	/* NB. We don't initialise c->next_seq, and it doesn't matter what
606 	 * value it has.  Most likely the next packet received for this
607 	 * connection will not match -- no harm done.
608 	 */
609 }
610 
611 /* Process mbuf and decide whether to dispatch it to the stack now or
612  * later.
613  */
614 static void
615 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
616 {
617 	struct sfxge_softc *sc = rxq->sc;
618 	struct mbuf *m = rx_buf->mbuf;
619 	struct ether_header *eh;
620 	struct sfxge_lro_conn *c;
621 	uint16_t l2_id;
622 	uint16_t l3_proto;
623 	void *nh;
624 	struct tcphdr *th;
625 	uint32_t conn_hash;
626 	unsigned bucket;
627 
628 	/* Get the hardware hash */
629 	conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
630 				      mtod(m, uint8_t *));
631 
632 	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
633 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
634 		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
635 		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
636 			SFXGE_LRO_L2_ID_VLAN;
637 		l3_proto = veh->evl_proto;
638 		nh = veh + 1;
639 	} else {
640 		l2_id = 0;
641 		l3_proto = eh->ether_type;
642 		nh = eh + 1;
643 	}
644 
645 	/* Check whether this is a suitable packet (unfragmented
646 	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
647 	 * length, and compute a hash if necessary.  If not, return.
648 	 */
649 	if (l3_proto == htons(ETHERTYPE_IP)) {
650 		struct ip *iph = nh;
651 		if ((iph->ip_p - IPPROTO_TCP) |
652 		    (iph->ip_hl - (sizeof(*iph) >> 2u)) |
653 		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
654 			goto deliver_now;
655 		th = (struct tcphdr *)(iph + 1);
656 	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
657 		struct ip6_hdr *iph = nh;
658 		if (iph->ip6_nxt != IPPROTO_TCP)
659 			goto deliver_now;
660 		l2_id |= SFXGE_LRO_L2_ID_IPV6;
661 		th = (struct tcphdr *)(iph + 1);
662 	} else {
663 		goto deliver_now;
664 	}
665 
666 	bucket = conn_hash & rxq->lro.conns_mask;
667 
668 	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
669 		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
670 			continue;
671 		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
672 			continue;
673 		if (c->mbuf != NULL) {
674 			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
675 				struct ip *c_iph, *iph = nh;
676 				c_iph = c->nh;
677 				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
678 				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
679 					continue;
680 			} else {
681 				struct ip6_hdr *c_iph, *iph = nh;
682 				c_iph = c->nh;
683 				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
684 				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
685 					continue;
686 			}
687 		}
688 
689 		/* Re-insert at head of list to reduce lookup time. */
690 		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
691 		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
692 
693 		if (c->next_buf.mbuf != NULL) {
694 			if (!sfxge_lro_try_merge(rxq, c))
695 				goto deliver_now;
696 		} else {
697 			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
698 			    active_link);
699 		}
700 		c->next_buf = *rx_buf;
701 		c->next_eh = eh;
702 		c->next_nh = nh;
703 
704 		rx_buf->mbuf = NULL;
705 		rx_buf->flags = EFX_DISCARD;
706 		return;
707 	}
708 
709 	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
710  deliver_now:
711 	sfxge_rx_deliver(sc, rx_buf);
712 }
713 
714 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
715 {
716 	struct sfxge_lro_state *st = &rxq->lro;
717 	struct sfxge_lro_conn *c;
718 	unsigned t;
719 
720 	while (!LIST_EMPTY(&st->active_conns)) {
721 		c = LIST_FIRST(&st->active_conns);
722 		if (!c->delivered && c->mbuf != NULL)
723 			sfxge_lro_deliver(st, c);
724 		if (sfxge_lro_try_merge(rxq, c)) {
725 			if (c->mbuf != NULL)
726 				sfxge_lro_deliver(st, c);
727 			LIST_REMOVE(c, active_link);
728 		}
729 		c->delivered = 0;
730 	}
731 
732 	t = *(volatile int *)&ticks;
733 	if (__predict_false(t != st->last_purge_ticks))
734 		sfxge_lro_purge_idle(rxq, t);
735 }
736 
737 void
738 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
739 {
740 	struct sfxge_softc *sc = rxq->sc;
741 	int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
742 	unsigned int index;
743 	struct sfxge_evq *evq;
744 	unsigned int completed;
745 	unsigned int level;
746 	struct mbuf *m;
747 	struct sfxge_rx_sw_desc *prev = NULL;
748 
749 	index = rxq->index;
750 	evq = sc->evq[index];
751 
752 	mtx_assert(&evq->lock, MA_OWNED);
753 
754 	completed = rxq->completed;
755 	while (completed != rxq->pending) {
756 		unsigned int id;
757 		struct sfxge_rx_sw_desc *rx_desc;
758 
759 		id = completed++ & rxq->ptr_mask;
760 		rx_desc = &rxq->queue[id];
761 		m = rx_desc->mbuf;
762 
763 		if (rxq->init_state != SFXGE_RXQ_STARTED)
764 			goto discard;
765 
766 		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
767 			goto discard;
768 
769 		prefetch_read_many(mtod(m, caddr_t));
770 
771 		/* Check for loopback packets */
772 		if (!(rx_desc->flags & EFX_PKT_IPV4) &&
773 		    !(rx_desc->flags & EFX_PKT_IPV6)) {
774 			struct ether_header *etherhp;
775 
776 			/*LINTED*/
777 			etherhp = mtod(m, struct ether_header *);
778 
779 			if (etherhp->ether_type ==
780 			    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
781 				EFSYS_PROBE(loopback);
782 
783 				rxq->loopback++;
784 				goto discard;
785 			}
786 		}
787 
788 		/* Pass packet up the stack or into LRO (pipelined) */
789 		if (prev != NULL) {
790 			if (lro_enabled)
791 				sfxge_lro(rxq, prev);
792 			else
793 				sfxge_rx_deliver(sc, prev);
794 		}
795 		prev = rx_desc;
796 		continue;
797 
798 discard:
799 		/* Return the packet to the pool */
800 		m_free(m);
801 		rx_desc->mbuf = NULL;
802 	}
803 	rxq->completed = completed;
804 
805 	level = rxq->added - rxq->completed;
806 
807 	/* Pass last packet up the stack or into LRO */
808 	if (prev != NULL) {
809 		if (lro_enabled)
810 			sfxge_lro(rxq, prev);
811 		else
812 			sfxge_rx_deliver(sc, prev);
813 	}
814 
815 	/*
816 	 * If there are any pending flows and this is the end of the
817 	 * poll then they must be completed.
818 	 */
819 	if (eop)
820 		sfxge_lro_end_of_burst(rxq);
821 
822 	/* Top up the queue if necessary */
823 	if (level < rxq->refill_threshold)
824 		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE);
825 }
826 
827 static void
828 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
829 {
830 	struct sfxge_rxq *rxq;
831 	struct sfxge_evq *evq;
832 	unsigned int count;
833 
834 	rxq = sc->rxq[index];
835 	evq = sc->evq[index];
836 
837 	mtx_lock(&evq->lock);
838 
839 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
840 	    ("rxq not started"));
841 
842 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
843 
844 	callout_stop(&rxq->refill_callout);
845 
846 again:
847 	rxq->flush_state = SFXGE_FLUSH_PENDING;
848 
849 	/* Flush the receive queue */
850 	efx_rx_qflush(rxq->common);
851 
852 	mtx_unlock(&evq->lock);
853 
854 	count = 0;
855 	do {
856 		/* Spin for 100 ms */
857 		DELAY(100000);
858 
859 		if (rxq->flush_state != SFXGE_FLUSH_PENDING)
860 			break;
861 
862 	} while (++count < 20);
863 
864 	mtx_lock(&evq->lock);
865 
866 	if (rxq->flush_state == SFXGE_FLUSH_FAILED)
867 		goto again;
868 
869 	rxq->flush_state = SFXGE_FLUSH_DONE;
870 
871 	rxq->pending = rxq->added;
872 	sfxge_rx_qcomplete(rxq, B_TRUE);
873 
874 	KASSERT(rxq->completed == rxq->pending,
875 	    ("rxq->completed != rxq->pending"));
876 
877 	rxq->added = 0;
878 	rxq->pending = 0;
879 	rxq->completed = 0;
880 	rxq->loopback = 0;
881 
882 	/* Destroy the common code receive queue. */
883 	efx_rx_qdestroy(rxq->common);
884 
885 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
886 	    EFX_RXQ_NBUFS(sc->rxq_entries));
887 
888 	mtx_unlock(&evq->lock);
889 }
890 
891 static int
892 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
893 {
894 	struct sfxge_rxq *rxq;
895 	efsys_mem_t *esmp;
896 	struct sfxge_evq *evq;
897 	int rc;
898 
899 	rxq = sc->rxq[index];
900 	esmp = &rxq->mem;
901 	evq = sc->evq[index];
902 
903 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
904 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
905 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
906 	    ("evq->init_state != SFXGE_EVQ_STARTED"));
907 
908 	/* Program the buffer table. */
909 	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
910 	    EFX_RXQ_NBUFS(sc->rxq_entries))) != 0)
911 		return (rc);
912 
913 	/* Create the common code receive queue. */
914 	if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
915 	    esmp, sc->rxq_entries, rxq->buf_base_id, evq->common,
916 	    &rxq->common)) != 0)
917 		goto fail;
918 
919 	mtx_lock(&evq->lock);
920 
921 	/* Enable the receive queue. */
922 	efx_rx_qenable(rxq->common);
923 
924 	rxq->init_state = SFXGE_RXQ_STARTED;
925 
926 	/* Try to fill the queue from the pool. */
927 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE);
928 
929 	mtx_unlock(&evq->lock);
930 
931 	return (0);
932 
933 fail:
934 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
935 	    EFX_RXQ_NBUFS(sc->rxq_entries));
936 	return (rc);
937 }
938 
939 void
940 sfxge_rx_stop(struct sfxge_softc *sc)
941 {
942 	struct sfxge_intr *intr;
943 	int index;
944 
945 	intr = &sc->intr;
946 
947 	/* Stop the receive queue(s) */
948 	index = intr->n_alloc;
949 	while (--index >= 0)
950 		sfxge_rx_qstop(sc, index);
951 
952 	sc->rx_prefix_size = 0;
953 	sc->rx_buffer_size = 0;
954 
955 	efx_rx_fini(sc->enp);
956 }
957 
958 int
959 sfxge_rx_start(struct sfxge_softc *sc)
960 {
961 	struct sfxge_intr *intr;
962 	int index;
963 	int rc;
964 
965 	intr = &sc->intr;
966 
967 	/* Initialize the common code receive module. */
968 	if ((rc = efx_rx_init(sc->enp)) != 0)
969 		return (rc);
970 
971 	/* Calculate the receive packet buffer size. */
972 	sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
973 	sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
974 			      sc->rx_prefix_size);
975 
976 	/* Select zone for packet buffers */
977 	if (sc->rx_buffer_size <= MCLBYTES)
978 		sc->rx_buffer_zone = zone_clust;
979 	else if (sc->rx_buffer_size <= MJUMPAGESIZE)
980 		sc->rx_buffer_zone = zone_jumbop;
981 	else if (sc->rx_buffer_size <= MJUM9BYTES)
982 		sc->rx_buffer_zone = zone_jumbo9;
983 	else
984 		sc->rx_buffer_zone = zone_jumbo16;
985 
986 	/*
987 	 * Set up the scale table.  Enable all hash types and hash insertion.
988 	 */
989 	for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
990 		sc->rx_indir_table[index] = index % sc->intr.n_alloc;
991 	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
992 				       SFXGE_RX_SCALE_MAX)) != 0)
993 		goto fail;
994 	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
995 	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
996 	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
997 
998 	if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
999 	    sizeof(toep_key))) != 0)
1000 		goto fail;
1001 
1002 	/* Start the receive queue(s). */
1003 	for (index = 0; index < intr->n_alloc; index++) {
1004 		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1005 			goto fail2;
1006 	}
1007 
1008 	return (0);
1009 
1010 fail2:
1011 	while (--index >= 0)
1012 		sfxge_rx_qstop(sc, index);
1013 
1014 fail:
1015 	efx_rx_fini(sc->enp);
1016 
1017 	return (rc);
1018 }
1019 
1020 static void sfxge_lro_init(struct sfxge_rxq *rxq)
1021 {
1022 	struct sfxge_lro_state *st = &rxq->lro;
1023 	unsigned i;
1024 
1025 	st->conns_mask = lro_table_size - 1;
1026 	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1027 		("lro_table_size must be a power of 2"));
1028 	st->sc = rxq->sc;
1029 	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1030 			   M_SFXGE, M_WAITOK);
1031 	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1032 			     M_SFXGE, M_WAITOK);
1033 	for (i = 0; i <= st->conns_mask; ++i) {
1034 		TAILQ_INIT(&st->conns[i]);
1035 		st->conns_n[i] = 0;
1036 	}
1037 	LIST_INIT(&st->active_conns);
1038 	TAILQ_INIT(&st->free_conns);
1039 }
1040 
1041 static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1042 {
1043 	struct sfxge_lro_state *st = &rxq->lro;
1044 	struct sfxge_lro_conn *c;
1045 	unsigned i;
1046 
1047 	/* Return cleanly if sfxge_lro_init() has not been called. */
1048 	if (st->conns == NULL)
1049 		return;
1050 
1051 	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1052 
1053 	for (i = 0; i <= st->conns_mask; ++i) {
1054 		while (!TAILQ_EMPTY(&st->conns[i])) {
1055 			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1056 			sfxge_lro_drop(rxq, c);
1057 		}
1058 	}
1059 
1060 	while (!TAILQ_EMPTY(&st->free_conns)) {
1061 		c = TAILQ_FIRST(&st->free_conns);
1062 		TAILQ_REMOVE(&st->free_conns, c, link);
1063 		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1064 		free(c, M_SFXGE);
1065 	}
1066 
1067 	free(st->conns_n, M_SFXGE);
1068 	free(st->conns, M_SFXGE);
1069 	st->conns = NULL;
1070 }
1071 
1072 static void
1073 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1074 {
1075 	struct sfxge_rxq *rxq;
1076 
1077 	rxq = sc->rxq[index];
1078 
1079 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1080 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1081 
1082 	/* Free the context array and the flow table. */
1083 	free(rxq->queue, M_SFXGE);
1084 	sfxge_lro_fini(rxq);
1085 
1086 	/* Release DMA memory. */
1087 	sfxge_dma_free(&rxq->mem);
1088 
1089 	sc->rxq[index] = NULL;
1090 
1091 	free(rxq, M_SFXGE);
1092 }
1093 
1094 static int
1095 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1096 {
1097 	struct sfxge_rxq *rxq;
1098 	struct sfxge_evq *evq;
1099 	efsys_mem_t *esmp;
1100 	int rc;
1101 
1102 	KASSERT(index < sc->intr.n_alloc, ("index >= %d", sc->intr.n_alloc));
1103 
1104 	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1105 	rxq->sc = sc;
1106 	rxq->index = index;
1107 	rxq->entries = sc->rxq_entries;
1108 	rxq->ptr_mask = rxq->entries - 1;
1109 	rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries);
1110 
1111 	sc->rxq[index] = rxq;
1112 	esmp = &rxq->mem;
1113 
1114 	evq = sc->evq[index];
1115 
1116 	/* Allocate and zero DMA space. */
1117 	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0)
1118 		return (rc);
1119 	(void)memset(esmp->esm_base, 0, EFX_RXQ_SIZE(sc->rxq_entries));
1120 
1121 	/* Allocate buffer table entries. */
1122 	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries),
1123 				 &rxq->buf_base_id);
1124 
1125 	/* Allocate the context array and the flow table. */
1126 	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries,
1127 	    M_SFXGE, M_WAITOK | M_ZERO);
1128 	sfxge_lro_init(rxq);
1129 
1130 	callout_init(&rxq->refill_callout, B_TRUE);
1131 
1132 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1133 
1134 	return (0);
1135 }
1136 
1137 static const struct {
1138 	const char *name;
1139 	size_t offset;
1140 } sfxge_rx_stats[] = {
1141 #define	SFXGE_RX_STAT(name, member) \
1142 	{ #name, offsetof(struct sfxge_rxq, member) }
1143 	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1144 	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1145 	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1146 	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1147 	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1148 	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1149 	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1150 	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
1151 };
1152 
1153 static int
1154 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1155 {
1156 	struct sfxge_softc *sc = arg1;
1157 	unsigned int id = arg2;
1158 	unsigned int sum, index;
1159 
1160 	/* Sum across all RX queues */
1161 	sum = 0;
1162 	for (index = 0; index < sc->intr.n_alloc; index++)
1163 		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1164 					 sfxge_rx_stats[id].offset);
1165 
1166 	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1167 }
1168 
1169 static void
1170 sfxge_rx_stat_init(struct sfxge_softc *sc)
1171 {
1172 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1173 	struct sysctl_oid_list *stat_list;
1174 	unsigned int id;
1175 
1176 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1177 
1178 	for (id = 0;
1179 	     id < sizeof(sfxge_rx_stats) / sizeof(sfxge_rx_stats[0]);
1180 	     id++) {
1181 		SYSCTL_ADD_PROC(
1182 			ctx, stat_list,
1183 			OID_AUTO, sfxge_rx_stats[id].name,
1184 			CTLTYPE_UINT|CTLFLAG_RD,
1185 			sc, id, sfxge_rx_stat_handler, "IU",
1186 			"");
1187 	}
1188 }
1189 
1190 void
1191 sfxge_rx_fini(struct sfxge_softc *sc)
1192 {
1193 	struct sfxge_intr *intr;
1194 	int index;
1195 
1196 	intr = &sc->intr;
1197 
1198 	index = intr->n_alloc;
1199 	while (--index >= 0)
1200 		sfxge_rx_qfini(sc, index);
1201 }
1202 
1203 int
1204 sfxge_rx_init(struct sfxge_softc *sc)
1205 {
1206 	struct sfxge_intr *intr;
1207 	int index;
1208 	int rc;
1209 
1210 	if (lro_idle_ticks == 0)
1211 		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
1212 
1213 	intr = &sc->intr;
1214 
1215 	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1216 	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1217 
1218 	/* Initialize the receive queue(s) - one per interrupt. */
1219 	for (index = 0; index < intr->n_alloc; index++) {
1220 		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1221 			goto fail;
1222 	}
1223 
1224 	sfxge_rx_stat_init(sc);
1225 
1226 	return (0);
1227 
1228 fail:
1229 	/* Tear down the receive queue(s). */
1230 	while (--index >= 0)
1231 		sfxge_rx_qfini(sc, index);
1232 
1233 	return (rc);
1234 }
1235