xref: /freebsd/sys/dev/sfxge/sfxge_rx.c (revision 01b792f1f535c12a1a14000cf3360ef6c36cee2d)
1 /*-
2  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/types.h>
34 #include <sys/mbuf.h>
35 #include <sys/smp.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/limits.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_vlan_var.h>
43 
44 #include <netinet/in.h>
45 #include <netinet/ip.h>
46 #include <netinet/ip6.h>
47 #include <netinet/tcp.h>
48 
49 #include <machine/in_cksum.h>
50 
51 #include "common/efx.h"
52 
53 
54 #include "sfxge.h"
55 #include "sfxge_rx.h"
56 
57 #define	RX_REFILL_THRESHOLD(_entries)	(EFX_RXQ_LIMIT(_entries) * 9 / 10)
58 
59 /* Size of the LRO hash table.  Must be a power of 2.  A larger table
60  * means we can accelerate a larger number of streams.
61  */
62 static unsigned lro_table_size = 128;
63 
64 /* Maximum length of a hash chain.  If chains get too long then the lookup
65  * time increases and may exceed the benefit of LRO.
66  */
67 static unsigned lro_chain_max = 20;
68 
69 /* Maximum time (in ticks) that a connection can be idle before it's LRO
70  * state is discarded.
71  */
72 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
73 
74 /* Number of packets with payload that must arrive in-order before a
75  * connection is eligible for LRO.  The idea is we should avoid coalescing
76  * segments when the sender is in slow-start because reducing the ACK rate
77  * can damage performance.
78  */
79 static int lro_slow_start_packets = 2000;
80 
81 /* Number of packets with payload that must arrive in-order following loss
82  * before a connection is eligible for LRO.  The idea is we should avoid
83  * coalescing segments when the sender is recovering from loss, because
84  * reducing the ACK rate can damage performance.
85  */
86 static int lro_loss_packets = 20;
87 
88 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
89 #define	SFXGE_LRO_L2_ID_VLAN 0x4000
90 #define	SFXGE_LRO_L2_ID_IPV6 0x8000
91 #define	SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
92 #define	SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
93 
94 /* Compare IPv6 addresses, avoiding conditional branches */
95 static unsigned long ipv6_addr_cmp(const struct in6_addr *left,
96 				   const struct in6_addr *right)
97 {
98 #if LONG_BIT == 64
99 	const uint64_t *left64 = (const uint64_t *)left;
100 	const uint64_t *right64 = (const uint64_t *)right;
101 	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
102 #else
103 	return (left->s6_addr32[0] - right->s6_addr32[0]) |
104 	       (left->s6_addr32[1] - right->s6_addr32[1]) |
105 	       (left->s6_addr32[2] - right->s6_addr32[2]) |
106 	       (left->s6_addr32[3] - right->s6_addr32[3]);
107 #endif
108 }
109 
110 void
111 sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
112 {
113 
114 	rxq->flush_state = SFXGE_FLUSH_DONE;
115 }
116 
117 void
118 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
119 {
120 
121 	rxq->flush_state = SFXGE_FLUSH_FAILED;
122 }
123 
124 static uint8_t toep_key[] = {
125 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
126 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
127 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
128 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
129 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
130 };
131 
132 static void
133 sfxge_rx_post_refill(void *arg)
134 {
135 	struct sfxge_rxq *rxq = arg;
136 	struct sfxge_softc *sc;
137 	unsigned int index;
138 	struct sfxge_evq *evq;
139 	uint16_t magic;
140 
141 	sc = rxq->sc;
142 	index = rxq->index;
143 	evq = sc->evq[index];
144 
145 	magic = SFXGE_MAGIC_RX_QREFILL | index;
146 
147 	/* This is guaranteed due to the start/stop order of rx and ev */
148 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
149 	    ("evq not started"));
150 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
151 	    ("rxq not started"));
152 	efx_ev_qpost(evq->common, magic);
153 }
154 
155 static void
156 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
157 {
158 	/* Initially retry after 100 ms, but back off in case of
159 	 * repeated failures as we probably have to wait for the
160 	 * administrator to raise the pool limit. */
161 	if (retrying)
162 		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
163 	else
164 		rxq->refill_delay = hz / 10;
165 
166 	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
167 			     sfxge_rx_post_refill, rxq);
168 }
169 
170 static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
171 {
172 	struct mb_args args;
173 	struct mbuf *m;
174 
175 	/* Allocate mbuf structure */
176 	args.flags = M_PKTHDR;
177 	args.type = MT_DATA;
178 	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
179 
180 	/* Allocate (and attach) packet buffer */
181 	if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
182 		uma_zfree(zone_mbuf, m);
183 		m = NULL;
184 	}
185 
186 	return (m);
187 }
188 
189 #define	SFXGE_REFILL_BATCH  64
190 
191 static void
192 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
193 {
194 	struct sfxge_softc *sc;
195 	unsigned int index;
196 	struct sfxge_evq *evq;
197 	unsigned int batch;
198 	unsigned int rxfill;
199 	unsigned int mblksize;
200 	int ntodo;
201 	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
202 
203 	sc = rxq->sc;
204 	index = rxq->index;
205 	evq = sc->evq[index];
206 
207 	prefetch_read_many(sc->enp);
208 	prefetch_read_many(rxq->common);
209 
210 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
211 
212 	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
213 		return;
214 
215 	rxfill = rxq->added - rxq->completed;
216 	KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries),
217 	    ("rxfill > EFX_RXQ_LIMIT(rxq->entries)"));
218 	ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target);
219 	KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries),
220 	    ("ntodo > EFX_RQX_LIMIT(rxq->entries)"));
221 
222 	if (ntodo == 0)
223 		return;
224 
225 	batch = 0;
226 	mblksize = sc->rx_buffer_size;
227 	while (ntodo-- > 0) {
228 		unsigned int id;
229 		struct sfxge_rx_sw_desc *rx_desc;
230 		bus_dma_segment_t seg;
231 		struct mbuf *m;
232 
233 		id = (rxq->added + batch) & rxq->ptr_mask;
234 		rx_desc = &rxq->queue[id];
235 		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
236 
237 		rx_desc->flags = EFX_DISCARD;
238 		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
239 		if (m == NULL)
240 			break;
241 		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
242 		addr[batch++] = seg.ds_addr;
243 
244 		if (batch == SFXGE_REFILL_BATCH) {
245 			efx_rx_qpost(rxq->common, addr, mblksize, batch,
246 			    rxq->completed, rxq->added);
247 			rxq->added += batch;
248 			batch = 0;
249 		}
250 	}
251 
252 	if (ntodo != 0)
253 		sfxge_rx_schedule_refill(rxq, retrying);
254 
255 	if (batch != 0) {
256 		efx_rx_qpost(rxq->common, addr, mblksize, batch,
257 		    rxq->completed, rxq->added);
258 		rxq->added += batch;
259 	}
260 
261 	/* Make the descriptors visible to the hardware */
262 	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
263 			BUS_DMASYNC_PREWRITE);
264 
265 	efx_rx_qpush(rxq->common, rxq->added);
266 }
267 
268 void
269 sfxge_rx_qrefill(struct sfxge_rxq *rxq)
270 {
271 
272 	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
273 		return;
274 
275 	/* Make sure the queue is full */
276 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE);
277 }
278 
279 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
280 {
281 	struct ifnet *ifp = sc->ifnet;
282 
283 	m->m_pkthdr.rcvif = ifp;
284 	m->m_pkthdr.csum_data = 0xffff;
285 	ifp->if_input(ifp, m);
286 }
287 
288 static void
289 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
290 {
291 	struct mbuf *m = rx_desc->mbuf;
292 	int csum_flags;
293 
294 	/* Convert checksum flags */
295 	csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
296 		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
297 	if (rx_desc->flags & EFX_CKSUM_TCPUDP)
298 		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
299 
300 #ifdef SFXGE_HAVE_MQ
301 	/* The hash covers a 4-tuple for TCP only */
302 	if (rx_desc->flags & EFX_PKT_TCP) {
303 		m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
304 						       mtod(m, uint8_t *));
305 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
306 	}
307 #endif
308 	m->m_data += sc->rx_prefix_size;
309 	m->m_len = rx_desc->size - sc->rx_prefix_size;
310 	m->m_pkthdr.len = m->m_len;
311 	m->m_pkthdr.csum_flags = csum_flags;
312 	__sfxge_rx_deliver(sc, rx_desc->mbuf);
313 
314 	rx_desc->flags = EFX_DISCARD;
315 	rx_desc->mbuf = NULL;
316 }
317 
318 static void
319 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
320 {
321 	struct sfxge_softc *sc = st->sc;
322 	struct mbuf *m = c->mbuf;
323 	struct tcphdr *c_th;
324 	int csum_flags;
325 
326 	KASSERT(m, ("no mbuf to deliver"));
327 
328 	++st->n_bursts;
329 
330 	/* Finish off packet munging and recalculate IP header checksum. */
331 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
332 		struct ip *iph = c->nh;
333 		iph->ip_len = htons(iph->ip_len);
334 		iph->ip_sum = 0;
335 		iph->ip_sum = in_cksum_hdr(iph);
336 		c_th = (struct tcphdr *)(iph + 1);
337 		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
338 			      CSUM_IP_CHECKED | CSUM_IP_VALID);
339 	} else {
340 		struct ip6_hdr *iph = c->nh;
341 		iph->ip6_plen = htons(iph->ip6_plen);
342 		c_th = (struct tcphdr *)(iph + 1);
343 		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
344 	}
345 
346 	c_th->th_win = c->th_last->th_win;
347 	c_th->th_ack = c->th_last->th_ack;
348 	if (c_th->th_off == c->th_last->th_off) {
349 		/* Copy TCP options (take care to avoid going negative). */
350 		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
351 		memcpy(c_th + 1, c->th_last + 1, optlen);
352 	}
353 
354 #ifdef SFXGE_HAVE_MQ
355 	m->m_pkthdr.flowid = c->conn_hash;
356 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
357 #endif
358 	m->m_pkthdr.csum_flags = csum_flags;
359 	__sfxge_rx_deliver(sc, m);
360 
361 	c->mbuf = NULL;
362 	c->delivered = 1;
363 }
364 
365 /* Drop the given connection, and add it to the free list. */
366 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
367 {
368 	unsigned bucket;
369 
370 	KASSERT(!c->mbuf, ("found orphaned mbuf"));
371 
372 	if (c->next_buf.mbuf != NULL) {
373 		sfxge_rx_deliver(rxq->sc, &c->next_buf);
374 		LIST_REMOVE(c, active_link);
375 	}
376 
377 	bucket = c->conn_hash & rxq->lro.conns_mask;
378 	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
379 	--rxq->lro.conns_n[bucket];
380 	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
381 	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
382 }
383 
384 /* Stop tracking connections that have gone idle in order to keep hash
385  * chains short.
386  */
387 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
388 {
389 	struct sfxge_lro_conn *c;
390 	unsigned i;
391 
392 	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
393 		("found active connections"));
394 
395 	rxq->lro.last_purge_ticks = now;
396 	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
397 		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
398 			continue;
399 
400 		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
401 		if (now - c->last_pkt_ticks > lro_idle_ticks) {
402 			++rxq->lro.n_drop_idle;
403 			sfxge_lro_drop(rxq, c);
404 		}
405 	}
406 }
407 
408 static void
409 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
410 		struct mbuf *mbuf, struct tcphdr *th)
411 {
412 	struct tcphdr *c_th;
413 
414 	/* Tack the new mbuf onto the chain. */
415 	KASSERT(!mbuf->m_next, ("mbuf already chained"));
416 	c->mbuf_tail->m_next = mbuf;
417 	c->mbuf_tail = mbuf;
418 
419 	/* Increase length appropriately */
420 	c->mbuf->m_pkthdr.len += mbuf->m_len;
421 
422 	/* Update the connection state flags */
423 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
424 		struct ip *iph = c->nh;
425 		iph->ip_len += mbuf->m_len;
426 		c_th = (struct tcphdr *)(iph + 1);
427 	} else {
428 		struct ip6_hdr *iph = c->nh;
429 		iph->ip6_plen += mbuf->m_len;
430 		c_th = (struct tcphdr *)(iph + 1);
431 	}
432 	c_th->th_flags |= (th->th_flags & TH_PUSH);
433 	c->th_last = th;
434 	++st->n_merges;
435 
436 	/* Pass packet up now if another segment could overflow the IP
437 	 * length.
438 	 */
439 	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
440 		sfxge_lro_deliver(st, c);
441 }
442 
443 static void
444 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
445 		struct mbuf *mbuf, void *nh, struct tcphdr *th)
446 {
447 	/* Start the chain */
448 	c->mbuf = mbuf;
449 	c->mbuf_tail = c->mbuf;
450 	c->nh = nh;
451 	c->th_last = th;
452 
453 	mbuf->m_pkthdr.len = mbuf->m_len;
454 
455 	/* Mangle header fields for later processing */
456 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
457 		struct ip *iph = nh;
458 		iph->ip_len = ntohs(iph->ip_len);
459 	} else {
460 		struct ip6_hdr *iph = nh;
461 		iph->ip6_plen = ntohs(iph->ip6_plen);
462 	}
463 }
464 
465 /* Try to merge or otherwise hold or deliver (as appropriate) the
466  * packet buffered for this connection (c->next_buf).  Return a flag
467  * indicating whether the connection is still active for LRO purposes.
468  */
469 static int
470 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
471 {
472 	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
473 	char *eh = c->next_eh;
474 	int data_length, hdr_length, dont_merge;
475 	unsigned th_seq, pkt_length;
476 	struct tcphdr *th;
477 	unsigned now;
478 
479 	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
480 		struct ip *iph = c->next_nh;
481 		th = (struct tcphdr *)(iph + 1);
482 		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
483 	} else {
484 		struct ip6_hdr *iph = c->next_nh;
485 		th = (struct tcphdr *)(iph + 1);
486 		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
487 	}
488 
489 	hdr_length = (char *) th + th->th_off * 4 - eh;
490 	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
491 		       hdr_length);
492 	th_seq = ntohl(th->th_seq);
493 	dont_merge = ((data_length <= 0)
494 		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
495 
496 	/* Check for options other than aligned timestamp. */
497 	if (th->th_off != 5) {
498 		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
499 		if (th->th_off == 8 &&
500 		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
501 					(TCPOPT_NOP << 16) |
502 					(TCPOPT_TIMESTAMP << 8) |
503 					TCPOLEN_TIMESTAMP)) {
504 			/* timestamp option -- okay */
505 		} else {
506 			dont_merge = 1;
507 		}
508 	}
509 
510 	if (__predict_false(th_seq != c->next_seq)) {
511 		/* Out-of-order, so start counting again. */
512 		if (c->mbuf != NULL)
513 			sfxge_lro_deliver(&rxq->lro, c);
514 		c->n_in_order_pkts -= lro_loss_packets;
515 		c->next_seq = th_seq + data_length;
516 		++rxq->lro.n_misorder;
517 		goto deliver_buf_out;
518 	}
519 	c->next_seq = th_seq + data_length;
520 
521 	now = ticks;
522 	if (now - c->last_pkt_ticks > lro_idle_ticks) {
523 		++rxq->lro.n_drop_idle;
524 		if (c->mbuf != NULL)
525 			sfxge_lro_deliver(&rxq->lro, c);
526 		sfxge_lro_drop(rxq, c);
527 		return (0);
528 	}
529 	c->last_pkt_ticks = ticks;
530 
531 	if (c->n_in_order_pkts < lro_slow_start_packets) {
532 		/* May be in slow-start, so don't merge. */
533 		++rxq->lro.n_slow_start;
534 		++c->n_in_order_pkts;
535 		goto deliver_buf_out;
536 	}
537 
538 	if (__predict_false(dont_merge)) {
539 		if (c->mbuf != NULL)
540 			sfxge_lro_deliver(&rxq->lro, c);
541 		if (th->th_flags & (TH_FIN | TH_RST)) {
542 			++rxq->lro.n_drop_closed;
543 			sfxge_lro_drop(rxq, c);
544 			return (0);
545 		}
546 		goto deliver_buf_out;
547 	}
548 
549 	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
550 
551 	if (__predict_true(c->mbuf != NULL)) {
552 		/* Remove headers and any padding */
553 		rx_buf->mbuf->m_data += hdr_length;
554 		rx_buf->mbuf->m_len = data_length;
555 
556 		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
557 	} else {
558 		/* Remove any padding */
559 		rx_buf->mbuf->m_len = pkt_length;
560 
561 		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
562 	}
563 
564 	rx_buf->mbuf = NULL;
565 	return (1);
566 
567  deliver_buf_out:
568 	sfxge_rx_deliver(rxq->sc, rx_buf);
569 	return (1);
570 }
571 
572 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
573 			       uint16_t l2_id, void *nh, struct tcphdr *th)
574 {
575 	unsigned bucket = conn_hash & st->conns_mask;
576 	struct sfxge_lro_conn *c;
577 
578 	if (st->conns_n[bucket] >= lro_chain_max) {
579 		++st->n_too_many;
580 		return;
581 	}
582 
583 	if (!TAILQ_EMPTY(&st->free_conns)) {
584 		c = TAILQ_FIRST(&st->free_conns);
585 		TAILQ_REMOVE(&st->free_conns, c, link);
586 	} else {
587 		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
588 		if (c == NULL)
589 			return;
590 		c->mbuf = NULL;
591 		c->next_buf.mbuf = NULL;
592 	}
593 
594 	/* Create the connection tracking data */
595 	++st->conns_n[bucket];
596 	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
597 	c->l2_id = l2_id;
598 	c->conn_hash = conn_hash;
599 	c->source = th->th_sport;
600 	c->dest = th->th_dport;
601 	c->n_in_order_pkts = 0;
602 	c->last_pkt_ticks = *(volatile int *)&ticks;
603 	c->delivered = 0;
604 	++st->n_new_stream;
605 	/* NB. We don't initialise c->next_seq, and it doesn't matter what
606 	 * value it has.  Most likely the next packet received for this
607 	 * connection will not match -- no harm done.
608 	 */
609 }
610 
611 /* Process mbuf and decide whether to dispatch it to the stack now or
612  * later.
613  */
614 static void
615 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
616 {
617 	struct sfxge_softc *sc = rxq->sc;
618 	struct mbuf *m = rx_buf->mbuf;
619 	struct ether_header *eh;
620 	struct sfxge_lro_conn *c;
621 	uint16_t l2_id;
622 	uint16_t l3_proto;
623 	void *nh;
624 	struct tcphdr *th;
625 	uint32_t conn_hash;
626 	unsigned bucket;
627 
628 	/* Get the hardware hash */
629 	conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
630 				      mtod(m, uint8_t *));
631 
632 	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
633 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
634 		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
635 		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
636 			SFXGE_LRO_L2_ID_VLAN;
637 		l3_proto = veh->evl_proto;
638 		nh = veh + 1;
639 	} else {
640 		l2_id = 0;
641 		l3_proto = eh->ether_type;
642 		nh = eh + 1;
643 	}
644 
645 	/* Check whether this is a suitable packet (unfragmented
646 	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
647 	 * length, and compute a hash if necessary.  If not, return.
648 	 */
649 	if (l3_proto == htons(ETHERTYPE_IP)) {
650 		struct ip *iph = nh;
651 		if ((iph->ip_p - IPPROTO_TCP) |
652 		    (iph->ip_hl - (sizeof(*iph) >> 2u)) |
653 		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
654 			goto deliver_now;
655 		th = (struct tcphdr *)(iph + 1);
656 	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
657 		struct ip6_hdr *iph = nh;
658 		if (iph->ip6_nxt != IPPROTO_TCP)
659 			goto deliver_now;
660 		l2_id |= SFXGE_LRO_L2_ID_IPV6;
661 		th = (struct tcphdr *)(iph + 1);
662 	} else {
663 		goto deliver_now;
664 	}
665 
666 	bucket = conn_hash & rxq->lro.conns_mask;
667 
668 	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
669 		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
670 			continue;
671 		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
672 			continue;
673 		if (c->mbuf != NULL) {
674 			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
675 				struct ip *c_iph, *iph = nh;
676 				c_iph = c->nh;
677 				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
678 				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
679 					continue;
680 			} else {
681 				struct ip6_hdr *c_iph, *iph = nh;
682 				c_iph = c->nh;
683 				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
684 				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
685 					continue;
686 			}
687 		}
688 
689 		/* Re-insert at head of list to reduce lookup time. */
690 		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
691 		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
692 
693 		if (c->next_buf.mbuf != NULL) {
694 			if (!sfxge_lro_try_merge(rxq, c))
695 				goto deliver_now;
696 		} else {
697 			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
698 			    active_link);
699 		}
700 		c->next_buf = *rx_buf;
701 		c->next_eh = eh;
702 		c->next_nh = nh;
703 
704 		rx_buf->mbuf = NULL;
705 		rx_buf->flags = EFX_DISCARD;
706 		return;
707 	}
708 
709 	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
710  deliver_now:
711 	sfxge_rx_deliver(sc, rx_buf);
712 }
713 
714 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
715 {
716 	struct sfxge_lro_state *st = &rxq->lro;
717 	struct sfxge_lro_conn *c;
718 	unsigned t;
719 
720 	while (!LIST_EMPTY(&st->active_conns)) {
721 		c = LIST_FIRST(&st->active_conns);
722 		if (!c->delivered && c->mbuf != NULL)
723 			sfxge_lro_deliver(st, c);
724 		if (sfxge_lro_try_merge(rxq, c)) {
725 			if (c->mbuf != NULL)
726 				sfxge_lro_deliver(st, c);
727 			LIST_REMOVE(c, active_link);
728 		}
729 		c->delivered = 0;
730 	}
731 
732 	t = *(volatile int *)&ticks;
733 	if (__predict_false(t != st->last_purge_ticks))
734 		sfxge_lro_purge_idle(rxq, t);
735 }
736 
737 void
738 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
739 {
740 	struct sfxge_softc *sc = rxq->sc;
741 	int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
742 	unsigned int index;
743 	struct sfxge_evq *evq;
744 	unsigned int completed;
745 	unsigned int level;
746 	struct mbuf *m;
747 	struct sfxge_rx_sw_desc *prev = NULL;
748 
749 	index = rxq->index;
750 	evq = sc->evq[index];
751 
752 	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
753 
754 	completed = rxq->completed;
755 	while (completed != rxq->pending) {
756 		unsigned int id;
757 		struct sfxge_rx_sw_desc *rx_desc;
758 
759 		id = completed++ & rxq->ptr_mask;
760 		rx_desc = &rxq->queue[id];
761 		m = rx_desc->mbuf;
762 
763 		if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
764 			goto discard;
765 
766 		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
767 			goto discard;
768 
769 		prefetch_read_many(mtod(m, caddr_t));
770 
771 		/* Check for loopback packets */
772 		if (!(rx_desc->flags & EFX_PKT_IPV4) &&
773 		    !(rx_desc->flags & EFX_PKT_IPV6)) {
774 			struct ether_header *etherhp;
775 
776 			/*LINTED*/
777 			etherhp = mtod(m, struct ether_header *);
778 
779 			if (etherhp->ether_type ==
780 			    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
781 				EFSYS_PROBE(loopback);
782 
783 				rxq->loopback++;
784 				goto discard;
785 			}
786 		}
787 
788 		/* Pass packet up the stack or into LRO (pipelined) */
789 		if (prev != NULL) {
790 			if (lro_enabled)
791 				sfxge_lro(rxq, prev);
792 			else
793 				sfxge_rx_deliver(sc, prev);
794 		}
795 		prev = rx_desc;
796 		continue;
797 
798 discard:
799 		/* Return the packet to the pool */
800 		m_free(m);
801 		rx_desc->mbuf = NULL;
802 	}
803 	rxq->completed = completed;
804 
805 	level = rxq->added - rxq->completed;
806 
807 	/* Pass last packet up the stack or into LRO */
808 	if (prev != NULL) {
809 		if (lro_enabled)
810 			sfxge_lro(rxq, prev);
811 		else
812 			sfxge_rx_deliver(sc, prev);
813 	}
814 
815 	/*
816 	 * If there are any pending flows and this is the end of the
817 	 * poll then they must be completed.
818 	 */
819 	if (eop)
820 		sfxge_lro_end_of_burst(rxq);
821 
822 	/* Top up the queue if necessary */
823 	if (level < rxq->refill_threshold)
824 		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE);
825 }
826 
827 static void
828 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
829 {
830 	struct sfxge_rxq *rxq;
831 	struct sfxge_evq *evq;
832 	unsigned int count;
833 
834 	rxq = sc->rxq[index];
835 	evq = sc->evq[index];
836 
837 	SFXGE_EVQ_LOCK(evq);
838 
839 	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
840 	    ("rxq not started"));
841 
842 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
843 
844 	callout_stop(&rxq->refill_callout);
845 
846 again:
847 	rxq->flush_state = SFXGE_FLUSH_PENDING;
848 
849 	/* Flush the receive queue */
850 	efx_rx_qflush(rxq->common);
851 
852 	SFXGE_EVQ_UNLOCK(evq);
853 
854 	count = 0;
855 	do {
856 		/* Spin for 100 ms */
857 		DELAY(100000);
858 
859 		if (rxq->flush_state != SFXGE_FLUSH_PENDING)
860 			break;
861 
862 	} while (++count < 20);
863 
864 	SFXGE_EVQ_LOCK(evq);
865 
866 	if (rxq->flush_state == SFXGE_FLUSH_FAILED)
867 		goto again;
868 
869 	rxq->flush_state = SFXGE_FLUSH_DONE;
870 
871 	rxq->pending = rxq->added;
872 	sfxge_rx_qcomplete(rxq, B_TRUE);
873 
874 	KASSERT(rxq->completed == rxq->pending,
875 	    ("rxq->completed != rxq->pending"));
876 
877 	rxq->added = 0;
878 	rxq->pending = 0;
879 	rxq->completed = 0;
880 	rxq->loopback = 0;
881 
882 	/* Destroy the common code receive queue. */
883 	efx_rx_qdestroy(rxq->common);
884 
885 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
886 	    EFX_RXQ_NBUFS(sc->rxq_entries));
887 
888 	SFXGE_EVQ_UNLOCK(evq);
889 }
890 
891 static int
892 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
893 {
894 	struct sfxge_rxq *rxq;
895 	efsys_mem_t *esmp;
896 	struct sfxge_evq *evq;
897 	int rc;
898 
899 	rxq = sc->rxq[index];
900 	esmp = &rxq->mem;
901 	evq = sc->evq[index];
902 
903 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
904 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
905 	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
906 	    ("evq->init_state != SFXGE_EVQ_STARTED"));
907 
908 	/* Program the buffer table. */
909 	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
910 	    EFX_RXQ_NBUFS(sc->rxq_entries))) != 0)
911 		return (rc);
912 
913 	/* Create the common code receive queue. */
914 	if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
915 	    esmp, sc->rxq_entries, rxq->buf_base_id, evq->common,
916 	    &rxq->common)) != 0)
917 		goto fail;
918 
919 	SFXGE_EVQ_LOCK(evq);
920 
921 	/* Enable the receive queue. */
922 	efx_rx_qenable(rxq->common);
923 
924 	rxq->init_state = SFXGE_RXQ_STARTED;
925 
926 	/* Try to fill the queue from the pool. */
927 	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE);
928 
929 	SFXGE_EVQ_UNLOCK(evq);
930 
931 	return (0);
932 
933 fail:
934 	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
935 	    EFX_RXQ_NBUFS(sc->rxq_entries));
936 	return (rc);
937 }
938 
939 void
940 sfxge_rx_stop(struct sfxge_softc *sc)
941 {
942 	int index;
943 
944 	/* Stop the receive queue(s) */
945 	index = sc->rxq_count;
946 	while (--index >= 0)
947 		sfxge_rx_qstop(sc, index);
948 
949 	sc->rx_prefix_size = 0;
950 	sc->rx_buffer_size = 0;
951 
952 	efx_rx_fini(sc->enp);
953 }
954 
955 int
956 sfxge_rx_start(struct sfxge_softc *sc)
957 {
958 	struct sfxge_intr *intr;
959 	int index;
960 	int rc;
961 
962 	intr = &sc->intr;
963 
964 	/* Initialize the common code receive module. */
965 	if ((rc = efx_rx_init(sc->enp)) != 0)
966 		return (rc);
967 
968 	/* Calculate the receive packet buffer size. */
969 	sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
970 	sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
971 			      sc->rx_prefix_size);
972 
973 	/* Select zone for packet buffers */
974 	if (sc->rx_buffer_size <= MCLBYTES)
975 		sc->rx_buffer_zone = zone_clust;
976 	else if (sc->rx_buffer_size <= MJUMPAGESIZE)
977 		sc->rx_buffer_zone = zone_jumbop;
978 	else if (sc->rx_buffer_size <= MJUM9BYTES)
979 		sc->rx_buffer_zone = zone_jumbo9;
980 	else
981 		sc->rx_buffer_zone = zone_jumbo16;
982 
983 	/*
984 	 * Set up the scale table.  Enable all hash types and hash insertion.
985 	 */
986 	for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
987 		sc->rx_indir_table[index] = index % sc->rxq_count;
988 	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
989 				       SFXGE_RX_SCALE_MAX)) != 0)
990 		goto fail;
991 	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
992 	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
993 	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
994 
995 	if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
996 	    sizeof(toep_key))) != 0)
997 		goto fail;
998 
999 	/* Start the receive queue(s). */
1000 	for (index = 0; index < sc->rxq_count; index++) {
1001 		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1002 			goto fail2;
1003 	}
1004 
1005 	return (0);
1006 
1007 fail2:
1008 	while (--index >= 0)
1009 		sfxge_rx_qstop(sc, index);
1010 
1011 fail:
1012 	efx_rx_fini(sc->enp);
1013 
1014 	return (rc);
1015 }
1016 
1017 static void sfxge_lro_init(struct sfxge_rxq *rxq)
1018 {
1019 	struct sfxge_lro_state *st = &rxq->lro;
1020 	unsigned i;
1021 
1022 	st->conns_mask = lro_table_size - 1;
1023 	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1024 		("lro_table_size must be a power of 2"));
1025 	st->sc = rxq->sc;
1026 	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1027 			   M_SFXGE, M_WAITOK);
1028 	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1029 			     M_SFXGE, M_WAITOK);
1030 	for (i = 0; i <= st->conns_mask; ++i) {
1031 		TAILQ_INIT(&st->conns[i]);
1032 		st->conns_n[i] = 0;
1033 	}
1034 	LIST_INIT(&st->active_conns);
1035 	TAILQ_INIT(&st->free_conns);
1036 }
1037 
1038 static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1039 {
1040 	struct sfxge_lro_state *st = &rxq->lro;
1041 	struct sfxge_lro_conn *c;
1042 	unsigned i;
1043 
1044 	/* Return cleanly if sfxge_lro_init() has not been called. */
1045 	if (st->conns == NULL)
1046 		return;
1047 
1048 	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1049 
1050 	for (i = 0; i <= st->conns_mask; ++i) {
1051 		while (!TAILQ_EMPTY(&st->conns[i])) {
1052 			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1053 			sfxge_lro_drop(rxq, c);
1054 		}
1055 	}
1056 
1057 	while (!TAILQ_EMPTY(&st->free_conns)) {
1058 		c = TAILQ_FIRST(&st->free_conns);
1059 		TAILQ_REMOVE(&st->free_conns, c, link);
1060 		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1061 		free(c, M_SFXGE);
1062 	}
1063 
1064 	free(st->conns_n, M_SFXGE);
1065 	free(st->conns, M_SFXGE);
1066 	st->conns = NULL;
1067 }
1068 
1069 static void
1070 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1071 {
1072 	struct sfxge_rxq *rxq;
1073 
1074 	rxq = sc->rxq[index];
1075 
1076 	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1077 	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1078 
1079 	/* Free the context array and the flow table. */
1080 	free(rxq->queue, M_SFXGE);
1081 	sfxge_lro_fini(rxq);
1082 
1083 	/* Release DMA memory. */
1084 	sfxge_dma_free(&rxq->mem);
1085 
1086 	sc->rxq[index] = NULL;
1087 
1088 	free(rxq, M_SFXGE);
1089 }
1090 
1091 static int
1092 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1093 {
1094 	struct sfxge_rxq *rxq;
1095 	struct sfxge_evq *evq;
1096 	efsys_mem_t *esmp;
1097 	int rc;
1098 
1099 	KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count));
1100 
1101 	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1102 	rxq->sc = sc;
1103 	rxq->index = index;
1104 	rxq->entries = sc->rxq_entries;
1105 	rxq->ptr_mask = rxq->entries - 1;
1106 	rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries);
1107 
1108 	sc->rxq[index] = rxq;
1109 	esmp = &rxq->mem;
1110 
1111 	evq = sc->evq[index];
1112 
1113 	/* Allocate and zero DMA space. */
1114 	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0)
1115 		return (rc);
1116 
1117 	/* Allocate buffer table entries. */
1118 	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries),
1119 				 &rxq->buf_base_id);
1120 
1121 	/* Allocate the context array and the flow table. */
1122 	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries,
1123 	    M_SFXGE, M_WAITOK | M_ZERO);
1124 	sfxge_lro_init(rxq);
1125 
1126 	callout_init(&rxq->refill_callout, B_TRUE);
1127 
1128 	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1129 
1130 	return (0);
1131 }
1132 
1133 static const struct {
1134 	const char *name;
1135 	size_t offset;
1136 } sfxge_rx_stats[] = {
1137 #define	SFXGE_RX_STAT(name, member) \
1138 	{ #name, offsetof(struct sfxge_rxq, member) }
1139 	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1140 	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1141 	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1142 	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1143 	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1144 	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1145 	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1146 	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
1147 };
1148 
1149 static int
1150 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1151 {
1152 	struct sfxge_softc *sc = arg1;
1153 	unsigned int id = arg2;
1154 	unsigned int sum, index;
1155 
1156 	/* Sum across all RX queues */
1157 	sum = 0;
1158 	for (index = 0; index < sc->rxq_count; index++)
1159 		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1160 					 sfxge_rx_stats[id].offset);
1161 
1162 	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1163 }
1164 
1165 static void
1166 sfxge_rx_stat_init(struct sfxge_softc *sc)
1167 {
1168 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1169 	struct sysctl_oid_list *stat_list;
1170 	unsigned int id;
1171 
1172 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1173 
1174 	for (id = 0; id < nitems(sfxge_rx_stats); id++) {
1175 		SYSCTL_ADD_PROC(
1176 			ctx, stat_list,
1177 			OID_AUTO, sfxge_rx_stats[id].name,
1178 			CTLTYPE_UINT|CTLFLAG_RD,
1179 			sc, id, sfxge_rx_stat_handler, "IU",
1180 			"");
1181 	}
1182 }
1183 
1184 void
1185 sfxge_rx_fini(struct sfxge_softc *sc)
1186 {
1187 	int index;
1188 
1189 	index = sc->rxq_count;
1190 	while (--index >= 0)
1191 		sfxge_rx_qfini(sc, index);
1192 
1193 	sc->rxq_count = 0;
1194 }
1195 
1196 int
1197 sfxge_rx_init(struct sfxge_softc *sc)
1198 {
1199 	struct sfxge_intr *intr;
1200 	int index;
1201 	int rc;
1202 
1203 	if (lro_idle_ticks == 0)
1204 		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
1205 
1206 	intr = &sc->intr;
1207 
1208 	sc->rxq_count = intr->n_alloc;
1209 
1210 	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1211 	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1212 
1213 	/* Initialize the receive queue(s) - one per interrupt. */
1214 	for (index = 0; index < sc->rxq_count; index++) {
1215 		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1216 			goto fail;
1217 	}
1218 
1219 	sfxge_rx_stat_init(sc);
1220 
1221 	return (0);
1222 
1223 fail:
1224 	/* Tear down the receive queue(s). */
1225 	while (--index >= 0)
1226 		sfxge_rx_qfini(sc, index);
1227 
1228 	sc->rxq_count = 0;
1229 	return (rc);
1230 }
1231