1 /*- 2 * Copyright (c) 2010-2011 Solarflare Communications, Inc. 3 * All rights reserved. 4 * 5 * This software was developed in part by Philip Paeps under contract for 6 * Solarflare Communications, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 #include <sys/mbuf.h> 35 #include <sys/smp.h> 36 #include <sys/socket.h> 37 #include <sys/sysctl.h> 38 #include <sys/limits.h> 39 #include <sys/syslog.h> 40 41 #include <net/ethernet.h> 42 #include <net/if.h> 43 #include <net/if_vlan_var.h> 44 45 #include <netinet/in.h> 46 #include <netinet/ip.h> 47 #include <netinet/ip6.h> 48 #include <netinet/tcp.h> 49 50 #include <machine/in_cksum.h> 51 52 #include "common/efx.h" 53 54 55 #include "sfxge.h" 56 #include "sfxge_rx.h" 57 58 #define RX_REFILL_THRESHOLD(_entries) (EFX_RXQ_LIMIT(_entries) * 9 / 10) 59 60 #ifdef SFXGE_LRO 61 62 SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL, 63 "Large receive offload (LRO) parameters"); 64 65 #define SFXGE_LRO_PARAM(_param) SFXGE_PARAM(lro._param) 66 67 /* Size of the LRO hash table. Must be a power of 2. A larger table 68 * means we can accelerate a larger number of streams. 69 */ 70 static unsigned lro_table_size = 128; 71 TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size); 72 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN, 73 &lro_table_size, 0, 74 "Size of the LRO hash table (must be a power of 2)"); 75 76 /* Maximum length of a hash chain. If chains get too long then the lookup 77 * time increases and may exceed the benefit of LRO. 78 */ 79 static unsigned lro_chain_max = 20; 80 TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max); 81 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN, 82 &lro_chain_max, 0, 83 "The maximum length of a hash chain"); 84 85 /* Maximum time (in ticks) that a connection can be idle before it's LRO 86 * state is discarded. 87 */ 88 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */ 89 TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks); 90 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN, 91 &lro_idle_ticks, 0, 92 "The maximum time (in ticks) that a connection can be idle " 93 "before it's LRO state is discarded"); 94 95 /* Number of packets with payload that must arrive in-order before a 96 * connection is eligible for LRO. The idea is we should avoid coalescing 97 * segments when the sender is in slow-start because reducing the ACK rate 98 * can damage performance. 99 */ 100 static int lro_slow_start_packets = 2000; 101 TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets); 102 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN, 103 &lro_slow_start_packets, 0, 104 "Number of packets with payload that must arrive in-order before " 105 "a connection is eligible for LRO"); 106 107 /* Number of packets with payload that must arrive in-order following loss 108 * before a connection is eligible for LRO. The idea is we should avoid 109 * coalescing segments when the sender is recovering from loss, because 110 * reducing the ACK rate can damage performance. 111 */ 112 static int lro_loss_packets = 20; 113 TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets); 114 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN, 115 &lro_loss_packets, 0, 116 "Number of packets with payload that must arrive in-order " 117 "following loss before a connection is eligible for LRO"); 118 119 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */ 120 #define SFXGE_LRO_L2_ID_VLAN 0x4000 121 #define SFXGE_LRO_L2_ID_IPV6 0x8000 122 #define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN) 123 #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6)) 124 125 /* Compare IPv6 addresses, avoiding conditional branches */ 126 static unsigned long ipv6_addr_cmp(const struct in6_addr *left, 127 const struct in6_addr *right) 128 { 129 #if LONG_BIT == 64 130 const uint64_t *left64 = (const uint64_t *)left; 131 const uint64_t *right64 = (const uint64_t *)right; 132 return (left64[0] - right64[0]) | (left64[1] - right64[1]); 133 #else 134 return (left->s6_addr32[0] - right->s6_addr32[0]) | 135 (left->s6_addr32[1] - right->s6_addr32[1]) | 136 (left->s6_addr32[2] - right->s6_addr32[2]) | 137 (left->s6_addr32[3] - right->s6_addr32[3]); 138 #endif 139 } 140 141 #endif /* SFXGE_LRO */ 142 143 void 144 sfxge_rx_qflush_done(struct sfxge_rxq *rxq) 145 { 146 147 rxq->flush_state = SFXGE_FLUSH_DONE; 148 } 149 150 void 151 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq) 152 { 153 154 rxq->flush_state = SFXGE_FLUSH_FAILED; 155 } 156 157 static uint8_t toep_key[] = { 158 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 159 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 160 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 161 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 162 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 163 }; 164 165 static void 166 sfxge_rx_post_refill(void *arg) 167 { 168 struct sfxge_rxq *rxq = arg; 169 struct sfxge_softc *sc; 170 unsigned int index; 171 struct sfxge_evq *evq; 172 uint16_t magic; 173 174 sc = rxq->sc; 175 index = rxq->index; 176 evq = sc->evq[index]; 177 178 magic = SFXGE_MAGIC_RX_QREFILL | index; 179 180 /* This is guaranteed due to the start/stop order of rx and ev */ 181 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 182 ("evq not started")); 183 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 184 ("rxq not started")); 185 efx_ev_qpost(evq->common, magic); 186 } 187 188 static void 189 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying) 190 { 191 /* Initially retry after 100 ms, but back off in case of 192 * repeated failures as we probably have to wait for the 193 * administrator to raise the pool limit. */ 194 if (retrying) 195 rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz); 196 else 197 rxq->refill_delay = hz / 10; 198 199 callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay, 200 sfxge_rx_post_refill, rxq); 201 } 202 203 static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) 204 { 205 struct mb_args args; 206 struct mbuf *m; 207 208 /* Allocate mbuf structure */ 209 args.flags = M_PKTHDR; 210 args.type = MT_DATA; 211 m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT); 212 213 /* Allocate (and attach) packet buffer */ 214 if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) { 215 uma_zfree(zone_mbuf, m); 216 m = NULL; 217 } 218 219 return (m); 220 } 221 222 #define SFXGE_REFILL_BATCH 64 223 224 static void 225 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying) 226 { 227 struct sfxge_softc *sc; 228 unsigned int index; 229 struct sfxge_evq *evq; 230 unsigned int batch; 231 unsigned int rxfill; 232 unsigned int mblksize; 233 int ntodo; 234 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 235 236 sc = rxq->sc; 237 index = rxq->index; 238 evq = sc->evq[index]; 239 240 prefetch_read_many(sc->enp); 241 prefetch_read_many(rxq->common); 242 243 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 244 245 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 246 return; 247 248 rxfill = rxq->added - rxq->completed; 249 KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries), 250 ("rxfill > EFX_RXQ_LIMIT(rxq->entries)")); 251 ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target); 252 KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries), 253 ("ntodo > EFX_RQX_LIMIT(rxq->entries)")); 254 255 if (ntodo == 0) 256 return; 257 258 batch = 0; 259 mblksize = sc->rx_buffer_size; 260 while (ntodo-- > 0) { 261 unsigned int id; 262 struct sfxge_rx_sw_desc *rx_desc; 263 bus_dma_segment_t seg; 264 struct mbuf *m; 265 266 id = (rxq->added + batch) & rxq->ptr_mask; 267 rx_desc = &rxq->queue[id]; 268 KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL")); 269 270 rx_desc->flags = EFX_DISCARD; 271 m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc); 272 if (m == NULL) 273 break; 274 sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg); 275 addr[batch++] = seg.ds_addr; 276 277 if (batch == SFXGE_REFILL_BATCH) { 278 efx_rx_qpost(rxq->common, addr, mblksize, batch, 279 rxq->completed, rxq->added); 280 rxq->added += batch; 281 batch = 0; 282 } 283 } 284 285 if (ntodo != 0) 286 sfxge_rx_schedule_refill(rxq, retrying); 287 288 if (batch != 0) { 289 efx_rx_qpost(rxq->common, addr, mblksize, batch, 290 rxq->completed, rxq->added); 291 rxq->added += batch; 292 } 293 294 /* Make the descriptors visible to the hardware */ 295 bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map, 296 BUS_DMASYNC_PREWRITE); 297 298 efx_rx_qpush(rxq->common, rxq->added); 299 } 300 301 void 302 sfxge_rx_qrefill(struct sfxge_rxq *rxq) 303 { 304 305 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 306 return; 307 308 /* Make sure the queue is full */ 309 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE); 310 } 311 312 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m) 313 { 314 struct ifnet *ifp = sc->ifnet; 315 316 m->m_pkthdr.rcvif = ifp; 317 m->m_pkthdr.csum_data = 0xffff; 318 ifp->if_input(ifp, m); 319 } 320 321 static void 322 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc) 323 { 324 struct mbuf *m = rx_desc->mbuf; 325 int csum_flags; 326 327 /* Convert checksum flags */ 328 csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ? 329 (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0; 330 if (rx_desc->flags & EFX_CKSUM_TCPUDP) 331 csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 332 333 if (rx_desc->flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) { 334 m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 335 mtod(m, uint8_t *)); 336 /* The hash covers a 4-tuple for TCP only */ 337 M_HASHTYPE_SET(m, 338 (rx_desc->flags & EFX_PKT_IPV4) ? 339 ((rx_desc->flags & EFX_PKT_TCP) ? 340 M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_IPV4) : 341 ((rx_desc->flags & EFX_PKT_TCP) ? 342 M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_IPV6)); 343 } 344 m->m_data += sc->rx_prefix_size; 345 m->m_len = rx_desc->size - sc->rx_prefix_size; 346 m->m_pkthdr.len = m->m_len; 347 m->m_pkthdr.csum_flags = csum_flags; 348 __sfxge_rx_deliver(sc, rx_desc->mbuf); 349 350 rx_desc->flags = EFX_DISCARD; 351 rx_desc->mbuf = NULL; 352 } 353 354 #ifdef SFXGE_LRO 355 356 static void 357 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c) 358 { 359 struct sfxge_softc *sc = st->sc; 360 struct mbuf *m = c->mbuf; 361 struct tcphdr *c_th; 362 int csum_flags; 363 364 KASSERT(m, ("no mbuf to deliver")); 365 366 ++st->n_bursts; 367 368 /* Finish off packet munging and recalculate IP header checksum. */ 369 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 370 struct ip *iph = c->nh; 371 iph->ip_len = htons(iph->ip_len); 372 iph->ip_sum = 0; 373 iph->ip_sum = in_cksum_hdr(iph); 374 c_th = (struct tcphdr *)(iph + 1); 375 csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 376 CSUM_IP_CHECKED | CSUM_IP_VALID); 377 } else { 378 struct ip6_hdr *iph = c->nh; 379 iph->ip6_plen = htons(iph->ip6_plen); 380 c_th = (struct tcphdr *)(iph + 1); 381 csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 382 } 383 384 c_th->th_win = c->th_last->th_win; 385 c_th->th_ack = c->th_last->th_ack; 386 if (c_th->th_off == c->th_last->th_off) { 387 /* Copy TCP options (take care to avoid going negative). */ 388 int optlen = ((c_th->th_off - 5) & 0xf) << 2u; 389 memcpy(c_th + 1, c->th_last + 1, optlen); 390 } 391 392 m->m_pkthdr.flowid = c->conn_hash; 393 M_HASHTYPE_SET(m, 394 SFXGE_LRO_CONN_IS_TCPIPV4(c) ? 395 M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_TCP_IPV6); 396 397 m->m_pkthdr.csum_flags = csum_flags; 398 __sfxge_rx_deliver(sc, m); 399 400 c->mbuf = NULL; 401 c->delivered = 1; 402 } 403 404 /* Drop the given connection, and add it to the free list. */ 405 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 406 { 407 unsigned bucket; 408 409 KASSERT(!c->mbuf, ("found orphaned mbuf")); 410 411 if (c->next_buf.mbuf != NULL) { 412 sfxge_rx_deliver(rxq->sc, &c->next_buf); 413 LIST_REMOVE(c, active_link); 414 } 415 416 bucket = c->conn_hash & rxq->lro.conns_mask; 417 KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong")); 418 --rxq->lro.conns_n[bucket]; 419 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 420 TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link); 421 } 422 423 /* Stop tracking connections that have gone idle in order to keep hash 424 * chains short. 425 */ 426 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now) 427 { 428 struct sfxge_lro_conn *c; 429 unsigned i; 430 431 KASSERT(LIST_EMPTY(&rxq->lro.active_conns), 432 ("found active connections")); 433 434 rxq->lro.last_purge_ticks = now; 435 for (i = 0; i <= rxq->lro.conns_mask; ++i) { 436 if (TAILQ_EMPTY(&rxq->lro.conns[i])) 437 continue; 438 439 c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq); 440 if (now - c->last_pkt_ticks > lro_idle_ticks) { 441 ++rxq->lro.n_drop_idle; 442 sfxge_lro_drop(rxq, c); 443 } 444 } 445 } 446 447 static void 448 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 449 struct mbuf *mbuf, struct tcphdr *th) 450 { 451 struct tcphdr *c_th; 452 453 /* Tack the new mbuf onto the chain. */ 454 KASSERT(!mbuf->m_next, ("mbuf already chained")); 455 c->mbuf_tail->m_next = mbuf; 456 c->mbuf_tail = mbuf; 457 458 /* Increase length appropriately */ 459 c->mbuf->m_pkthdr.len += mbuf->m_len; 460 461 /* Update the connection state flags */ 462 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 463 struct ip *iph = c->nh; 464 iph->ip_len += mbuf->m_len; 465 c_th = (struct tcphdr *)(iph + 1); 466 } else { 467 struct ip6_hdr *iph = c->nh; 468 iph->ip6_plen += mbuf->m_len; 469 c_th = (struct tcphdr *)(iph + 1); 470 } 471 c_th->th_flags |= (th->th_flags & TH_PUSH); 472 c->th_last = th; 473 ++st->n_merges; 474 475 /* Pass packet up now if another segment could overflow the IP 476 * length. 477 */ 478 if (c->mbuf->m_pkthdr.len > 65536 - 9200) 479 sfxge_lro_deliver(st, c); 480 } 481 482 static void 483 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 484 struct mbuf *mbuf, void *nh, struct tcphdr *th) 485 { 486 /* Start the chain */ 487 c->mbuf = mbuf; 488 c->mbuf_tail = c->mbuf; 489 c->nh = nh; 490 c->th_last = th; 491 492 mbuf->m_pkthdr.len = mbuf->m_len; 493 494 /* Mangle header fields for later processing */ 495 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 496 struct ip *iph = nh; 497 iph->ip_len = ntohs(iph->ip_len); 498 } else { 499 struct ip6_hdr *iph = nh; 500 iph->ip6_plen = ntohs(iph->ip6_plen); 501 } 502 } 503 504 /* Try to merge or otherwise hold or deliver (as appropriate) the 505 * packet buffered for this connection (c->next_buf). Return a flag 506 * indicating whether the connection is still active for LRO purposes. 507 */ 508 static int 509 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 510 { 511 struct sfxge_rx_sw_desc *rx_buf = &c->next_buf; 512 char *eh = c->next_eh; 513 int data_length, hdr_length, dont_merge; 514 unsigned th_seq, pkt_length; 515 struct tcphdr *th; 516 unsigned now; 517 518 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 519 struct ip *iph = c->next_nh; 520 th = (struct tcphdr *)(iph + 1); 521 pkt_length = ntohs(iph->ip_len) + (char *) iph - eh; 522 } else { 523 struct ip6_hdr *iph = c->next_nh; 524 th = (struct tcphdr *)(iph + 1); 525 pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh; 526 } 527 528 hdr_length = (char *) th + th->th_off * 4 - eh; 529 data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) - 530 hdr_length); 531 th_seq = ntohl(th->th_seq); 532 dont_merge = ((data_length <= 0) 533 | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN))); 534 535 /* Check for options other than aligned timestamp. */ 536 if (th->th_off != 5) { 537 const uint32_t *opt_ptr = (const uint32_t *) (th + 1); 538 if (th->th_off == 8 && 539 opt_ptr[0] == ntohl((TCPOPT_NOP << 24) | 540 (TCPOPT_NOP << 16) | 541 (TCPOPT_TIMESTAMP << 8) | 542 TCPOLEN_TIMESTAMP)) { 543 /* timestamp option -- okay */ 544 } else { 545 dont_merge = 1; 546 } 547 } 548 549 if (__predict_false(th_seq != c->next_seq)) { 550 /* Out-of-order, so start counting again. */ 551 if (c->mbuf != NULL) 552 sfxge_lro_deliver(&rxq->lro, c); 553 c->n_in_order_pkts -= lro_loss_packets; 554 c->next_seq = th_seq + data_length; 555 ++rxq->lro.n_misorder; 556 goto deliver_buf_out; 557 } 558 c->next_seq = th_seq + data_length; 559 560 now = ticks; 561 if (now - c->last_pkt_ticks > lro_idle_ticks) { 562 ++rxq->lro.n_drop_idle; 563 if (c->mbuf != NULL) 564 sfxge_lro_deliver(&rxq->lro, c); 565 sfxge_lro_drop(rxq, c); 566 return (0); 567 } 568 c->last_pkt_ticks = ticks; 569 570 if (c->n_in_order_pkts < lro_slow_start_packets) { 571 /* May be in slow-start, so don't merge. */ 572 ++rxq->lro.n_slow_start; 573 ++c->n_in_order_pkts; 574 goto deliver_buf_out; 575 } 576 577 if (__predict_false(dont_merge)) { 578 if (c->mbuf != NULL) 579 sfxge_lro_deliver(&rxq->lro, c); 580 if (th->th_flags & (TH_FIN | TH_RST)) { 581 ++rxq->lro.n_drop_closed; 582 sfxge_lro_drop(rxq, c); 583 return (0); 584 } 585 goto deliver_buf_out; 586 } 587 588 rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size; 589 590 if (__predict_true(c->mbuf != NULL)) { 591 /* Remove headers and any padding */ 592 rx_buf->mbuf->m_data += hdr_length; 593 rx_buf->mbuf->m_len = data_length; 594 595 sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th); 596 } else { 597 /* Remove any padding */ 598 rx_buf->mbuf->m_len = pkt_length; 599 600 sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th); 601 } 602 603 rx_buf->mbuf = NULL; 604 return (1); 605 606 deliver_buf_out: 607 sfxge_rx_deliver(rxq->sc, rx_buf); 608 return (1); 609 } 610 611 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash, 612 uint16_t l2_id, void *nh, struct tcphdr *th) 613 { 614 unsigned bucket = conn_hash & st->conns_mask; 615 struct sfxge_lro_conn *c; 616 617 if (st->conns_n[bucket] >= lro_chain_max) { 618 ++st->n_too_many; 619 return; 620 } 621 622 if (!TAILQ_EMPTY(&st->free_conns)) { 623 c = TAILQ_FIRST(&st->free_conns); 624 TAILQ_REMOVE(&st->free_conns, c, link); 625 } else { 626 c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT); 627 if (c == NULL) 628 return; 629 c->mbuf = NULL; 630 c->next_buf.mbuf = NULL; 631 } 632 633 /* Create the connection tracking data */ 634 ++st->conns_n[bucket]; 635 TAILQ_INSERT_HEAD(&st->conns[bucket], c, link); 636 c->l2_id = l2_id; 637 c->conn_hash = conn_hash; 638 c->source = th->th_sport; 639 c->dest = th->th_dport; 640 c->n_in_order_pkts = 0; 641 c->last_pkt_ticks = *(volatile int *)&ticks; 642 c->delivered = 0; 643 ++st->n_new_stream; 644 /* NB. We don't initialise c->next_seq, and it doesn't matter what 645 * value it has. Most likely the next packet received for this 646 * connection will not match -- no harm done. 647 */ 648 } 649 650 /* Process mbuf and decide whether to dispatch it to the stack now or 651 * later. 652 */ 653 static void 654 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 655 { 656 struct sfxge_softc *sc = rxq->sc; 657 struct mbuf *m = rx_buf->mbuf; 658 struct ether_header *eh; 659 struct sfxge_lro_conn *c; 660 uint16_t l2_id; 661 uint16_t l3_proto; 662 void *nh; 663 struct tcphdr *th; 664 uint32_t conn_hash; 665 unsigned bucket; 666 667 /* Get the hardware hash */ 668 conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 669 mtod(m, uint8_t *)); 670 671 eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size); 672 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 673 struct ether_vlan_header *veh = (struct ether_vlan_header *)eh; 674 l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) | 675 SFXGE_LRO_L2_ID_VLAN; 676 l3_proto = veh->evl_proto; 677 nh = veh + 1; 678 } else { 679 l2_id = 0; 680 l3_proto = eh->ether_type; 681 nh = eh + 1; 682 } 683 684 /* Check whether this is a suitable packet (unfragmented 685 * TCP/IPv4 or TCP/IPv6). If so, find the TCP header and 686 * length, and compute a hash if necessary. If not, return. 687 */ 688 if (l3_proto == htons(ETHERTYPE_IP)) { 689 struct ip *iph = nh; 690 if ((iph->ip_p - IPPROTO_TCP) | 691 (iph->ip_hl - (sizeof(*iph) >> 2u)) | 692 (iph->ip_off & htons(IP_MF | IP_OFFMASK))) 693 goto deliver_now; 694 th = (struct tcphdr *)(iph + 1); 695 } else if (l3_proto == htons(ETHERTYPE_IPV6)) { 696 struct ip6_hdr *iph = nh; 697 if (iph->ip6_nxt != IPPROTO_TCP) 698 goto deliver_now; 699 l2_id |= SFXGE_LRO_L2_ID_IPV6; 700 th = (struct tcphdr *)(iph + 1); 701 } else { 702 goto deliver_now; 703 } 704 705 bucket = conn_hash & rxq->lro.conns_mask; 706 707 TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) { 708 if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash)) 709 continue; 710 if ((c->source - th->th_sport) | (c->dest - th->th_dport)) 711 continue; 712 if (c->mbuf != NULL) { 713 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 714 struct ip *c_iph, *iph = nh; 715 c_iph = c->nh; 716 if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) | 717 (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr)) 718 continue; 719 } else { 720 struct ip6_hdr *c_iph, *iph = nh; 721 c_iph = c->nh; 722 if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) | 723 ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst)) 724 continue; 725 } 726 } 727 728 /* Re-insert at head of list to reduce lookup time. */ 729 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 730 TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link); 731 732 if (c->next_buf.mbuf != NULL) { 733 if (!sfxge_lro_try_merge(rxq, c)) 734 goto deliver_now; 735 } else { 736 LIST_INSERT_HEAD(&rxq->lro.active_conns, c, 737 active_link); 738 } 739 c->next_buf = *rx_buf; 740 c->next_eh = eh; 741 c->next_nh = nh; 742 743 rx_buf->mbuf = NULL; 744 rx_buf->flags = EFX_DISCARD; 745 return; 746 } 747 748 sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th); 749 deliver_now: 750 sfxge_rx_deliver(sc, rx_buf); 751 } 752 753 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 754 { 755 struct sfxge_lro_state *st = &rxq->lro; 756 struct sfxge_lro_conn *c; 757 unsigned t; 758 759 while (!LIST_EMPTY(&st->active_conns)) { 760 c = LIST_FIRST(&st->active_conns); 761 if (!c->delivered && c->mbuf != NULL) 762 sfxge_lro_deliver(st, c); 763 if (sfxge_lro_try_merge(rxq, c)) { 764 if (c->mbuf != NULL) 765 sfxge_lro_deliver(st, c); 766 LIST_REMOVE(c, active_link); 767 } 768 c->delivered = 0; 769 } 770 771 t = *(volatile int *)&ticks; 772 if (__predict_false(t != st->last_purge_ticks)) 773 sfxge_lro_purge_idle(rxq, t); 774 } 775 776 #else /* !SFXGE_LRO */ 777 778 static void 779 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 780 { 781 } 782 783 static void 784 sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 785 { 786 } 787 788 #endif /* SFXGE_LRO */ 789 790 void 791 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop) 792 { 793 struct sfxge_softc *sc = rxq->sc; 794 int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO; 795 unsigned int index; 796 struct sfxge_evq *evq; 797 unsigned int completed; 798 unsigned int level; 799 struct mbuf *m; 800 struct sfxge_rx_sw_desc *prev = NULL; 801 802 index = rxq->index; 803 evq = sc->evq[index]; 804 805 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 806 807 completed = rxq->completed; 808 while (completed != rxq->pending) { 809 unsigned int id; 810 struct sfxge_rx_sw_desc *rx_desc; 811 812 id = completed++ & rxq->ptr_mask; 813 rx_desc = &rxq->queue[id]; 814 m = rx_desc->mbuf; 815 816 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 817 goto discard; 818 819 if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 820 goto discard; 821 822 prefetch_read_many(mtod(m, caddr_t)); 823 824 /* Check for loopback packets */ 825 if (!(rx_desc->flags & EFX_PKT_IPV4) && 826 !(rx_desc->flags & EFX_PKT_IPV6)) { 827 struct ether_header *etherhp; 828 829 /*LINTED*/ 830 etherhp = mtod(m, struct ether_header *); 831 832 if (etherhp->ether_type == 833 htons(SFXGE_ETHERTYPE_LOOPBACK)) { 834 EFSYS_PROBE(loopback); 835 836 rxq->loopback++; 837 goto discard; 838 } 839 } 840 841 /* Pass packet up the stack or into LRO (pipelined) */ 842 if (prev != NULL) { 843 if (lro_enabled) 844 sfxge_lro(rxq, prev); 845 else 846 sfxge_rx_deliver(sc, prev); 847 } 848 prev = rx_desc; 849 continue; 850 851 discard: 852 /* Return the packet to the pool */ 853 m_free(m); 854 rx_desc->mbuf = NULL; 855 } 856 rxq->completed = completed; 857 858 level = rxq->added - rxq->completed; 859 860 /* Pass last packet up the stack or into LRO */ 861 if (prev != NULL) { 862 if (lro_enabled) 863 sfxge_lro(rxq, prev); 864 else 865 sfxge_rx_deliver(sc, prev); 866 } 867 868 /* 869 * If there are any pending flows and this is the end of the 870 * poll then they must be completed. 871 */ 872 if (eop) 873 sfxge_lro_end_of_burst(rxq); 874 875 /* Top up the queue if necessary */ 876 if (level < rxq->refill_threshold) 877 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE); 878 } 879 880 static void 881 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index) 882 { 883 struct sfxge_rxq *rxq; 884 struct sfxge_evq *evq; 885 unsigned int count; 886 887 rxq = sc->rxq[index]; 888 evq = sc->evq[index]; 889 890 SFXGE_EVQ_LOCK(evq); 891 892 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 893 ("rxq not started")); 894 895 rxq->init_state = SFXGE_RXQ_INITIALIZED; 896 897 callout_stop(&rxq->refill_callout); 898 899 again: 900 rxq->flush_state = SFXGE_FLUSH_PENDING; 901 902 /* Flush the receive queue */ 903 efx_rx_qflush(rxq->common); 904 905 SFXGE_EVQ_UNLOCK(evq); 906 907 count = 0; 908 do { 909 /* Spin for 100 ms */ 910 DELAY(100000); 911 912 if (rxq->flush_state != SFXGE_FLUSH_PENDING) 913 break; 914 915 } while (++count < 20); 916 917 SFXGE_EVQ_LOCK(evq); 918 919 if (rxq->flush_state == SFXGE_FLUSH_FAILED) 920 goto again; 921 922 rxq->flush_state = SFXGE_FLUSH_DONE; 923 924 rxq->pending = rxq->added; 925 sfxge_rx_qcomplete(rxq, B_TRUE); 926 927 KASSERT(rxq->completed == rxq->pending, 928 ("rxq->completed != rxq->pending")); 929 930 rxq->added = 0; 931 rxq->pending = 0; 932 rxq->completed = 0; 933 rxq->loopback = 0; 934 935 /* Destroy the common code receive queue. */ 936 efx_rx_qdestroy(rxq->common); 937 938 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 939 EFX_RXQ_NBUFS(sc->rxq_entries)); 940 941 SFXGE_EVQ_UNLOCK(evq); 942 } 943 944 static int 945 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index) 946 { 947 struct sfxge_rxq *rxq; 948 efsys_mem_t *esmp; 949 struct sfxge_evq *evq; 950 int rc; 951 952 rxq = sc->rxq[index]; 953 esmp = &rxq->mem; 954 evq = sc->evq[index]; 955 956 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 957 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 958 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 959 ("evq->init_state != SFXGE_EVQ_STARTED")); 960 961 /* Program the buffer table. */ 962 if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp, 963 EFX_RXQ_NBUFS(sc->rxq_entries))) != 0) 964 return (rc); 965 966 /* Create the common code receive queue. */ 967 if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT, 968 esmp, sc->rxq_entries, rxq->buf_base_id, evq->common, 969 &rxq->common)) != 0) 970 goto fail; 971 972 SFXGE_EVQ_LOCK(evq); 973 974 /* Enable the receive queue. */ 975 efx_rx_qenable(rxq->common); 976 977 rxq->init_state = SFXGE_RXQ_STARTED; 978 979 /* Try to fill the queue from the pool. */ 980 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE); 981 982 SFXGE_EVQ_UNLOCK(evq); 983 984 return (0); 985 986 fail: 987 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 988 EFX_RXQ_NBUFS(sc->rxq_entries)); 989 return (rc); 990 } 991 992 void 993 sfxge_rx_stop(struct sfxge_softc *sc) 994 { 995 int index; 996 997 /* Stop the receive queue(s) */ 998 index = sc->rxq_count; 999 while (--index >= 0) 1000 sfxge_rx_qstop(sc, index); 1001 1002 sc->rx_prefix_size = 0; 1003 sc->rx_buffer_size = 0; 1004 1005 efx_rx_fini(sc->enp); 1006 } 1007 1008 int 1009 sfxge_rx_start(struct sfxge_softc *sc) 1010 { 1011 struct sfxge_intr *intr; 1012 int index; 1013 int rc; 1014 1015 intr = &sc->intr; 1016 1017 /* Initialize the common code receive module. */ 1018 if ((rc = efx_rx_init(sc->enp)) != 0) 1019 return (rc); 1020 1021 /* Calculate the receive packet buffer size. */ 1022 sc->rx_prefix_size = EFX_RX_PREFIX_SIZE; 1023 sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) + 1024 sc->rx_prefix_size); 1025 1026 /* Select zone for packet buffers */ 1027 if (sc->rx_buffer_size <= MCLBYTES) 1028 sc->rx_buffer_zone = zone_clust; 1029 else if (sc->rx_buffer_size <= MJUMPAGESIZE) 1030 sc->rx_buffer_zone = zone_jumbop; 1031 else if (sc->rx_buffer_size <= MJUM9BYTES) 1032 sc->rx_buffer_zone = zone_jumbo9; 1033 else 1034 sc->rx_buffer_zone = zone_jumbo16; 1035 1036 /* 1037 * Set up the scale table. Enable all hash types and hash insertion. 1038 */ 1039 for (index = 0; index < SFXGE_RX_SCALE_MAX; index++) 1040 sc->rx_indir_table[index] = index % sc->rxq_count; 1041 if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table, 1042 SFXGE_RX_SCALE_MAX)) != 0) 1043 goto fail; 1044 (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ, 1045 (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) | 1046 (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE); 1047 1048 if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key, 1049 sizeof(toep_key))) != 0) 1050 goto fail; 1051 1052 /* Start the receive queue(s). */ 1053 for (index = 0; index < sc->rxq_count; index++) { 1054 if ((rc = sfxge_rx_qstart(sc, index)) != 0) 1055 goto fail2; 1056 } 1057 1058 return (0); 1059 1060 fail2: 1061 while (--index >= 0) 1062 sfxge_rx_qstop(sc, index); 1063 1064 fail: 1065 efx_rx_fini(sc->enp); 1066 1067 return (rc); 1068 } 1069 1070 #ifdef SFXGE_LRO 1071 1072 static void sfxge_lro_init(struct sfxge_rxq *rxq) 1073 { 1074 struct sfxge_lro_state *st = &rxq->lro; 1075 unsigned i; 1076 1077 st->conns_mask = lro_table_size - 1; 1078 KASSERT(!((st->conns_mask + 1) & st->conns_mask), 1079 ("lro_table_size must be a power of 2")); 1080 st->sc = rxq->sc; 1081 st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]), 1082 M_SFXGE, M_WAITOK); 1083 st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]), 1084 M_SFXGE, M_WAITOK); 1085 for (i = 0; i <= st->conns_mask; ++i) { 1086 TAILQ_INIT(&st->conns[i]); 1087 st->conns_n[i] = 0; 1088 } 1089 LIST_INIT(&st->active_conns); 1090 TAILQ_INIT(&st->free_conns); 1091 } 1092 1093 static void sfxge_lro_fini(struct sfxge_rxq *rxq) 1094 { 1095 struct sfxge_lro_state *st = &rxq->lro; 1096 struct sfxge_lro_conn *c; 1097 unsigned i; 1098 1099 /* Return cleanly if sfxge_lro_init() has not been called. */ 1100 if (st->conns == NULL) 1101 return; 1102 1103 KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections")); 1104 1105 for (i = 0; i <= st->conns_mask; ++i) { 1106 while (!TAILQ_EMPTY(&st->conns[i])) { 1107 c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq); 1108 sfxge_lro_drop(rxq, c); 1109 } 1110 } 1111 1112 while (!TAILQ_EMPTY(&st->free_conns)) { 1113 c = TAILQ_FIRST(&st->free_conns); 1114 TAILQ_REMOVE(&st->free_conns, c, link); 1115 KASSERT(!c->mbuf, ("found orphaned mbuf")); 1116 free(c, M_SFXGE); 1117 } 1118 1119 free(st->conns_n, M_SFXGE); 1120 free(st->conns, M_SFXGE); 1121 st->conns = NULL; 1122 } 1123 1124 #else 1125 1126 static void 1127 sfxge_lro_init(struct sfxge_rxq *rxq) 1128 { 1129 } 1130 1131 static void 1132 sfxge_lro_fini(struct sfxge_rxq *rxq) 1133 { 1134 } 1135 1136 #endif /* SFXGE_LRO */ 1137 1138 static void 1139 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index) 1140 { 1141 struct sfxge_rxq *rxq; 1142 1143 rxq = sc->rxq[index]; 1144 1145 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 1146 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 1147 1148 /* Free the context array and the flow table. */ 1149 free(rxq->queue, M_SFXGE); 1150 sfxge_lro_fini(rxq); 1151 1152 /* Release DMA memory. */ 1153 sfxge_dma_free(&rxq->mem); 1154 1155 sc->rxq[index] = NULL; 1156 1157 free(rxq, M_SFXGE); 1158 } 1159 1160 static int 1161 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index) 1162 { 1163 struct sfxge_rxq *rxq; 1164 struct sfxge_evq *evq; 1165 efsys_mem_t *esmp; 1166 int rc; 1167 1168 KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count)); 1169 1170 rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK); 1171 rxq->sc = sc; 1172 rxq->index = index; 1173 rxq->entries = sc->rxq_entries; 1174 rxq->ptr_mask = rxq->entries - 1; 1175 rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries); 1176 1177 sc->rxq[index] = rxq; 1178 esmp = &rxq->mem; 1179 1180 evq = sc->evq[index]; 1181 1182 /* Allocate and zero DMA space. */ 1183 if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0) 1184 return (rc); 1185 1186 /* Allocate buffer table entries. */ 1187 sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries), 1188 &rxq->buf_base_id); 1189 1190 /* Allocate the context array and the flow table. */ 1191 rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries, 1192 M_SFXGE, M_WAITOK | M_ZERO); 1193 sfxge_lro_init(rxq); 1194 1195 callout_init(&rxq->refill_callout, B_TRUE); 1196 1197 rxq->init_state = SFXGE_RXQ_INITIALIZED; 1198 1199 return (0); 1200 } 1201 1202 static const struct { 1203 const char *name; 1204 size_t offset; 1205 } sfxge_rx_stats[] = { 1206 #define SFXGE_RX_STAT(name, member) \ 1207 { #name, offsetof(struct sfxge_rxq, member) } 1208 #ifdef SFXGE_LRO 1209 SFXGE_RX_STAT(lro_merges, lro.n_merges), 1210 SFXGE_RX_STAT(lro_bursts, lro.n_bursts), 1211 SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start), 1212 SFXGE_RX_STAT(lro_misorder, lro.n_misorder), 1213 SFXGE_RX_STAT(lro_too_many, lro.n_too_many), 1214 SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream), 1215 SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle), 1216 SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed) 1217 #endif 1218 }; 1219 1220 static int 1221 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS) 1222 { 1223 struct sfxge_softc *sc = arg1; 1224 unsigned int id = arg2; 1225 unsigned int sum, index; 1226 1227 /* Sum across all RX queues */ 1228 sum = 0; 1229 for (index = 0; index < sc->rxq_count; index++) 1230 sum += *(unsigned int *)((caddr_t)sc->rxq[index] + 1231 sfxge_rx_stats[id].offset); 1232 1233 return (SYSCTL_OUT(req, &sum, sizeof(sum))); 1234 } 1235 1236 static void 1237 sfxge_rx_stat_init(struct sfxge_softc *sc) 1238 { 1239 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1240 struct sysctl_oid_list *stat_list; 1241 unsigned int id; 1242 1243 stat_list = SYSCTL_CHILDREN(sc->stats_node); 1244 1245 for (id = 0; id < nitems(sfxge_rx_stats); id++) { 1246 SYSCTL_ADD_PROC( 1247 ctx, stat_list, 1248 OID_AUTO, sfxge_rx_stats[id].name, 1249 CTLTYPE_UINT|CTLFLAG_RD, 1250 sc, id, sfxge_rx_stat_handler, "IU", 1251 ""); 1252 } 1253 } 1254 1255 void 1256 sfxge_rx_fini(struct sfxge_softc *sc) 1257 { 1258 int index; 1259 1260 index = sc->rxq_count; 1261 while (--index >= 0) 1262 sfxge_rx_qfini(sc, index); 1263 1264 sc->rxq_count = 0; 1265 } 1266 1267 int 1268 sfxge_rx_init(struct sfxge_softc *sc) 1269 { 1270 struct sfxge_intr *intr; 1271 int index; 1272 int rc; 1273 1274 #ifdef SFXGE_LRO 1275 if (!ISP2(lro_table_size)) { 1276 log(LOG_ERR, "%s=%u must be power of 2", 1277 SFXGE_LRO_PARAM(table_size), lro_table_size); 1278 rc = EINVAL; 1279 goto fail_lro_table_size; 1280 } 1281 1282 if (lro_idle_ticks == 0) 1283 lro_idle_ticks = hz / 10 + 1; /* 100 ms */ 1284 #endif 1285 1286 intr = &sc->intr; 1287 1288 sc->rxq_count = intr->n_alloc; 1289 1290 KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 1291 ("intr->state != SFXGE_INTR_INITIALIZED")); 1292 1293 /* Initialize the receive queue(s) - one per interrupt. */ 1294 for (index = 0; index < sc->rxq_count; index++) { 1295 if ((rc = sfxge_rx_qinit(sc, index)) != 0) 1296 goto fail; 1297 } 1298 1299 sfxge_rx_stat_init(sc); 1300 1301 return (0); 1302 1303 fail: 1304 /* Tear down the receive queue(s). */ 1305 while (--index >= 0) 1306 sfxge_rx_qfini(sc, index); 1307 1308 sc->rxq_count = 0; 1309 1310 #ifdef SFXGE_LRO 1311 fail_lro_table_size: 1312 #endif 1313 return (rc); 1314 } 1315