1 /*- 2 * Copyright (c) 2010-2011 Solarflare Communications, Inc. 3 * All rights reserved. 4 * 5 * This software was developed in part by Philip Paeps under contract for 6 * Solarflare Communications, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 #include <sys/mbuf.h> 35 #include <sys/smp.h> 36 #include <sys/socket.h> 37 #include <sys/sysctl.h> 38 #include <sys/limits.h> 39 #include <sys/syslog.h> 40 41 #include <net/ethernet.h> 42 #include <net/if.h> 43 #include <net/if_vlan_var.h> 44 45 #include <netinet/in.h> 46 #include <netinet/ip.h> 47 #include <netinet/ip6.h> 48 #include <netinet/tcp.h> 49 50 #include <machine/in_cksum.h> 51 52 #include "common/efx.h" 53 54 55 #include "sfxge.h" 56 #include "sfxge_rx.h" 57 58 #define RX_REFILL_THRESHOLD(_entries) (EFX_RXQ_LIMIT(_entries) * 9 / 10) 59 60 #ifdef SFXGE_LRO 61 62 SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL, 63 "Large receive offload (LRO) parameters"); 64 65 #define SFXGE_LRO_PARAM(_param) SFXGE_PARAM(lro._param) 66 67 /* Size of the LRO hash table. Must be a power of 2. A larger table 68 * means we can accelerate a larger number of streams. 69 */ 70 static unsigned lro_table_size = 128; 71 TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size); 72 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN, 73 &lro_table_size, 0, 74 "Size of the LRO hash table (must be a power of 2)"); 75 76 /* Maximum length of a hash chain. If chains get too long then the lookup 77 * time increases and may exceed the benefit of LRO. 78 */ 79 static unsigned lro_chain_max = 20; 80 TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max); 81 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN, 82 &lro_chain_max, 0, 83 "The maximum length of a hash chain"); 84 85 /* Maximum time (in ticks) that a connection can be idle before it's LRO 86 * state is discarded. 87 */ 88 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */ 89 TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks); 90 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN, 91 &lro_idle_ticks, 0, 92 "The maximum time (in ticks) that a connection can be idle " 93 "before it's LRO state is discarded"); 94 95 /* Number of packets with payload that must arrive in-order before a 96 * connection is eligible for LRO. The idea is we should avoid coalescing 97 * segments when the sender is in slow-start because reducing the ACK rate 98 * can damage performance. 99 */ 100 static int lro_slow_start_packets = 2000; 101 TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets); 102 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN, 103 &lro_slow_start_packets, 0, 104 "Number of packets with payload that must arrive in-order before " 105 "a connection is eligible for LRO"); 106 107 /* Number of packets with payload that must arrive in-order following loss 108 * before a connection is eligible for LRO. The idea is we should avoid 109 * coalescing segments when the sender is recovering from loss, because 110 * reducing the ACK rate can damage performance. 111 */ 112 static int lro_loss_packets = 20; 113 TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets); 114 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN, 115 &lro_loss_packets, 0, 116 "Number of packets with payload that must arrive in-order " 117 "following loss before a connection is eligible for LRO"); 118 119 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */ 120 #define SFXGE_LRO_L2_ID_VLAN 0x4000 121 #define SFXGE_LRO_L2_ID_IPV6 0x8000 122 #define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN) 123 #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6)) 124 125 /* Compare IPv6 addresses, avoiding conditional branches */ 126 static unsigned long ipv6_addr_cmp(const struct in6_addr *left, 127 const struct in6_addr *right) 128 { 129 #if LONG_BIT == 64 130 const uint64_t *left64 = (const uint64_t *)left; 131 const uint64_t *right64 = (const uint64_t *)right; 132 return (left64[0] - right64[0]) | (left64[1] - right64[1]); 133 #else 134 return (left->s6_addr32[0] - right->s6_addr32[0]) | 135 (left->s6_addr32[1] - right->s6_addr32[1]) | 136 (left->s6_addr32[2] - right->s6_addr32[2]) | 137 (left->s6_addr32[3] - right->s6_addr32[3]); 138 #endif 139 } 140 141 #endif /* SFXGE_LRO */ 142 143 void 144 sfxge_rx_qflush_done(struct sfxge_rxq *rxq) 145 { 146 147 rxq->flush_state = SFXGE_FLUSH_DONE; 148 } 149 150 void 151 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq) 152 { 153 154 rxq->flush_state = SFXGE_FLUSH_FAILED; 155 } 156 157 static uint8_t toep_key[] = { 158 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 159 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 160 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 161 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 162 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 163 }; 164 165 static void 166 sfxge_rx_post_refill(void *arg) 167 { 168 struct sfxge_rxq *rxq = arg; 169 struct sfxge_softc *sc; 170 unsigned int index; 171 struct sfxge_evq *evq; 172 uint16_t magic; 173 174 sc = rxq->sc; 175 index = rxq->index; 176 evq = sc->evq[index]; 177 178 magic = SFXGE_MAGIC_RX_QREFILL | index; 179 180 /* This is guaranteed due to the start/stop order of rx and ev */ 181 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 182 ("evq not started")); 183 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 184 ("rxq not started")); 185 efx_ev_qpost(evq->common, magic); 186 } 187 188 static void 189 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying) 190 { 191 /* Initially retry after 100 ms, but back off in case of 192 * repeated failures as we probably have to wait for the 193 * administrator to raise the pool limit. */ 194 if (retrying) 195 rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz); 196 else 197 rxq->refill_delay = hz / 10; 198 199 callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay, 200 sfxge_rx_post_refill, rxq); 201 } 202 203 static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) 204 { 205 struct mb_args args; 206 struct mbuf *m; 207 208 /* Allocate mbuf structure */ 209 args.flags = M_PKTHDR; 210 args.type = MT_DATA; 211 m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT); 212 213 /* Allocate (and attach) packet buffer */ 214 if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) { 215 uma_zfree(zone_mbuf, m); 216 m = NULL; 217 } 218 219 return (m); 220 } 221 222 #define SFXGE_REFILL_BATCH 64 223 224 static void 225 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying) 226 { 227 struct sfxge_softc *sc; 228 unsigned int index; 229 struct sfxge_evq *evq; 230 unsigned int batch; 231 unsigned int rxfill; 232 unsigned int mblksize; 233 int ntodo; 234 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 235 236 sc = rxq->sc; 237 index = rxq->index; 238 evq = sc->evq[index]; 239 240 prefetch_read_many(sc->enp); 241 prefetch_read_many(rxq->common); 242 243 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 244 245 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 246 return; 247 248 rxfill = rxq->added - rxq->completed; 249 KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries), 250 ("rxfill > EFX_RXQ_LIMIT(rxq->entries)")); 251 ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target); 252 KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries), 253 ("ntodo > EFX_RQX_LIMIT(rxq->entries)")); 254 255 if (ntodo == 0) 256 return; 257 258 batch = 0; 259 mblksize = sc->rx_buffer_size; 260 while (ntodo-- > 0) { 261 unsigned int id; 262 struct sfxge_rx_sw_desc *rx_desc; 263 bus_dma_segment_t seg; 264 struct mbuf *m; 265 266 id = (rxq->added + batch) & rxq->ptr_mask; 267 rx_desc = &rxq->queue[id]; 268 KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL")); 269 270 rx_desc->flags = EFX_DISCARD; 271 m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc); 272 if (m == NULL) 273 break; 274 sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg); 275 addr[batch++] = seg.ds_addr; 276 277 if (batch == SFXGE_REFILL_BATCH) { 278 efx_rx_qpost(rxq->common, addr, mblksize, batch, 279 rxq->completed, rxq->added); 280 rxq->added += batch; 281 batch = 0; 282 } 283 } 284 285 if (ntodo != 0) 286 sfxge_rx_schedule_refill(rxq, retrying); 287 288 if (batch != 0) { 289 efx_rx_qpost(rxq->common, addr, mblksize, batch, 290 rxq->completed, rxq->added); 291 rxq->added += batch; 292 } 293 294 /* Make the descriptors visible to the hardware */ 295 bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map, 296 BUS_DMASYNC_PREWRITE); 297 298 efx_rx_qpush(rxq->common, rxq->added); 299 } 300 301 void 302 sfxge_rx_qrefill(struct sfxge_rxq *rxq) 303 { 304 305 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 306 return; 307 308 /* Make sure the queue is full */ 309 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE); 310 } 311 312 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m) 313 { 314 struct ifnet *ifp = sc->ifnet; 315 316 m->m_pkthdr.rcvif = ifp; 317 m->m_pkthdr.csum_data = 0xffff; 318 ifp->if_input(ifp, m); 319 } 320 321 static void 322 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc) 323 { 324 struct mbuf *m = rx_desc->mbuf; 325 int csum_flags; 326 327 /* Convert checksum flags */ 328 csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ? 329 (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0; 330 if (rx_desc->flags & EFX_CKSUM_TCPUDP) 331 csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 332 333 #ifdef SFXGE_HAVE_MQ 334 /* The hash covers a 4-tuple for TCP only */ 335 if (rx_desc->flags & EFX_PKT_TCP) { 336 m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 337 mtod(m, uint8_t *)); 338 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 339 } 340 #endif 341 m->m_data += sc->rx_prefix_size; 342 m->m_len = rx_desc->size - sc->rx_prefix_size; 343 m->m_pkthdr.len = m->m_len; 344 m->m_pkthdr.csum_flags = csum_flags; 345 __sfxge_rx_deliver(sc, rx_desc->mbuf); 346 347 rx_desc->flags = EFX_DISCARD; 348 rx_desc->mbuf = NULL; 349 } 350 351 #ifdef SFXGE_LRO 352 353 static void 354 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c) 355 { 356 struct sfxge_softc *sc = st->sc; 357 struct mbuf *m = c->mbuf; 358 struct tcphdr *c_th; 359 int csum_flags; 360 361 KASSERT(m, ("no mbuf to deliver")); 362 363 ++st->n_bursts; 364 365 /* Finish off packet munging and recalculate IP header checksum. */ 366 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 367 struct ip *iph = c->nh; 368 iph->ip_len = htons(iph->ip_len); 369 iph->ip_sum = 0; 370 iph->ip_sum = in_cksum_hdr(iph); 371 c_th = (struct tcphdr *)(iph + 1); 372 csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 373 CSUM_IP_CHECKED | CSUM_IP_VALID); 374 } else { 375 struct ip6_hdr *iph = c->nh; 376 iph->ip6_plen = htons(iph->ip6_plen); 377 c_th = (struct tcphdr *)(iph + 1); 378 csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 379 } 380 381 c_th->th_win = c->th_last->th_win; 382 c_th->th_ack = c->th_last->th_ack; 383 if (c_th->th_off == c->th_last->th_off) { 384 /* Copy TCP options (take care to avoid going negative). */ 385 int optlen = ((c_th->th_off - 5) & 0xf) << 2u; 386 memcpy(c_th + 1, c->th_last + 1, optlen); 387 } 388 389 #ifdef SFXGE_HAVE_MQ 390 m->m_pkthdr.flowid = c->conn_hash; 391 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 392 #endif 393 m->m_pkthdr.csum_flags = csum_flags; 394 __sfxge_rx_deliver(sc, m); 395 396 c->mbuf = NULL; 397 c->delivered = 1; 398 } 399 400 /* Drop the given connection, and add it to the free list. */ 401 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 402 { 403 unsigned bucket; 404 405 KASSERT(!c->mbuf, ("found orphaned mbuf")); 406 407 if (c->next_buf.mbuf != NULL) { 408 sfxge_rx_deliver(rxq->sc, &c->next_buf); 409 LIST_REMOVE(c, active_link); 410 } 411 412 bucket = c->conn_hash & rxq->lro.conns_mask; 413 KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong")); 414 --rxq->lro.conns_n[bucket]; 415 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 416 TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link); 417 } 418 419 /* Stop tracking connections that have gone idle in order to keep hash 420 * chains short. 421 */ 422 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now) 423 { 424 struct sfxge_lro_conn *c; 425 unsigned i; 426 427 KASSERT(LIST_EMPTY(&rxq->lro.active_conns), 428 ("found active connections")); 429 430 rxq->lro.last_purge_ticks = now; 431 for (i = 0; i <= rxq->lro.conns_mask; ++i) { 432 if (TAILQ_EMPTY(&rxq->lro.conns[i])) 433 continue; 434 435 c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq); 436 if (now - c->last_pkt_ticks > lro_idle_ticks) { 437 ++rxq->lro.n_drop_idle; 438 sfxge_lro_drop(rxq, c); 439 } 440 } 441 } 442 443 static void 444 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 445 struct mbuf *mbuf, struct tcphdr *th) 446 { 447 struct tcphdr *c_th; 448 449 /* Tack the new mbuf onto the chain. */ 450 KASSERT(!mbuf->m_next, ("mbuf already chained")); 451 c->mbuf_tail->m_next = mbuf; 452 c->mbuf_tail = mbuf; 453 454 /* Increase length appropriately */ 455 c->mbuf->m_pkthdr.len += mbuf->m_len; 456 457 /* Update the connection state flags */ 458 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 459 struct ip *iph = c->nh; 460 iph->ip_len += mbuf->m_len; 461 c_th = (struct tcphdr *)(iph + 1); 462 } else { 463 struct ip6_hdr *iph = c->nh; 464 iph->ip6_plen += mbuf->m_len; 465 c_th = (struct tcphdr *)(iph + 1); 466 } 467 c_th->th_flags |= (th->th_flags & TH_PUSH); 468 c->th_last = th; 469 ++st->n_merges; 470 471 /* Pass packet up now if another segment could overflow the IP 472 * length. 473 */ 474 if (c->mbuf->m_pkthdr.len > 65536 - 9200) 475 sfxge_lro_deliver(st, c); 476 } 477 478 static void 479 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 480 struct mbuf *mbuf, void *nh, struct tcphdr *th) 481 { 482 /* Start the chain */ 483 c->mbuf = mbuf; 484 c->mbuf_tail = c->mbuf; 485 c->nh = nh; 486 c->th_last = th; 487 488 mbuf->m_pkthdr.len = mbuf->m_len; 489 490 /* Mangle header fields for later processing */ 491 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 492 struct ip *iph = nh; 493 iph->ip_len = ntohs(iph->ip_len); 494 } else { 495 struct ip6_hdr *iph = nh; 496 iph->ip6_plen = ntohs(iph->ip6_plen); 497 } 498 } 499 500 /* Try to merge or otherwise hold or deliver (as appropriate) the 501 * packet buffered for this connection (c->next_buf). Return a flag 502 * indicating whether the connection is still active for LRO purposes. 503 */ 504 static int 505 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 506 { 507 struct sfxge_rx_sw_desc *rx_buf = &c->next_buf; 508 char *eh = c->next_eh; 509 int data_length, hdr_length, dont_merge; 510 unsigned th_seq, pkt_length; 511 struct tcphdr *th; 512 unsigned now; 513 514 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 515 struct ip *iph = c->next_nh; 516 th = (struct tcphdr *)(iph + 1); 517 pkt_length = ntohs(iph->ip_len) + (char *) iph - eh; 518 } else { 519 struct ip6_hdr *iph = c->next_nh; 520 th = (struct tcphdr *)(iph + 1); 521 pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh; 522 } 523 524 hdr_length = (char *) th + th->th_off * 4 - eh; 525 data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) - 526 hdr_length); 527 th_seq = ntohl(th->th_seq); 528 dont_merge = ((data_length <= 0) 529 | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN))); 530 531 /* Check for options other than aligned timestamp. */ 532 if (th->th_off != 5) { 533 const uint32_t *opt_ptr = (const uint32_t *) (th + 1); 534 if (th->th_off == 8 && 535 opt_ptr[0] == ntohl((TCPOPT_NOP << 24) | 536 (TCPOPT_NOP << 16) | 537 (TCPOPT_TIMESTAMP << 8) | 538 TCPOLEN_TIMESTAMP)) { 539 /* timestamp option -- okay */ 540 } else { 541 dont_merge = 1; 542 } 543 } 544 545 if (__predict_false(th_seq != c->next_seq)) { 546 /* Out-of-order, so start counting again. */ 547 if (c->mbuf != NULL) 548 sfxge_lro_deliver(&rxq->lro, c); 549 c->n_in_order_pkts -= lro_loss_packets; 550 c->next_seq = th_seq + data_length; 551 ++rxq->lro.n_misorder; 552 goto deliver_buf_out; 553 } 554 c->next_seq = th_seq + data_length; 555 556 now = ticks; 557 if (now - c->last_pkt_ticks > lro_idle_ticks) { 558 ++rxq->lro.n_drop_idle; 559 if (c->mbuf != NULL) 560 sfxge_lro_deliver(&rxq->lro, c); 561 sfxge_lro_drop(rxq, c); 562 return (0); 563 } 564 c->last_pkt_ticks = ticks; 565 566 if (c->n_in_order_pkts < lro_slow_start_packets) { 567 /* May be in slow-start, so don't merge. */ 568 ++rxq->lro.n_slow_start; 569 ++c->n_in_order_pkts; 570 goto deliver_buf_out; 571 } 572 573 if (__predict_false(dont_merge)) { 574 if (c->mbuf != NULL) 575 sfxge_lro_deliver(&rxq->lro, c); 576 if (th->th_flags & (TH_FIN | TH_RST)) { 577 ++rxq->lro.n_drop_closed; 578 sfxge_lro_drop(rxq, c); 579 return (0); 580 } 581 goto deliver_buf_out; 582 } 583 584 rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size; 585 586 if (__predict_true(c->mbuf != NULL)) { 587 /* Remove headers and any padding */ 588 rx_buf->mbuf->m_data += hdr_length; 589 rx_buf->mbuf->m_len = data_length; 590 591 sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th); 592 } else { 593 /* Remove any padding */ 594 rx_buf->mbuf->m_len = pkt_length; 595 596 sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th); 597 } 598 599 rx_buf->mbuf = NULL; 600 return (1); 601 602 deliver_buf_out: 603 sfxge_rx_deliver(rxq->sc, rx_buf); 604 return (1); 605 } 606 607 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash, 608 uint16_t l2_id, void *nh, struct tcphdr *th) 609 { 610 unsigned bucket = conn_hash & st->conns_mask; 611 struct sfxge_lro_conn *c; 612 613 if (st->conns_n[bucket] >= lro_chain_max) { 614 ++st->n_too_many; 615 return; 616 } 617 618 if (!TAILQ_EMPTY(&st->free_conns)) { 619 c = TAILQ_FIRST(&st->free_conns); 620 TAILQ_REMOVE(&st->free_conns, c, link); 621 } else { 622 c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT); 623 if (c == NULL) 624 return; 625 c->mbuf = NULL; 626 c->next_buf.mbuf = NULL; 627 } 628 629 /* Create the connection tracking data */ 630 ++st->conns_n[bucket]; 631 TAILQ_INSERT_HEAD(&st->conns[bucket], c, link); 632 c->l2_id = l2_id; 633 c->conn_hash = conn_hash; 634 c->source = th->th_sport; 635 c->dest = th->th_dport; 636 c->n_in_order_pkts = 0; 637 c->last_pkt_ticks = *(volatile int *)&ticks; 638 c->delivered = 0; 639 ++st->n_new_stream; 640 /* NB. We don't initialise c->next_seq, and it doesn't matter what 641 * value it has. Most likely the next packet received for this 642 * connection will not match -- no harm done. 643 */ 644 } 645 646 /* Process mbuf and decide whether to dispatch it to the stack now or 647 * later. 648 */ 649 static void 650 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 651 { 652 struct sfxge_softc *sc = rxq->sc; 653 struct mbuf *m = rx_buf->mbuf; 654 struct ether_header *eh; 655 struct sfxge_lro_conn *c; 656 uint16_t l2_id; 657 uint16_t l3_proto; 658 void *nh; 659 struct tcphdr *th; 660 uint32_t conn_hash; 661 unsigned bucket; 662 663 /* Get the hardware hash */ 664 conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 665 mtod(m, uint8_t *)); 666 667 eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size); 668 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 669 struct ether_vlan_header *veh = (struct ether_vlan_header *)eh; 670 l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) | 671 SFXGE_LRO_L2_ID_VLAN; 672 l3_proto = veh->evl_proto; 673 nh = veh + 1; 674 } else { 675 l2_id = 0; 676 l3_proto = eh->ether_type; 677 nh = eh + 1; 678 } 679 680 /* Check whether this is a suitable packet (unfragmented 681 * TCP/IPv4 or TCP/IPv6). If so, find the TCP header and 682 * length, and compute a hash if necessary. If not, return. 683 */ 684 if (l3_proto == htons(ETHERTYPE_IP)) { 685 struct ip *iph = nh; 686 if ((iph->ip_p - IPPROTO_TCP) | 687 (iph->ip_hl - (sizeof(*iph) >> 2u)) | 688 (iph->ip_off & htons(IP_MF | IP_OFFMASK))) 689 goto deliver_now; 690 th = (struct tcphdr *)(iph + 1); 691 } else if (l3_proto == htons(ETHERTYPE_IPV6)) { 692 struct ip6_hdr *iph = nh; 693 if (iph->ip6_nxt != IPPROTO_TCP) 694 goto deliver_now; 695 l2_id |= SFXGE_LRO_L2_ID_IPV6; 696 th = (struct tcphdr *)(iph + 1); 697 } else { 698 goto deliver_now; 699 } 700 701 bucket = conn_hash & rxq->lro.conns_mask; 702 703 TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) { 704 if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash)) 705 continue; 706 if ((c->source - th->th_sport) | (c->dest - th->th_dport)) 707 continue; 708 if (c->mbuf != NULL) { 709 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 710 struct ip *c_iph, *iph = nh; 711 c_iph = c->nh; 712 if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) | 713 (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr)) 714 continue; 715 } else { 716 struct ip6_hdr *c_iph, *iph = nh; 717 c_iph = c->nh; 718 if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) | 719 ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst)) 720 continue; 721 } 722 } 723 724 /* Re-insert at head of list to reduce lookup time. */ 725 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 726 TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link); 727 728 if (c->next_buf.mbuf != NULL) { 729 if (!sfxge_lro_try_merge(rxq, c)) 730 goto deliver_now; 731 } else { 732 LIST_INSERT_HEAD(&rxq->lro.active_conns, c, 733 active_link); 734 } 735 c->next_buf = *rx_buf; 736 c->next_eh = eh; 737 c->next_nh = nh; 738 739 rx_buf->mbuf = NULL; 740 rx_buf->flags = EFX_DISCARD; 741 return; 742 } 743 744 sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th); 745 deliver_now: 746 sfxge_rx_deliver(sc, rx_buf); 747 } 748 749 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 750 { 751 struct sfxge_lro_state *st = &rxq->lro; 752 struct sfxge_lro_conn *c; 753 unsigned t; 754 755 while (!LIST_EMPTY(&st->active_conns)) { 756 c = LIST_FIRST(&st->active_conns); 757 if (!c->delivered && c->mbuf != NULL) 758 sfxge_lro_deliver(st, c); 759 if (sfxge_lro_try_merge(rxq, c)) { 760 if (c->mbuf != NULL) 761 sfxge_lro_deliver(st, c); 762 LIST_REMOVE(c, active_link); 763 } 764 c->delivered = 0; 765 } 766 767 t = *(volatile int *)&ticks; 768 if (__predict_false(t != st->last_purge_ticks)) 769 sfxge_lro_purge_idle(rxq, t); 770 } 771 772 #else /* !SFXGE_LRO */ 773 774 static void 775 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 776 { 777 } 778 779 static void 780 sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 781 { 782 } 783 784 #endif /* SFXGE_LRO */ 785 786 void 787 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop) 788 { 789 struct sfxge_softc *sc = rxq->sc; 790 int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO; 791 unsigned int index; 792 struct sfxge_evq *evq; 793 unsigned int completed; 794 unsigned int level; 795 struct mbuf *m; 796 struct sfxge_rx_sw_desc *prev = NULL; 797 798 index = rxq->index; 799 evq = sc->evq[index]; 800 801 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 802 803 completed = rxq->completed; 804 while (completed != rxq->pending) { 805 unsigned int id; 806 struct sfxge_rx_sw_desc *rx_desc; 807 808 id = completed++ & rxq->ptr_mask; 809 rx_desc = &rxq->queue[id]; 810 m = rx_desc->mbuf; 811 812 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 813 goto discard; 814 815 if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 816 goto discard; 817 818 prefetch_read_many(mtod(m, caddr_t)); 819 820 /* Check for loopback packets */ 821 if (!(rx_desc->flags & EFX_PKT_IPV4) && 822 !(rx_desc->flags & EFX_PKT_IPV6)) { 823 struct ether_header *etherhp; 824 825 /*LINTED*/ 826 etherhp = mtod(m, struct ether_header *); 827 828 if (etherhp->ether_type == 829 htons(SFXGE_ETHERTYPE_LOOPBACK)) { 830 EFSYS_PROBE(loopback); 831 832 rxq->loopback++; 833 goto discard; 834 } 835 } 836 837 /* Pass packet up the stack or into LRO (pipelined) */ 838 if (prev != NULL) { 839 if (lro_enabled) 840 sfxge_lro(rxq, prev); 841 else 842 sfxge_rx_deliver(sc, prev); 843 } 844 prev = rx_desc; 845 continue; 846 847 discard: 848 /* Return the packet to the pool */ 849 m_free(m); 850 rx_desc->mbuf = NULL; 851 } 852 rxq->completed = completed; 853 854 level = rxq->added - rxq->completed; 855 856 /* Pass last packet up the stack or into LRO */ 857 if (prev != NULL) { 858 if (lro_enabled) 859 sfxge_lro(rxq, prev); 860 else 861 sfxge_rx_deliver(sc, prev); 862 } 863 864 /* 865 * If there are any pending flows and this is the end of the 866 * poll then they must be completed. 867 */ 868 if (eop) 869 sfxge_lro_end_of_burst(rxq); 870 871 /* Top up the queue if necessary */ 872 if (level < rxq->refill_threshold) 873 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE); 874 } 875 876 static void 877 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index) 878 { 879 struct sfxge_rxq *rxq; 880 struct sfxge_evq *evq; 881 unsigned int count; 882 883 rxq = sc->rxq[index]; 884 evq = sc->evq[index]; 885 886 SFXGE_EVQ_LOCK(evq); 887 888 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 889 ("rxq not started")); 890 891 rxq->init_state = SFXGE_RXQ_INITIALIZED; 892 893 callout_stop(&rxq->refill_callout); 894 895 again: 896 rxq->flush_state = SFXGE_FLUSH_PENDING; 897 898 /* Flush the receive queue */ 899 efx_rx_qflush(rxq->common); 900 901 SFXGE_EVQ_UNLOCK(evq); 902 903 count = 0; 904 do { 905 /* Spin for 100 ms */ 906 DELAY(100000); 907 908 if (rxq->flush_state != SFXGE_FLUSH_PENDING) 909 break; 910 911 } while (++count < 20); 912 913 SFXGE_EVQ_LOCK(evq); 914 915 if (rxq->flush_state == SFXGE_FLUSH_FAILED) 916 goto again; 917 918 rxq->flush_state = SFXGE_FLUSH_DONE; 919 920 rxq->pending = rxq->added; 921 sfxge_rx_qcomplete(rxq, B_TRUE); 922 923 KASSERT(rxq->completed == rxq->pending, 924 ("rxq->completed != rxq->pending")); 925 926 rxq->added = 0; 927 rxq->pending = 0; 928 rxq->completed = 0; 929 rxq->loopback = 0; 930 931 /* Destroy the common code receive queue. */ 932 efx_rx_qdestroy(rxq->common); 933 934 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 935 EFX_RXQ_NBUFS(sc->rxq_entries)); 936 937 SFXGE_EVQ_UNLOCK(evq); 938 } 939 940 static int 941 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index) 942 { 943 struct sfxge_rxq *rxq; 944 efsys_mem_t *esmp; 945 struct sfxge_evq *evq; 946 int rc; 947 948 rxq = sc->rxq[index]; 949 esmp = &rxq->mem; 950 evq = sc->evq[index]; 951 952 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 953 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 954 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 955 ("evq->init_state != SFXGE_EVQ_STARTED")); 956 957 /* Program the buffer table. */ 958 if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp, 959 EFX_RXQ_NBUFS(sc->rxq_entries))) != 0) 960 return (rc); 961 962 /* Create the common code receive queue. */ 963 if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT, 964 esmp, sc->rxq_entries, rxq->buf_base_id, evq->common, 965 &rxq->common)) != 0) 966 goto fail; 967 968 SFXGE_EVQ_LOCK(evq); 969 970 /* Enable the receive queue. */ 971 efx_rx_qenable(rxq->common); 972 973 rxq->init_state = SFXGE_RXQ_STARTED; 974 975 /* Try to fill the queue from the pool. */ 976 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE); 977 978 SFXGE_EVQ_UNLOCK(evq); 979 980 return (0); 981 982 fail: 983 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 984 EFX_RXQ_NBUFS(sc->rxq_entries)); 985 return (rc); 986 } 987 988 void 989 sfxge_rx_stop(struct sfxge_softc *sc) 990 { 991 int index; 992 993 /* Stop the receive queue(s) */ 994 index = sc->rxq_count; 995 while (--index >= 0) 996 sfxge_rx_qstop(sc, index); 997 998 sc->rx_prefix_size = 0; 999 sc->rx_buffer_size = 0; 1000 1001 efx_rx_fini(sc->enp); 1002 } 1003 1004 int 1005 sfxge_rx_start(struct sfxge_softc *sc) 1006 { 1007 struct sfxge_intr *intr; 1008 int index; 1009 int rc; 1010 1011 intr = &sc->intr; 1012 1013 /* Initialize the common code receive module. */ 1014 if ((rc = efx_rx_init(sc->enp)) != 0) 1015 return (rc); 1016 1017 /* Calculate the receive packet buffer size. */ 1018 sc->rx_prefix_size = EFX_RX_PREFIX_SIZE; 1019 sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) + 1020 sc->rx_prefix_size); 1021 1022 /* Select zone for packet buffers */ 1023 if (sc->rx_buffer_size <= MCLBYTES) 1024 sc->rx_buffer_zone = zone_clust; 1025 else if (sc->rx_buffer_size <= MJUMPAGESIZE) 1026 sc->rx_buffer_zone = zone_jumbop; 1027 else if (sc->rx_buffer_size <= MJUM9BYTES) 1028 sc->rx_buffer_zone = zone_jumbo9; 1029 else 1030 sc->rx_buffer_zone = zone_jumbo16; 1031 1032 /* 1033 * Set up the scale table. Enable all hash types and hash insertion. 1034 */ 1035 for (index = 0; index < SFXGE_RX_SCALE_MAX; index++) 1036 sc->rx_indir_table[index] = index % sc->rxq_count; 1037 if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table, 1038 SFXGE_RX_SCALE_MAX)) != 0) 1039 goto fail; 1040 (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ, 1041 (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) | 1042 (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE); 1043 1044 if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key, 1045 sizeof(toep_key))) != 0) 1046 goto fail; 1047 1048 /* Start the receive queue(s). */ 1049 for (index = 0; index < sc->rxq_count; index++) { 1050 if ((rc = sfxge_rx_qstart(sc, index)) != 0) 1051 goto fail2; 1052 } 1053 1054 return (0); 1055 1056 fail2: 1057 while (--index >= 0) 1058 sfxge_rx_qstop(sc, index); 1059 1060 fail: 1061 efx_rx_fini(sc->enp); 1062 1063 return (rc); 1064 } 1065 1066 #ifdef SFXGE_LRO 1067 1068 static void sfxge_lro_init(struct sfxge_rxq *rxq) 1069 { 1070 struct sfxge_lro_state *st = &rxq->lro; 1071 unsigned i; 1072 1073 st->conns_mask = lro_table_size - 1; 1074 KASSERT(!((st->conns_mask + 1) & st->conns_mask), 1075 ("lro_table_size must be a power of 2")); 1076 st->sc = rxq->sc; 1077 st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]), 1078 M_SFXGE, M_WAITOK); 1079 st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]), 1080 M_SFXGE, M_WAITOK); 1081 for (i = 0; i <= st->conns_mask; ++i) { 1082 TAILQ_INIT(&st->conns[i]); 1083 st->conns_n[i] = 0; 1084 } 1085 LIST_INIT(&st->active_conns); 1086 TAILQ_INIT(&st->free_conns); 1087 } 1088 1089 static void sfxge_lro_fini(struct sfxge_rxq *rxq) 1090 { 1091 struct sfxge_lro_state *st = &rxq->lro; 1092 struct sfxge_lro_conn *c; 1093 unsigned i; 1094 1095 /* Return cleanly if sfxge_lro_init() has not been called. */ 1096 if (st->conns == NULL) 1097 return; 1098 1099 KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections")); 1100 1101 for (i = 0; i <= st->conns_mask; ++i) { 1102 while (!TAILQ_EMPTY(&st->conns[i])) { 1103 c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq); 1104 sfxge_lro_drop(rxq, c); 1105 } 1106 } 1107 1108 while (!TAILQ_EMPTY(&st->free_conns)) { 1109 c = TAILQ_FIRST(&st->free_conns); 1110 TAILQ_REMOVE(&st->free_conns, c, link); 1111 KASSERT(!c->mbuf, ("found orphaned mbuf")); 1112 free(c, M_SFXGE); 1113 } 1114 1115 free(st->conns_n, M_SFXGE); 1116 free(st->conns, M_SFXGE); 1117 st->conns = NULL; 1118 } 1119 1120 #else 1121 1122 static void 1123 sfxge_lro_init(struct sfxge_rxq *rxq) 1124 { 1125 } 1126 1127 static void 1128 sfxge_lro_fini(struct sfxge_rxq *rxq) 1129 { 1130 } 1131 1132 #endif /* SFXGE_LRO */ 1133 1134 static void 1135 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index) 1136 { 1137 struct sfxge_rxq *rxq; 1138 1139 rxq = sc->rxq[index]; 1140 1141 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 1142 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 1143 1144 /* Free the context array and the flow table. */ 1145 free(rxq->queue, M_SFXGE); 1146 sfxge_lro_fini(rxq); 1147 1148 /* Release DMA memory. */ 1149 sfxge_dma_free(&rxq->mem); 1150 1151 sc->rxq[index] = NULL; 1152 1153 free(rxq, M_SFXGE); 1154 } 1155 1156 static int 1157 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index) 1158 { 1159 struct sfxge_rxq *rxq; 1160 struct sfxge_evq *evq; 1161 efsys_mem_t *esmp; 1162 int rc; 1163 1164 KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count)); 1165 1166 rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK); 1167 rxq->sc = sc; 1168 rxq->index = index; 1169 rxq->entries = sc->rxq_entries; 1170 rxq->ptr_mask = rxq->entries - 1; 1171 rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries); 1172 1173 sc->rxq[index] = rxq; 1174 esmp = &rxq->mem; 1175 1176 evq = sc->evq[index]; 1177 1178 /* Allocate and zero DMA space. */ 1179 if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0) 1180 return (rc); 1181 1182 /* Allocate buffer table entries. */ 1183 sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries), 1184 &rxq->buf_base_id); 1185 1186 /* Allocate the context array and the flow table. */ 1187 rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries, 1188 M_SFXGE, M_WAITOK | M_ZERO); 1189 sfxge_lro_init(rxq); 1190 1191 callout_init(&rxq->refill_callout, B_TRUE); 1192 1193 rxq->init_state = SFXGE_RXQ_INITIALIZED; 1194 1195 return (0); 1196 } 1197 1198 static const struct { 1199 const char *name; 1200 size_t offset; 1201 } sfxge_rx_stats[] = { 1202 #define SFXGE_RX_STAT(name, member) \ 1203 { #name, offsetof(struct sfxge_rxq, member) } 1204 #ifdef SFXGE_LRO 1205 SFXGE_RX_STAT(lro_merges, lro.n_merges), 1206 SFXGE_RX_STAT(lro_bursts, lro.n_bursts), 1207 SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start), 1208 SFXGE_RX_STAT(lro_misorder, lro.n_misorder), 1209 SFXGE_RX_STAT(lro_too_many, lro.n_too_many), 1210 SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream), 1211 SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle), 1212 SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed) 1213 #endif 1214 }; 1215 1216 static int 1217 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS) 1218 { 1219 struct sfxge_softc *sc = arg1; 1220 unsigned int id = arg2; 1221 unsigned int sum, index; 1222 1223 /* Sum across all RX queues */ 1224 sum = 0; 1225 for (index = 0; index < sc->rxq_count; index++) 1226 sum += *(unsigned int *)((caddr_t)sc->rxq[index] + 1227 sfxge_rx_stats[id].offset); 1228 1229 return (SYSCTL_OUT(req, &sum, sizeof(sum))); 1230 } 1231 1232 static void 1233 sfxge_rx_stat_init(struct sfxge_softc *sc) 1234 { 1235 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1236 struct sysctl_oid_list *stat_list; 1237 unsigned int id; 1238 1239 stat_list = SYSCTL_CHILDREN(sc->stats_node); 1240 1241 for (id = 0; id < nitems(sfxge_rx_stats); id++) { 1242 SYSCTL_ADD_PROC( 1243 ctx, stat_list, 1244 OID_AUTO, sfxge_rx_stats[id].name, 1245 CTLTYPE_UINT|CTLFLAG_RD, 1246 sc, id, sfxge_rx_stat_handler, "IU", 1247 ""); 1248 } 1249 } 1250 1251 void 1252 sfxge_rx_fini(struct sfxge_softc *sc) 1253 { 1254 int index; 1255 1256 index = sc->rxq_count; 1257 while (--index >= 0) 1258 sfxge_rx_qfini(sc, index); 1259 1260 sc->rxq_count = 0; 1261 } 1262 1263 int 1264 sfxge_rx_init(struct sfxge_softc *sc) 1265 { 1266 struct sfxge_intr *intr; 1267 int index; 1268 int rc; 1269 1270 #ifdef SFXGE_LRO 1271 if (!ISP2(lro_table_size)) { 1272 log(LOG_ERR, "%s=%u must be power of 2", 1273 SFXGE_LRO_PARAM(table_size), lro_table_size); 1274 rc = EINVAL; 1275 goto fail_lro_table_size; 1276 } 1277 1278 if (lro_idle_ticks == 0) 1279 lro_idle_ticks = hz / 10 + 1; /* 100 ms */ 1280 #endif 1281 1282 intr = &sc->intr; 1283 1284 sc->rxq_count = intr->n_alloc; 1285 1286 KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 1287 ("intr->state != SFXGE_INTR_INITIALIZED")); 1288 1289 /* Initialize the receive queue(s) - one per interrupt. */ 1290 for (index = 0; index < sc->rxq_count; index++) { 1291 if ((rc = sfxge_rx_qinit(sc, index)) != 0) 1292 goto fail; 1293 } 1294 1295 sfxge_rx_stat_init(sc); 1296 1297 return (0); 1298 1299 fail: 1300 /* Tear down the receive queue(s). */ 1301 while (--index >= 0) 1302 sfxge_rx_qfini(sc, index); 1303 1304 sc->rxq_count = 0; 1305 1306 #ifdef SFXGE_LRO 1307 fail_lro_table_size: 1308 #endif 1309 return (rc); 1310 } 1311