1e948693eSPhilip Paeps /*- 2e948693eSPhilip Paeps * Copyright (c) 2010-2011 Solarflare Communications, Inc. 3e948693eSPhilip Paeps * All rights reserved. 4e948693eSPhilip Paeps * 5e948693eSPhilip Paeps * This software was developed in part by Philip Paeps under contract for 6e948693eSPhilip Paeps * Solarflare Communications, Inc. 7e948693eSPhilip Paeps * 8e948693eSPhilip Paeps * Redistribution and use in source and binary forms, with or without 9e948693eSPhilip Paeps * modification, are permitted provided that the following conditions 10e948693eSPhilip Paeps * are met: 11e948693eSPhilip Paeps * 1. Redistributions of source code must retain the above copyright 12e948693eSPhilip Paeps * notice, this list of conditions and the following disclaimer. 13e948693eSPhilip Paeps * 2. Redistributions in binary form must reproduce the above copyright 14e948693eSPhilip Paeps * notice, this list of conditions and the following disclaimer in the 15e948693eSPhilip Paeps * documentation and/or other materials provided with the distribution. 16e948693eSPhilip Paeps * 17e948693eSPhilip Paeps * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18e948693eSPhilip Paeps * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19e948693eSPhilip Paeps * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20e948693eSPhilip Paeps * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21e948693eSPhilip Paeps * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22e948693eSPhilip Paeps * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23e948693eSPhilip Paeps * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24e948693eSPhilip Paeps * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25e948693eSPhilip Paeps * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26e948693eSPhilip Paeps * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27e948693eSPhilip Paeps * SUCH DAMAGE. 28e948693eSPhilip Paeps */ 29e948693eSPhilip Paeps 30e948693eSPhilip Paeps #include <sys/cdefs.h> 31e948693eSPhilip Paeps __FBSDID("$FreeBSD$"); 32e948693eSPhilip Paeps 33e948693eSPhilip Paeps #include <sys/types.h> 34e948693eSPhilip Paeps #include <sys/mbuf.h> 35e948693eSPhilip Paeps #include <sys/smp.h> 36e948693eSPhilip Paeps #include <sys/socket.h> 37e948693eSPhilip Paeps #include <sys/sysctl.h> 38e948693eSPhilip Paeps #include <sys/limits.h> 39245d1576SAndrew Rybchenko #include <sys/syslog.h> 40e948693eSPhilip Paeps 41e948693eSPhilip Paeps #include <net/ethernet.h> 42e948693eSPhilip Paeps #include <net/if.h> 43e948693eSPhilip Paeps #include <net/if_vlan_var.h> 44e948693eSPhilip Paeps 45e948693eSPhilip Paeps #include <netinet/in.h> 46e948693eSPhilip Paeps #include <netinet/ip.h> 47e948693eSPhilip Paeps #include <netinet/ip6.h> 48e948693eSPhilip Paeps #include <netinet/tcp.h> 49e948693eSPhilip Paeps 50e948693eSPhilip Paeps #include <machine/in_cksum.h> 51e948693eSPhilip Paeps 52e948693eSPhilip Paeps #include "common/efx.h" 53e948693eSPhilip Paeps 54e948693eSPhilip Paeps 55e948693eSPhilip Paeps #include "sfxge.h" 56e948693eSPhilip Paeps #include "sfxge_rx.h" 57e948693eSPhilip Paeps 58385b1d8eSGeorge V. Neville-Neil #define RX_REFILL_THRESHOLD(_entries) (EFX_RXQ_LIMIT(_entries) * 9 / 10) 59e948693eSPhilip Paeps 6018daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO 6118daa0eeSAndrew Rybchenko 62245d1576SAndrew Rybchenko SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL, 63245d1576SAndrew Rybchenko "Large receive offload (LRO) parameters"); 64245d1576SAndrew Rybchenko 65245d1576SAndrew Rybchenko #define SFXGE_LRO_PARAM(_param) SFXGE_PARAM(lro._param) 66245d1576SAndrew Rybchenko 67e948693eSPhilip Paeps /* Size of the LRO hash table. Must be a power of 2. A larger table 68e948693eSPhilip Paeps * means we can accelerate a larger number of streams. 69e948693eSPhilip Paeps */ 70e948693eSPhilip Paeps static unsigned lro_table_size = 128; 71245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size); 72245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN, 73245d1576SAndrew Rybchenko &lro_table_size, 0, 74245d1576SAndrew Rybchenko "Size of the LRO hash table (must be a power of 2)"); 75e948693eSPhilip Paeps 76e948693eSPhilip Paeps /* Maximum length of a hash chain. If chains get too long then the lookup 77e948693eSPhilip Paeps * time increases and may exceed the benefit of LRO. 78e948693eSPhilip Paeps */ 79e948693eSPhilip Paeps static unsigned lro_chain_max = 20; 80245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max); 81245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN, 82245d1576SAndrew Rybchenko &lro_chain_max, 0, 83245d1576SAndrew Rybchenko "The maximum length of a hash chain"); 84e948693eSPhilip Paeps 85e948693eSPhilip Paeps /* Maximum time (in ticks) that a connection can be idle before it's LRO 86e948693eSPhilip Paeps * state is discarded. 87e948693eSPhilip Paeps */ 88e948693eSPhilip Paeps static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */ 89245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks); 90245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN, 91245d1576SAndrew Rybchenko &lro_idle_ticks, 0, 92245d1576SAndrew Rybchenko "The maximum time (in ticks) that a connection can be idle " 93245d1576SAndrew Rybchenko "before it's LRO state is discarded"); 94e948693eSPhilip Paeps 95e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order before a 96e948693eSPhilip Paeps * connection is eligible for LRO. The idea is we should avoid coalescing 97e948693eSPhilip Paeps * segments when the sender is in slow-start because reducing the ACK rate 98e948693eSPhilip Paeps * can damage performance. 99e948693eSPhilip Paeps */ 100e948693eSPhilip Paeps static int lro_slow_start_packets = 2000; 101245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets); 102245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN, 103245d1576SAndrew Rybchenko &lro_slow_start_packets, 0, 104245d1576SAndrew Rybchenko "Number of packets with payload that must arrive in-order before " 105245d1576SAndrew Rybchenko "a connection is eligible for LRO"); 106e948693eSPhilip Paeps 107e948693eSPhilip Paeps /* Number of packets with payload that must arrive in-order following loss 108e948693eSPhilip Paeps * before a connection is eligible for LRO. The idea is we should avoid 109e948693eSPhilip Paeps * coalescing segments when the sender is recovering from loss, because 110e948693eSPhilip Paeps * reducing the ACK rate can damage performance. 111e948693eSPhilip Paeps */ 112e948693eSPhilip Paeps static int lro_loss_packets = 20; 113245d1576SAndrew Rybchenko TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets); 114245d1576SAndrew Rybchenko SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN, 115245d1576SAndrew Rybchenko &lro_loss_packets, 0, 116245d1576SAndrew Rybchenko "Number of packets with payload that must arrive in-order " 117245d1576SAndrew Rybchenko "following loss before a connection is eligible for LRO"); 118e948693eSPhilip Paeps 119e948693eSPhilip Paeps /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */ 120e948693eSPhilip Paeps #define SFXGE_LRO_L2_ID_VLAN 0x4000 121e948693eSPhilip Paeps #define SFXGE_LRO_L2_ID_IPV6 0x8000 122e948693eSPhilip Paeps #define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN) 123e948693eSPhilip Paeps #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6)) 124e948693eSPhilip Paeps 125e948693eSPhilip Paeps /* Compare IPv6 addresses, avoiding conditional branches */ 1260b28bbdcSAndrew Rybchenko static unsigned long ipv6_addr_cmp(const struct in6_addr *left, 127e948693eSPhilip Paeps const struct in6_addr *right) 128e948693eSPhilip Paeps { 129e948693eSPhilip Paeps #if LONG_BIT == 64 130e948693eSPhilip Paeps const uint64_t *left64 = (const uint64_t *)left; 131e948693eSPhilip Paeps const uint64_t *right64 = (const uint64_t *)right; 132e948693eSPhilip Paeps return (left64[0] - right64[0]) | (left64[1] - right64[1]); 133e948693eSPhilip Paeps #else 134e948693eSPhilip Paeps return (left->s6_addr32[0] - right->s6_addr32[0]) | 135e948693eSPhilip Paeps (left->s6_addr32[1] - right->s6_addr32[1]) | 136e948693eSPhilip Paeps (left->s6_addr32[2] - right->s6_addr32[2]) | 137e948693eSPhilip Paeps (left->s6_addr32[3] - right->s6_addr32[3]); 138e948693eSPhilip Paeps #endif 139e948693eSPhilip Paeps } 140e948693eSPhilip Paeps 14118daa0eeSAndrew Rybchenko #endif /* SFXGE_LRO */ 14218daa0eeSAndrew Rybchenko 143e948693eSPhilip Paeps void 144e948693eSPhilip Paeps sfxge_rx_qflush_done(struct sfxge_rxq *rxq) 145e948693eSPhilip Paeps { 146e948693eSPhilip Paeps 147e948693eSPhilip Paeps rxq->flush_state = SFXGE_FLUSH_DONE; 148e948693eSPhilip Paeps } 149e948693eSPhilip Paeps 150e948693eSPhilip Paeps void 151e948693eSPhilip Paeps sfxge_rx_qflush_failed(struct sfxge_rxq *rxq) 152e948693eSPhilip Paeps { 153e948693eSPhilip Paeps 154e948693eSPhilip Paeps rxq->flush_state = SFXGE_FLUSH_FAILED; 155e948693eSPhilip Paeps } 156e948693eSPhilip Paeps 157e948693eSPhilip Paeps static uint8_t toep_key[] = { 158e948693eSPhilip Paeps 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 159e948693eSPhilip Paeps 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 160e948693eSPhilip Paeps 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 161e948693eSPhilip Paeps 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 162e948693eSPhilip Paeps 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 163e948693eSPhilip Paeps }; 164e948693eSPhilip Paeps 165e948693eSPhilip Paeps static void 166e948693eSPhilip Paeps sfxge_rx_post_refill(void *arg) 167e948693eSPhilip Paeps { 168e948693eSPhilip Paeps struct sfxge_rxq *rxq = arg; 169e948693eSPhilip Paeps struct sfxge_softc *sc; 170e948693eSPhilip Paeps unsigned int index; 171e948693eSPhilip Paeps struct sfxge_evq *evq; 172e948693eSPhilip Paeps uint16_t magic; 173e948693eSPhilip Paeps 174e948693eSPhilip Paeps sc = rxq->sc; 175e948693eSPhilip Paeps index = rxq->index; 176e948693eSPhilip Paeps evq = sc->evq[index]; 177e948693eSPhilip Paeps 178e948693eSPhilip Paeps magic = SFXGE_MAGIC_RX_QREFILL | index; 179e948693eSPhilip Paeps 180e948693eSPhilip Paeps /* This is guaranteed due to the start/stop order of rx and ev */ 181e948693eSPhilip Paeps KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 182e948693eSPhilip Paeps ("evq not started")); 183e948693eSPhilip Paeps KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 184e948693eSPhilip Paeps ("rxq not started")); 185e948693eSPhilip Paeps efx_ev_qpost(evq->common, magic); 186e948693eSPhilip Paeps } 187e948693eSPhilip Paeps 188e948693eSPhilip Paeps static void 189e948693eSPhilip Paeps sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying) 190e948693eSPhilip Paeps { 191e948693eSPhilip Paeps /* Initially retry after 100 ms, but back off in case of 192e948693eSPhilip Paeps * repeated failures as we probably have to wait for the 193e948693eSPhilip Paeps * administrator to raise the pool limit. */ 194e948693eSPhilip Paeps if (retrying) 195e948693eSPhilip Paeps rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz); 196e948693eSPhilip Paeps else 197e948693eSPhilip Paeps rxq->refill_delay = hz / 10; 198e948693eSPhilip Paeps 199e948693eSPhilip Paeps callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay, 200e948693eSPhilip Paeps sfxge_rx_post_refill, rxq); 201e948693eSPhilip Paeps } 202e948693eSPhilip Paeps 2030b28bbdcSAndrew Rybchenko static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) 204e948693eSPhilip Paeps { 205e948693eSPhilip Paeps struct mb_args args; 206e948693eSPhilip Paeps struct mbuf *m; 207e948693eSPhilip Paeps 208e948693eSPhilip Paeps /* Allocate mbuf structure */ 209e948693eSPhilip Paeps args.flags = M_PKTHDR; 210e948693eSPhilip Paeps args.type = MT_DATA; 211e275c0d3SGleb Smirnoff m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT); 212e948693eSPhilip Paeps 213e948693eSPhilip Paeps /* Allocate (and attach) packet buffer */ 214b7b0edd1SGeorge V. Neville-Neil if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) { 215e948693eSPhilip Paeps uma_zfree(zone_mbuf, m); 216e948693eSPhilip Paeps m = NULL; 217e948693eSPhilip Paeps } 218e948693eSPhilip Paeps 219b7b0edd1SGeorge V. Neville-Neil return (m); 220e948693eSPhilip Paeps } 221e948693eSPhilip Paeps 222e948693eSPhilip Paeps #define SFXGE_REFILL_BATCH 64 223e948693eSPhilip Paeps 224e948693eSPhilip Paeps static void 225e948693eSPhilip Paeps sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying) 226e948693eSPhilip Paeps { 227e948693eSPhilip Paeps struct sfxge_softc *sc; 228e948693eSPhilip Paeps unsigned int index; 229e948693eSPhilip Paeps struct sfxge_evq *evq; 230e948693eSPhilip Paeps unsigned int batch; 231e948693eSPhilip Paeps unsigned int rxfill; 232e948693eSPhilip Paeps unsigned int mblksize; 233e948693eSPhilip Paeps int ntodo; 234e948693eSPhilip Paeps efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 235e948693eSPhilip Paeps 236e948693eSPhilip Paeps sc = rxq->sc; 237e948693eSPhilip Paeps index = rxq->index; 238e948693eSPhilip Paeps evq = sc->evq[index]; 239e948693eSPhilip Paeps 240e948693eSPhilip Paeps prefetch_read_many(sc->enp); 241e948693eSPhilip Paeps prefetch_read_many(rxq->common); 242e948693eSPhilip Paeps 243763cab71SAndrew Rybchenko SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 244e948693eSPhilip Paeps 245851128b8SAndrew Rybchenko if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 246e948693eSPhilip Paeps return; 247e948693eSPhilip Paeps 248e948693eSPhilip Paeps rxfill = rxq->added - rxq->completed; 249385b1d8eSGeorge V. Neville-Neil KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries), 250385b1d8eSGeorge V. Neville-Neil ("rxfill > EFX_RXQ_LIMIT(rxq->entries)")); 251385b1d8eSGeorge V. Neville-Neil ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target); 252385b1d8eSGeorge V. Neville-Neil KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries), 253385b1d8eSGeorge V. Neville-Neil ("ntodo > EFX_RQX_LIMIT(rxq->entries)")); 254e948693eSPhilip Paeps 255e948693eSPhilip Paeps if (ntodo == 0) 256e948693eSPhilip Paeps return; 257e948693eSPhilip Paeps 258e948693eSPhilip Paeps batch = 0; 259e948693eSPhilip Paeps mblksize = sc->rx_buffer_size; 260e948693eSPhilip Paeps while (ntodo-- > 0) { 261e948693eSPhilip Paeps unsigned int id; 262e948693eSPhilip Paeps struct sfxge_rx_sw_desc *rx_desc; 263e948693eSPhilip Paeps bus_dma_segment_t seg; 264e948693eSPhilip Paeps struct mbuf *m; 265e948693eSPhilip Paeps 266385b1d8eSGeorge V. Neville-Neil id = (rxq->added + batch) & rxq->ptr_mask; 267e948693eSPhilip Paeps rx_desc = &rxq->queue[id]; 268e948693eSPhilip Paeps KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL")); 269e948693eSPhilip Paeps 270e948693eSPhilip Paeps rx_desc->flags = EFX_DISCARD; 271e948693eSPhilip Paeps m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc); 272e948693eSPhilip Paeps if (m == NULL) 273e948693eSPhilip Paeps break; 274e948693eSPhilip Paeps sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg); 275e948693eSPhilip Paeps addr[batch++] = seg.ds_addr; 276e948693eSPhilip Paeps 277e948693eSPhilip Paeps if (batch == SFXGE_REFILL_BATCH) { 278e948693eSPhilip Paeps efx_rx_qpost(rxq->common, addr, mblksize, batch, 279e948693eSPhilip Paeps rxq->completed, rxq->added); 280e948693eSPhilip Paeps rxq->added += batch; 281e948693eSPhilip Paeps batch = 0; 282e948693eSPhilip Paeps } 283e948693eSPhilip Paeps } 284e948693eSPhilip Paeps 285e948693eSPhilip Paeps if (ntodo != 0) 286e948693eSPhilip Paeps sfxge_rx_schedule_refill(rxq, retrying); 287e948693eSPhilip Paeps 288e948693eSPhilip Paeps if (batch != 0) { 289e948693eSPhilip Paeps efx_rx_qpost(rxq->common, addr, mblksize, batch, 290e948693eSPhilip Paeps rxq->completed, rxq->added); 291e948693eSPhilip Paeps rxq->added += batch; 292e948693eSPhilip Paeps } 293e948693eSPhilip Paeps 294e948693eSPhilip Paeps /* Make the descriptors visible to the hardware */ 295e948693eSPhilip Paeps bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map, 296e948693eSPhilip Paeps BUS_DMASYNC_PREWRITE); 297e948693eSPhilip Paeps 298e948693eSPhilip Paeps efx_rx_qpush(rxq->common, rxq->added); 299e948693eSPhilip Paeps } 300e948693eSPhilip Paeps 301e948693eSPhilip Paeps void 302e948693eSPhilip Paeps sfxge_rx_qrefill(struct sfxge_rxq *rxq) 303e948693eSPhilip Paeps { 304e948693eSPhilip Paeps 305851128b8SAndrew Rybchenko if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 306e948693eSPhilip Paeps return; 307e948693eSPhilip Paeps 308e948693eSPhilip Paeps /* Make sure the queue is full */ 309385b1d8eSGeorge V. Neville-Neil sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE); 310e948693eSPhilip Paeps } 311e948693eSPhilip Paeps 312e948693eSPhilip Paeps static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m) 313e948693eSPhilip Paeps { 314e948693eSPhilip Paeps struct ifnet *ifp = sc->ifnet; 315e948693eSPhilip Paeps 316e948693eSPhilip Paeps m->m_pkthdr.rcvif = ifp; 317e948693eSPhilip Paeps m->m_pkthdr.csum_data = 0xffff; 318e948693eSPhilip Paeps ifp->if_input(ifp, m); 319e948693eSPhilip Paeps } 320e948693eSPhilip Paeps 321e948693eSPhilip Paeps static void 322e948693eSPhilip Paeps sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc) 323e948693eSPhilip Paeps { 324e948693eSPhilip Paeps struct mbuf *m = rx_desc->mbuf; 325588644a4SAndrew Rybchenko int flags = rx_desc->flags; 326e948693eSPhilip Paeps int csum_flags; 327e948693eSPhilip Paeps 328e948693eSPhilip Paeps /* Convert checksum flags */ 329588644a4SAndrew Rybchenko csum_flags = (flags & EFX_CKSUM_IPV4) ? 330e948693eSPhilip Paeps (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0; 331588644a4SAndrew Rybchenko if (flags & EFX_CKSUM_TCPUDP) 332e948693eSPhilip Paeps csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 333e948693eSPhilip Paeps 334588644a4SAndrew Rybchenko if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) { 335e948693eSPhilip Paeps m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 336e948693eSPhilip Paeps mtod(m, uint8_t *)); 337dcf08586SAndrew Rybchenko /* The hash covers a 4-tuple for TCP only */ 338dcf08586SAndrew Rybchenko M_HASHTYPE_SET(m, 339588644a4SAndrew Rybchenko (flags & EFX_PKT_IPV4) ? 340588644a4SAndrew Rybchenko ((flags & EFX_PKT_TCP) ? 341dcf08586SAndrew Rybchenko M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_IPV4) : 342588644a4SAndrew Rybchenko ((flags & EFX_PKT_TCP) ? 343dcf08586SAndrew Rybchenko M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_IPV6)); 344e948693eSPhilip Paeps } 345e948693eSPhilip Paeps m->m_data += sc->rx_prefix_size; 346e948693eSPhilip Paeps m->m_len = rx_desc->size - sc->rx_prefix_size; 347e948693eSPhilip Paeps m->m_pkthdr.len = m->m_len; 348e948693eSPhilip Paeps m->m_pkthdr.csum_flags = csum_flags; 349e948693eSPhilip Paeps __sfxge_rx_deliver(sc, rx_desc->mbuf); 350e948693eSPhilip Paeps 351e948693eSPhilip Paeps rx_desc->flags = EFX_DISCARD; 352e948693eSPhilip Paeps rx_desc->mbuf = NULL; 353e948693eSPhilip Paeps } 354e948693eSPhilip Paeps 35518daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO 35618daa0eeSAndrew Rybchenko 357e948693eSPhilip Paeps static void 358e948693eSPhilip Paeps sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c) 359e948693eSPhilip Paeps { 360e948693eSPhilip Paeps struct sfxge_softc *sc = st->sc; 361e948693eSPhilip Paeps struct mbuf *m = c->mbuf; 362e948693eSPhilip Paeps struct tcphdr *c_th; 363e948693eSPhilip Paeps int csum_flags; 364e948693eSPhilip Paeps 365e948693eSPhilip Paeps KASSERT(m, ("no mbuf to deliver")); 366e948693eSPhilip Paeps 367e948693eSPhilip Paeps ++st->n_bursts; 368e948693eSPhilip Paeps 369e948693eSPhilip Paeps /* Finish off packet munging and recalculate IP header checksum. */ 370e948693eSPhilip Paeps if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 371e948693eSPhilip Paeps struct ip *iph = c->nh; 372e948693eSPhilip Paeps iph->ip_len = htons(iph->ip_len); 373e948693eSPhilip Paeps iph->ip_sum = 0; 374e948693eSPhilip Paeps iph->ip_sum = in_cksum_hdr(iph); 375e948693eSPhilip Paeps c_th = (struct tcphdr *)(iph + 1); 376e948693eSPhilip Paeps csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 377e948693eSPhilip Paeps CSUM_IP_CHECKED | CSUM_IP_VALID); 378e948693eSPhilip Paeps } else { 379e948693eSPhilip Paeps struct ip6_hdr *iph = c->nh; 380e948693eSPhilip Paeps iph->ip6_plen = htons(iph->ip6_plen); 381e948693eSPhilip Paeps c_th = (struct tcphdr *)(iph + 1); 382e948693eSPhilip Paeps csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 383e948693eSPhilip Paeps } 384e948693eSPhilip Paeps 385e948693eSPhilip Paeps c_th->th_win = c->th_last->th_win; 386e948693eSPhilip Paeps c_th->th_ack = c->th_last->th_ack; 387e948693eSPhilip Paeps if (c_th->th_off == c->th_last->th_off) { 388e948693eSPhilip Paeps /* Copy TCP options (take care to avoid going negative). */ 389e948693eSPhilip Paeps int optlen = ((c_th->th_off - 5) & 0xf) << 2u; 390e948693eSPhilip Paeps memcpy(c_th + 1, c->th_last + 1, optlen); 391e948693eSPhilip Paeps } 392e948693eSPhilip Paeps 393e948693eSPhilip Paeps m->m_pkthdr.flowid = c->conn_hash; 394dcf08586SAndrew Rybchenko M_HASHTYPE_SET(m, 395dcf08586SAndrew Rybchenko SFXGE_LRO_CONN_IS_TCPIPV4(c) ? 396dcf08586SAndrew Rybchenko M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_TCP_IPV6); 397a411fe4eSAndrew Rybchenko 398e948693eSPhilip Paeps m->m_pkthdr.csum_flags = csum_flags; 399e948693eSPhilip Paeps __sfxge_rx_deliver(sc, m); 400e948693eSPhilip Paeps 401e948693eSPhilip Paeps c->mbuf = NULL; 402e948693eSPhilip Paeps c->delivered = 1; 403e948693eSPhilip Paeps } 404e948693eSPhilip Paeps 405e948693eSPhilip Paeps /* Drop the given connection, and add it to the free list. */ 406e948693eSPhilip Paeps static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 407e948693eSPhilip Paeps { 408e948693eSPhilip Paeps unsigned bucket; 409e948693eSPhilip Paeps 410e948693eSPhilip Paeps KASSERT(!c->mbuf, ("found orphaned mbuf")); 411e948693eSPhilip Paeps 412b7b0edd1SGeorge V. Neville-Neil if (c->next_buf.mbuf != NULL) { 413e948693eSPhilip Paeps sfxge_rx_deliver(rxq->sc, &c->next_buf); 414e948693eSPhilip Paeps LIST_REMOVE(c, active_link); 415e948693eSPhilip Paeps } 416e948693eSPhilip Paeps 417e948693eSPhilip Paeps bucket = c->conn_hash & rxq->lro.conns_mask; 418e948693eSPhilip Paeps KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong")); 419e948693eSPhilip Paeps --rxq->lro.conns_n[bucket]; 420e948693eSPhilip Paeps TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 421e948693eSPhilip Paeps TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link); 422e948693eSPhilip Paeps } 423e948693eSPhilip Paeps 424e948693eSPhilip Paeps /* Stop tracking connections that have gone idle in order to keep hash 425e948693eSPhilip Paeps * chains short. 426e948693eSPhilip Paeps */ 427e948693eSPhilip Paeps static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now) 428e948693eSPhilip Paeps { 429e948693eSPhilip Paeps struct sfxge_lro_conn *c; 430e948693eSPhilip Paeps unsigned i; 431e948693eSPhilip Paeps 432e948693eSPhilip Paeps KASSERT(LIST_EMPTY(&rxq->lro.active_conns), 433e948693eSPhilip Paeps ("found active connections")); 434e948693eSPhilip Paeps 435e948693eSPhilip Paeps rxq->lro.last_purge_ticks = now; 436e948693eSPhilip Paeps for (i = 0; i <= rxq->lro.conns_mask; ++i) { 437e948693eSPhilip Paeps if (TAILQ_EMPTY(&rxq->lro.conns[i])) 438e948693eSPhilip Paeps continue; 439e948693eSPhilip Paeps 440e948693eSPhilip Paeps c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq); 441e948693eSPhilip Paeps if (now - c->last_pkt_ticks > lro_idle_ticks) { 442e948693eSPhilip Paeps ++rxq->lro.n_drop_idle; 443e948693eSPhilip Paeps sfxge_lro_drop(rxq, c); 444e948693eSPhilip Paeps } 445e948693eSPhilip Paeps } 446e948693eSPhilip Paeps } 447e948693eSPhilip Paeps 448e948693eSPhilip Paeps static void 449e948693eSPhilip Paeps sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 450e948693eSPhilip Paeps struct mbuf *mbuf, struct tcphdr *th) 451e948693eSPhilip Paeps { 452e948693eSPhilip Paeps struct tcphdr *c_th; 453e948693eSPhilip Paeps 454e948693eSPhilip Paeps /* Tack the new mbuf onto the chain. */ 455e948693eSPhilip Paeps KASSERT(!mbuf->m_next, ("mbuf already chained")); 456e948693eSPhilip Paeps c->mbuf_tail->m_next = mbuf; 457e948693eSPhilip Paeps c->mbuf_tail = mbuf; 458e948693eSPhilip Paeps 459e948693eSPhilip Paeps /* Increase length appropriately */ 460e948693eSPhilip Paeps c->mbuf->m_pkthdr.len += mbuf->m_len; 461e948693eSPhilip Paeps 462e948693eSPhilip Paeps /* Update the connection state flags */ 463e948693eSPhilip Paeps if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 464e948693eSPhilip Paeps struct ip *iph = c->nh; 465e948693eSPhilip Paeps iph->ip_len += mbuf->m_len; 466e948693eSPhilip Paeps c_th = (struct tcphdr *)(iph + 1); 467e948693eSPhilip Paeps } else { 468e948693eSPhilip Paeps struct ip6_hdr *iph = c->nh; 469e948693eSPhilip Paeps iph->ip6_plen += mbuf->m_len; 470e948693eSPhilip Paeps c_th = (struct tcphdr *)(iph + 1); 471e948693eSPhilip Paeps } 472e948693eSPhilip Paeps c_th->th_flags |= (th->th_flags & TH_PUSH); 473e948693eSPhilip Paeps c->th_last = th; 474e948693eSPhilip Paeps ++st->n_merges; 475e948693eSPhilip Paeps 476e948693eSPhilip Paeps /* Pass packet up now if another segment could overflow the IP 477e948693eSPhilip Paeps * length. 478e948693eSPhilip Paeps */ 479e948693eSPhilip Paeps if (c->mbuf->m_pkthdr.len > 65536 - 9200) 480e948693eSPhilip Paeps sfxge_lro_deliver(st, c); 481e948693eSPhilip Paeps } 482e948693eSPhilip Paeps 483e948693eSPhilip Paeps static void 484e948693eSPhilip Paeps sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 485e948693eSPhilip Paeps struct mbuf *mbuf, void *nh, struct tcphdr *th) 486e948693eSPhilip Paeps { 487e948693eSPhilip Paeps /* Start the chain */ 488e948693eSPhilip Paeps c->mbuf = mbuf; 489e948693eSPhilip Paeps c->mbuf_tail = c->mbuf; 490e948693eSPhilip Paeps c->nh = nh; 491e948693eSPhilip Paeps c->th_last = th; 492e948693eSPhilip Paeps 493e948693eSPhilip Paeps mbuf->m_pkthdr.len = mbuf->m_len; 494e948693eSPhilip Paeps 495e948693eSPhilip Paeps /* Mangle header fields for later processing */ 496e948693eSPhilip Paeps if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 497e948693eSPhilip Paeps struct ip *iph = nh; 498e948693eSPhilip Paeps iph->ip_len = ntohs(iph->ip_len); 499e948693eSPhilip Paeps } else { 500e948693eSPhilip Paeps struct ip6_hdr *iph = nh; 501e948693eSPhilip Paeps iph->ip6_plen = ntohs(iph->ip6_plen); 502e948693eSPhilip Paeps } 503e948693eSPhilip Paeps } 504e948693eSPhilip Paeps 505e948693eSPhilip Paeps /* Try to merge or otherwise hold or deliver (as appropriate) the 506e948693eSPhilip Paeps * packet buffered for this connection (c->next_buf). Return a flag 507e948693eSPhilip Paeps * indicating whether the connection is still active for LRO purposes. 508e948693eSPhilip Paeps */ 509e948693eSPhilip Paeps static int 510e948693eSPhilip Paeps sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 511e948693eSPhilip Paeps { 512e948693eSPhilip Paeps struct sfxge_rx_sw_desc *rx_buf = &c->next_buf; 513e948693eSPhilip Paeps char *eh = c->next_eh; 514e948693eSPhilip Paeps int data_length, hdr_length, dont_merge; 515e948693eSPhilip Paeps unsigned th_seq, pkt_length; 516e948693eSPhilip Paeps struct tcphdr *th; 517e948693eSPhilip Paeps unsigned now; 518e948693eSPhilip Paeps 519e948693eSPhilip Paeps if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 520e948693eSPhilip Paeps struct ip *iph = c->next_nh; 521e948693eSPhilip Paeps th = (struct tcphdr *)(iph + 1); 522e948693eSPhilip Paeps pkt_length = ntohs(iph->ip_len) + (char *) iph - eh; 523e948693eSPhilip Paeps } else { 524e948693eSPhilip Paeps struct ip6_hdr *iph = c->next_nh; 525e948693eSPhilip Paeps th = (struct tcphdr *)(iph + 1); 526e948693eSPhilip Paeps pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh; 527e948693eSPhilip Paeps } 528e948693eSPhilip Paeps 529e948693eSPhilip Paeps hdr_length = (char *) th + th->th_off * 4 - eh; 530e948693eSPhilip Paeps data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) - 531e948693eSPhilip Paeps hdr_length); 532e948693eSPhilip Paeps th_seq = ntohl(th->th_seq); 533e948693eSPhilip Paeps dont_merge = ((data_length <= 0) 534e948693eSPhilip Paeps | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN))); 535e948693eSPhilip Paeps 536e948693eSPhilip Paeps /* Check for options other than aligned timestamp. */ 537e948693eSPhilip Paeps if (th->th_off != 5) { 538e948693eSPhilip Paeps const uint32_t *opt_ptr = (const uint32_t *) (th + 1); 539e948693eSPhilip Paeps if (th->th_off == 8 && 540e948693eSPhilip Paeps opt_ptr[0] == ntohl((TCPOPT_NOP << 24) | 541e948693eSPhilip Paeps (TCPOPT_NOP << 16) | 542e948693eSPhilip Paeps (TCPOPT_TIMESTAMP << 8) | 543e948693eSPhilip Paeps TCPOLEN_TIMESTAMP)) { 544e948693eSPhilip Paeps /* timestamp option -- okay */ 545e948693eSPhilip Paeps } else { 546e948693eSPhilip Paeps dont_merge = 1; 547e948693eSPhilip Paeps } 548e948693eSPhilip Paeps } 549e948693eSPhilip Paeps 550e948693eSPhilip Paeps if (__predict_false(th_seq != c->next_seq)) { 551e948693eSPhilip Paeps /* Out-of-order, so start counting again. */ 552b7b0edd1SGeorge V. Neville-Neil if (c->mbuf != NULL) 553e948693eSPhilip Paeps sfxge_lro_deliver(&rxq->lro, c); 554e948693eSPhilip Paeps c->n_in_order_pkts -= lro_loss_packets; 555e948693eSPhilip Paeps c->next_seq = th_seq + data_length; 556e948693eSPhilip Paeps ++rxq->lro.n_misorder; 557e948693eSPhilip Paeps goto deliver_buf_out; 558e948693eSPhilip Paeps } 559e948693eSPhilip Paeps c->next_seq = th_seq + data_length; 560e948693eSPhilip Paeps 561e948693eSPhilip Paeps now = ticks; 562e948693eSPhilip Paeps if (now - c->last_pkt_ticks > lro_idle_ticks) { 563e948693eSPhilip Paeps ++rxq->lro.n_drop_idle; 564b7b0edd1SGeorge V. Neville-Neil if (c->mbuf != NULL) 565e948693eSPhilip Paeps sfxge_lro_deliver(&rxq->lro, c); 566e948693eSPhilip Paeps sfxge_lro_drop(rxq, c); 567b7b0edd1SGeorge V. Neville-Neil return (0); 568e948693eSPhilip Paeps } 569e948693eSPhilip Paeps c->last_pkt_ticks = ticks; 570e948693eSPhilip Paeps 571e948693eSPhilip Paeps if (c->n_in_order_pkts < lro_slow_start_packets) { 572e948693eSPhilip Paeps /* May be in slow-start, so don't merge. */ 573e948693eSPhilip Paeps ++rxq->lro.n_slow_start; 574e948693eSPhilip Paeps ++c->n_in_order_pkts; 575e948693eSPhilip Paeps goto deliver_buf_out; 576e948693eSPhilip Paeps } 577e948693eSPhilip Paeps 578e948693eSPhilip Paeps if (__predict_false(dont_merge)) { 579b7b0edd1SGeorge V. Neville-Neil if (c->mbuf != NULL) 580e948693eSPhilip Paeps sfxge_lro_deliver(&rxq->lro, c); 581e948693eSPhilip Paeps if (th->th_flags & (TH_FIN | TH_RST)) { 582e948693eSPhilip Paeps ++rxq->lro.n_drop_closed; 583e948693eSPhilip Paeps sfxge_lro_drop(rxq, c); 584b7b0edd1SGeorge V. Neville-Neil return (0); 585e948693eSPhilip Paeps } 586e948693eSPhilip Paeps goto deliver_buf_out; 587e948693eSPhilip Paeps } 588e948693eSPhilip Paeps 589e948693eSPhilip Paeps rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size; 590e948693eSPhilip Paeps 591e948693eSPhilip Paeps if (__predict_true(c->mbuf != NULL)) { 592e948693eSPhilip Paeps /* Remove headers and any padding */ 593e948693eSPhilip Paeps rx_buf->mbuf->m_data += hdr_length; 594e948693eSPhilip Paeps rx_buf->mbuf->m_len = data_length; 595e948693eSPhilip Paeps 596e948693eSPhilip Paeps sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th); 597e948693eSPhilip Paeps } else { 598e948693eSPhilip Paeps /* Remove any padding */ 599e948693eSPhilip Paeps rx_buf->mbuf->m_len = pkt_length; 600e948693eSPhilip Paeps 601e948693eSPhilip Paeps sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th); 602e948693eSPhilip Paeps } 603e948693eSPhilip Paeps 604e948693eSPhilip Paeps rx_buf->mbuf = NULL; 605b7b0edd1SGeorge V. Neville-Neil return (1); 606e948693eSPhilip Paeps 607e948693eSPhilip Paeps deliver_buf_out: 608e948693eSPhilip Paeps sfxge_rx_deliver(rxq->sc, rx_buf); 609b7b0edd1SGeorge V. Neville-Neil return (1); 610e948693eSPhilip Paeps } 611e948693eSPhilip Paeps 612e948693eSPhilip Paeps static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash, 613e948693eSPhilip Paeps uint16_t l2_id, void *nh, struct tcphdr *th) 614e948693eSPhilip Paeps { 615e948693eSPhilip Paeps unsigned bucket = conn_hash & st->conns_mask; 616e948693eSPhilip Paeps struct sfxge_lro_conn *c; 617e948693eSPhilip Paeps 618e948693eSPhilip Paeps if (st->conns_n[bucket] >= lro_chain_max) { 619e948693eSPhilip Paeps ++st->n_too_many; 620e948693eSPhilip Paeps return; 621e948693eSPhilip Paeps } 622e948693eSPhilip Paeps 623e948693eSPhilip Paeps if (!TAILQ_EMPTY(&st->free_conns)) { 624e948693eSPhilip Paeps c = TAILQ_FIRST(&st->free_conns); 625e948693eSPhilip Paeps TAILQ_REMOVE(&st->free_conns, c, link); 626e948693eSPhilip Paeps } else { 627e275c0d3SGleb Smirnoff c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT); 628e948693eSPhilip Paeps if (c == NULL) 629e948693eSPhilip Paeps return; 630e948693eSPhilip Paeps c->mbuf = NULL; 631e948693eSPhilip Paeps c->next_buf.mbuf = NULL; 632e948693eSPhilip Paeps } 633e948693eSPhilip Paeps 634e948693eSPhilip Paeps /* Create the connection tracking data */ 635e948693eSPhilip Paeps ++st->conns_n[bucket]; 636e948693eSPhilip Paeps TAILQ_INSERT_HEAD(&st->conns[bucket], c, link); 637e948693eSPhilip Paeps c->l2_id = l2_id; 638e948693eSPhilip Paeps c->conn_hash = conn_hash; 639e948693eSPhilip Paeps c->source = th->th_sport; 640e948693eSPhilip Paeps c->dest = th->th_dport; 641e948693eSPhilip Paeps c->n_in_order_pkts = 0; 642e948693eSPhilip Paeps c->last_pkt_ticks = *(volatile int *)&ticks; 643e948693eSPhilip Paeps c->delivered = 0; 644e948693eSPhilip Paeps ++st->n_new_stream; 645e948693eSPhilip Paeps /* NB. We don't initialise c->next_seq, and it doesn't matter what 646e948693eSPhilip Paeps * value it has. Most likely the next packet received for this 647e948693eSPhilip Paeps * connection will not match -- no harm done. 648e948693eSPhilip Paeps */ 649e948693eSPhilip Paeps } 650e948693eSPhilip Paeps 651e948693eSPhilip Paeps /* Process mbuf and decide whether to dispatch it to the stack now or 652e948693eSPhilip Paeps * later. 653e948693eSPhilip Paeps */ 654e948693eSPhilip Paeps static void 655e948693eSPhilip Paeps sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 656e948693eSPhilip Paeps { 657e948693eSPhilip Paeps struct sfxge_softc *sc = rxq->sc; 658e948693eSPhilip Paeps struct mbuf *m = rx_buf->mbuf; 659e948693eSPhilip Paeps struct ether_header *eh; 660e948693eSPhilip Paeps struct sfxge_lro_conn *c; 661e948693eSPhilip Paeps uint16_t l2_id; 662e948693eSPhilip Paeps uint16_t l3_proto; 663e948693eSPhilip Paeps void *nh; 664e948693eSPhilip Paeps struct tcphdr *th; 665e948693eSPhilip Paeps uint32_t conn_hash; 666e948693eSPhilip Paeps unsigned bucket; 667e948693eSPhilip Paeps 668e948693eSPhilip Paeps /* Get the hardware hash */ 669e948693eSPhilip Paeps conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 670e948693eSPhilip Paeps mtod(m, uint8_t *)); 671e948693eSPhilip Paeps 672e948693eSPhilip Paeps eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size); 673e948693eSPhilip Paeps if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 674e948693eSPhilip Paeps struct ether_vlan_header *veh = (struct ether_vlan_header *)eh; 675e948693eSPhilip Paeps l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) | 676e948693eSPhilip Paeps SFXGE_LRO_L2_ID_VLAN; 677e948693eSPhilip Paeps l3_proto = veh->evl_proto; 678e948693eSPhilip Paeps nh = veh + 1; 679e948693eSPhilip Paeps } else { 680e948693eSPhilip Paeps l2_id = 0; 681e948693eSPhilip Paeps l3_proto = eh->ether_type; 682e948693eSPhilip Paeps nh = eh + 1; 683e948693eSPhilip Paeps } 684e948693eSPhilip Paeps 685e948693eSPhilip Paeps /* Check whether this is a suitable packet (unfragmented 686e948693eSPhilip Paeps * TCP/IPv4 or TCP/IPv6). If so, find the TCP header and 687e948693eSPhilip Paeps * length, and compute a hash if necessary. If not, return. 688e948693eSPhilip Paeps */ 689e948693eSPhilip Paeps if (l3_proto == htons(ETHERTYPE_IP)) { 690e948693eSPhilip Paeps struct ip *iph = nh; 6913b3390c1SAndrew Rybchenko 6923b3390c1SAndrew Rybchenko KASSERT(iph->ip_p == IPPROTO_TCP, 6933b3390c1SAndrew Rybchenko ("IPv4 protocol is not TCP, but packet marker is set")); 6943b3390c1SAndrew Rybchenko if ((iph->ip_hl - (sizeof(*iph) >> 2u)) | 695e948693eSPhilip Paeps (iph->ip_off & htons(IP_MF | IP_OFFMASK))) 696e948693eSPhilip Paeps goto deliver_now; 697e948693eSPhilip Paeps th = (struct tcphdr *)(iph + 1); 698e948693eSPhilip Paeps } else if (l3_proto == htons(ETHERTYPE_IPV6)) { 699e948693eSPhilip Paeps struct ip6_hdr *iph = nh; 7003b3390c1SAndrew Rybchenko 7013b3390c1SAndrew Rybchenko KASSERT(iph->ip6_nxt == IPPROTO_TCP, 7023b3390c1SAndrew Rybchenko ("IPv6 next header is not TCP, but packet marker is set")); 703e948693eSPhilip Paeps l2_id |= SFXGE_LRO_L2_ID_IPV6; 704e948693eSPhilip Paeps th = (struct tcphdr *)(iph + 1); 705e948693eSPhilip Paeps } else { 706e948693eSPhilip Paeps goto deliver_now; 707e948693eSPhilip Paeps } 708e948693eSPhilip Paeps 709e948693eSPhilip Paeps bucket = conn_hash & rxq->lro.conns_mask; 710e948693eSPhilip Paeps 711e948693eSPhilip Paeps TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) { 712e948693eSPhilip Paeps if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash)) 713e948693eSPhilip Paeps continue; 714e948693eSPhilip Paeps if ((c->source - th->th_sport) | (c->dest - th->th_dport)) 715e948693eSPhilip Paeps continue; 716b7b0edd1SGeorge V. Neville-Neil if (c->mbuf != NULL) { 717e948693eSPhilip Paeps if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 718e948693eSPhilip Paeps struct ip *c_iph, *iph = nh; 719e948693eSPhilip Paeps c_iph = c->nh; 720e948693eSPhilip Paeps if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) | 721e948693eSPhilip Paeps (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr)) 722e948693eSPhilip Paeps continue; 723e948693eSPhilip Paeps } else { 724e948693eSPhilip Paeps struct ip6_hdr *c_iph, *iph = nh; 725e948693eSPhilip Paeps c_iph = c->nh; 726e948693eSPhilip Paeps if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) | 727e948693eSPhilip Paeps ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst)) 728e948693eSPhilip Paeps continue; 729e948693eSPhilip Paeps } 730e948693eSPhilip Paeps } 731e948693eSPhilip Paeps 732e948693eSPhilip Paeps /* Re-insert at head of list to reduce lookup time. */ 733e948693eSPhilip Paeps TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 734e948693eSPhilip Paeps TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link); 735e948693eSPhilip Paeps 736b7b0edd1SGeorge V. Neville-Neil if (c->next_buf.mbuf != NULL) { 737e948693eSPhilip Paeps if (!sfxge_lro_try_merge(rxq, c)) 738e948693eSPhilip Paeps goto deliver_now; 739e948693eSPhilip Paeps } else { 740e948693eSPhilip Paeps LIST_INSERT_HEAD(&rxq->lro.active_conns, c, 741e948693eSPhilip Paeps active_link); 742e948693eSPhilip Paeps } 743e948693eSPhilip Paeps c->next_buf = *rx_buf; 744e948693eSPhilip Paeps c->next_eh = eh; 745e948693eSPhilip Paeps c->next_nh = nh; 746e948693eSPhilip Paeps 747e948693eSPhilip Paeps rx_buf->mbuf = NULL; 748e948693eSPhilip Paeps rx_buf->flags = EFX_DISCARD; 749e948693eSPhilip Paeps return; 750e948693eSPhilip Paeps } 751e948693eSPhilip Paeps 752e948693eSPhilip Paeps sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th); 753e948693eSPhilip Paeps deliver_now: 754e948693eSPhilip Paeps sfxge_rx_deliver(sc, rx_buf); 755e948693eSPhilip Paeps } 756e948693eSPhilip Paeps 757e948693eSPhilip Paeps static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 758e948693eSPhilip Paeps { 759e948693eSPhilip Paeps struct sfxge_lro_state *st = &rxq->lro; 760e948693eSPhilip Paeps struct sfxge_lro_conn *c; 761e948693eSPhilip Paeps unsigned t; 762e948693eSPhilip Paeps 763e948693eSPhilip Paeps while (!LIST_EMPTY(&st->active_conns)) { 764e948693eSPhilip Paeps c = LIST_FIRST(&st->active_conns); 765b7b0edd1SGeorge V. Neville-Neil if (!c->delivered && c->mbuf != NULL) 766e948693eSPhilip Paeps sfxge_lro_deliver(st, c); 767e948693eSPhilip Paeps if (sfxge_lro_try_merge(rxq, c)) { 768b7b0edd1SGeorge V. Neville-Neil if (c->mbuf != NULL) 769e948693eSPhilip Paeps sfxge_lro_deliver(st, c); 770e948693eSPhilip Paeps LIST_REMOVE(c, active_link); 771e948693eSPhilip Paeps } 772e948693eSPhilip Paeps c->delivered = 0; 773e948693eSPhilip Paeps } 774e948693eSPhilip Paeps 775e948693eSPhilip Paeps t = *(volatile int *)&ticks; 776e948693eSPhilip Paeps if (__predict_false(t != st->last_purge_ticks)) 777e948693eSPhilip Paeps sfxge_lro_purge_idle(rxq, t); 778e948693eSPhilip Paeps } 779e948693eSPhilip Paeps 78018daa0eeSAndrew Rybchenko #else /* !SFXGE_LRO */ 78118daa0eeSAndrew Rybchenko 78218daa0eeSAndrew Rybchenko static void 78318daa0eeSAndrew Rybchenko sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 78418daa0eeSAndrew Rybchenko { 78518daa0eeSAndrew Rybchenko } 78618daa0eeSAndrew Rybchenko 78718daa0eeSAndrew Rybchenko static void 78818daa0eeSAndrew Rybchenko sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 78918daa0eeSAndrew Rybchenko { 79018daa0eeSAndrew Rybchenko } 79118daa0eeSAndrew Rybchenko 79218daa0eeSAndrew Rybchenko #endif /* SFXGE_LRO */ 79318daa0eeSAndrew Rybchenko 794e948693eSPhilip Paeps void 795e948693eSPhilip Paeps sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop) 796e948693eSPhilip Paeps { 797e948693eSPhilip Paeps struct sfxge_softc *sc = rxq->sc; 798*b5bae9f4SAndrew Rybchenko int if_capenable = sc->ifnet->if_capenable; 799*b5bae9f4SAndrew Rybchenko int lro_enabled = if_capenable & IFCAP_LRO; 800e948693eSPhilip Paeps unsigned int index; 801e948693eSPhilip Paeps struct sfxge_evq *evq; 802e948693eSPhilip Paeps unsigned int completed; 803e948693eSPhilip Paeps unsigned int level; 804e948693eSPhilip Paeps struct mbuf *m; 805e948693eSPhilip Paeps struct sfxge_rx_sw_desc *prev = NULL; 806e948693eSPhilip Paeps 807e948693eSPhilip Paeps index = rxq->index; 808e948693eSPhilip Paeps evq = sc->evq[index]; 809e948693eSPhilip Paeps 810763cab71SAndrew Rybchenko SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 811e948693eSPhilip Paeps 812e948693eSPhilip Paeps completed = rxq->completed; 813e948693eSPhilip Paeps while (completed != rxq->pending) { 814e948693eSPhilip Paeps unsigned int id; 815e948693eSPhilip Paeps struct sfxge_rx_sw_desc *rx_desc; 816e948693eSPhilip Paeps 817385b1d8eSGeorge V. Neville-Neil id = completed++ & rxq->ptr_mask; 818e948693eSPhilip Paeps rx_desc = &rxq->queue[id]; 819e948693eSPhilip Paeps m = rx_desc->mbuf; 820e948693eSPhilip Paeps 821851128b8SAndrew Rybchenko if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 822e948693eSPhilip Paeps goto discard; 823e948693eSPhilip Paeps 824e948693eSPhilip Paeps if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 825e948693eSPhilip Paeps goto discard; 826e948693eSPhilip Paeps 827e948693eSPhilip Paeps prefetch_read_many(mtod(m, caddr_t)); 828e948693eSPhilip Paeps 829*b5bae9f4SAndrew Rybchenko switch (rx_desc->flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) { 830*b5bae9f4SAndrew Rybchenko case EFX_PKT_IPV4: 831*b5bae9f4SAndrew Rybchenko if (~if_capenable & IFCAP_RXCSUM) 832*b5bae9f4SAndrew Rybchenko rx_desc->flags &= 833*b5bae9f4SAndrew Rybchenko ~(EFX_CKSUM_IPV4 | EFX_CKSUM_TCPUDP); 834*b5bae9f4SAndrew Rybchenko break; 835*b5bae9f4SAndrew Rybchenko case EFX_PKT_IPV6: 836*b5bae9f4SAndrew Rybchenko if (~if_capenable & IFCAP_RXCSUM_IPV6) 837*b5bae9f4SAndrew Rybchenko rx_desc->flags &= ~EFX_CKSUM_TCPUDP; 838*b5bae9f4SAndrew Rybchenko break; 839*b5bae9f4SAndrew Rybchenko case 0: 840e948693eSPhilip Paeps /* Check for loopback packets */ 841*b5bae9f4SAndrew Rybchenko { 842e948693eSPhilip Paeps struct ether_header *etherhp; 843e948693eSPhilip Paeps 844e948693eSPhilip Paeps /*LINTED*/ 845e948693eSPhilip Paeps etherhp = mtod(m, struct ether_header *); 846e948693eSPhilip Paeps 847e948693eSPhilip Paeps if (etherhp->ether_type == 848e948693eSPhilip Paeps htons(SFXGE_ETHERTYPE_LOOPBACK)) { 849e948693eSPhilip Paeps EFSYS_PROBE(loopback); 850e948693eSPhilip Paeps 851e948693eSPhilip Paeps rxq->loopback++; 852e948693eSPhilip Paeps goto discard; 853e948693eSPhilip Paeps } 854e948693eSPhilip Paeps } 855*b5bae9f4SAndrew Rybchenko break; 856*b5bae9f4SAndrew Rybchenko default: 857*b5bae9f4SAndrew Rybchenko KASSERT(B_FALSE, 858*b5bae9f4SAndrew Rybchenko ("Rx descriptor with both IPv4 and IPv6 flags")); 859*b5bae9f4SAndrew Rybchenko goto discard; 860*b5bae9f4SAndrew Rybchenko } 861e948693eSPhilip Paeps 862e948693eSPhilip Paeps /* Pass packet up the stack or into LRO (pipelined) */ 863e948693eSPhilip Paeps if (prev != NULL) { 8643b3390c1SAndrew Rybchenko if (lro_enabled && 8653b3390c1SAndrew Rybchenko ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) == 8663b3390c1SAndrew Rybchenko (EFX_PKT_TCP | EFX_CKSUM_TCPUDP))) 867e948693eSPhilip Paeps sfxge_lro(rxq, prev); 868e948693eSPhilip Paeps else 869e948693eSPhilip Paeps sfxge_rx_deliver(sc, prev); 870e948693eSPhilip Paeps } 871e948693eSPhilip Paeps prev = rx_desc; 872e948693eSPhilip Paeps continue; 873e948693eSPhilip Paeps 874e948693eSPhilip Paeps discard: 875e948693eSPhilip Paeps /* Return the packet to the pool */ 876e948693eSPhilip Paeps m_free(m); 877e948693eSPhilip Paeps rx_desc->mbuf = NULL; 878e948693eSPhilip Paeps } 879e948693eSPhilip Paeps rxq->completed = completed; 880e948693eSPhilip Paeps 881e948693eSPhilip Paeps level = rxq->added - rxq->completed; 882e948693eSPhilip Paeps 883e948693eSPhilip Paeps /* Pass last packet up the stack or into LRO */ 884e948693eSPhilip Paeps if (prev != NULL) { 8853b3390c1SAndrew Rybchenko if (lro_enabled && 8863b3390c1SAndrew Rybchenko ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) == 8873b3390c1SAndrew Rybchenko (EFX_PKT_TCP | EFX_CKSUM_TCPUDP))) 888e948693eSPhilip Paeps sfxge_lro(rxq, prev); 889e948693eSPhilip Paeps else 890e948693eSPhilip Paeps sfxge_rx_deliver(sc, prev); 891e948693eSPhilip Paeps } 892e948693eSPhilip Paeps 893e948693eSPhilip Paeps /* 894e948693eSPhilip Paeps * If there are any pending flows and this is the end of the 895e948693eSPhilip Paeps * poll then they must be completed. 896e948693eSPhilip Paeps */ 897e948693eSPhilip Paeps if (eop) 898e948693eSPhilip Paeps sfxge_lro_end_of_burst(rxq); 899e948693eSPhilip Paeps 900e948693eSPhilip Paeps /* Top up the queue if necessary */ 901385b1d8eSGeorge V. Neville-Neil if (level < rxq->refill_threshold) 902385b1d8eSGeorge V. Neville-Neil sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE); 903e948693eSPhilip Paeps } 904e948693eSPhilip Paeps 905e948693eSPhilip Paeps static void 906e948693eSPhilip Paeps sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index) 907e948693eSPhilip Paeps { 908e948693eSPhilip Paeps struct sfxge_rxq *rxq; 909e948693eSPhilip Paeps struct sfxge_evq *evq; 910e948693eSPhilip Paeps unsigned int count; 911e948693eSPhilip Paeps 912e948693eSPhilip Paeps rxq = sc->rxq[index]; 913e948693eSPhilip Paeps evq = sc->evq[index]; 914e948693eSPhilip Paeps 915763cab71SAndrew Rybchenko SFXGE_EVQ_LOCK(evq); 916e948693eSPhilip Paeps 917e948693eSPhilip Paeps KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 918e948693eSPhilip Paeps ("rxq not started")); 919e948693eSPhilip Paeps 920e948693eSPhilip Paeps rxq->init_state = SFXGE_RXQ_INITIALIZED; 921e948693eSPhilip Paeps 922e948693eSPhilip Paeps callout_stop(&rxq->refill_callout); 923e948693eSPhilip Paeps 924e948693eSPhilip Paeps again: 925e948693eSPhilip Paeps rxq->flush_state = SFXGE_FLUSH_PENDING; 926e948693eSPhilip Paeps 927e948693eSPhilip Paeps /* Flush the receive queue */ 928e948693eSPhilip Paeps efx_rx_qflush(rxq->common); 929e948693eSPhilip Paeps 930763cab71SAndrew Rybchenko SFXGE_EVQ_UNLOCK(evq); 931e948693eSPhilip Paeps 932e948693eSPhilip Paeps count = 0; 933e948693eSPhilip Paeps do { 934e948693eSPhilip Paeps /* Spin for 100 ms */ 935e948693eSPhilip Paeps DELAY(100000); 936e948693eSPhilip Paeps 937e948693eSPhilip Paeps if (rxq->flush_state != SFXGE_FLUSH_PENDING) 938e948693eSPhilip Paeps break; 939e948693eSPhilip Paeps 940e948693eSPhilip Paeps } while (++count < 20); 941e948693eSPhilip Paeps 942763cab71SAndrew Rybchenko SFXGE_EVQ_LOCK(evq); 943e948693eSPhilip Paeps 944e948693eSPhilip Paeps if (rxq->flush_state == SFXGE_FLUSH_FAILED) 945e948693eSPhilip Paeps goto again; 946e948693eSPhilip Paeps 947e948693eSPhilip Paeps rxq->flush_state = SFXGE_FLUSH_DONE; 948e948693eSPhilip Paeps 949e948693eSPhilip Paeps rxq->pending = rxq->added; 950e948693eSPhilip Paeps sfxge_rx_qcomplete(rxq, B_TRUE); 951e948693eSPhilip Paeps 952e948693eSPhilip Paeps KASSERT(rxq->completed == rxq->pending, 953e948693eSPhilip Paeps ("rxq->completed != rxq->pending")); 954e948693eSPhilip Paeps 955e948693eSPhilip Paeps rxq->added = 0; 956e948693eSPhilip Paeps rxq->pending = 0; 957e948693eSPhilip Paeps rxq->completed = 0; 958e948693eSPhilip Paeps rxq->loopback = 0; 959e948693eSPhilip Paeps 960e948693eSPhilip Paeps /* Destroy the common code receive queue. */ 961e948693eSPhilip Paeps efx_rx_qdestroy(rxq->common); 962e948693eSPhilip Paeps 963e948693eSPhilip Paeps efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 964385b1d8eSGeorge V. Neville-Neil EFX_RXQ_NBUFS(sc->rxq_entries)); 965e948693eSPhilip Paeps 966763cab71SAndrew Rybchenko SFXGE_EVQ_UNLOCK(evq); 967e948693eSPhilip Paeps } 968e948693eSPhilip Paeps 969e948693eSPhilip Paeps static int 970e948693eSPhilip Paeps sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index) 971e948693eSPhilip Paeps { 972e948693eSPhilip Paeps struct sfxge_rxq *rxq; 973e948693eSPhilip Paeps efsys_mem_t *esmp; 974e948693eSPhilip Paeps struct sfxge_evq *evq; 975e948693eSPhilip Paeps int rc; 976e948693eSPhilip Paeps 977e948693eSPhilip Paeps rxq = sc->rxq[index]; 978e948693eSPhilip Paeps esmp = &rxq->mem; 979e948693eSPhilip Paeps evq = sc->evq[index]; 980e948693eSPhilip Paeps 981e948693eSPhilip Paeps KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 982e948693eSPhilip Paeps ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 983e948693eSPhilip Paeps KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 984e948693eSPhilip Paeps ("evq->init_state != SFXGE_EVQ_STARTED")); 985e948693eSPhilip Paeps 986e948693eSPhilip Paeps /* Program the buffer table. */ 987e948693eSPhilip Paeps if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp, 988385b1d8eSGeorge V. Neville-Neil EFX_RXQ_NBUFS(sc->rxq_entries))) != 0) 989385b1d8eSGeorge V. Neville-Neil return (rc); 990e948693eSPhilip Paeps 991e948693eSPhilip Paeps /* Create the common code receive queue. */ 992e948693eSPhilip Paeps if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT, 993385b1d8eSGeorge V. Neville-Neil esmp, sc->rxq_entries, rxq->buf_base_id, evq->common, 994e948693eSPhilip Paeps &rxq->common)) != 0) 995e948693eSPhilip Paeps goto fail; 996e948693eSPhilip Paeps 997763cab71SAndrew Rybchenko SFXGE_EVQ_LOCK(evq); 998e948693eSPhilip Paeps 999e948693eSPhilip Paeps /* Enable the receive queue. */ 1000e948693eSPhilip Paeps efx_rx_qenable(rxq->common); 1001e948693eSPhilip Paeps 1002e948693eSPhilip Paeps rxq->init_state = SFXGE_RXQ_STARTED; 1003e948693eSPhilip Paeps 1004e948693eSPhilip Paeps /* Try to fill the queue from the pool. */ 1005385b1d8eSGeorge V. Neville-Neil sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE); 1006e948693eSPhilip Paeps 1007763cab71SAndrew Rybchenko SFXGE_EVQ_UNLOCK(evq); 1008e948693eSPhilip Paeps 1009e948693eSPhilip Paeps return (0); 1010e948693eSPhilip Paeps 1011e948693eSPhilip Paeps fail: 1012e948693eSPhilip Paeps efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 1013385b1d8eSGeorge V. Neville-Neil EFX_RXQ_NBUFS(sc->rxq_entries)); 1014385b1d8eSGeorge V. Neville-Neil return (rc); 1015e948693eSPhilip Paeps } 1016e948693eSPhilip Paeps 1017e948693eSPhilip Paeps void 1018e948693eSPhilip Paeps sfxge_rx_stop(struct sfxge_softc *sc) 1019e948693eSPhilip Paeps { 1020e948693eSPhilip Paeps int index; 1021e948693eSPhilip Paeps 1022e948693eSPhilip Paeps /* Stop the receive queue(s) */ 1023133366a6SAndrew Rybchenko index = sc->rxq_count; 1024e948693eSPhilip Paeps while (--index >= 0) 1025e948693eSPhilip Paeps sfxge_rx_qstop(sc, index); 1026e948693eSPhilip Paeps 1027e948693eSPhilip Paeps sc->rx_prefix_size = 0; 1028e948693eSPhilip Paeps sc->rx_buffer_size = 0; 1029e948693eSPhilip Paeps 1030e948693eSPhilip Paeps efx_rx_fini(sc->enp); 1031e948693eSPhilip Paeps } 1032e948693eSPhilip Paeps 1033e948693eSPhilip Paeps int 1034e948693eSPhilip Paeps sfxge_rx_start(struct sfxge_softc *sc) 1035e948693eSPhilip Paeps { 1036e948693eSPhilip Paeps struct sfxge_intr *intr; 1037e948693eSPhilip Paeps int index; 1038e948693eSPhilip Paeps int rc; 1039e948693eSPhilip Paeps 1040e948693eSPhilip Paeps intr = &sc->intr; 1041e948693eSPhilip Paeps 1042e948693eSPhilip Paeps /* Initialize the common code receive module. */ 1043e948693eSPhilip Paeps if ((rc = efx_rx_init(sc->enp)) != 0) 1044e948693eSPhilip Paeps return (rc); 1045e948693eSPhilip Paeps 1046e948693eSPhilip Paeps /* Calculate the receive packet buffer size. */ 1047e948693eSPhilip Paeps sc->rx_prefix_size = EFX_RX_PREFIX_SIZE; 1048e948693eSPhilip Paeps sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) + 1049e948693eSPhilip Paeps sc->rx_prefix_size); 1050e948693eSPhilip Paeps 1051e948693eSPhilip Paeps /* Select zone for packet buffers */ 1052e948693eSPhilip Paeps if (sc->rx_buffer_size <= MCLBYTES) 1053e948693eSPhilip Paeps sc->rx_buffer_zone = zone_clust; 1054e948693eSPhilip Paeps else if (sc->rx_buffer_size <= MJUMPAGESIZE) 1055e948693eSPhilip Paeps sc->rx_buffer_zone = zone_jumbop; 1056e948693eSPhilip Paeps else if (sc->rx_buffer_size <= MJUM9BYTES) 1057e948693eSPhilip Paeps sc->rx_buffer_zone = zone_jumbo9; 1058e948693eSPhilip Paeps else 1059e948693eSPhilip Paeps sc->rx_buffer_zone = zone_jumbo16; 1060e948693eSPhilip Paeps 1061e948693eSPhilip Paeps /* 1062e948693eSPhilip Paeps * Set up the scale table. Enable all hash types and hash insertion. 1063e948693eSPhilip Paeps */ 1064e948693eSPhilip Paeps for (index = 0; index < SFXGE_RX_SCALE_MAX; index++) 1065133366a6SAndrew Rybchenko sc->rx_indir_table[index] = index % sc->rxq_count; 1066e948693eSPhilip Paeps if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table, 1067e948693eSPhilip Paeps SFXGE_RX_SCALE_MAX)) != 0) 1068e948693eSPhilip Paeps goto fail; 1069e948693eSPhilip Paeps (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ, 1070e948693eSPhilip Paeps (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) | 1071e948693eSPhilip Paeps (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE); 1072e948693eSPhilip Paeps 1073e948693eSPhilip Paeps if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key, 1074e948693eSPhilip Paeps sizeof(toep_key))) != 0) 1075e948693eSPhilip Paeps goto fail; 1076e948693eSPhilip Paeps 1077e948693eSPhilip Paeps /* Start the receive queue(s). */ 1078133366a6SAndrew Rybchenko for (index = 0; index < sc->rxq_count; index++) { 1079e948693eSPhilip Paeps if ((rc = sfxge_rx_qstart(sc, index)) != 0) 1080e948693eSPhilip Paeps goto fail2; 1081e948693eSPhilip Paeps } 1082e948693eSPhilip Paeps 1083e948693eSPhilip Paeps return (0); 1084e948693eSPhilip Paeps 1085e948693eSPhilip Paeps fail2: 1086e948693eSPhilip Paeps while (--index >= 0) 1087e948693eSPhilip Paeps sfxge_rx_qstop(sc, index); 1088e948693eSPhilip Paeps 1089e948693eSPhilip Paeps fail: 1090e948693eSPhilip Paeps efx_rx_fini(sc->enp); 1091e948693eSPhilip Paeps 1092e948693eSPhilip Paeps return (rc); 1093e948693eSPhilip Paeps } 1094e948693eSPhilip Paeps 109518daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO 109618daa0eeSAndrew Rybchenko 1097e948693eSPhilip Paeps static void sfxge_lro_init(struct sfxge_rxq *rxq) 1098e948693eSPhilip Paeps { 1099e948693eSPhilip Paeps struct sfxge_lro_state *st = &rxq->lro; 1100e948693eSPhilip Paeps unsigned i; 1101e948693eSPhilip Paeps 1102e948693eSPhilip Paeps st->conns_mask = lro_table_size - 1; 1103e948693eSPhilip Paeps KASSERT(!((st->conns_mask + 1) & st->conns_mask), 1104e948693eSPhilip Paeps ("lro_table_size must be a power of 2")); 1105e948693eSPhilip Paeps st->sc = rxq->sc; 1106e948693eSPhilip Paeps st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]), 1107e948693eSPhilip Paeps M_SFXGE, M_WAITOK); 1108e948693eSPhilip Paeps st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]), 1109e948693eSPhilip Paeps M_SFXGE, M_WAITOK); 1110e948693eSPhilip Paeps for (i = 0; i <= st->conns_mask; ++i) { 1111e948693eSPhilip Paeps TAILQ_INIT(&st->conns[i]); 1112e948693eSPhilip Paeps st->conns_n[i] = 0; 1113e948693eSPhilip Paeps } 1114e948693eSPhilip Paeps LIST_INIT(&st->active_conns); 1115e948693eSPhilip Paeps TAILQ_INIT(&st->free_conns); 1116e948693eSPhilip Paeps } 1117e948693eSPhilip Paeps 1118e948693eSPhilip Paeps static void sfxge_lro_fini(struct sfxge_rxq *rxq) 1119e948693eSPhilip Paeps { 1120e948693eSPhilip Paeps struct sfxge_lro_state *st = &rxq->lro; 1121e948693eSPhilip Paeps struct sfxge_lro_conn *c; 1122e948693eSPhilip Paeps unsigned i; 1123e948693eSPhilip Paeps 1124e948693eSPhilip Paeps /* Return cleanly if sfxge_lro_init() has not been called. */ 1125e948693eSPhilip Paeps if (st->conns == NULL) 1126e948693eSPhilip Paeps return; 1127e948693eSPhilip Paeps 1128e948693eSPhilip Paeps KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections")); 1129e948693eSPhilip Paeps 1130e948693eSPhilip Paeps for (i = 0; i <= st->conns_mask; ++i) { 1131e948693eSPhilip Paeps while (!TAILQ_EMPTY(&st->conns[i])) { 1132e948693eSPhilip Paeps c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq); 1133e948693eSPhilip Paeps sfxge_lro_drop(rxq, c); 1134e948693eSPhilip Paeps } 1135e948693eSPhilip Paeps } 1136e948693eSPhilip Paeps 1137e948693eSPhilip Paeps while (!TAILQ_EMPTY(&st->free_conns)) { 1138e948693eSPhilip Paeps c = TAILQ_FIRST(&st->free_conns); 1139e948693eSPhilip Paeps TAILQ_REMOVE(&st->free_conns, c, link); 1140e948693eSPhilip Paeps KASSERT(!c->mbuf, ("found orphaned mbuf")); 1141e948693eSPhilip Paeps free(c, M_SFXGE); 1142e948693eSPhilip Paeps } 1143e948693eSPhilip Paeps 1144e948693eSPhilip Paeps free(st->conns_n, M_SFXGE); 1145e948693eSPhilip Paeps free(st->conns, M_SFXGE); 1146e948693eSPhilip Paeps st->conns = NULL; 1147e948693eSPhilip Paeps } 1148e948693eSPhilip Paeps 114918daa0eeSAndrew Rybchenko #else 115018daa0eeSAndrew Rybchenko 115118daa0eeSAndrew Rybchenko static void 115218daa0eeSAndrew Rybchenko sfxge_lro_init(struct sfxge_rxq *rxq) 115318daa0eeSAndrew Rybchenko { 115418daa0eeSAndrew Rybchenko } 115518daa0eeSAndrew Rybchenko 115618daa0eeSAndrew Rybchenko static void 115718daa0eeSAndrew Rybchenko sfxge_lro_fini(struct sfxge_rxq *rxq) 115818daa0eeSAndrew Rybchenko { 115918daa0eeSAndrew Rybchenko } 116018daa0eeSAndrew Rybchenko 116118daa0eeSAndrew Rybchenko #endif /* SFXGE_LRO */ 116218daa0eeSAndrew Rybchenko 1163e948693eSPhilip Paeps static void 1164e948693eSPhilip Paeps sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index) 1165e948693eSPhilip Paeps { 1166e948693eSPhilip Paeps struct sfxge_rxq *rxq; 1167e948693eSPhilip Paeps 1168e948693eSPhilip Paeps rxq = sc->rxq[index]; 1169e948693eSPhilip Paeps 1170e948693eSPhilip Paeps KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 1171e948693eSPhilip Paeps ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 1172e948693eSPhilip Paeps 1173e948693eSPhilip Paeps /* Free the context array and the flow table. */ 1174e948693eSPhilip Paeps free(rxq->queue, M_SFXGE); 1175e948693eSPhilip Paeps sfxge_lro_fini(rxq); 1176e948693eSPhilip Paeps 1177e948693eSPhilip Paeps /* Release DMA memory. */ 1178e948693eSPhilip Paeps sfxge_dma_free(&rxq->mem); 1179e948693eSPhilip Paeps 1180e948693eSPhilip Paeps sc->rxq[index] = NULL; 1181e948693eSPhilip Paeps 1182e948693eSPhilip Paeps free(rxq, M_SFXGE); 1183e948693eSPhilip Paeps } 1184e948693eSPhilip Paeps 1185e948693eSPhilip Paeps static int 1186e948693eSPhilip Paeps sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index) 1187e948693eSPhilip Paeps { 1188e948693eSPhilip Paeps struct sfxge_rxq *rxq; 1189e948693eSPhilip Paeps struct sfxge_evq *evq; 1190e948693eSPhilip Paeps efsys_mem_t *esmp; 1191e948693eSPhilip Paeps int rc; 1192e948693eSPhilip Paeps 1193133366a6SAndrew Rybchenko KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count)); 1194e948693eSPhilip Paeps 1195e948693eSPhilip Paeps rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK); 1196e948693eSPhilip Paeps rxq->sc = sc; 1197e948693eSPhilip Paeps rxq->index = index; 1198385b1d8eSGeorge V. Neville-Neil rxq->entries = sc->rxq_entries; 1199385b1d8eSGeorge V. Neville-Neil rxq->ptr_mask = rxq->entries - 1; 1200385b1d8eSGeorge V. Neville-Neil rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries); 1201e948693eSPhilip Paeps 1202e948693eSPhilip Paeps sc->rxq[index] = rxq; 1203e948693eSPhilip Paeps esmp = &rxq->mem; 1204e948693eSPhilip Paeps 1205e948693eSPhilip Paeps evq = sc->evq[index]; 1206e948693eSPhilip Paeps 1207e948693eSPhilip Paeps /* Allocate and zero DMA space. */ 1208385b1d8eSGeorge V. Neville-Neil if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0) 1209e948693eSPhilip Paeps return (rc); 1210e948693eSPhilip Paeps 1211e948693eSPhilip Paeps /* Allocate buffer table entries. */ 1212385b1d8eSGeorge V. Neville-Neil sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries), 1213e948693eSPhilip Paeps &rxq->buf_base_id); 1214e948693eSPhilip Paeps 1215e948693eSPhilip Paeps /* Allocate the context array and the flow table. */ 1216385b1d8eSGeorge V. Neville-Neil rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries, 1217e948693eSPhilip Paeps M_SFXGE, M_WAITOK | M_ZERO); 1218e948693eSPhilip Paeps sfxge_lro_init(rxq); 1219e948693eSPhilip Paeps 1220e948693eSPhilip Paeps callout_init(&rxq->refill_callout, B_TRUE); 1221e948693eSPhilip Paeps 1222e948693eSPhilip Paeps rxq->init_state = SFXGE_RXQ_INITIALIZED; 1223e948693eSPhilip Paeps 1224e948693eSPhilip Paeps return (0); 1225e948693eSPhilip Paeps } 1226e948693eSPhilip Paeps 1227e948693eSPhilip Paeps static const struct { 1228e948693eSPhilip Paeps const char *name; 1229e948693eSPhilip Paeps size_t offset; 1230e948693eSPhilip Paeps } sfxge_rx_stats[] = { 1231e948693eSPhilip Paeps #define SFXGE_RX_STAT(name, member) \ 1232e948693eSPhilip Paeps { #name, offsetof(struct sfxge_rxq, member) } 123318daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO 1234e948693eSPhilip Paeps SFXGE_RX_STAT(lro_merges, lro.n_merges), 1235e948693eSPhilip Paeps SFXGE_RX_STAT(lro_bursts, lro.n_bursts), 1236e948693eSPhilip Paeps SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start), 1237e948693eSPhilip Paeps SFXGE_RX_STAT(lro_misorder, lro.n_misorder), 1238e948693eSPhilip Paeps SFXGE_RX_STAT(lro_too_many, lro.n_too_many), 1239e948693eSPhilip Paeps SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream), 1240e948693eSPhilip Paeps SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle), 1241e948693eSPhilip Paeps SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed) 124218daa0eeSAndrew Rybchenko #endif 1243e948693eSPhilip Paeps }; 1244e948693eSPhilip Paeps 1245e948693eSPhilip Paeps static int 1246e948693eSPhilip Paeps sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS) 1247e948693eSPhilip Paeps { 1248e948693eSPhilip Paeps struct sfxge_softc *sc = arg1; 1249e948693eSPhilip Paeps unsigned int id = arg2; 1250e948693eSPhilip Paeps unsigned int sum, index; 1251e948693eSPhilip Paeps 1252e948693eSPhilip Paeps /* Sum across all RX queues */ 1253e948693eSPhilip Paeps sum = 0; 1254133366a6SAndrew Rybchenko for (index = 0; index < sc->rxq_count; index++) 1255e948693eSPhilip Paeps sum += *(unsigned int *)((caddr_t)sc->rxq[index] + 1256e948693eSPhilip Paeps sfxge_rx_stats[id].offset); 1257e948693eSPhilip Paeps 1258b7b0edd1SGeorge V. Neville-Neil return (SYSCTL_OUT(req, &sum, sizeof(sum))); 1259e948693eSPhilip Paeps } 1260e948693eSPhilip Paeps 1261e948693eSPhilip Paeps static void 1262e948693eSPhilip Paeps sfxge_rx_stat_init(struct sfxge_softc *sc) 1263e948693eSPhilip Paeps { 1264e948693eSPhilip Paeps struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1265e948693eSPhilip Paeps struct sysctl_oid_list *stat_list; 1266e948693eSPhilip Paeps unsigned int id; 1267e948693eSPhilip Paeps 1268e948693eSPhilip Paeps stat_list = SYSCTL_CHILDREN(sc->stats_node); 1269e948693eSPhilip Paeps 1270612d8e28SAndrew Rybchenko for (id = 0; id < nitems(sfxge_rx_stats); id++) { 1271e948693eSPhilip Paeps SYSCTL_ADD_PROC( 1272e948693eSPhilip Paeps ctx, stat_list, 1273e948693eSPhilip Paeps OID_AUTO, sfxge_rx_stats[id].name, 1274e948693eSPhilip Paeps CTLTYPE_UINT|CTLFLAG_RD, 1275e948693eSPhilip Paeps sc, id, sfxge_rx_stat_handler, "IU", 1276e948693eSPhilip Paeps ""); 1277e948693eSPhilip Paeps } 1278e948693eSPhilip Paeps } 1279e948693eSPhilip Paeps 1280e948693eSPhilip Paeps void 1281e948693eSPhilip Paeps sfxge_rx_fini(struct sfxge_softc *sc) 1282e948693eSPhilip Paeps { 1283e948693eSPhilip Paeps int index; 1284e948693eSPhilip Paeps 1285133366a6SAndrew Rybchenko index = sc->rxq_count; 1286e948693eSPhilip Paeps while (--index >= 0) 1287e948693eSPhilip Paeps sfxge_rx_qfini(sc, index); 1288133366a6SAndrew Rybchenko 1289133366a6SAndrew Rybchenko sc->rxq_count = 0; 1290e948693eSPhilip Paeps } 1291e948693eSPhilip Paeps 1292e948693eSPhilip Paeps int 1293e948693eSPhilip Paeps sfxge_rx_init(struct sfxge_softc *sc) 1294e948693eSPhilip Paeps { 1295e948693eSPhilip Paeps struct sfxge_intr *intr; 1296e948693eSPhilip Paeps int index; 1297e948693eSPhilip Paeps int rc; 1298e948693eSPhilip Paeps 129918daa0eeSAndrew Rybchenko #ifdef SFXGE_LRO 1300245d1576SAndrew Rybchenko if (!ISP2(lro_table_size)) { 1301245d1576SAndrew Rybchenko log(LOG_ERR, "%s=%u must be power of 2", 1302245d1576SAndrew Rybchenko SFXGE_LRO_PARAM(table_size), lro_table_size); 1303245d1576SAndrew Rybchenko rc = EINVAL; 1304245d1576SAndrew Rybchenko goto fail_lro_table_size; 1305245d1576SAndrew Rybchenko } 1306245d1576SAndrew Rybchenko 1307e948693eSPhilip Paeps if (lro_idle_ticks == 0) 1308e948693eSPhilip Paeps lro_idle_ticks = hz / 10 + 1; /* 100 ms */ 130918daa0eeSAndrew Rybchenko #endif 1310e948693eSPhilip Paeps 1311e948693eSPhilip Paeps intr = &sc->intr; 1312e948693eSPhilip Paeps 1313133366a6SAndrew Rybchenko sc->rxq_count = intr->n_alloc; 1314133366a6SAndrew Rybchenko 1315e948693eSPhilip Paeps KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 1316e948693eSPhilip Paeps ("intr->state != SFXGE_INTR_INITIALIZED")); 1317e948693eSPhilip Paeps 1318e948693eSPhilip Paeps /* Initialize the receive queue(s) - one per interrupt. */ 1319133366a6SAndrew Rybchenko for (index = 0; index < sc->rxq_count; index++) { 1320e948693eSPhilip Paeps if ((rc = sfxge_rx_qinit(sc, index)) != 0) 1321e948693eSPhilip Paeps goto fail; 1322e948693eSPhilip Paeps } 1323e948693eSPhilip Paeps 1324e948693eSPhilip Paeps sfxge_rx_stat_init(sc); 1325e948693eSPhilip Paeps 1326e948693eSPhilip Paeps return (0); 1327e948693eSPhilip Paeps 1328e948693eSPhilip Paeps fail: 1329e948693eSPhilip Paeps /* Tear down the receive queue(s). */ 1330e948693eSPhilip Paeps while (--index >= 0) 1331e948693eSPhilip Paeps sfxge_rx_qfini(sc, index); 1332e948693eSPhilip Paeps 1333133366a6SAndrew Rybchenko sc->rxq_count = 0; 1334245d1576SAndrew Rybchenko 1335245d1576SAndrew Rybchenko #ifdef SFXGE_LRO 1336245d1576SAndrew Rybchenko fail_lro_table_size: 1337245d1576SAndrew Rybchenko #endif 1338e948693eSPhilip Paeps return (rc); 1339e948693eSPhilip Paeps } 1340