17527624eSRobert Watson /*- 27527624eSRobert Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. 37527624eSRobert Watson * All rights reserved. 47527624eSRobert Watson * 57527624eSRobert Watson * This software was developed by Robert N. M. Watson under contract 67527624eSRobert Watson * to Juniper Networks, Inc. 77527624eSRobert Watson * 87527624eSRobert Watson * Redistribution and use in source and binary forms, with or without 97527624eSRobert Watson * modification, are permitted provided that the following conditions 107527624eSRobert Watson * are met: 117527624eSRobert Watson * 1. Redistributions of source code must retain the above copyright 127527624eSRobert Watson * notice, this list of conditions and the following disclaimer. 137527624eSRobert Watson * 2. Redistributions in binary form must reproduce the above copyright 147527624eSRobert Watson * notice, this list of conditions and the following disclaimer in the 157527624eSRobert Watson * documentation and/or other materials provided with the distribution. 167527624eSRobert Watson * 177527624eSRobert Watson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 187527624eSRobert Watson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 197527624eSRobert Watson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 207527624eSRobert Watson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 217527624eSRobert Watson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 227527624eSRobert Watson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 237527624eSRobert Watson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 247527624eSRobert Watson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 257527624eSRobert Watson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 267527624eSRobert Watson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 277527624eSRobert Watson * SUCH DAMAGE. 287527624eSRobert Watson */ 297527624eSRobert Watson 307527624eSRobert Watson #include <sys/cdefs.h> 317527624eSRobert Watson 327527624eSRobert Watson __FBSDID("$FreeBSD$"); 337527624eSRobert Watson 347527624eSRobert Watson #include "opt_inet6.h" 357527624eSRobert Watson #include "opt_pcbgroup.h" 367527624eSRobert Watson 377527624eSRobert Watson #ifndef PCBGROUP 387527624eSRobert Watson #error "options RSS depends on options PCBGROUP" 397527624eSRobert Watson #endif 407527624eSRobert Watson 417527624eSRobert Watson #include <sys/param.h> 427527624eSRobert Watson #include <sys/mbuf.h> 437527624eSRobert Watson #include <sys/socket.h> 447527624eSRobert Watson #include <sys/priv.h> 457527624eSRobert Watson #include <sys/kernel.h> 467527624eSRobert Watson #include <sys/smp.h> 477527624eSRobert Watson #include <sys/sysctl.h> 488bde802aSAdrian Chadd #include <sys/sbuf.h> 497527624eSRobert Watson 507527624eSRobert Watson #include <net/if.h> 517527624eSRobert Watson #include <net/if_var.h> 527527624eSRobert Watson #include <net/netisr.h> 53b2bdc62aSAdrian Chadd #include <net/rss_config.h> 547527624eSRobert Watson 557527624eSRobert Watson #include <netinet/in.h> 567527624eSRobert Watson #include <netinet/in_pcb.h> 577527624eSRobert Watson #include <netinet/in_rss.h> 587527624eSRobert Watson #include <netinet/in_var.h> 597527624eSRobert Watson 6072d33245SAdrian Chadd /* for software rss hash support */ 6172d33245SAdrian Chadd #include <netinet/ip.h> 6272d33245SAdrian Chadd #include <netinet/tcp.h> 6372d33245SAdrian Chadd #include <netinet/udp.h> 6472d33245SAdrian Chadd 657527624eSRobert Watson /* 667527624eSRobert Watson * Hash an IPv4 2-tuple. 677527624eSRobert Watson */ 687527624eSRobert Watson uint32_t 697527624eSRobert Watson rss_hash_ip4_2tuple(struct in_addr src, struct in_addr dst) 707527624eSRobert Watson { 717527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst)]; 727527624eSRobert Watson u_int datalen; 737527624eSRobert Watson 747527624eSRobert Watson datalen = 0; 757527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 767527624eSRobert Watson datalen += sizeof(src); 777527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 787527624eSRobert Watson datalen += sizeof(dst); 797527624eSRobert Watson return (rss_hash(datalen, data)); 807527624eSRobert Watson } 817527624eSRobert Watson 827527624eSRobert Watson /* 837527624eSRobert Watson * Hash an IPv4 4-tuple. 847527624eSRobert Watson */ 857527624eSRobert Watson uint32_t 867527624eSRobert Watson rss_hash_ip4_4tuple(struct in_addr src, u_short srcport, struct in_addr dst, 877527624eSRobert Watson u_short dstport) 887527624eSRobert Watson { 897527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) + 907527624eSRobert Watson sizeof(dstport)]; 917527624eSRobert Watson u_int datalen; 927527624eSRobert Watson 937527624eSRobert Watson datalen = 0; 947527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 957527624eSRobert Watson datalen += sizeof(src); 967527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 977527624eSRobert Watson datalen += sizeof(dst); 987527624eSRobert Watson bcopy(&srcport, &data[datalen], sizeof(srcport)); 997527624eSRobert Watson datalen += sizeof(srcport); 1007527624eSRobert Watson bcopy(&dstport, &data[datalen], sizeof(dstport)); 1017527624eSRobert Watson datalen += sizeof(dstport); 1027527624eSRobert Watson return (rss_hash(datalen, data)); 1037527624eSRobert Watson } 1047527624eSRobert Watson 1057527624eSRobert Watson /* 10672d33245SAdrian Chadd * Calculate an appropriate ipv4 2-tuple or 4-tuple given the given 10772d33245SAdrian Chadd * IPv4 source/destination address, UDP or TCP source/destination ports 10872d33245SAdrian Chadd * and the protocol type. 10972d33245SAdrian Chadd * 11072d33245SAdrian Chadd * The protocol code may wish to do a software hash of the given 11172d33245SAdrian Chadd * tuple. This depends upon the currently configured RSS hash types. 11272d33245SAdrian Chadd * 11372d33245SAdrian Chadd * This assumes that the packet in question isn't a fragment. 11472d33245SAdrian Chadd * 11572d33245SAdrian Chadd * It also assumes the packet source/destination address 11672d33245SAdrian Chadd * are in "incoming" packet order (ie, source is "far" address.) 11772d33245SAdrian Chadd */ 11872d33245SAdrian Chadd int 11972d33245SAdrian Chadd rss_proto_software_hash_v4(struct in_addr s, struct in_addr d, 12072d33245SAdrian Chadd u_short sp, u_short dp, int proto, 12172d33245SAdrian Chadd uint32_t *hashval, uint32_t *hashtype) 12272d33245SAdrian Chadd { 12372d33245SAdrian Chadd uint32_t hash; 12472d33245SAdrian Chadd 12572d33245SAdrian Chadd /* 12672d33245SAdrian Chadd * Next, choose the hash type depending upon the protocol 12772d33245SAdrian Chadd * identifier. 12872d33245SAdrian Chadd */ 12972d33245SAdrian Chadd if ((proto == IPPROTO_TCP) && 130b2bdc62aSAdrian Chadd (rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) { 13172d33245SAdrian Chadd hash = rss_hash_ip4_4tuple(s, sp, d, dp); 13272d33245SAdrian Chadd *hashval = hash; 13372d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_TCP_IPV4; 13472d33245SAdrian Chadd return (0); 13572d33245SAdrian Chadd } else if ((proto == IPPROTO_UDP) && 136b2bdc62aSAdrian Chadd (rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) { 13772d33245SAdrian Chadd hash = rss_hash_ip4_4tuple(s, sp, d, dp); 13872d33245SAdrian Chadd *hashval = hash; 13972d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_UDP_IPV4; 14072d33245SAdrian Chadd return (0); 141b2bdc62aSAdrian Chadd } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) { 14272d33245SAdrian Chadd /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */ 14372d33245SAdrian Chadd hash = rss_hash_ip4_2tuple(s, d); 14472d33245SAdrian Chadd *hashval = hash; 14572d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_IPV4; 14672d33245SAdrian Chadd return (0); 14772d33245SAdrian Chadd } 14872d33245SAdrian Chadd 14972d33245SAdrian Chadd /* No configured available hashtypes! */ 150e5562eb9SAdrian Chadd RSS_DEBUG("no available hashtypes!\n"); 15172d33245SAdrian Chadd return (-1); 15272d33245SAdrian Chadd } 15372d33245SAdrian Chadd 15472d33245SAdrian Chadd /* 15572d33245SAdrian Chadd * Do a software calculation of the RSS for the given mbuf. 15672d33245SAdrian Chadd * 15772d33245SAdrian Chadd * This is typically used by the input path to recalculate the RSS after 15872d33245SAdrian Chadd * some form of packet processing (eg de-capsulation, IP fragment reassembly.) 15972d33245SAdrian Chadd * 16072d33245SAdrian Chadd * dir is the packet direction - RSS_HASH_PKT_INGRESS for incoming and 16172d33245SAdrian Chadd * RSS_HASH_PKT_EGRESS for outgoing. 16272d33245SAdrian Chadd * 16372d33245SAdrian Chadd * Returns 0 if a hash was done, -1 if no hash was done, +1 if 16472d33245SAdrian Chadd * the mbuf already had a valid RSS flowid. 16572d33245SAdrian Chadd * 16672d33245SAdrian Chadd * This function doesn't modify the mbuf. It's up to the caller to 16772d33245SAdrian Chadd * assign flowid/flowtype as appropriate. 16872d33245SAdrian Chadd */ 16972d33245SAdrian Chadd int 17072d33245SAdrian Chadd rss_mbuf_software_hash_v4(const struct mbuf *m, int dir, uint32_t *hashval, 17172d33245SAdrian Chadd uint32_t *hashtype) 17272d33245SAdrian Chadd { 17372d33245SAdrian Chadd const struct ip *ip; 17472d33245SAdrian Chadd const struct tcphdr *th; 17572d33245SAdrian Chadd const struct udphdr *uh; 176c2529042SHans Petter Selasky uint32_t flowid; 177c2529042SHans Petter Selasky uint32_t flowtype; 17872d33245SAdrian Chadd uint8_t proto; 17972d33245SAdrian Chadd int iphlen; 18072d33245SAdrian Chadd int is_frag = 0; 18172d33245SAdrian Chadd 18272d33245SAdrian Chadd /* 18372d33245SAdrian Chadd * XXX For now this only handles hashing on incoming mbufs. 18472d33245SAdrian Chadd */ 18572d33245SAdrian Chadd if (dir != RSS_HASH_PKT_INGRESS) { 186e5562eb9SAdrian Chadd RSS_DEBUG("called on EGRESS packet!\n"); 18772d33245SAdrian Chadd return (-1); 18872d33245SAdrian Chadd } 18972d33245SAdrian Chadd 19072d33245SAdrian Chadd /* 19172d33245SAdrian Chadd * First, validate that the mbuf we have is long enough 19272d33245SAdrian Chadd * to have an IPv4 header in it. 19372d33245SAdrian Chadd */ 19472d33245SAdrian Chadd if (m->m_pkthdr.len < (sizeof(struct ip))) { 195e5562eb9SAdrian Chadd RSS_DEBUG("short mbuf pkthdr\n"); 19672d33245SAdrian Chadd return (-1); 19772d33245SAdrian Chadd } 19872d33245SAdrian Chadd if (m->m_len < (sizeof(struct ip))) { 199e5562eb9SAdrian Chadd RSS_DEBUG("short mbuf len\n"); 20072d33245SAdrian Chadd return (-1); 20172d33245SAdrian Chadd } 20272d33245SAdrian Chadd 20372d33245SAdrian Chadd /* Ok, let's dereference that */ 20472d33245SAdrian Chadd ip = mtod(m, struct ip *); 20572d33245SAdrian Chadd proto = ip->ip_p; 20672d33245SAdrian Chadd iphlen = ip->ip_hl << 2; 20772d33245SAdrian Chadd 20872d33245SAdrian Chadd /* 20972d33245SAdrian Chadd * If this is a fragment then it shouldn't be four-tuple 21072d33245SAdrian Chadd * hashed just yet. Once it's reassembled into a full 21172d33245SAdrian Chadd * frame it should be re-hashed. 21272d33245SAdrian Chadd */ 21372d33245SAdrian Chadd if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) 21472d33245SAdrian Chadd is_frag = 1; 21572d33245SAdrian Chadd 21672d33245SAdrian Chadd /* 21772d33245SAdrian Chadd * If the mbuf flowid/flowtype matches the packet type, 21872d33245SAdrian Chadd * and we don't support the 4-tuple version of the given protocol, 21972d33245SAdrian Chadd * then signal to the owner that it can trust the flowid/flowtype 22072d33245SAdrian Chadd * details. 22172d33245SAdrian Chadd * 22272d33245SAdrian Chadd * This is a little picky - eg, if TCPv4 / UDPv4 hashing 22372d33245SAdrian Chadd * is supported but we got a TCP/UDP frame only 2-tuple hashed, 22472d33245SAdrian Chadd * then we shouldn't just "trust" the 2-tuple hash. We need 22572d33245SAdrian Chadd * a 4-tuple hash. 22672d33245SAdrian Chadd */ 22772d33245SAdrian Chadd flowid = m->m_pkthdr.flowid; 22872d33245SAdrian Chadd flowtype = M_HASHTYPE_GET(m); 22972d33245SAdrian Chadd 230c2529042SHans Petter Selasky if (flowtype != M_HASHTYPE_NONE) { 23172d33245SAdrian Chadd switch (proto) { 23272d33245SAdrian Chadd case IPPROTO_UDP: 233b2bdc62aSAdrian Chadd if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4) && 23472d33245SAdrian Chadd (flowtype == M_HASHTYPE_RSS_UDP_IPV4) && 23572d33245SAdrian Chadd (is_frag == 0)) { 23672d33245SAdrian Chadd return (1); 23772d33245SAdrian Chadd } 23872d33245SAdrian Chadd /* 23972d33245SAdrian Chadd * Only allow 2-tuple for UDP frames if we don't also 24072d33245SAdrian Chadd * support 4-tuple for UDP. 24172d33245SAdrian Chadd */ 242b2bdc62aSAdrian Chadd if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) && 243b2bdc62aSAdrian Chadd ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4) == 0) && 24472d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 24572d33245SAdrian Chadd return (1); 24672d33245SAdrian Chadd } 24772d33245SAdrian Chadd break; 24872d33245SAdrian Chadd case IPPROTO_TCP: 249b2bdc62aSAdrian Chadd if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4) && 25072d33245SAdrian Chadd (flowtype == M_HASHTYPE_RSS_TCP_IPV4) && 25172d33245SAdrian Chadd (is_frag == 0)) { 25272d33245SAdrian Chadd return (1); 25372d33245SAdrian Chadd } 25472d33245SAdrian Chadd /* 25572d33245SAdrian Chadd * Only allow 2-tuple for TCP frames if we don't also 25672d33245SAdrian Chadd * support 2-tuple for TCP. 25772d33245SAdrian Chadd */ 258b2bdc62aSAdrian Chadd if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) && 259b2bdc62aSAdrian Chadd ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4) == 0) && 26072d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 26172d33245SAdrian Chadd return (1); 26272d33245SAdrian Chadd } 26372d33245SAdrian Chadd break; 26472d33245SAdrian Chadd default: 265b2bdc62aSAdrian Chadd if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) && 26672d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 26772d33245SAdrian Chadd return (1); 26872d33245SAdrian Chadd } 26972d33245SAdrian Chadd break; 27072d33245SAdrian Chadd } 27172d33245SAdrian Chadd } 27272d33245SAdrian Chadd 27372d33245SAdrian Chadd /* 27472d33245SAdrian Chadd * Decode enough information to make a hash decision. 27572d33245SAdrian Chadd * 27672d33245SAdrian Chadd * XXX TODO: does the hardware hash on 4-tuple if IP 27772d33245SAdrian Chadd * options are present? 27872d33245SAdrian Chadd */ 279b2bdc62aSAdrian Chadd if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4) && 280f4659f4cSAdrian Chadd (proto == IPPROTO_TCP) && 281f4659f4cSAdrian Chadd (is_frag == 0)) { 28272d33245SAdrian Chadd if (m->m_len < iphlen + sizeof(struct tcphdr)) { 283e5562eb9SAdrian Chadd RSS_DEBUG("short TCP frame?\n"); 28472d33245SAdrian Chadd return (-1); 28572d33245SAdrian Chadd } 2863b272782SAdrian Chadd th = (const struct tcphdr *)((c_caddr_t)ip + iphlen); 28772d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 28872d33245SAdrian Chadd th->th_sport, 28972d33245SAdrian Chadd th->th_dport, 29072d33245SAdrian Chadd proto, 29172d33245SAdrian Chadd hashval, 29272d33245SAdrian Chadd hashtype); 293b2bdc62aSAdrian Chadd } else if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4) && 294f4659f4cSAdrian Chadd (proto == IPPROTO_UDP) && 295f4659f4cSAdrian Chadd (is_frag == 0)) { 2963b272782SAdrian Chadd uh = (const struct udphdr *)((c_caddr_t)ip + iphlen); 29772d33245SAdrian Chadd if (m->m_len < iphlen + sizeof(struct udphdr)) { 298e5562eb9SAdrian Chadd RSS_DEBUG("short UDP frame?\n"); 29972d33245SAdrian Chadd return (-1); 30072d33245SAdrian Chadd } 30172d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 30272d33245SAdrian Chadd uh->uh_sport, 30372d33245SAdrian Chadd uh->uh_dport, 30472d33245SAdrian Chadd proto, 30572d33245SAdrian Chadd hashval, 30672d33245SAdrian Chadd hashtype); 307b2bdc62aSAdrian Chadd } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) { 30872d33245SAdrian Chadd /* Default to 2-tuple hash */ 30972d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 31072d33245SAdrian Chadd 0, /* source port */ 31172d33245SAdrian Chadd 0, /* destination port */ 31272d33245SAdrian Chadd 0, /* IPPROTO_IP */ 31372d33245SAdrian Chadd hashval, 31472d33245SAdrian Chadd hashtype); 315f4659f4cSAdrian Chadd } else { 316e5562eb9SAdrian Chadd RSS_DEBUG("no available hashtypes!\n"); 317f4659f4cSAdrian Chadd return (-1); 31872d33245SAdrian Chadd } 31972d33245SAdrian Chadd } 32072d33245SAdrian Chadd 32172d33245SAdrian Chadd /* 32272d33245SAdrian Chadd * Similar to rss_m2cpuid, but designed to be used by the IP NETISR 32372d33245SAdrian Chadd * on incoming frames. 32472d33245SAdrian Chadd * 32572d33245SAdrian Chadd * If an existing RSS hash exists and it matches what the configured 32672d33245SAdrian Chadd * hashing is, then use it. 32772d33245SAdrian Chadd * 32872d33245SAdrian Chadd * If there's an existing RSS hash but the desired hash is different, 32972d33245SAdrian Chadd * or if there's no useful RSS hash, then calculate it via 33072d33245SAdrian Chadd * the software path. 33172d33245SAdrian Chadd * 33272d33245SAdrian Chadd * XXX TODO: definitely want statistics here! 33372d33245SAdrian Chadd */ 33472d33245SAdrian Chadd struct mbuf * 335*2527ccadSAdrian Chadd rss_soft_m2cpuid_v4(struct mbuf *m, uintptr_t source, u_int *cpuid) 33672d33245SAdrian Chadd { 33772d33245SAdrian Chadd uint32_t hash_val, hash_type; 33872d33245SAdrian Chadd int ret; 33972d33245SAdrian Chadd 34072d33245SAdrian Chadd M_ASSERTPKTHDR(m); 34172d33245SAdrian Chadd 34272d33245SAdrian Chadd ret = rss_mbuf_software_hash_v4(m, RSS_HASH_PKT_INGRESS, 34372d33245SAdrian Chadd &hash_val, &hash_type); 34472d33245SAdrian Chadd if (ret > 0) { 34572d33245SAdrian Chadd /* mbuf has a valid hash already; don't need to modify it */ 34672d33245SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 34772d33245SAdrian Chadd } else if (ret == 0) { 34872d33245SAdrian Chadd /* hash was done; update */ 34972d33245SAdrian Chadd m->m_pkthdr.flowid = hash_val; 35072d33245SAdrian Chadd M_HASHTYPE_SET(m, hash_type); 35172d33245SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 35272d33245SAdrian Chadd } else { /* ret < 0 */ 35372d33245SAdrian Chadd /* no hash was done */ 35472d33245SAdrian Chadd *cpuid = NETISR_CPUID_NONE; 35572d33245SAdrian Chadd } 35672d33245SAdrian Chadd return (m); 35772d33245SAdrian Chadd } 358