17527624eSRobert Watson /*- 27527624eSRobert Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. 37527624eSRobert Watson * All rights reserved. 47527624eSRobert Watson * 57527624eSRobert Watson * This software was developed by Robert N. M. Watson under contract 67527624eSRobert Watson * to Juniper Networks, Inc. 77527624eSRobert Watson * 87527624eSRobert Watson * Redistribution and use in source and binary forms, with or without 97527624eSRobert Watson * modification, are permitted provided that the following conditions 107527624eSRobert Watson * are met: 117527624eSRobert Watson * 1. Redistributions of source code must retain the above copyright 127527624eSRobert Watson * notice, this list of conditions and the following disclaimer. 137527624eSRobert Watson * 2. Redistributions in binary form must reproduce the above copyright 147527624eSRobert Watson * notice, this list of conditions and the following disclaimer in the 157527624eSRobert Watson * documentation and/or other materials provided with the distribution. 167527624eSRobert Watson * 177527624eSRobert Watson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 187527624eSRobert Watson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 197527624eSRobert Watson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 207527624eSRobert Watson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 217527624eSRobert Watson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 227527624eSRobert Watson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 237527624eSRobert Watson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 247527624eSRobert Watson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 257527624eSRobert Watson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 267527624eSRobert Watson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 277527624eSRobert Watson * SUCH DAMAGE. 287527624eSRobert Watson */ 297527624eSRobert Watson 307527624eSRobert Watson #include <sys/cdefs.h> 317527624eSRobert Watson 327527624eSRobert Watson __FBSDID("$FreeBSD$"); 337527624eSRobert Watson 347527624eSRobert Watson #include "opt_inet6.h" 357527624eSRobert Watson #include "opt_pcbgroup.h" 367527624eSRobert Watson 377527624eSRobert Watson #ifndef PCBGROUP 387527624eSRobert Watson #error "options RSS depends on options PCBGROUP" 397527624eSRobert Watson #endif 407527624eSRobert Watson 417527624eSRobert Watson #include <sys/param.h> 427527624eSRobert Watson #include <sys/mbuf.h> 437527624eSRobert Watson #include <sys/socket.h> 447527624eSRobert Watson #include <sys/priv.h> 457527624eSRobert Watson #include <sys/kernel.h> 467527624eSRobert Watson #include <sys/smp.h> 477527624eSRobert Watson #include <sys/sysctl.h> 488bde802aSAdrian Chadd #include <sys/sbuf.h> 497527624eSRobert Watson 507527624eSRobert Watson #include <net/if.h> 517527624eSRobert Watson #include <net/if_var.h> 527527624eSRobert Watson #include <net/netisr.h> 537527624eSRobert Watson 547527624eSRobert Watson #include <netinet/in.h> 557527624eSRobert Watson #include <netinet/in_pcb.h> 567527624eSRobert Watson #include <netinet/in_rss.h> 577527624eSRobert Watson #include <netinet/in_var.h> 587527624eSRobert Watson #include <netinet/toeplitz.h> 597527624eSRobert Watson 6072d33245SAdrian Chadd /* for software rss hash support */ 6172d33245SAdrian Chadd #include <netinet/ip.h> 6272d33245SAdrian Chadd #include <netinet/tcp.h> 6372d33245SAdrian Chadd #include <netinet/udp.h> 6472d33245SAdrian Chadd 657527624eSRobert Watson /*- 667527624eSRobert Watson * Operating system parts of receiver-side scaling (RSS), which allows 677527624eSRobert Watson * network cards to direct flows to particular receive queues based on hashes 687527624eSRobert Watson * of header tuples. This implementation aligns RSS buckets with connection 697527624eSRobert Watson * groups at the TCP/IP layer, so each bucket is associated with exactly one 707527624eSRobert Watson * group. As a result, the group lookup structures (and lock) should have an 717527624eSRobert Watson * effective affinity with exactly one CPU. 727527624eSRobert Watson * 737527624eSRobert Watson * Network device drivers needing to configure RSS will query this framework 747527624eSRobert Watson * for parameters, such as the current RSS key, hashing policies, number of 757527624eSRobert Watson * bits, and indirection table mapping hashes to buckets and CPUs. They may 767527624eSRobert Watson * provide their own supplementary information, such as queue<->CPU bindings. 777527624eSRobert Watson * It is the responsibility of the network device driver to inject packets 787527624eSRobert Watson * into the stack on as close to the right CPU as possible, if playing by RSS 797527624eSRobert Watson * rules. 807527624eSRobert Watson * 817527624eSRobert Watson * TODO: 827527624eSRobert Watson * 837527624eSRobert Watson * - Synchronization for rss_key and other future-configurable parameters. 847527624eSRobert Watson * - Event handler drivers can register to pick up RSS configuration changes. 857527624eSRobert Watson * - Should we allow rss_basecpu to be configured? 867527624eSRobert Watson * - Randomize key on boot. 877527624eSRobert Watson * - IPv6 support. 887527624eSRobert Watson * - Statistics on how often there's a misalignment between hardware 897527624eSRobert Watson * placement and pcbgroup expectations. 907527624eSRobert Watson */ 917527624eSRobert Watson 927527624eSRobert Watson SYSCTL_NODE(_net_inet, OID_AUTO, rss, CTLFLAG_RW, 0, "Receive-side steering"); 937527624eSRobert Watson 947527624eSRobert Watson /* 957527624eSRobert Watson * Toeplitz is the only required hash function in the RSS spec, so use it by 967527624eSRobert Watson * default. 977527624eSRobert Watson */ 987527624eSRobert Watson static u_int rss_hashalgo = RSS_HASH_TOEPLITZ; 99af3b2549SHans Petter Selasky SYSCTL_INT(_net_inet_rss, OID_AUTO, hashalgo, CTLFLAG_RDTUN, &rss_hashalgo, 0, 1007527624eSRobert Watson "RSS hash algorithm"); 1017527624eSRobert Watson 1027527624eSRobert Watson /* 1037527624eSRobert Watson * Size of the indirection table; at most 128 entries per the RSS spec. We 1047527624eSRobert Watson * size it to at least 2 times the number of CPUs by default to allow useful 1057527624eSRobert Watson * rebalancing. If not set explicitly with a loader tunable, we tune based 1067527624eSRobert Watson * on the number of CPUs present. 1077527624eSRobert Watson * 1087527624eSRobert Watson * XXXRW: buckets might be better to use for the tunable than bits. 1097527624eSRobert Watson */ 1107527624eSRobert Watson static u_int rss_bits; 111af3b2549SHans Petter Selasky SYSCTL_INT(_net_inet_rss, OID_AUTO, bits, CTLFLAG_RDTUN, &rss_bits, 0, 1127527624eSRobert Watson "RSS bits"); 1137527624eSRobert Watson 1147527624eSRobert Watson static u_int rss_mask; 1157527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, mask, CTLFLAG_RD, &rss_mask, 0, 1167527624eSRobert Watson "RSS mask"); 1177527624eSRobert Watson 1187527624eSRobert Watson static const u_int rss_maxbits = RSS_MAXBITS; 1197527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, maxbits, CTLFLAG_RD, 1207527624eSRobert Watson __DECONST(int *, &rss_maxbits), 0, "RSS maximum bits"); 1217527624eSRobert Watson 1227527624eSRobert Watson /* 1237527624eSRobert Watson * RSS's own count of the number of CPUs it could be using for processing. 1247527624eSRobert Watson * Bounded to 64 by RSS constants. 1257527624eSRobert Watson */ 1267527624eSRobert Watson static u_int rss_ncpus; 1277527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, ncpus, CTLFLAG_RD, &rss_ncpus, 0, 1287527624eSRobert Watson "Number of CPUs available to RSS"); 1297527624eSRobert Watson 1307527624eSRobert Watson #define RSS_MAXCPUS (1 << (RSS_MAXBITS - 1)) 1317527624eSRobert Watson static const u_int rss_maxcpus = RSS_MAXCPUS; 1327527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, maxcpus, CTLFLAG_RD, 1337527624eSRobert Watson __DECONST(int *, &rss_maxcpus), 0, "RSS maximum CPUs that can be used"); 1347527624eSRobert Watson 1357527624eSRobert Watson /* 1367527624eSRobert Watson * Variable exists just for reporting rss_bits in a user-friendly way. 1377527624eSRobert Watson */ 1387527624eSRobert Watson static u_int rss_buckets; 1397527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, buckets, CTLFLAG_RD, &rss_buckets, 0, 1407527624eSRobert Watson "RSS buckets"); 1417527624eSRobert Watson 1427527624eSRobert Watson /* 1437527624eSRobert Watson * Base CPU number; devices will add this to all CPU numbers returned by the 1447527624eSRobert Watson * RSS indirection table. Currently unmodifable in FreeBSD. 1457527624eSRobert Watson */ 1467527624eSRobert Watson static const u_int rss_basecpu; 1477527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, basecpu, CTLFLAG_RD, 1487527624eSRobert Watson __DECONST(int *, &rss_basecpu), 0, "RSS base CPU"); 1497527624eSRobert Watson 1507527624eSRobert Watson /* 1517527624eSRobert Watson * RSS secret key, intended to prevent attacks on load-balancing. Its 1527527624eSRobert Watson * effectiveness may be limited by algorithm choice and available entropy 1537527624eSRobert Watson * during the boot. 1547527624eSRobert Watson * 1557527624eSRobert Watson * XXXRW: And that we don't randomize it yet! 1567527624eSRobert Watson * 15785415b47SAdrian Chadd * This is the default Microsoft RSS specification key which is also 15885415b47SAdrian Chadd * the Chelsio T5 firmware default key. 1597527624eSRobert Watson */ 1607527624eSRobert Watson static uint8_t rss_key[RSS_KEYSIZE] = { 16107b4e383SPeter Grehan 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 16207b4e383SPeter Grehan 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 16307b4e383SPeter Grehan 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 16407b4e383SPeter Grehan 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 16507b4e383SPeter Grehan 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 1667527624eSRobert Watson }; 1677527624eSRobert Watson 1687527624eSRobert Watson /* 1697527624eSRobert Watson * RSS hash->CPU table, which maps hashed packet headers to particular CPUs. 1707527624eSRobert Watson * Drivers may supplement this table with a seperate CPU<->queue table when 1717527624eSRobert Watson * programming devices. 1727527624eSRobert Watson */ 1737527624eSRobert Watson struct rss_table_entry { 1747527624eSRobert Watson uint8_t rte_cpu; /* CPU affinity of bucket. */ 1757527624eSRobert Watson }; 1767527624eSRobert Watson static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN]; 1777527624eSRobert Watson 17872d33245SAdrian Chadd static inline u_int rss_gethashconfig_local(void); 17972d33245SAdrian Chadd 1807527624eSRobert Watson static void 1817527624eSRobert Watson rss_init(__unused void *arg) 1827527624eSRobert Watson { 1837527624eSRobert Watson u_int i; 184bad008ceSAdrian Chadd u_int cpuid; 1857527624eSRobert Watson 1867527624eSRobert Watson /* 1877527624eSRobert Watson * Validate tunables, coerce to sensible values. 1887527624eSRobert Watson */ 1897527624eSRobert Watson switch (rss_hashalgo) { 1907527624eSRobert Watson case RSS_HASH_TOEPLITZ: 1917527624eSRobert Watson case RSS_HASH_NAIVE: 1927527624eSRobert Watson break; 1937527624eSRobert Watson 1947527624eSRobert Watson default: 1957527624eSRobert Watson printf("%s: invalid RSS hashalgo %u, coercing to %u", 1967527624eSRobert Watson __func__, rss_hashalgo, RSS_HASH_TOEPLITZ); 1977527624eSRobert Watson rss_hashalgo = RSS_HASH_TOEPLITZ; 1987527624eSRobert Watson } 1997527624eSRobert Watson 2007527624eSRobert Watson /* 2017527624eSRobert Watson * Count available CPUs. 2027527624eSRobert Watson * 2037527624eSRobert Watson * XXXRW: Note incorrect assumptions regarding contiguity of this set 2047527624eSRobert Watson * elsewhere. 2057527624eSRobert Watson */ 2067527624eSRobert Watson rss_ncpus = 0; 2077527624eSRobert Watson for (i = 0; i <= mp_maxid; i++) { 2087527624eSRobert Watson if (CPU_ABSENT(i)) 2097527624eSRobert Watson continue; 2107527624eSRobert Watson rss_ncpus++; 2117527624eSRobert Watson } 2127527624eSRobert Watson if (rss_ncpus > RSS_MAXCPUS) 2137527624eSRobert Watson rss_ncpus = RSS_MAXCPUS; 2147527624eSRobert Watson 2157527624eSRobert Watson /* 2167527624eSRobert Watson * Tune RSS table entries to be no less than 2x the number of CPUs 2177527624eSRobert Watson * -- unless we're running uniprocessor, in which case there's not 2187527624eSRobert Watson * much point in having buckets to rearrange for load-balancing! 2197527624eSRobert Watson */ 2207527624eSRobert Watson if (rss_ncpus > 1) { 2217527624eSRobert Watson if (rss_bits == 0) 2227527624eSRobert Watson rss_bits = fls(rss_ncpus - 1) + 1; 2237527624eSRobert Watson 2247527624eSRobert Watson /* 2257527624eSRobert Watson * Microsoft limits RSS table entries to 128, so apply that 2267527624eSRobert Watson * limit to both auto-detected CPU counts and user-configured 2277527624eSRobert Watson * ones. 2287527624eSRobert Watson */ 2297527624eSRobert Watson if (rss_bits == 0 || rss_bits > RSS_MAXBITS) { 2307527624eSRobert Watson printf("%s: RSS bits %u not valid, coercing to %u", 2317527624eSRobert Watson __func__, rss_bits, RSS_MAXBITS); 2327527624eSRobert Watson rss_bits = RSS_MAXBITS; 2337527624eSRobert Watson } 2347527624eSRobert Watson 2357527624eSRobert Watson /* 2367527624eSRobert Watson * Figure out how many buckets to use; warn if less than the 2377527624eSRobert Watson * number of configured CPUs, although this is not a fatal 2387527624eSRobert Watson * problem. 2397527624eSRobert Watson */ 2407527624eSRobert Watson rss_buckets = (1 << rss_bits); 2417527624eSRobert Watson if (rss_buckets < rss_ncpus) 2427527624eSRobert Watson printf("%s: WARNING: rss_buckets (%u) less than " 2437527624eSRobert Watson "rss_ncpus (%u)\n", __func__, rss_buckets, 2447527624eSRobert Watson rss_ncpus); 2457527624eSRobert Watson rss_mask = rss_buckets - 1; 2467527624eSRobert Watson } else { 2477527624eSRobert Watson rss_bits = 0; 2487527624eSRobert Watson rss_buckets = 1; 2497527624eSRobert Watson rss_mask = 0; 2507527624eSRobert Watson } 2517527624eSRobert Watson 2527527624eSRobert Watson /* 2537527624eSRobert Watson * Set up initial CPU assignments: round-robin by default. 2547527624eSRobert Watson */ 255bad008ceSAdrian Chadd cpuid = CPU_FIRST(); 256bad008ceSAdrian Chadd for (i = 0; i < rss_buckets; i++) { 257bad008ceSAdrian Chadd rss_table[i].rte_cpu = cpuid; 258bad008ceSAdrian Chadd cpuid = CPU_NEXT(cpuid); 259bad008ceSAdrian Chadd } 2607527624eSRobert Watson 2617527624eSRobert Watson /* 2627527624eSRobert Watson * Randomize rrs_key. 2637527624eSRobert Watson * 2647527624eSRobert Watson * XXXRW: Not yet. If nothing else, will require an rss_isbadkey() 2657527624eSRobert Watson * loop to check for "bad" RSS keys. 2667527624eSRobert Watson */ 2677527624eSRobert Watson } 2687527624eSRobert Watson SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL); 2697527624eSRobert Watson 2707527624eSRobert Watson static uint32_t 2717527624eSRobert Watson rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen, 2727527624eSRobert Watson const uint8_t *data) 2737527624eSRobert Watson { 2747527624eSRobert Watson uint32_t v; 2757527624eSRobert Watson u_int i; 2767527624eSRobert Watson 2777527624eSRobert Watson v = 0; 2787527624eSRobert Watson for (i = 0; i < keylen; i++) 2797527624eSRobert Watson v += key[i]; 2807527624eSRobert Watson for (i = 0; i < datalen; i++) 2817527624eSRobert Watson v += data[i]; 2827527624eSRobert Watson return (v); 2837527624eSRobert Watson } 2847527624eSRobert Watson 2857527624eSRobert Watson static uint32_t 2867527624eSRobert Watson rss_hash(u_int datalen, const uint8_t *data) 2877527624eSRobert Watson { 2887527624eSRobert Watson 2897527624eSRobert Watson switch (rss_hashalgo) { 2907527624eSRobert Watson case RSS_HASH_TOEPLITZ: 2917527624eSRobert Watson return (toeplitz_hash(sizeof(rss_key), rss_key, datalen, 2927527624eSRobert Watson data)); 2937527624eSRobert Watson 2947527624eSRobert Watson case RSS_HASH_NAIVE: 2957527624eSRobert Watson return (rss_naive_hash(sizeof(rss_key), rss_key, datalen, 2967527624eSRobert Watson data)); 2977527624eSRobert Watson 2987527624eSRobert Watson default: 2997527624eSRobert Watson panic("%s: unsupported/unknown hashalgo %d", __func__, 3007527624eSRobert Watson rss_hashalgo); 3017527624eSRobert Watson } 3027527624eSRobert Watson } 3037527624eSRobert Watson 3047527624eSRobert Watson /* 3057527624eSRobert Watson * Hash an IPv4 2-tuple. 3067527624eSRobert Watson */ 3077527624eSRobert Watson uint32_t 3087527624eSRobert Watson rss_hash_ip4_2tuple(struct in_addr src, struct in_addr dst) 3097527624eSRobert Watson { 3107527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst)]; 3117527624eSRobert Watson u_int datalen; 3127527624eSRobert Watson 3137527624eSRobert Watson datalen = 0; 3147527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3157527624eSRobert Watson datalen += sizeof(src); 3167527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3177527624eSRobert Watson datalen += sizeof(dst); 3187527624eSRobert Watson return (rss_hash(datalen, data)); 3197527624eSRobert Watson } 3207527624eSRobert Watson 3217527624eSRobert Watson /* 3227527624eSRobert Watson * Hash an IPv4 4-tuple. 3237527624eSRobert Watson */ 3247527624eSRobert Watson uint32_t 3257527624eSRobert Watson rss_hash_ip4_4tuple(struct in_addr src, u_short srcport, struct in_addr dst, 3267527624eSRobert Watson u_short dstport) 3277527624eSRobert Watson { 3287527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) + 3297527624eSRobert Watson sizeof(dstport)]; 3307527624eSRobert Watson u_int datalen; 3317527624eSRobert Watson 3327527624eSRobert Watson datalen = 0; 3337527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3347527624eSRobert Watson datalen += sizeof(src); 3357527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3367527624eSRobert Watson datalen += sizeof(dst); 3377527624eSRobert Watson bcopy(&srcport, &data[datalen], sizeof(srcport)); 3387527624eSRobert Watson datalen += sizeof(srcport); 3397527624eSRobert Watson bcopy(&dstport, &data[datalen], sizeof(dstport)); 3407527624eSRobert Watson datalen += sizeof(dstport); 3417527624eSRobert Watson return (rss_hash(datalen, data)); 3427527624eSRobert Watson } 3437527624eSRobert Watson 3447527624eSRobert Watson #ifdef INET6 3457527624eSRobert Watson /* 3467527624eSRobert Watson * Hash an IPv6 2-tuple. 3477527624eSRobert Watson */ 3487527624eSRobert Watson uint32_t 3497527624eSRobert Watson rss_hash_ip6_2tuple(struct in6_addr src, struct in6_addr dst) 3507527624eSRobert Watson { 3517527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst)]; 3527527624eSRobert Watson u_int datalen; 3537527624eSRobert Watson 3547527624eSRobert Watson datalen = 0; 3557527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3567527624eSRobert Watson datalen += sizeof(src); 3577527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3587527624eSRobert Watson datalen += sizeof(dst); 3597527624eSRobert Watson return (rss_hash(datalen, data)); 3607527624eSRobert Watson } 3617527624eSRobert Watson 3627527624eSRobert Watson /* 3637527624eSRobert Watson * Hash an IPv6 4-tuple. 3647527624eSRobert Watson */ 3657527624eSRobert Watson uint32_t 3667527624eSRobert Watson rss_hash_ip6_4tuple(struct in6_addr src, u_short srcport, 3677527624eSRobert Watson struct in6_addr dst, u_short dstport) 3687527624eSRobert Watson { 3697527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) + 3707527624eSRobert Watson sizeof(dstport)]; 3717527624eSRobert Watson u_int datalen; 3727527624eSRobert Watson 3737527624eSRobert Watson datalen = 0; 3747527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3757527624eSRobert Watson datalen += sizeof(src); 3767527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3777527624eSRobert Watson datalen += sizeof(dst); 3787527624eSRobert Watson bcopy(&srcport, &data[datalen], sizeof(srcport)); 3797527624eSRobert Watson datalen += sizeof(srcport); 3807527624eSRobert Watson bcopy(&dstport, &data[datalen], sizeof(dstport)); 3817527624eSRobert Watson datalen += sizeof(dstport); 3827527624eSRobert Watson return (rss_hash(datalen, data)); 3837527624eSRobert Watson } 3847527624eSRobert Watson #endif /* INET6 */ 3857527624eSRobert Watson 3867527624eSRobert Watson /* 3877527624eSRobert Watson * Query the number of RSS bits in use. 3887527624eSRobert Watson */ 3897527624eSRobert Watson u_int 3907527624eSRobert Watson rss_getbits(void) 3917527624eSRobert Watson { 3927527624eSRobert Watson 3937527624eSRobert Watson return (rss_bits); 3947527624eSRobert Watson } 3957527624eSRobert Watson 3967527624eSRobert Watson /* 3977527624eSRobert Watson * Query the RSS bucket associated with an RSS hash. 3987527624eSRobert Watson */ 3997527624eSRobert Watson u_int 4007527624eSRobert Watson rss_getbucket(u_int hash) 4017527624eSRobert Watson { 4027527624eSRobert Watson 4037527624eSRobert Watson return (hash & rss_mask); 4047527624eSRobert Watson } 4057527624eSRobert Watson 4067527624eSRobert Watson /* 407a6c88ec4SAdrian Chadd * Query the RSS layer bucket associated with the given 408a6c88ec4SAdrian Chadd * entry in the RSS hash space. 409a6c88ec4SAdrian Chadd * 410a6c88ec4SAdrian Chadd * The RSS indirection table is 0 .. rss_buckets-1, 411a6c88ec4SAdrian Chadd * covering the low 'rss_bits' of the total 128 slot 412a6c88ec4SAdrian Chadd * RSS indirection table. So just mask off rss_bits and 413a6c88ec4SAdrian Chadd * return that. 414a6c88ec4SAdrian Chadd * 415a6c88ec4SAdrian Chadd * NIC drivers can then iterate over the 128 slot RSS 416a6c88ec4SAdrian Chadd * indirection table and fetch which RSS bucket to 417a6c88ec4SAdrian Chadd * map it to. This will typically be a CPU queue 418a6c88ec4SAdrian Chadd */ 419a6c88ec4SAdrian Chadd u_int 420a6c88ec4SAdrian Chadd rss_get_indirection_to_bucket(u_int index) 421a6c88ec4SAdrian Chadd { 422a6c88ec4SAdrian Chadd 423a6c88ec4SAdrian Chadd return (index & rss_mask); 424a6c88ec4SAdrian Chadd } 425a6c88ec4SAdrian Chadd 426a6c88ec4SAdrian Chadd /* 4277527624eSRobert Watson * Query the RSS CPU associated with an RSS bucket. 4287527624eSRobert Watson */ 4297527624eSRobert Watson u_int 4307527624eSRobert Watson rss_getcpu(u_int bucket) 4317527624eSRobert Watson { 4327527624eSRobert Watson 4337527624eSRobert Watson return (rss_table[bucket].rte_cpu); 4347527624eSRobert Watson } 4357527624eSRobert Watson 4367527624eSRobert Watson /* 437cc6c1877SAdrian Chadd * netisr CPU affinity lookup given just the hash and hashtype. 438cc6c1877SAdrian Chadd */ 439cc6c1877SAdrian Chadd u_int 440cc6c1877SAdrian Chadd rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type) 441cc6c1877SAdrian Chadd { 442cc6c1877SAdrian Chadd 443cc6c1877SAdrian Chadd switch (hash_type) { 444cc6c1877SAdrian Chadd case M_HASHTYPE_RSS_IPV4: 445cc6c1877SAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV4: 4469870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV4: 447e989b65fSAdrian Chadd case M_HASHTYPE_RSS_IPV6: 448e989b65fSAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV6: 4499870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV6: 450cc6c1877SAdrian Chadd return (rss_getcpu(rss_getbucket(hash_val))); 451cc6c1877SAdrian Chadd default: 452cc6c1877SAdrian Chadd return (NETISR_CPUID_NONE); 453cc6c1877SAdrian Chadd } 454cc6c1877SAdrian Chadd } 455cc6c1877SAdrian Chadd 456cc6c1877SAdrian Chadd /* 4578bde802aSAdrian Chadd * Query the RSS bucket associated with the given hash value and 4588bde802aSAdrian Chadd * type. 4598bde802aSAdrian Chadd */ 4608bde802aSAdrian Chadd int 4618bde802aSAdrian Chadd rss_hash2bucket(uint32_t hash_val, uint32_t hash_type, uint32_t *bucket_id) 4628bde802aSAdrian Chadd { 4638bde802aSAdrian Chadd 4648bde802aSAdrian Chadd switch (hash_type) { 4658bde802aSAdrian Chadd case M_HASHTYPE_RSS_IPV4: 4668bde802aSAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV4: 4679870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV4: 468e989b65fSAdrian Chadd case M_HASHTYPE_RSS_IPV6: 469e989b65fSAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV6: 4709870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV6: 4718bde802aSAdrian Chadd *bucket_id = rss_getbucket(hash_val); 4728bde802aSAdrian Chadd return (0); 4738bde802aSAdrian Chadd default: 4748bde802aSAdrian Chadd return (-1); 4758bde802aSAdrian Chadd } 4768bde802aSAdrian Chadd } 4778bde802aSAdrian Chadd 4788bde802aSAdrian Chadd /* 4797527624eSRobert Watson * netisr CPU affinity lookup routine for use by protocols. 4807527624eSRobert Watson */ 4817527624eSRobert Watson struct mbuf * 4827527624eSRobert Watson rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 4837527624eSRobert Watson { 4847527624eSRobert Watson 4857527624eSRobert Watson M_ASSERTPKTHDR(m); 486cc6c1877SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 4877527624eSRobert Watson return (m); 4887527624eSRobert Watson } 4897527624eSRobert Watson 4908bde802aSAdrian Chadd int 4918bde802aSAdrian Chadd rss_m2bucket(struct mbuf *m, uint32_t *bucket_id) 4928bde802aSAdrian Chadd { 4938bde802aSAdrian Chadd 4948bde802aSAdrian Chadd M_ASSERTPKTHDR(m); 4958bde802aSAdrian Chadd 4968bde802aSAdrian Chadd return(rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 4978bde802aSAdrian Chadd bucket_id)); 4988bde802aSAdrian Chadd } 4998bde802aSAdrian Chadd 5007527624eSRobert Watson /* 50172d33245SAdrian Chadd * Calculate an appropriate ipv4 2-tuple or 4-tuple given the given 50272d33245SAdrian Chadd * IPv4 source/destination address, UDP or TCP source/destination ports 50372d33245SAdrian Chadd * and the protocol type. 50472d33245SAdrian Chadd * 50572d33245SAdrian Chadd * The protocol code may wish to do a software hash of the given 50672d33245SAdrian Chadd * tuple. This depends upon the currently configured RSS hash types. 50772d33245SAdrian Chadd * 50872d33245SAdrian Chadd * This assumes that the packet in question isn't a fragment. 50972d33245SAdrian Chadd * 51072d33245SAdrian Chadd * It also assumes the packet source/destination address 51172d33245SAdrian Chadd * are in "incoming" packet order (ie, source is "far" address.) 51272d33245SAdrian Chadd */ 51372d33245SAdrian Chadd int 51472d33245SAdrian Chadd rss_proto_software_hash_v4(struct in_addr s, struct in_addr d, 51572d33245SAdrian Chadd u_short sp, u_short dp, int proto, 51672d33245SAdrian Chadd uint32_t *hashval, uint32_t *hashtype) 51772d33245SAdrian Chadd { 51872d33245SAdrian Chadd uint32_t hash; 51972d33245SAdrian Chadd 52072d33245SAdrian Chadd /* 52172d33245SAdrian Chadd * Next, choose the hash type depending upon the protocol 52272d33245SAdrian Chadd * identifier. 52372d33245SAdrian Chadd */ 52472d33245SAdrian Chadd if ((proto == IPPROTO_TCP) && 52572d33245SAdrian Chadd (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4)) { 52672d33245SAdrian Chadd hash = rss_hash_ip4_4tuple(s, sp, d, dp); 52772d33245SAdrian Chadd *hashval = hash; 52872d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_TCP_IPV4; 52972d33245SAdrian Chadd return (0); 53072d33245SAdrian Chadd } else if ((proto == IPPROTO_UDP) && 53172d33245SAdrian Chadd (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4)) { 53272d33245SAdrian Chadd hash = rss_hash_ip4_4tuple(s, sp, d, dp); 53372d33245SAdrian Chadd *hashval = hash; 53472d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_UDP_IPV4; 53572d33245SAdrian Chadd return (0); 53672d33245SAdrian Chadd } else if (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) { 53772d33245SAdrian Chadd /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */ 53872d33245SAdrian Chadd hash = rss_hash_ip4_2tuple(s, d); 53972d33245SAdrian Chadd *hashval = hash; 54072d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_IPV4; 54172d33245SAdrian Chadd return (0); 54272d33245SAdrian Chadd } 54372d33245SAdrian Chadd 54472d33245SAdrian Chadd /* No configured available hashtypes! */ 54572d33245SAdrian Chadd printf("%s: no available hashtypes!\n", __func__); 54672d33245SAdrian Chadd return (-1); 54772d33245SAdrian Chadd } 54872d33245SAdrian Chadd 54972d33245SAdrian Chadd /* 55072d33245SAdrian Chadd * Do a software calculation of the RSS for the given mbuf. 55172d33245SAdrian Chadd * 55272d33245SAdrian Chadd * This is typically used by the input path to recalculate the RSS after 55372d33245SAdrian Chadd * some form of packet processing (eg de-capsulation, IP fragment reassembly.) 55472d33245SAdrian Chadd * 55572d33245SAdrian Chadd * dir is the packet direction - RSS_HASH_PKT_INGRESS for incoming and 55672d33245SAdrian Chadd * RSS_HASH_PKT_EGRESS for outgoing. 55772d33245SAdrian Chadd * 55872d33245SAdrian Chadd * Returns 0 if a hash was done, -1 if no hash was done, +1 if 55972d33245SAdrian Chadd * the mbuf already had a valid RSS flowid. 56072d33245SAdrian Chadd * 56172d33245SAdrian Chadd * This function doesn't modify the mbuf. It's up to the caller to 56272d33245SAdrian Chadd * assign flowid/flowtype as appropriate. 56372d33245SAdrian Chadd */ 56472d33245SAdrian Chadd int 56572d33245SAdrian Chadd rss_mbuf_software_hash_v4(const struct mbuf *m, int dir, uint32_t *hashval, 56672d33245SAdrian Chadd uint32_t *hashtype) 56772d33245SAdrian Chadd { 56872d33245SAdrian Chadd const struct ip *ip; 56972d33245SAdrian Chadd const struct tcphdr *th; 57072d33245SAdrian Chadd const struct udphdr *uh; 571*c2529042SHans Petter Selasky uint32_t flowid; 572*c2529042SHans Petter Selasky uint32_t flowtype; 57372d33245SAdrian Chadd uint8_t proto; 57472d33245SAdrian Chadd int iphlen; 57572d33245SAdrian Chadd int is_frag = 0; 57672d33245SAdrian Chadd 57772d33245SAdrian Chadd /* 57872d33245SAdrian Chadd * XXX For now this only handles hashing on incoming mbufs. 57972d33245SAdrian Chadd */ 58072d33245SAdrian Chadd if (dir != RSS_HASH_PKT_INGRESS) { 58172d33245SAdrian Chadd printf("%s: called on EGRESS packet!\n", __func__); 58272d33245SAdrian Chadd return (-1); 58372d33245SAdrian Chadd } 58472d33245SAdrian Chadd 58572d33245SAdrian Chadd /* 58672d33245SAdrian Chadd * First, validate that the mbuf we have is long enough 58772d33245SAdrian Chadd * to have an IPv4 header in it. 58872d33245SAdrian Chadd */ 58972d33245SAdrian Chadd if (m->m_pkthdr.len < (sizeof(struct ip))) { 59072d33245SAdrian Chadd printf("%s: short mbuf pkthdr\n", __func__); 59172d33245SAdrian Chadd return (-1); 59272d33245SAdrian Chadd } 59372d33245SAdrian Chadd if (m->m_len < (sizeof(struct ip))) { 59472d33245SAdrian Chadd printf("%s: short mbuf len\n", __func__); 59572d33245SAdrian Chadd return (-1); 59672d33245SAdrian Chadd } 59772d33245SAdrian Chadd 59872d33245SAdrian Chadd /* Ok, let's dereference that */ 59972d33245SAdrian Chadd ip = mtod(m, struct ip *); 60072d33245SAdrian Chadd proto = ip->ip_p; 60172d33245SAdrian Chadd iphlen = ip->ip_hl << 2; 60272d33245SAdrian Chadd 60372d33245SAdrian Chadd /* 60472d33245SAdrian Chadd * If this is a fragment then it shouldn't be four-tuple 60572d33245SAdrian Chadd * hashed just yet. Once it's reassembled into a full 60672d33245SAdrian Chadd * frame it should be re-hashed. 60772d33245SAdrian Chadd */ 60872d33245SAdrian Chadd if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) 60972d33245SAdrian Chadd is_frag = 1; 61072d33245SAdrian Chadd 61172d33245SAdrian Chadd /* 61272d33245SAdrian Chadd * If the mbuf flowid/flowtype matches the packet type, 61372d33245SAdrian Chadd * and we don't support the 4-tuple version of the given protocol, 61472d33245SAdrian Chadd * then signal to the owner that it can trust the flowid/flowtype 61572d33245SAdrian Chadd * details. 61672d33245SAdrian Chadd * 61772d33245SAdrian Chadd * This is a little picky - eg, if TCPv4 / UDPv4 hashing 61872d33245SAdrian Chadd * is supported but we got a TCP/UDP frame only 2-tuple hashed, 61972d33245SAdrian Chadd * then we shouldn't just "trust" the 2-tuple hash. We need 62072d33245SAdrian Chadd * a 4-tuple hash. 62172d33245SAdrian Chadd */ 62272d33245SAdrian Chadd flowid = m->m_pkthdr.flowid; 62372d33245SAdrian Chadd flowtype = M_HASHTYPE_GET(m); 62472d33245SAdrian Chadd 625*c2529042SHans Petter Selasky if (flowtype != M_HASHTYPE_NONE) { 62672d33245SAdrian Chadd switch (proto) { 62772d33245SAdrian Chadd case IPPROTO_UDP: 62872d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) && 62972d33245SAdrian Chadd (flowtype == M_HASHTYPE_RSS_UDP_IPV4) && 63072d33245SAdrian Chadd (is_frag == 0)) { 63172d33245SAdrian Chadd return (1); 63272d33245SAdrian Chadd } 63372d33245SAdrian Chadd /* 63472d33245SAdrian Chadd * Only allow 2-tuple for UDP frames if we don't also 63572d33245SAdrian Chadd * support 4-tuple for UDP. 63672d33245SAdrian Chadd */ 63772d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && 63872d33245SAdrian Chadd ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) == 0) && 63972d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 64072d33245SAdrian Chadd return (1); 64172d33245SAdrian Chadd } 64272d33245SAdrian Chadd break; 64372d33245SAdrian Chadd case IPPROTO_TCP: 64472d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) && 64572d33245SAdrian Chadd (flowtype == M_HASHTYPE_RSS_TCP_IPV4) && 64672d33245SAdrian Chadd (is_frag == 0)) { 64772d33245SAdrian Chadd return (1); 64872d33245SAdrian Chadd } 64972d33245SAdrian Chadd /* 65072d33245SAdrian Chadd * Only allow 2-tuple for TCP frames if we don't also 65172d33245SAdrian Chadd * support 2-tuple for TCP. 65272d33245SAdrian Chadd */ 65372d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && 65472d33245SAdrian Chadd ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) == 0) && 65572d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 65672d33245SAdrian Chadd return (1); 65772d33245SAdrian Chadd } 65872d33245SAdrian Chadd break; 65972d33245SAdrian Chadd default: 66072d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && 66172d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 66272d33245SAdrian Chadd return (1); 66372d33245SAdrian Chadd } 66472d33245SAdrian Chadd break; 66572d33245SAdrian Chadd } 66672d33245SAdrian Chadd } 66772d33245SAdrian Chadd 66872d33245SAdrian Chadd /* 66972d33245SAdrian Chadd * Decode enough information to make a hash decision. 67072d33245SAdrian Chadd * 67172d33245SAdrian Chadd * XXX TODO: does the hardware hash on 4-tuple if IP 67272d33245SAdrian Chadd * options are present? 67372d33245SAdrian Chadd */ 674f4659f4cSAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) && 675f4659f4cSAdrian Chadd (proto == IPPROTO_TCP) && 676f4659f4cSAdrian Chadd (is_frag == 0)) { 67772d33245SAdrian Chadd if (m->m_len < iphlen + sizeof(struct tcphdr)) { 67872d33245SAdrian Chadd printf("%s: short TCP frame?\n", __func__); 67972d33245SAdrian Chadd return (-1); 68072d33245SAdrian Chadd } 68172d33245SAdrian Chadd th = (struct tcphdr *)((caddr_t)ip + iphlen); 68272d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 68372d33245SAdrian Chadd th->th_sport, 68472d33245SAdrian Chadd th->th_dport, 68572d33245SAdrian Chadd proto, 68672d33245SAdrian Chadd hashval, 68772d33245SAdrian Chadd hashtype); 688f4659f4cSAdrian Chadd } else if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) && 689f4659f4cSAdrian Chadd (proto == IPPROTO_UDP) && 690f4659f4cSAdrian Chadd (is_frag == 0)) { 69172d33245SAdrian Chadd uh = (struct udphdr *)((caddr_t)ip + iphlen); 69272d33245SAdrian Chadd if (m->m_len < iphlen + sizeof(struct udphdr)) { 69372d33245SAdrian Chadd printf("%s: short UDP frame?\n", __func__); 69472d33245SAdrian Chadd return (-1); 69572d33245SAdrian Chadd } 69672d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 69772d33245SAdrian Chadd uh->uh_sport, 69872d33245SAdrian Chadd uh->uh_dport, 69972d33245SAdrian Chadd proto, 70072d33245SAdrian Chadd hashval, 70172d33245SAdrian Chadd hashtype); 702f4659f4cSAdrian Chadd } else if (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) { 70372d33245SAdrian Chadd /* Default to 2-tuple hash */ 70472d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 70572d33245SAdrian Chadd 0, /* source port */ 70672d33245SAdrian Chadd 0, /* destination port */ 70772d33245SAdrian Chadd 0, /* IPPROTO_IP */ 70872d33245SAdrian Chadd hashval, 70972d33245SAdrian Chadd hashtype); 710f4659f4cSAdrian Chadd } else { 711f4659f4cSAdrian Chadd printf("%s: no available hashtypes!\n", __func__); 712f4659f4cSAdrian Chadd return (-1); 71372d33245SAdrian Chadd } 71472d33245SAdrian Chadd } 71572d33245SAdrian Chadd 71672d33245SAdrian Chadd /* 71772d33245SAdrian Chadd * Similar to rss_m2cpuid, but designed to be used by the IP NETISR 71872d33245SAdrian Chadd * on incoming frames. 71972d33245SAdrian Chadd * 72072d33245SAdrian Chadd * If an existing RSS hash exists and it matches what the configured 72172d33245SAdrian Chadd * hashing is, then use it. 72272d33245SAdrian Chadd * 72372d33245SAdrian Chadd * If there's an existing RSS hash but the desired hash is different, 72472d33245SAdrian Chadd * or if there's no useful RSS hash, then calculate it via 72572d33245SAdrian Chadd * the software path. 72672d33245SAdrian Chadd * 72772d33245SAdrian Chadd * XXX TODO: definitely want statistics here! 72872d33245SAdrian Chadd */ 72972d33245SAdrian Chadd struct mbuf * 73072d33245SAdrian Chadd rss_soft_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 73172d33245SAdrian Chadd { 73272d33245SAdrian Chadd uint32_t hash_val, hash_type; 73372d33245SAdrian Chadd int ret; 73472d33245SAdrian Chadd 73572d33245SAdrian Chadd M_ASSERTPKTHDR(m); 73672d33245SAdrian Chadd 73772d33245SAdrian Chadd ret = rss_mbuf_software_hash_v4(m, RSS_HASH_PKT_INGRESS, 73872d33245SAdrian Chadd &hash_val, &hash_type); 73972d33245SAdrian Chadd if (ret > 0) { 74072d33245SAdrian Chadd /* mbuf has a valid hash already; don't need to modify it */ 74172d33245SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 74272d33245SAdrian Chadd } else if (ret == 0) { 74372d33245SAdrian Chadd /* hash was done; update */ 74472d33245SAdrian Chadd m->m_pkthdr.flowid = hash_val; 74572d33245SAdrian Chadd M_HASHTYPE_SET(m, hash_type); 74672d33245SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 74772d33245SAdrian Chadd } else { /* ret < 0 */ 74872d33245SAdrian Chadd /* no hash was done */ 74972d33245SAdrian Chadd *cpuid = NETISR_CPUID_NONE; 75072d33245SAdrian Chadd } 75172d33245SAdrian Chadd return (m); 75272d33245SAdrian Chadd } 75372d33245SAdrian Chadd 75472d33245SAdrian Chadd /* 7557527624eSRobert Watson * Query the RSS hash algorithm. 7567527624eSRobert Watson */ 7577527624eSRobert Watson u_int 7587527624eSRobert Watson rss_gethashalgo(void) 7597527624eSRobert Watson { 7607527624eSRobert Watson 7617527624eSRobert Watson return (rss_hashalgo); 7627527624eSRobert Watson } 7637527624eSRobert Watson 7647527624eSRobert Watson /* 7657527624eSRobert Watson * Query the current RSS key; likely to be used by device drivers when 7667527624eSRobert Watson * configuring hardware RSS. Caller must pass an array of size RSS_KEYSIZE. 7677527624eSRobert Watson * 7687527624eSRobert Watson * XXXRW: Perhaps we should do the accept-a-length-and-truncate thing? 7697527624eSRobert Watson */ 7707527624eSRobert Watson void 7717527624eSRobert Watson rss_getkey(uint8_t *key) 7727527624eSRobert Watson { 7737527624eSRobert Watson 7747527624eSRobert Watson bcopy(rss_key, key, sizeof(rss_key)); 7757527624eSRobert Watson } 7767527624eSRobert Watson 7777527624eSRobert Watson /* 7787527624eSRobert Watson * Query the number of buckets; this may be used by both network device 7797527624eSRobert Watson * drivers, which will need to populate hardware shadows of the software 7807527624eSRobert Watson * indirection table, and the network stack itself (such as when deciding how 7817527624eSRobert Watson * many connection groups to allocate). 7827527624eSRobert Watson */ 7837527624eSRobert Watson u_int 7847527624eSRobert Watson rss_getnumbuckets(void) 7857527624eSRobert Watson { 7867527624eSRobert Watson 7877527624eSRobert Watson return (rss_buckets); 7887527624eSRobert Watson } 7897527624eSRobert Watson 7907527624eSRobert Watson /* 7917527624eSRobert Watson * Query the number of CPUs in use by RSS; may be useful to device drivers 7927527624eSRobert Watson * trying to figure out how to map a larger number of CPUs into a smaller 7937527624eSRobert Watson * number of receive queues. 7947527624eSRobert Watson */ 7957527624eSRobert Watson u_int 7967527624eSRobert Watson rss_getnumcpus(void) 7977527624eSRobert Watson { 7987527624eSRobert Watson 7997527624eSRobert Watson return (rss_ncpus); 8007527624eSRobert Watson } 8017527624eSRobert Watson 80272d33245SAdrian Chadd static inline u_int 80372d33245SAdrian Chadd rss_gethashconfig_local(void) 80440c753e3SAdrian Chadd { 80572d33245SAdrian Chadd 80640c753e3SAdrian Chadd /* Return 4-tuple for TCP; 2-tuple for others */ 80740c753e3SAdrian Chadd /* 80840c753e3SAdrian Chadd * UDP may fragment more often than TCP and thus we'll end up with 80940c753e3SAdrian Chadd * NICs returning 2-tuple fragments. 81040c753e3SAdrian Chadd * udp_init() and udplite_init() both currently initialise things 81140c753e3SAdrian Chadd * as 2-tuple. 81240c753e3SAdrian Chadd * So for now disable UDP 4-tuple hashing until all of the other 81340c753e3SAdrian Chadd * pieces are in place. 81440c753e3SAdrian Chadd */ 81540c753e3SAdrian Chadd return ( 81640c753e3SAdrian Chadd RSS_HASHTYPE_RSS_IPV4 81740c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_TCP_IPV4 81840c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_IPV6 81940c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_TCP_IPV6 82040c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_IPV6_EX 82140c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_TCP_IPV6_EX 82240c753e3SAdrian Chadd #if 0 82340c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV4 82440c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV4_EX 82540c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV6 82640c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV6_EX 82740c753e3SAdrian Chadd #endif 82840c753e3SAdrian Chadd ); 82940c753e3SAdrian Chadd } 83040c753e3SAdrian Chadd 83140c753e3SAdrian Chadd /* 83272d33245SAdrian Chadd * Return the supported RSS hash configuration. 83372d33245SAdrian Chadd * 83472d33245SAdrian Chadd * NICs should query this to determine what to configure in their redirection 83572d33245SAdrian Chadd * matching table. 83672d33245SAdrian Chadd */ 83772d33245SAdrian Chadd u_int 83872d33245SAdrian Chadd rss_gethashconfig(void) 83972d33245SAdrian Chadd { 84072d33245SAdrian Chadd 84172d33245SAdrian Chadd return (rss_gethashconfig_local()); 84272d33245SAdrian Chadd } 84372d33245SAdrian Chadd 84472d33245SAdrian Chadd /* 8457527624eSRobert Watson * XXXRW: Confirm that sysctl -a won't dump this keying material, don't want 8467527624eSRobert Watson * it appearing in debugging output unnecessarily. 8477527624eSRobert Watson */ 8487527624eSRobert Watson static int 8497527624eSRobert Watson sysctl_rss_key(SYSCTL_HANDLER_ARGS) 8507527624eSRobert Watson { 8517527624eSRobert Watson uint8_t temp_rss_key[RSS_KEYSIZE]; 8527527624eSRobert Watson int error; 8537527624eSRobert Watson 8547527624eSRobert Watson error = priv_check(req->td, PRIV_NETINET_HASHKEY); 8557527624eSRobert Watson if (error) 8567527624eSRobert Watson return (error); 8577527624eSRobert Watson 8587527624eSRobert Watson bcopy(rss_key, temp_rss_key, sizeof(temp_rss_key)); 8597527624eSRobert Watson error = sysctl_handle_opaque(oidp, temp_rss_key, 8607527624eSRobert Watson sizeof(temp_rss_key), req); 8617527624eSRobert Watson if (error) 8627527624eSRobert Watson return (error); 8637527624eSRobert Watson if (req->newptr != NULL) { 8647527624eSRobert Watson /* XXXRW: Not yet. */ 8657527624eSRobert Watson return (EINVAL); 8667527624eSRobert Watson } 8677527624eSRobert Watson return (0); 8687527624eSRobert Watson } 8697527624eSRobert Watson SYSCTL_PROC(_net_inet_rss, OID_AUTO, key, 8707527624eSRobert Watson CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key, 8717527624eSRobert Watson "", "RSS keying material"); 8728bde802aSAdrian Chadd 8738bde802aSAdrian Chadd static int 8748bde802aSAdrian Chadd sysctl_rss_bucket_mapping(SYSCTL_HANDLER_ARGS) 8758bde802aSAdrian Chadd { 8768bde802aSAdrian Chadd struct sbuf *sb; 8778bde802aSAdrian Chadd int error; 8788bde802aSAdrian Chadd int i; 8798bde802aSAdrian Chadd 8808bde802aSAdrian Chadd error = 0; 8818bde802aSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 8828bde802aSAdrian Chadd if (error != 0) 8838bde802aSAdrian Chadd return (error); 8848bde802aSAdrian Chadd sb = sbuf_new_for_sysctl(NULL, NULL, 512, req); 8858bde802aSAdrian Chadd if (sb == NULL) 8868bde802aSAdrian Chadd return (ENOMEM); 8878bde802aSAdrian Chadd for (i = 0; i < rss_buckets; i++) { 8888bde802aSAdrian Chadd sbuf_printf(sb, "%s%d:%d", i == 0 ? "" : " ", 8898bde802aSAdrian Chadd i, 8908bde802aSAdrian Chadd rss_getcpu(i)); 8918bde802aSAdrian Chadd } 8928bde802aSAdrian Chadd error = sbuf_finish(sb); 8938bde802aSAdrian Chadd sbuf_delete(sb); 8948bde802aSAdrian Chadd 8958bde802aSAdrian Chadd return (error); 8968bde802aSAdrian Chadd } 8978bde802aSAdrian Chadd SYSCTL_PROC(_net_inet_rss, OID_AUTO, bucket_mapping, 8988bde802aSAdrian Chadd CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 8998bde802aSAdrian Chadd sysctl_rss_bucket_mapping, "", "RSS bucket -> CPU mapping"); 900