17527624eSRobert Watson /*- 27527624eSRobert Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. 37527624eSRobert Watson * All rights reserved. 47527624eSRobert Watson * 57527624eSRobert Watson * This software was developed by Robert N. M. Watson under contract 67527624eSRobert Watson * to Juniper Networks, Inc. 77527624eSRobert Watson * 87527624eSRobert Watson * Redistribution and use in source and binary forms, with or without 97527624eSRobert Watson * modification, are permitted provided that the following conditions 107527624eSRobert Watson * are met: 117527624eSRobert Watson * 1. Redistributions of source code must retain the above copyright 127527624eSRobert Watson * notice, this list of conditions and the following disclaimer. 137527624eSRobert Watson * 2. Redistributions in binary form must reproduce the above copyright 147527624eSRobert Watson * notice, this list of conditions and the following disclaimer in the 157527624eSRobert Watson * documentation and/or other materials provided with the distribution. 167527624eSRobert Watson * 177527624eSRobert Watson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 187527624eSRobert Watson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 197527624eSRobert Watson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 207527624eSRobert Watson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 217527624eSRobert Watson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 227527624eSRobert Watson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 237527624eSRobert Watson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 247527624eSRobert Watson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 257527624eSRobert Watson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 267527624eSRobert Watson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 277527624eSRobert Watson * SUCH DAMAGE. 287527624eSRobert Watson */ 297527624eSRobert Watson 307527624eSRobert Watson #include <sys/cdefs.h> 317527624eSRobert Watson 327527624eSRobert Watson __FBSDID("$FreeBSD$"); 337527624eSRobert Watson 347527624eSRobert Watson #include "opt_inet6.h" 357527624eSRobert Watson #include "opt_pcbgroup.h" 367527624eSRobert Watson 377527624eSRobert Watson #ifndef PCBGROUP 387527624eSRobert Watson #error "options RSS depends on options PCBGROUP" 397527624eSRobert Watson #endif 407527624eSRobert Watson 417527624eSRobert Watson #include <sys/param.h> 427527624eSRobert Watson #include <sys/mbuf.h> 437527624eSRobert Watson #include <sys/socket.h> 447527624eSRobert Watson #include <sys/priv.h> 457527624eSRobert Watson #include <sys/kernel.h> 467527624eSRobert Watson #include <sys/smp.h> 477527624eSRobert Watson #include <sys/sysctl.h> 488bde802aSAdrian Chadd #include <sys/sbuf.h> 497527624eSRobert Watson 507527624eSRobert Watson #include <net/if.h> 517527624eSRobert Watson #include <net/if_var.h> 527527624eSRobert Watson #include <net/netisr.h> 537527624eSRobert Watson 547527624eSRobert Watson #include <netinet/in.h> 557527624eSRobert Watson #include <netinet/in_pcb.h> 567527624eSRobert Watson #include <netinet/in_rss.h> 577527624eSRobert Watson #include <netinet/in_var.h> 587527624eSRobert Watson #include <netinet/toeplitz.h> 597527624eSRobert Watson 60*72d33245SAdrian Chadd /* for software rss hash support */ 61*72d33245SAdrian Chadd #include <netinet/ip.h> 62*72d33245SAdrian Chadd #include <netinet/tcp.h> 63*72d33245SAdrian Chadd #include <netinet/udp.h> 64*72d33245SAdrian Chadd 657527624eSRobert Watson /*- 667527624eSRobert Watson * Operating system parts of receiver-side scaling (RSS), which allows 677527624eSRobert Watson * network cards to direct flows to particular receive queues based on hashes 687527624eSRobert Watson * of header tuples. This implementation aligns RSS buckets with connection 697527624eSRobert Watson * groups at the TCP/IP layer, so each bucket is associated with exactly one 707527624eSRobert Watson * group. As a result, the group lookup structures (and lock) should have an 717527624eSRobert Watson * effective affinity with exactly one CPU. 727527624eSRobert Watson * 737527624eSRobert Watson * Network device drivers needing to configure RSS will query this framework 747527624eSRobert Watson * for parameters, such as the current RSS key, hashing policies, number of 757527624eSRobert Watson * bits, and indirection table mapping hashes to buckets and CPUs. They may 767527624eSRobert Watson * provide their own supplementary information, such as queue<->CPU bindings. 777527624eSRobert Watson * It is the responsibility of the network device driver to inject packets 787527624eSRobert Watson * into the stack on as close to the right CPU as possible, if playing by RSS 797527624eSRobert Watson * rules. 807527624eSRobert Watson * 817527624eSRobert Watson * TODO: 827527624eSRobert Watson * 837527624eSRobert Watson * - Synchronization for rss_key and other future-configurable parameters. 847527624eSRobert Watson * - Event handler drivers can register to pick up RSS configuration changes. 857527624eSRobert Watson * - Should we allow rss_basecpu to be configured? 867527624eSRobert Watson * - Randomize key on boot. 877527624eSRobert Watson * - IPv6 support. 887527624eSRobert Watson * - Statistics on how often there's a misalignment between hardware 897527624eSRobert Watson * placement and pcbgroup expectations. 907527624eSRobert Watson */ 917527624eSRobert Watson 927527624eSRobert Watson SYSCTL_NODE(_net_inet, OID_AUTO, rss, CTLFLAG_RW, 0, "Receive-side steering"); 937527624eSRobert Watson 947527624eSRobert Watson /* 957527624eSRobert Watson * Toeplitz is the only required hash function in the RSS spec, so use it by 967527624eSRobert Watson * default. 977527624eSRobert Watson */ 987527624eSRobert Watson static u_int rss_hashalgo = RSS_HASH_TOEPLITZ; 99af3b2549SHans Petter Selasky SYSCTL_INT(_net_inet_rss, OID_AUTO, hashalgo, CTLFLAG_RDTUN, &rss_hashalgo, 0, 1007527624eSRobert Watson "RSS hash algorithm"); 1017527624eSRobert Watson 1027527624eSRobert Watson /* 1037527624eSRobert Watson * Size of the indirection table; at most 128 entries per the RSS spec. We 1047527624eSRobert Watson * size it to at least 2 times the number of CPUs by default to allow useful 1057527624eSRobert Watson * rebalancing. If not set explicitly with a loader tunable, we tune based 1067527624eSRobert Watson * on the number of CPUs present. 1077527624eSRobert Watson * 1087527624eSRobert Watson * XXXRW: buckets might be better to use for the tunable than bits. 1097527624eSRobert Watson */ 1107527624eSRobert Watson static u_int rss_bits; 111af3b2549SHans Petter Selasky SYSCTL_INT(_net_inet_rss, OID_AUTO, bits, CTLFLAG_RDTUN, &rss_bits, 0, 1127527624eSRobert Watson "RSS bits"); 1137527624eSRobert Watson 1147527624eSRobert Watson static u_int rss_mask; 1157527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, mask, CTLFLAG_RD, &rss_mask, 0, 1167527624eSRobert Watson "RSS mask"); 1177527624eSRobert Watson 1187527624eSRobert Watson static const u_int rss_maxbits = RSS_MAXBITS; 1197527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, maxbits, CTLFLAG_RD, 1207527624eSRobert Watson __DECONST(int *, &rss_maxbits), 0, "RSS maximum bits"); 1217527624eSRobert Watson 1227527624eSRobert Watson /* 1237527624eSRobert Watson * RSS's own count of the number of CPUs it could be using for processing. 1247527624eSRobert Watson * Bounded to 64 by RSS constants. 1257527624eSRobert Watson */ 1267527624eSRobert Watson static u_int rss_ncpus; 1277527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, ncpus, CTLFLAG_RD, &rss_ncpus, 0, 1287527624eSRobert Watson "Number of CPUs available to RSS"); 1297527624eSRobert Watson 1307527624eSRobert Watson #define RSS_MAXCPUS (1 << (RSS_MAXBITS - 1)) 1317527624eSRobert Watson static const u_int rss_maxcpus = RSS_MAXCPUS; 1327527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, maxcpus, CTLFLAG_RD, 1337527624eSRobert Watson __DECONST(int *, &rss_maxcpus), 0, "RSS maximum CPUs that can be used"); 1347527624eSRobert Watson 1357527624eSRobert Watson /* 1367527624eSRobert Watson * Variable exists just for reporting rss_bits in a user-friendly way. 1377527624eSRobert Watson */ 1387527624eSRobert Watson static u_int rss_buckets; 1397527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, buckets, CTLFLAG_RD, &rss_buckets, 0, 1407527624eSRobert Watson "RSS buckets"); 1417527624eSRobert Watson 1427527624eSRobert Watson /* 1437527624eSRobert Watson * Base CPU number; devices will add this to all CPU numbers returned by the 1447527624eSRobert Watson * RSS indirection table. Currently unmodifable in FreeBSD. 1457527624eSRobert Watson */ 1467527624eSRobert Watson static const u_int rss_basecpu; 1477527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, basecpu, CTLFLAG_RD, 1487527624eSRobert Watson __DECONST(int *, &rss_basecpu), 0, "RSS base CPU"); 1497527624eSRobert Watson 1507527624eSRobert Watson /* 1517527624eSRobert Watson * RSS secret key, intended to prevent attacks on load-balancing. Its 1527527624eSRobert Watson * effectiveness may be limited by algorithm choice and available entropy 1537527624eSRobert Watson * during the boot. 1547527624eSRobert Watson * 1557527624eSRobert Watson * XXXRW: And that we don't randomize it yet! 1567527624eSRobert Watson * 15785415b47SAdrian Chadd * This is the default Microsoft RSS specification key which is also 15885415b47SAdrian Chadd * the Chelsio T5 firmware default key. 1597527624eSRobert Watson */ 1607527624eSRobert Watson static uint8_t rss_key[RSS_KEYSIZE] = { 16107b4e383SPeter Grehan 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 16207b4e383SPeter Grehan 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 16307b4e383SPeter Grehan 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 16407b4e383SPeter Grehan 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 16507b4e383SPeter Grehan 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 1667527624eSRobert Watson }; 1677527624eSRobert Watson 1687527624eSRobert Watson /* 1697527624eSRobert Watson * RSS hash->CPU table, which maps hashed packet headers to particular CPUs. 1707527624eSRobert Watson * Drivers may supplement this table with a seperate CPU<->queue table when 1717527624eSRobert Watson * programming devices. 1727527624eSRobert Watson */ 1737527624eSRobert Watson struct rss_table_entry { 1747527624eSRobert Watson uint8_t rte_cpu; /* CPU affinity of bucket. */ 1757527624eSRobert Watson }; 1767527624eSRobert Watson static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN]; 1777527624eSRobert Watson 178*72d33245SAdrian Chadd static inline u_int rss_gethashconfig_local(void); 179*72d33245SAdrian Chadd 1807527624eSRobert Watson static void 1817527624eSRobert Watson rss_init(__unused void *arg) 1827527624eSRobert Watson { 1837527624eSRobert Watson u_int i; 184bad008ceSAdrian Chadd u_int cpuid; 1857527624eSRobert Watson 1867527624eSRobert Watson /* 1877527624eSRobert Watson * Validate tunables, coerce to sensible values. 1887527624eSRobert Watson */ 1897527624eSRobert Watson switch (rss_hashalgo) { 1907527624eSRobert Watson case RSS_HASH_TOEPLITZ: 1917527624eSRobert Watson case RSS_HASH_NAIVE: 1927527624eSRobert Watson break; 1937527624eSRobert Watson 1947527624eSRobert Watson default: 1957527624eSRobert Watson printf("%s: invalid RSS hashalgo %u, coercing to %u", 1967527624eSRobert Watson __func__, rss_hashalgo, RSS_HASH_TOEPLITZ); 1977527624eSRobert Watson rss_hashalgo = RSS_HASH_TOEPLITZ; 1987527624eSRobert Watson } 1997527624eSRobert Watson 2007527624eSRobert Watson /* 2017527624eSRobert Watson * Count available CPUs. 2027527624eSRobert Watson * 2037527624eSRobert Watson * XXXRW: Note incorrect assumptions regarding contiguity of this set 2047527624eSRobert Watson * elsewhere. 2057527624eSRobert Watson */ 2067527624eSRobert Watson rss_ncpus = 0; 2077527624eSRobert Watson for (i = 0; i <= mp_maxid; i++) { 2087527624eSRobert Watson if (CPU_ABSENT(i)) 2097527624eSRobert Watson continue; 2107527624eSRobert Watson rss_ncpus++; 2117527624eSRobert Watson } 2127527624eSRobert Watson if (rss_ncpus > RSS_MAXCPUS) 2137527624eSRobert Watson rss_ncpus = RSS_MAXCPUS; 2147527624eSRobert Watson 2157527624eSRobert Watson /* 2167527624eSRobert Watson * Tune RSS table entries to be no less than 2x the number of CPUs 2177527624eSRobert Watson * -- unless we're running uniprocessor, in which case there's not 2187527624eSRobert Watson * much point in having buckets to rearrange for load-balancing! 2197527624eSRobert Watson */ 2207527624eSRobert Watson if (rss_ncpus > 1) { 2217527624eSRobert Watson if (rss_bits == 0) 2227527624eSRobert Watson rss_bits = fls(rss_ncpus - 1) + 1; 2237527624eSRobert Watson 2247527624eSRobert Watson /* 2257527624eSRobert Watson * Microsoft limits RSS table entries to 128, so apply that 2267527624eSRobert Watson * limit to both auto-detected CPU counts and user-configured 2277527624eSRobert Watson * ones. 2287527624eSRobert Watson */ 2297527624eSRobert Watson if (rss_bits == 0 || rss_bits > RSS_MAXBITS) { 2307527624eSRobert Watson printf("%s: RSS bits %u not valid, coercing to %u", 2317527624eSRobert Watson __func__, rss_bits, RSS_MAXBITS); 2327527624eSRobert Watson rss_bits = RSS_MAXBITS; 2337527624eSRobert Watson } 2347527624eSRobert Watson 2357527624eSRobert Watson /* 2367527624eSRobert Watson * Figure out how many buckets to use; warn if less than the 2377527624eSRobert Watson * number of configured CPUs, although this is not a fatal 2387527624eSRobert Watson * problem. 2397527624eSRobert Watson */ 2407527624eSRobert Watson rss_buckets = (1 << rss_bits); 2417527624eSRobert Watson if (rss_buckets < rss_ncpus) 2427527624eSRobert Watson printf("%s: WARNING: rss_buckets (%u) less than " 2437527624eSRobert Watson "rss_ncpus (%u)\n", __func__, rss_buckets, 2447527624eSRobert Watson rss_ncpus); 2457527624eSRobert Watson rss_mask = rss_buckets - 1; 2467527624eSRobert Watson } else { 2477527624eSRobert Watson rss_bits = 0; 2487527624eSRobert Watson rss_buckets = 1; 2497527624eSRobert Watson rss_mask = 0; 2507527624eSRobert Watson } 2517527624eSRobert Watson 2527527624eSRobert Watson /* 2537527624eSRobert Watson * Set up initial CPU assignments: round-robin by default. 2547527624eSRobert Watson */ 255bad008ceSAdrian Chadd cpuid = CPU_FIRST(); 256bad008ceSAdrian Chadd for (i = 0; i < rss_buckets; i++) { 257bad008ceSAdrian Chadd rss_table[i].rte_cpu = cpuid; 258bad008ceSAdrian Chadd cpuid = CPU_NEXT(cpuid); 259bad008ceSAdrian Chadd } 2607527624eSRobert Watson 2617527624eSRobert Watson /* 2627527624eSRobert Watson * Randomize rrs_key. 2637527624eSRobert Watson * 2647527624eSRobert Watson * XXXRW: Not yet. If nothing else, will require an rss_isbadkey() 2657527624eSRobert Watson * loop to check for "bad" RSS keys. 2667527624eSRobert Watson */ 2677527624eSRobert Watson } 2687527624eSRobert Watson SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL); 2697527624eSRobert Watson 2707527624eSRobert Watson static uint32_t 2717527624eSRobert Watson rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen, 2727527624eSRobert Watson const uint8_t *data) 2737527624eSRobert Watson { 2747527624eSRobert Watson uint32_t v; 2757527624eSRobert Watson u_int i; 2767527624eSRobert Watson 2777527624eSRobert Watson v = 0; 2787527624eSRobert Watson for (i = 0; i < keylen; i++) 2797527624eSRobert Watson v += key[i]; 2807527624eSRobert Watson for (i = 0; i < datalen; i++) 2817527624eSRobert Watson v += data[i]; 2827527624eSRobert Watson return (v); 2837527624eSRobert Watson } 2847527624eSRobert Watson 2857527624eSRobert Watson static uint32_t 2867527624eSRobert Watson rss_hash(u_int datalen, const uint8_t *data) 2877527624eSRobert Watson { 2887527624eSRobert Watson 2897527624eSRobert Watson switch (rss_hashalgo) { 2907527624eSRobert Watson case RSS_HASH_TOEPLITZ: 2917527624eSRobert Watson return (toeplitz_hash(sizeof(rss_key), rss_key, datalen, 2927527624eSRobert Watson data)); 2937527624eSRobert Watson 2947527624eSRobert Watson case RSS_HASH_NAIVE: 2957527624eSRobert Watson return (rss_naive_hash(sizeof(rss_key), rss_key, datalen, 2967527624eSRobert Watson data)); 2977527624eSRobert Watson 2987527624eSRobert Watson default: 2997527624eSRobert Watson panic("%s: unsupported/unknown hashalgo %d", __func__, 3007527624eSRobert Watson rss_hashalgo); 3017527624eSRobert Watson } 3027527624eSRobert Watson } 3037527624eSRobert Watson 3047527624eSRobert Watson /* 3057527624eSRobert Watson * Hash an IPv4 2-tuple. 3067527624eSRobert Watson */ 3077527624eSRobert Watson uint32_t 3087527624eSRobert Watson rss_hash_ip4_2tuple(struct in_addr src, struct in_addr dst) 3097527624eSRobert Watson { 3107527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst)]; 3117527624eSRobert Watson u_int datalen; 3127527624eSRobert Watson 3137527624eSRobert Watson datalen = 0; 3147527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3157527624eSRobert Watson datalen += sizeof(src); 3167527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3177527624eSRobert Watson datalen += sizeof(dst); 3187527624eSRobert Watson return (rss_hash(datalen, data)); 3197527624eSRobert Watson } 3207527624eSRobert Watson 3217527624eSRobert Watson /* 3227527624eSRobert Watson * Hash an IPv4 4-tuple. 3237527624eSRobert Watson */ 3247527624eSRobert Watson uint32_t 3257527624eSRobert Watson rss_hash_ip4_4tuple(struct in_addr src, u_short srcport, struct in_addr dst, 3267527624eSRobert Watson u_short dstport) 3277527624eSRobert Watson { 3287527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) + 3297527624eSRobert Watson sizeof(dstport)]; 3307527624eSRobert Watson u_int datalen; 3317527624eSRobert Watson 3327527624eSRobert Watson datalen = 0; 3337527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3347527624eSRobert Watson datalen += sizeof(src); 3357527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3367527624eSRobert Watson datalen += sizeof(dst); 3377527624eSRobert Watson bcopy(&srcport, &data[datalen], sizeof(srcport)); 3387527624eSRobert Watson datalen += sizeof(srcport); 3397527624eSRobert Watson bcopy(&dstport, &data[datalen], sizeof(dstport)); 3407527624eSRobert Watson datalen += sizeof(dstport); 3417527624eSRobert Watson return (rss_hash(datalen, data)); 3427527624eSRobert Watson } 3437527624eSRobert Watson 3447527624eSRobert Watson #ifdef INET6 3457527624eSRobert Watson /* 3467527624eSRobert Watson * Hash an IPv6 2-tuple. 3477527624eSRobert Watson */ 3487527624eSRobert Watson uint32_t 3497527624eSRobert Watson rss_hash_ip6_2tuple(struct in6_addr src, struct in6_addr dst) 3507527624eSRobert Watson { 3517527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst)]; 3527527624eSRobert Watson u_int datalen; 3537527624eSRobert Watson 3547527624eSRobert Watson datalen = 0; 3557527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3567527624eSRobert Watson datalen += sizeof(src); 3577527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3587527624eSRobert Watson datalen += sizeof(dst); 3597527624eSRobert Watson return (rss_hash(datalen, data)); 3607527624eSRobert Watson } 3617527624eSRobert Watson 3627527624eSRobert Watson /* 3637527624eSRobert Watson * Hash an IPv6 4-tuple. 3647527624eSRobert Watson */ 3657527624eSRobert Watson uint32_t 3667527624eSRobert Watson rss_hash_ip6_4tuple(struct in6_addr src, u_short srcport, 3677527624eSRobert Watson struct in6_addr dst, u_short dstport) 3687527624eSRobert Watson { 3697527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) + 3707527624eSRobert Watson sizeof(dstport)]; 3717527624eSRobert Watson u_int datalen; 3727527624eSRobert Watson 3737527624eSRobert Watson datalen = 0; 3747527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 3757527624eSRobert Watson datalen += sizeof(src); 3767527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 3777527624eSRobert Watson datalen += sizeof(dst); 3787527624eSRobert Watson bcopy(&srcport, &data[datalen], sizeof(srcport)); 3797527624eSRobert Watson datalen += sizeof(srcport); 3807527624eSRobert Watson bcopy(&dstport, &data[datalen], sizeof(dstport)); 3817527624eSRobert Watson datalen += sizeof(dstport); 3827527624eSRobert Watson return (rss_hash(datalen, data)); 3837527624eSRobert Watson } 3847527624eSRobert Watson #endif /* INET6 */ 3857527624eSRobert Watson 3867527624eSRobert Watson /* 3877527624eSRobert Watson * Query the number of RSS bits in use. 3887527624eSRobert Watson */ 3897527624eSRobert Watson u_int 3907527624eSRobert Watson rss_getbits(void) 3917527624eSRobert Watson { 3927527624eSRobert Watson 3937527624eSRobert Watson return (rss_bits); 3947527624eSRobert Watson } 3957527624eSRobert Watson 3967527624eSRobert Watson /* 3977527624eSRobert Watson * Query the RSS bucket associated with an RSS hash. 3987527624eSRobert Watson */ 3997527624eSRobert Watson u_int 4007527624eSRobert Watson rss_getbucket(u_int hash) 4017527624eSRobert Watson { 4027527624eSRobert Watson 4037527624eSRobert Watson return (hash & rss_mask); 4047527624eSRobert Watson } 4057527624eSRobert Watson 4067527624eSRobert Watson /* 407a6c88ec4SAdrian Chadd * Query the RSS layer bucket associated with the given 408a6c88ec4SAdrian Chadd * entry in the RSS hash space. 409a6c88ec4SAdrian Chadd * 410a6c88ec4SAdrian Chadd * The RSS indirection table is 0 .. rss_buckets-1, 411a6c88ec4SAdrian Chadd * covering the low 'rss_bits' of the total 128 slot 412a6c88ec4SAdrian Chadd * RSS indirection table. So just mask off rss_bits and 413a6c88ec4SAdrian Chadd * return that. 414a6c88ec4SAdrian Chadd * 415a6c88ec4SAdrian Chadd * NIC drivers can then iterate over the 128 slot RSS 416a6c88ec4SAdrian Chadd * indirection table and fetch which RSS bucket to 417a6c88ec4SAdrian Chadd * map it to. This will typically be a CPU queue 418a6c88ec4SAdrian Chadd */ 419a6c88ec4SAdrian Chadd u_int 420a6c88ec4SAdrian Chadd rss_get_indirection_to_bucket(u_int index) 421a6c88ec4SAdrian Chadd { 422a6c88ec4SAdrian Chadd 423a6c88ec4SAdrian Chadd return (index & rss_mask); 424a6c88ec4SAdrian Chadd } 425a6c88ec4SAdrian Chadd 426a6c88ec4SAdrian Chadd /* 4277527624eSRobert Watson * Query the RSS CPU associated with an RSS bucket. 4287527624eSRobert Watson */ 4297527624eSRobert Watson u_int 4307527624eSRobert Watson rss_getcpu(u_int bucket) 4317527624eSRobert Watson { 4327527624eSRobert Watson 4337527624eSRobert Watson return (rss_table[bucket].rte_cpu); 4347527624eSRobert Watson } 4357527624eSRobert Watson 4367527624eSRobert Watson /* 437cc6c1877SAdrian Chadd * netisr CPU affinity lookup given just the hash and hashtype. 438cc6c1877SAdrian Chadd */ 439cc6c1877SAdrian Chadd u_int 440cc6c1877SAdrian Chadd rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type) 441cc6c1877SAdrian Chadd { 442cc6c1877SAdrian Chadd 443cc6c1877SAdrian Chadd switch (hash_type) { 444cc6c1877SAdrian Chadd case M_HASHTYPE_RSS_IPV4: 445cc6c1877SAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV4: 4469870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV4: 447e989b65fSAdrian Chadd case M_HASHTYPE_RSS_IPV6: 448e989b65fSAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV6: 4499870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV6: 450cc6c1877SAdrian Chadd return (rss_getcpu(rss_getbucket(hash_val))); 451cc6c1877SAdrian Chadd default: 452cc6c1877SAdrian Chadd return (NETISR_CPUID_NONE); 453cc6c1877SAdrian Chadd } 454cc6c1877SAdrian Chadd } 455cc6c1877SAdrian Chadd 456cc6c1877SAdrian Chadd /* 4578bde802aSAdrian Chadd * Query the RSS bucket associated with the given hash value and 4588bde802aSAdrian Chadd * type. 4598bde802aSAdrian Chadd */ 4608bde802aSAdrian Chadd int 4618bde802aSAdrian Chadd rss_hash2bucket(uint32_t hash_val, uint32_t hash_type, uint32_t *bucket_id) 4628bde802aSAdrian Chadd { 4638bde802aSAdrian Chadd 4648bde802aSAdrian Chadd switch (hash_type) { 4658bde802aSAdrian Chadd case M_HASHTYPE_RSS_IPV4: 4668bde802aSAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV4: 4679870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV4: 468e989b65fSAdrian Chadd case M_HASHTYPE_RSS_IPV6: 469e989b65fSAdrian Chadd case M_HASHTYPE_RSS_TCP_IPV6: 4709870806cSAdrian Chadd case M_HASHTYPE_RSS_UDP_IPV6: 4718bde802aSAdrian Chadd *bucket_id = rss_getbucket(hash_val); 4728bde802aSAdrian Chadd return (0); 4738bde802aSAdrian Chadd default: 4748bde802aSAdrian Chadd return (-1); 4758bde802aSAdrian Chadd } 4768bde802aSAdrian Chadd } 4778bde802aSAdrian Chadd 4788bde802aSAdrian Chadd /* 4797527624eSRobert Watson * netisr CPU affinity lookup routine for use by protocols. 4807527624eSRobert Watson */ 4817527624eSRobert Watson struct mbuf * 4827527624eSRobert Watson rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 4837527624eSRobert Watson { 4847527624eSRobert Watson 4857527624eSRobert Watson M_ASSERTPKTHDR(m); 486cc6c1877SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 4877527624eSRobert Watson return (m); 4887527624eSRobert Watson } 4897527624eSRobert Watson 4908bde802aSAdrian Chadd int 4918bde802aSAdrian Chadd rss_m2bucket(struct mbuf *m, uint32_t *bucket_id) 4928bde802aSAdrian Chadd { 4938bde802aSAdrian Chadd 4948bde802aSAdrian Chadd M_ASSERTPKTHDR(m); 4958bde802aSAdrian Chadd 4968bde802aSAdrian Chadd return(rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 4978bde802aSAdrian Chadd bucket_id)); 4988bde802aSAdrian Chadd } 4998bde802aSAdrian Chadd 5007527624eSRobert Watson /* 501*72d33245SAdrian Chadd * Calculate an appropriate ipv4 2-tuple or 4-tuple given the given 502*72d33245SAdrian Chadd * IPv4 source/destination address, UDP or TCP source/destination ports 503*72d33245SAdrian Chadd * and the protocol type. 504*72d33245SAdrian Chadd * 505*72d33245SAdrian Chadd * The protocol code may wish to do a software hash of the given 506*72d33245SAdrian Chadd * tuple. This depends upon the currently configured RSS hash types. 507*72d33245SAdrian Chadd * 508*72d33245SAdrian Chadd * This assumes that the packet in question isn't a fragment. 509*72d33245SAdrian Chadd * 510*72d33245SAdrian Chadd * It also assumes the packet source/destination address 511*72d33245SAdrian Chadd * are in "incoming" packet order (ie, source is "far" address.) 512*72d33245SAdrian Chadd */ 513*72d33245SAdrian Chadd int 514*72d33245SAdrian Chadd rss_proto_software_hash_v4(struct in_addr s, struct in_addr d, 515*72d33245SAdrian Chadd u_short sp, u_short dp, int proto, 516*72d33245SAdrian Chadd uint32_t *hashval, uint32_t *hashtype) 517*72d33245SAdrian Chadd { 518*72d33245SAdrian Chadd uint32_t hash; 519*72d33245SAdrian Chadd 520*72d33245SAdrian Chadd /* 521*72d33245SAdrian Chadd * Next, choose the hash type depending upon the protocol 522*72d33245SAdrian Chadd * identifier. 523*72d33245SAdrian Chadd */ 524*72d33245SAdrian Chadd if ((proto == IPPROTO_TCP) && 525*72d33245SAdrian Chadd (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4)) { 526*72d33245SAdrian Chadd hash = rss_hash_ip4_4tuple(s, sp, d, dp); 527*72d33245SAdrian Chadd *hashval = hash; 528*72d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_TCP_IPV4; 529*72d33245SAdrian Chadd return (0); 530*72d33245SAdrian Chadd } else if ((proto == IPPROTO_UDP) && 531*72d33245SAdrian Chadd (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4)) { 532*72d33245SAdrian Chadd hash = rss_hash_ip4_4tuple(s, sp, d, dp); 533*72d33245SAdrian Chadd *hashval = hash; 534*72d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_UDP_IPV4; 535*72d33245SAdrian Chadd return (0); 536*72d33245SAdrian Chadd } else if (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) { 537*72d33245SAdrian Chadd /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */ 538*72d33245SAdrian Chadd hash = rss_hash_ip4_2tuple(s, d); 539*72d33245SAdrian Chadd *hashval = hash; 540*72d33245SAdrian Chadd *hashtype = M_HASHTYPE_RSS_IPV4; 541*72d33245SAdrian Chadd return (0); 542*72d33245SAdrian Chadd } 543*72d33245SAdrian Chadd 544*72d33245SAdrian Chadd /* No configured available hashtypes! */ 545*72d33245SAdrian Chadd printf("%s: no available hashtypes!\n", __func__); 546*72d33245SAdrian Chadd return (-1); 547*72d33245SAdrian Chadd } 548*72d33245SAdrian Chadd 549*72d33245SAdrian Chadd /* 550*72d33245SAdrian Chadd * Do a software calculation of the RSS for the given mbuf. 551*72d33245SAdrian Chadd * 552*72d33245SAdrian Chadd * This is typically used by the input path to recalculate the RSS after 553*72d33245SAdrian Chadd * some form of packet processing (eg de-capsulation, IP fragment reassembly.) 554*72d33245SAdrian Chadd * 555*72d33245SAdrian Chadd * dir is the packet direction - RSS_HASH_PKT_INGRESS for incoming and 556*72d33245SAdrian Chadd * RSS_HASH_PKT_EGRESS for outgoing. 557*72d33245SAdrian Chadd * 558*72d33245SAdrian Chadd * Returns 0 if a hash was done, -1 if no hash was done, +1 if 559*72d33245SAdrian Chadd * the mbuf already had a valid RSS flowid. 560*72d33245SAdrian Chadd * 561*72d33245SAdrian Chadd * This function doesn't modify the mbuf. It's up to the caller to 562*72d33245SAdrian Chadd * assign flowid/flowtype as appropriate. 563*72d33245SAdrian Chadd */ 564*72d33245SAdrian Chadd int 565*72d33245SAdrian Chadd rss_mbuf_software_hash_v4(const struct mbuf *m, int dir, uint32_t *hashval, 566*72d33245SAdrian Chadd uint32_t *hashtype) 567*72d33245SAdrian Chadd { 568*72d33245SAdrian Chadd const struct ip *ip; 569*72d33245SAdrian Chadd const struct tcphdr *th; 570*72d33245SAdrian Chadd const struct udphdr *uh; 571*72d33245SAdrian Chadd uint8_t proto; 572*72d33245SAdrian Chadd int iphlen; 573*72d33245SAdrian Chadd int is_frag = 0; 574*72d33245SAdrian Chadd 575*72d33245SAdrian Chadd /* 576*72d33245SAdrian Chadd * XXX For now this only handles hashing on incoming mbufs. 577*72d33245SAdrian Chadd */ 578*72d33245SAdrian Chadd if (dir != RSS_HASH_PKT_INGRESS) { 579*72d33245SAdrian Chadd printf("%s: called on EGRESS packet!\n", __func__); 580*72d33245SAdrian Chadd return (-1); 581*72d33245SAdrian Chadd } 582*72d33245SAdrian Chadd 583*72d33245SAdrian Chadd /* 584*72d33245SAdrian Chadd * First, validate that the mbuf we have is long enough 585*72d33245SAdrian Chadd * to have an IPv4 header in it. 586*72d33245SAdrian Chadd */ 587*72d33245SAdrian Chadd if (m->m_pkthdr.len < (sizeof(struct ip))) { 588*72d33245SAdrian Chadd printf("%s: short mbuf pkthdr\n", __func__); 589*72d33245SAdrian Chadd return (-1); 590*72d33245SAdrian Chadd } 591*72d33245SAdrian Chadd if (m->m_len < (sizeof(struct ip))) { 592*72d33245SAdrian Chadd printf("%s: short mbuf len\n", __func__); 593*72d33245SAdrian Chadd return (-1); 594*72d33245SAdrian Chadd } 595*72d33245SAdrian Chadd 596*72d33245SAdrian Chadd /* Ok, let's dereference that */ 597*72d33245SAdrian Chadd ip = mtod(m, struct ip *); 598*72d33245SAdrian Chadd proto = ip->ip_p; 599*72d33245SAdrian Chadd iphlen = ip->ip_hl << 2; 600*72d33245SAdrian Chadd 601*72d33245SAdrian Chadd /* 602*72d33245SAdrian Chadd * If this is a fragment then it shouldn't be four-tuple 603*72d33245SAdrian Chadd * hashed just yet. Once it's reassembled into a full 604*72d33245SAdrian Chadd * frame it should be re-hashed. 605*72d33245SAdrian Chadd */ 606*72d33245SAdrian Chadd if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) 607*72d33245SAdrian Chadd is_frag = 1; 608*72d33245SAdrian Chadd 609*72d33245SAdrian Chadd /* 610*72d33245SAdrian Chadd * If the mbuf flowid/flowtype matches the packet type, 611*72d33245SAdrian Chadd * and we don't support the 4-tuple version of the given protocol, 612*72d33245SAdrian Chadd * then signal to the owner that it can trust the flowid/flowtype 613*72d33245SAdrian Chadd * details. 614*72d33245SAdrian Chadd * 615*72d33245SAdrian Chadd * This is a little picky - eg, if TCPv4 / UDPv4 hashing 616*72d33245SAdrian Chadd * is supported but we got a TCP/UDP frame only 2-tuple hashed, 617*72d33245SAdrian Chadd * then we shouldn't just "trust" the 2-tuple hash. We need 618*72d33245SAdrian Chadd * a 4-tuple hash. 619*72d33245SAdrian Chadd */ 620*72d33245SAdrian Chadd if (m->m_flags & M_FLOWID) { 621*72d33245SAdrian Chadd uint32_t flowid, flowtype; 622*72d33245SAdrian Chadd 623*72d33245SAdrian Chadd flowid = m->m_pkthdr.flowid; 624*72d33245SAdrian Chadd flowtype = M_HASHTYPE_GET(m); 625*72d33245SAdrian Chadd 626*72d33245SAdrian Chadd switch (proto) { 627*72d33245SAdrian Chadd case IPPROTO_UDP: 628*72d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) && 629*72d33245SAdrian Chadd (flowtype == M_HASHTYPE_RSS_UDP_IPV4) && 630*72d33245SAdrian Chadd (is_frag == 0)) { 631*72d33245SAdrian Chadd return (1); 632*72d33245SAdrian Chadd } 633*72d33245SAdrian Chadd /* 634*72d33245SAdrian Chadd * Only allow 2-tuple for UDP frames if we don't also 635*72d33245SAdrian Chadd * support 4-tuple for UDP. 636*72d33245SAdrian Chadd */ 637*72d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && 638*72d33245SAdrian Chadd ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) == 0) && 639*72d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 640*72d33245SAdrian Chadd return (1); 641*72d33245SAdrian Chadd } 642*72d33245SAdrian Chadd break; 643*72d33245SAdrian Chadd case IPPROTO_TCP: 644*72d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) && 645*72d33245SAdrian Chadd (flowtype == M_HASHTYPE_RSS_TCP_IPV4) && 646*72d33245SAdrian Chadd (is_frag == 0)) { 647*72d33245SAdrian Chadd return (1); 648*72d33245SAdrian Chadd } 649*72d33245SAdrian Chadd /* 650*72d33245SAdrian Chadd * Only allow 2-tuple for TCP frames if we don't also 651*72d33245SAdrian Chadd * support 2-tuple for TCP. 652*72d33245SAdrian Chadd */ 653*72d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && 654*72d33245SAdrian Chadd ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) == 0) && 655*72d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 656*72d33245SAdrian Chadd return (1); 657*72d33245SAdrian Chadd } 658*72d33245SAdrian Chadd break; 659*72d33245SAdrian Chadd default: 660*72d33245SAdrian Chadd if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && 661*72d33245SAdrian Chadd flowtype == M_HASHTYPE_RSS_IPV4) { 662*72d33245SAdrian Chadd return (1); 663*72d33245SAdrian Chadd } 664*72d33245SAdrian Chadd break; 665*72d33245SAdrian Chadd } 666*72d33245SAdrian Chadd } 667*72d33245SAdrian Chadd 668*72d33245SAdrian Chadd /* 669*72d33245SAdrian Chadd * Decode enough information to make a hash decision. 670*72d33245SAdrian Chadd * 671*72d33245SAdrian Chadd * XXX TODO: does the hardware hash on 4-tuple if IP 672*72d33245SAdrian Chadd * options are present? 673*72d33245SAdrian Chadd */ 674*72d33245SAdrian Chadd if (proto == IPPROTO_TCP && is_frag == 0) { 675*72d33245SAdrian Chadd if (m->m_len < iphlen + sizeof(struct tcphdr)) { 676*72d33245SAdrian Chadd printf("%s: short TCP frame?\n", __func__); 677*72d33245SAdrian Chadd return (-1); 678*72d33245SAdrian Chadd } 679*72d33245SAdrian Chadd th = (struct tcphdr *)((caddr_t)ip + iphlen); 680*72d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 681*72d33245SAdrian Chadd th->th_sport, 682*72d33245SAdrian Chadd th->th_dport, 683*72d33245SAdrian Chadd proto, 684*72d33245SAdrian Chadd hashval, 685*72d33245SAdrian Chadd hashtype); 686*72d33245SAdrian Chadd } else if (proto == IPPROTO_UDP && is_frag == 0) { 687*72d33245SAdrian Chadd uh = (struct udphdr *)((caddr_t)ip + iphlen); 688*72d33245SAdrian Chadd if (m->m_len < iphlen + sizeof(struct udphdr)) { 689*72d33245SAdrian Chadd printf("%s: short UDP frame?\n", __func__); 690*72d33245SAdrian Chadd return (-1); 691*72d33245SAdrian Chadd } 692*72d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 693*72d33245SAdrian Chadd uh->uh_sport, 694*72d33245SAdrian Chadd uh->uh_dport, 695*72d33245SAdrian Chadd proto, 696*72d33245SAdrian Chadd hashval, 697*72d33245SAdrian Chadd hashtype); 698*72d33245SAdrian Chadd } else { 699*72d33245SAdrian Chadd /* Default to 2-tuple hash */ 700*72d33245SAdrian Chadd return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst, 701*72d33245SAdrian Chadd 0, /* source port */ 702*72d33245SAdrian Chadd 0, /* destination port */ 703*72d33245SAdrian Chadd 0, /* IPPROTO_IP */ 704*72d33245SAdrian Chadd hashval, 705*72d33245SAdrian Chadd hashtype); 706*72d33245SAdrian Chadd } 707*72d33245SAdrian Chadd } 708*72d33245SAdrian Chadd 709*72d33245SAdrian Chadd /* 710*72d33245SAdrian Chadd * Similar to rss_m2cpuid, but designed to be used by the IP NETISR 711*72d33245SAdrian Chadd * on incoming frames. 712*72d33245SAdrian Chadd * 713*72d33245SAdrian Chadd * If an existing RSS hash exists and it matches what the configured 714*72d33245SAdrian Chadd * hashing is, then use it. 715*72d33245SAdrian Chadd * 716*72d33245SAdrian Chadd * If there's an existing RSS hash but the desired hash is different, 717*72d33245SAdrian Chadd * or if there's no useful RSS hash, then calculate it via 718*72d33245SAdrian Chadd * the software path. 719*72d33245SAdrian Chadd * 720*72d33245SAdrian Chadd * XXX TODO: definitely want statistics here! 721*72d33245SAdrian Chadd */ 722*72d33245SAdrian Chadd struct mbuf * 723*72d33245SAdrian Chadd rss_soft_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 724*72d33245SAdrian Chadd { 725*72d33245SAdrian Chadd uint32_t hash_val, hash_type; 726*72d33245SAdrian Chadd int ret; 727*72d33245SAdrian Chadd 728*72d33245SAdrian Chadd M_ASSERTPKTHDR(m); 729*72d33245SAdrian Chadd 730*72d33245SAdrian Chadd ret = rss_mbuf_software_hash_v4(m, RSS_HASH_PKT_INGRESS, 731*72d33245SAdrian Chadd &hash_val, &hash_type); 732*72d33245SAdrian Chadd if (ret > 0) { 733*72d33245SAdrian Chadd /* mbuf has a valid hash already; don't need to modify it */ 734*72d33245SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 735*72d33245SAdrian Chadd } else if (ret == 0) { 736*72d33245SAdrian Chadd /* hash was done; update */ 737*72d33245SAdrian Chadd m->m_pkthdr.flowid = hash_val; 738*72d33245SAdrian Chadd M_HASHTYPE_SET(m, hash_type); 739*72d33245SAdrian Chadd m->m_flags |= M_FLOWID; 740*72d33245SAdrian Chadd *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); 741*72d33245SAdrian Chadd } else { /* ret < 0 */ 742*72d33245SAdrian Chadd /* no hash was done */ 743*72d33245SAdrian Chadd *cpuid = NETISR_CPUID_NONE; 744*72d33245SAdrian Chadd } 745*72d33245SAdrian Chadd return (m); 746*72d33245SAdrian Chadd } 747*72d33245SAdrian Chadd 748*72d33245SAdrian Chadd /* 7497527624eSRobert Watson * Query the RSS hash algorithm. 7507527624eSRobert Watson */ 7517527624eSRobert Watson u_int 7527527624eSRobert Watson rss_gethashalgo(void) 7537527624eSRobert Watson { 7547527624eSRobert Watson 7557527624eSRobert Watson return (rss_hashalgo); 7567527624eSRobert Watson } 7577527624eSRobert Watson 7587527624eSRobert Watson /* 7597527624eSRobert Watson * Query the current RSS key; likely to be used by device drivers when 7607527624eSRobert Watson * configuring hardware RSS. Caller must pass an array of size RSS_KEYSIZE. 7617527624eSRobert Watson * 7627527624eSRobert Watson * XXXRW: Perhaps we should do the accept-a-length-and-truncate thing? 7637527624eSRobert Watson */ 7647527624eSRobert Watson void 7657527624eSRobert Watson rss_getkey(uint8_t *key) 7667527624eSRobert Watson { 7677527624eSRobert Watson 7687527624eSRobert Watson bcopy(rss_key, key, sizeof(rss_key)); 7697527624eSRobert Watson } 7707527624eSRobert Watson 7717527624eSRobert Watson /* 7727527624eSRobert Watson * Query the number of buckets; this may be used by both network device 7737527624eSRobert Watson * drivers, which will need to populate hardware shadows of the software 7747527624eSRobert Watson * indirection table, and the network stack itself (such as when deciding how 7757527624eSRobert Watson * many connection groups to allocate). 7767527624eSRobert Watson */ 7777527624eSRobert Watson u_int 7787527624eSRobert Watson rss_getnumbuckets(void) 7797527624eSRobert Watson { 7807527624eSRobert Watson 7817527624eSRobert Watson return (rss_buckets); 7827527624eSRobert Watson } 7837527624eSRobert Watson 7847527624eSRobert Watson /* 7857527624eSRobert Watson * Query the number of CPUs in use by RSS; may be useful to device drivers 7867527624eSRobert Watson * trying to figure out how to map a larger number of CPUs into a smaller 7877527624eSRobert Watson * number of receive queues. 7887527624eSRobert Watson */ 7897527624eSRobert Watson u_int 7907527624eSRobert Watson rss_getnumcpus(void) 7917527624eSRobert Watson { 7927527624eSRobert Watson 7937527624eSRobert Watson return (rss_ncpus); 7947527624eSRobert Watson } 7957527624eSRobert Watson 796*72d33245SAdrian Chadd static inline u_int 797*72d33245SAdrian Chadd rss_gethashconfig_local(void) 79840c753e3SAdrian Chadd { 799*72d33245SAdrian Chadd 80040c753e3SAdrian Chadd /* Return 4-tuple for TCP; 2-tuple for others */ 80140c753e3SAdrian Chadd /* 80240c753e3SAdrian Chadd * UDP may fragment more often than TCP and thus we'll end up with 80340c753e3SAdrian Chadd * NICs returning 2-tuple fragments. 80440c753e3SAdrian Chadd * udp_init() and udplite_init() both currently initialise things 80540c753e3SAdrian Chadd * as 2-tuple. 80640c753e3SAdrian Chadd * So for now disable UDP 4-tuple hashing until all of the other 80740c753e3SAdrian Chadd * pieces are in place. 80840c753e3SAdrian Chadd */ 80940c753e3SAdrian Chadd return ( 81040c753e3SAdrian Chadd RSS_HASHTYPE_RSS_IPV4 81140c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_TCP_IPV4 81240c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_IPV6 81340c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_TCP_IPV6 81440c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_IPV6_EX 81540c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_TCP_IPV6_EX 81640c753e3SAdrian Chadd #if 0 81740c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV4 81840c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV4_EX 81940c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV6 82040c753e3SAdrian Chadd | RSS_HASHTYPE_RSS_UDP_IPV6_EX 82140c753e3SAdrian Chadd #endif 82240c753e3SAdrian Chadd ); 82340c753e3SAdrian Chadd } 82440c753e3SAdrian Chadd 82540c753e3SAdrian Chadd /* 826*72d33245SAdrian Chadd * Return the supported RSS hash configuration. 827*72d33245SAdrian Chadd * 828*72d33245SAdrian Chadd * NICs should query this to determine what to configure in their redirection 829*72d33245SAdrian Chadd * matching table. 830*72d33245SAdrian Chadd */ 831*72d33245SAdrian Chadd u_int 832*72d33245SAdrian Chadd rss_gethashconfig(void) 833*72d33245SAdrian Chadd { 834*72d33245SAdrian Chadd 835*72d33245SAdrian Chadd return (rss_gethashconfig_local()); 836*72d33245SAdrian Chadd } 837*72d33245SAdrian Chadd 838*72d33245SAdrian Chadd /* 8397527624eSRobert Watson * XXXRW: Confirm that sysctl -a won't dump this keying material, don't want 8407527624eSRobert Watson * it appearing in debugging output unnecessarily. 8417527624eSRobert Watson */ 8427527624eSRobert Watson static int 8437527624eSRobert Watson sysctl_rss_key(SYSCTL_HANDLER_ARGS) 8447527624eSRobert Watson { 8457527624eSRobert Watson uint8_t temp_rss_key[RSS_KEYSIZE]; 8467527624eSRobert Watson int error; 8477527624eSRobert Watson 8487527624eSRobert Watson error = priv_check(req->td, PRIV_NETINET_HASHKEY); 8497527624eSRobert Watson if (error) 8507527624eSRobert Watson return (error); 8517527624eSRobert Watson 8527527624eSRobert Watson bcopy(rss_key, temp_rss_key, sizeof(temp_rss_key)); 8537527624eSRobert Watson error = sysctl_handle_opaque(oidp, temp_rss_key, 8547527624eSRobert Watson sizeof(temp_rss_key), req); 8557527624eSRobert Watson if (error) 8567527624eSRobert Watson return (error); 8577527624eSRobert Watson if (req->newptr != NULL) { 8587527624eSRobert Watson /* XXXRW: Not yet. */ 8597527624eSRobert Watson return (EINVAL); 8607527624eSRobert Watson } 8617527624eSRobert Watson return (0); 8627527624eSRobert Watson } 8637527624eSRobert Watson SYSCTL_PROC(_net_inet_rss, OID_AUTO, key, 8647527624eSRobert Watson CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key, 8657527624eSRobert Watson "", "RSS keying material"); 8668bde802aSAdrian Chadd 8678bde802aSAdrian Chadd static int 8688bde802aSAdrian Chadd sysctl_rss_bucket_mapping(SYSCTL_HANDLER_ARGS) 8698bde802aSAdrian Chadd { 8708bde802aSAdrian Chadd struct sbuf *sb; 8718bde802aSAdrian Chadd int error; 8728bde802aSAdrian Chadd int i; 8738bde802aSAdrian Chadd 8748bde802aSAdrian Chadd error = 0; 8758bde802aSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 8768bde802aSAdrian Chadd if (error != 0) 8778bde802aSAdrian Chadd return (error); 8788bde802aSAdrian Chadd sb = sbuf_new_for_sysctl(NULL, NULL, 512, req); 8798bde802aSAdrian Chadd if (sb == NULL) 8808bde802aSAdrian Chadd return (ENOMEM); 8818bde802aSAdrian Chadd for (i = 0; i < rss_buckets; i++) { 8828bde802aSAdrian Chadd sbuf_printf(sb, "%s%d:%d", i == 0 ? "" : " ", 8838bde802aSAdrian Chadd i, 8848bde802aSAdrian Chadd rss_getcpu(i)); 8858bde802aSAdrian Chadd } 8868bde802aSAdrian Chadd error = sbuf_finish(sb); 8878bde802aSAdrian Chadd sbuf_delete(sb); 8888bde802aSAdrian Chadd 8898bde802aSAdrian Chadd return (error); 8908bde802aSAdrian Chadd } 8918bde802aSAdrian Chadd SYSCTL_PROC(_net_inet_rss, OID_AUTO, bucket_mapping, 8928bde802aSAdrian Chadd CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 8938bde802aSAdrian Chadd sysctl_rss_bucket_mapping, "", "RSS bucket -> CPU mapping"); 894