1*7527624eSRobert Watson /*- 2*7527624eSRobert Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. 3*7527624eSRobert Watson * All rights reserved. 4*7527624eSRobert Watson * 5*7527624eSRobert Watson * This software was developed by Robert N. M. Watson under contract 6*7527624eSRobert Watson * to Juniper Networks, Inc. 7*7527624eSRobert Watson * 8*7527624eSRobert Watson * Redistribution and use in source and binary forms, with or without 9*7527624eSRobert Watson * modification, are permitted provided that the following conditions 10*7527624eSRobert Watson * are met: 11*7527624eSRobert Watson * 1. Redistributions of source code must retain the above copyright 12*7527624eSRobert Watson * notice, this list of conditions and the following disclaimer. 13*7527624eSRobert Watson * 2. Redistributions in binary form must reproduce the above copyright 14*7527624eSRobert Watson * notice, this list of conditions and the following disclaimer in the 15*7527624eSRobert Watson * documentation and/or other materials provided with the distribution. 16*7527624eSRobert Watson * 17*7527624eSRobert Watson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18*7527624eSRobert Watson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19*7527624eSRobert Watson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20*7527624eSRobert Watson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21*7527624eSRobert Watson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22*7527624eSRobert Watson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23*7527624eSRobert Watson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24*7527624eSRobert Watson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25*7527624eSRobert Watson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26*7527624eSRobert Watson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27*7527624eSRobert Watson * SUCH DAMAGE. 28*7527624eSRobert Watson */ 29*7527624eSRobert Watson 30*7527624eSRobert Watson #include <sys/cdefs.h> 31*7527624eSRobert Watson 32*7527624eSRobert Watson __FBSDID("$FreeBSD$"); 33*7527624eSRobert Watson 34*7527624eSRobert Watson #include "opt_inet6.h" 35*7527624eSRobert Watson #include "opt_pcbgroup.h" 36*7527624eSRobert Watson 37*7527624eSRobert Watson #ifndef PCBGROUP 38*7527624eSRobert Watson #error "options RSS depends on options PCBGROUP" 39*7527624eSRobert Watson #endif 40*7527624eSRobert Watson 41*7527624eSRobert Watson #include <sys/param.h> 42*7527624eSRobert Watson #include <sys/mbuf.h> 43*7527624eSRobert Watson #include <sys/socket.h> 44*7527624eSRobert Watson #include <sys/priv.h> 45*7527624eSRobert Watson #include <sys/kernel.h> 46*7527624eSRobert Watson #include <sys/smp.h> 47*7527624eSRobert Watson #include <sys/sysctl.h> 48*7527624eSRobert Watson 49*7527624eSRobert Watson #include <net/if.h> 50*7527624eSRobert Watson #include <net/if_var.h> 51*7527624eSRobert Watson #include <net/netisr.h> 52*7527624eSRobert Watson 53*7527624eSRobert Watson #include <netinet/in.h> 54*7527624eSRobert Watson #include <netinet/in_pcb.h> 55*7527624eSRobert Watson #include <netinet/in_rss.h> 56*7527624eSRobert Watson #include <netinet/in_var.h> 57*7527624eSRobert Watson #include <netinet/toeplitz.h> 58*7527624eSRobert Watson 59*7527624eSRobert Watson /*- 60*7527624eSRobert Watson * Operating system parts of receiver-side scaling (RSS), which allows 61*7527624eSRobert Watson * network cards to direct flows to particular receive queues based on hashes 62*7527624eSRobert Watson * of header tuples. This implementation aligns RSS buckets with connection 63*7527624eSRobert Watson * groups at the TCP/IP layer, so each bucket is associated with exactly one 64*7527624eSRobert Watson * group. As a result, the group lookup structures (and lock) should have an 65*7527624eSRobert Watson * effective affinity with exactly one CPU. 66*7527624eSRobert Watson * 67*7527624eSRobert Watson * Network device drivers needing to configure RSS will query this framework 68*7527624eSRobert Watson * for parameters, such as the current RSS key, hashing policies, number of 69*7527624eSRobert Watson * bits, and indirection table mapping hashes to buckets and CPUs. They may 70*7527624eSRobert Watson * provide their own supplementary information, such as queue<->CPU bindings. 71*7527624eSRobert Watson * It is the responsibility of the network device driver to inject packets 72*7527624eSRobert Watson * into the stack on as close to the right CPU as possible, if playing by RSS 73*7527624eSRobert Watson * rules. 74*7527624eSRobert Watson * 75*7527624eSRobert Watson * TODO: 76*7527624eSRobert Watson * 77*7527624eSRobert Watson * - Synchronization for rss_key and other future-configurable parameters. 78*7527624eSRobert Watson * - Event handler drivers can register to pick up RSS configuration changes. 79*7527624eSRobert Watson * - Should we allow rss_basecpu to be configured? 80*7527624eSRobert Watson * - Randomize key on boot. 81*7527624eSRobert Watson * - IPv6 support. 82*7527624eSRobert Watson * - Statistics on how often there's a misalignment between hardware 83*7527624eSRobert Watson * placement and pcbgroup expectations. 84*7527624eSRobert Watson */ 85*7527624eSRobert Watson 86*7527624eSRobert Watson SYSCTL_NODE(_net_inet, OID_AUTO, rss, CTLFLAG_RW, 0, "Receive-side steering"); 87*7527624eSRobert Watson 88*7527624eSRobert Watson /* 89*7527624eSRobert Watson * Toeplitz is the only required hash function in the RSS spec, so use it by 90*7527624eSRobert Watson * default. 91*7527624eSRobert Watson */ 92*7527624eSRobert Watson static u_int rss_hashalgo = RSS_HASH_TOEPLITZ; 93*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, hashalgo, CTLFLAG_RD, &rss_hashalgo, 0, 94*7527624eSRobert Watson "RSS hash algorithm"); 95*7527624eSRobert Watson TUNABLE_INT("net.inet.rss.hashalgo", &rss_hashalgo); 96*7527624eSRobert Watson 97*7527624eSRobert Watson /* 98*7527624eSRobert Watson * Size of the indirection table; at most 128 entries per the RSS spec. We 99*7527624eSRobert Watson * size it to at least 2 times the number of CPUs by default to allow useful 100*7527624eSRobert Watson * rebalancing. If not set explicitly with a loader tunable, we tune based 101*7527624eSRobert Watson * on the number of CPUs present. 102*7527624eSRobert Watson * 103*7527624eSRobert Watson * XXXRW: buckets might be better to use for the tunable than bits. 104*7527624eSRobert Watson */ 105*7527624eSRobert Watson static u_int rss_bits; 106*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, bits, CTLFLAG_RD, &rss_bits, 0, 107*7527624eSRobert Watson "RSS bits"); 108*7527624eSRobert Watson TUNABLE_INT("net.inet.rss.bits", &rss_bits); 109*7527624eSRobert Watson 110*7527624eSRobert Watson static u_int rss_mask; 111*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, mask, CTLFLAG_RD, &rss_mask, 0, 112*7527624eSRobert Watson "RSS mask"); 113*7527624eSRobert Watson 114*7527624eSRobert Watson static const u_int rss_maxbits = RSS_MAXBITS; 115*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, maxbits, CTLFLAG_RD, 116*7527624eSRobert Watson __DECONST(int *, &rss_maxbits), 0, "RSS maximum bits"); 117*7527624eSRobert Watson 118*7527624eSRobert Watson /* 119*7527624eSRobert Watson * RSS's own count of the number of CPUs it could be using for processing. 120*7527624eSRobert Watson * Bounded to 64 by RSS constants. 121*7527624eSRobert Watson */ 122*7527624eSRobert Watson static u_int rss_ncpus; 123*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, ncpus, CTLFLAG_RD, &rss_ncpus, 0, 124*7527624eSRobert Watson "Number of CPUs available to RSS"); 125*7527624eSRobert Watson 126*7527624eSRobert Watson #define RSS_MAXCPUS (1 << (RSS_MAXBITS - 1)) 127*7527624eSRobert Watson static const u_int rss_maxcpus = RSS_MAXCPUS; 128*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, maxcpus, CTLFLAG_RD, 129*7527624eSRobert Watson __DECONST(int *, &rss_maxcpus), 0, "RSS maximum CPUs that can be used"); 130*7527624eSRobert Watson 131*7527624eSRobert Watson /* 132*7527624eSRobert Watson * Variable exists just for reporting rss_bits in a user-friendly way. 133*7527624eSRobert Watson */ 134*7527624eSRobert Watson static u_int rss_buckets; 135*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, buckets, CTLFLAG_RD, &rss_buckets, 0, 136*7527624eSRobert Watson "RSS buckets"); 137*7527624eSRobert Watson 138*7527624eSRobert Watson /* 139*7527624eSRobert Watson * Base CPU number; devices will add this to all CPU numbers returned by the 140*7527624eSRobert Watson * RSS indirection table. Currently unmodifable in FreeBSD. 141*7527624eSRobert Watson */ 142*7527624eSRobert Watson static const u_int rss_basecpu; 143*7527624eSRobert Watson SYSCTL_INT(_net_inet_rss, OID_AUTO, basecpu, CTLFLAG_RD, 144*7527624eSRobert Watson __DECONST(int *, &rss_basecpu), 0, "RSS base CPU"); 145*7527624eSRobert Watson 146*7527624eSRobert Watson /* 147*7527624eSRobert Watson * RSS secret key, intended to prevent attacks on load-balancing. Its 148*7527624eSRobert Watson * effectiveness may be limited by algorithm choice and available entropy 149*7527624eSRobert Watson * during the boot. 150*7527624eSRobert Watson * 151*7527624eSRobert Watson * XXXRW: And that we don't randomize it yet! 152*7527624eSRobert Watson * 153*7527624eSRobert Watson * XXXRW: This default is actually the default key from Chelsio T3 cards, as 154*7527624eSRobert Watson * it offers reasonable distribution, unlike all-0 keys which always 155*7527624eSRobert Watson * generate a hash of 0 (upsettingly). 156*7527624eSRobert Watson */ 157*7527624eSRobert Watson static uint8_t rss_key[RSS_KEYSIZE] = { 158*7527624eSRobert Watson 0x43, 0xa3, 0x8f, 0xb0, 0x41, 0x67, 0x25, 0x3d, 159*7527624eSRobert Watson 0x25, 0x5b, 0x0e, 0xc2, 0x6d, 0x5a, 0x56, 0xda, 160*7527624eSRobert Watson 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 161*7527624eSRobert Watson 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 162*7527624eSRobert Watson 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 163*7527624eSRobert Watson }; 164*7527624eSRobert Watson 165*7527624eSRobert Watson /* 166*7527624eSRobert Watson * RSS hash->CPU table, which maps hashed packet headers to particular CPUs. 167*7527624eSRobert Watson * Drivers may supplement this table with a seperate CPU<->queue table when 168*7527624eSRobert Watson * programming devices. 169*7527624eSRobert Watson */ 170*7527624eSRobert Watson struct rss_table_entry { 171*7527624eSRobert Watson uint8_t rte_cpu; /* CPU affinity of bucket. */ 172*7527624eSRobert Watson }; 173*7527624eSRobert Watson static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN]; 174*7527624eSRobert Watson 175*7527624eSRobert Watson static void 176*7527624eSRobert Watson rss_init(__unused void *arg) 177*7527624eSRobert Watson { 178*7527624eSRobert Watson u_int i; 179*7527624eSRobert Watson 180*7527624eSRobert Watson /* 181*7527624eSRobert Watson * Validate tunables, coerce to sensible values. 182*7527624eSRobert Watson */ 183*7527624eSRobert Watson switch (rss_hashalgo) { 184*7527624eSRobert Watson case RSS_HASH_TOEPLITZ: 185*7527624eSRobert Watson case RSS_HASH_NAIVE: 186*7527624eSRobert Watson break; 187*7527624eSRobert Watson 188*7527624eSRobert Watson default: 189*7527624eSRobert Watson printf("%s: invalid RSS hashalgo %u, coercing to %u", 190*7527624eSRobert Watson __func__, rss_hashalgo, RSS_HASH_TOEPLITZ); 191*7527624eSRobert Watson rss_hashalgo = RSS_HASH_TOEPLITZ; 192*7527624eSRobert Watson } 193*7527624eSRobert Watson 194*7527624eSRobert Watson /* 195*7527624eSRobert Watson * Count available CPUs. 196*7527624eSRobert Watson * 197*7527624eSRobert Watson * XXXRW: Note incorrect assumptions regarding contiguity of this set 198*7527624eSRobert Watson * elsewhere. 199*7527624eSRobert Watson */ 200*7527624eSRobert Watson rss_ncpus = 0; 201*7527624eSRobert Watson for (i = 0; i <= mp_maxid; i++) { 202*7527624eSRobert Watson if (CPU_ABSENT(i)) 203*7527624eSRobert Watson continue; 204*7527624eSRobert Watson rss_ncpus++; 205*7527624eSRobert Watson } 206*7527624eSRobert Watson if (rss_ncpus > RSS_MAXCPUS) 207*7527624eSRobert Watson rss_ncpus = RSS_MAXCPUS; 208*7527624eSRobert Watson 209*7527624eSRobert Watson /* 210*7527624eSRobert Watson * Tune RSS table entries to be no less than 2x the number of CPUs 211*7527624eSRobert Watson * -- unless we're running uniprocessor, in which case there's not 212*7527624eSRobert Watson * much point in having buckets to rearrange for load-balancing! 213*7527624eSRobert Watson */ 214*7527624eSRobert Watson if (rss_ncpus > 1) { 215*7527624eSRobert Watson if (rss_bits == 0) 216*7527624eSRobert Watson rss_bits = fls(rss_ncpus - 1) + 1; 217*7527624eSRobert Watson 218*7527624eSRobert Watson /* 219*7527624eSRobert Watson * Microsoft limits RSS table entries to 128, so apply that 220*7527624eSRobert Watson * limit to both auto-detected CPU counts and user-configured 221*7527624eSRobert Watson * ones. 222*7527624eSRobert Watson */ 223*7527624eSRobert Watson if (rss_bits == 0 || rss_bits > RSS_MAXBITS) { 224*7527624eSRobert Watson printf("%s: RSS bits %u not valid, coercing to %u", 225*7527624eSRobert Watson __func__, rss_bits, RSS_MAXBITS); 226*7527624eSRobert Watson rss_bits = RSS_MAXBITS; 227*7527624eSRobert Watson } 228*7527624eSRobert Watson 229*7527624eSRobert Watson /* 230*7527624eSRobert Watson * Figure out how many buckets to use; warn if less than the 231*7527624eSRobert Watson * number of configured CPUs, although this is not a fatal 232*7527624eSRobert Watson * problem. 233*7527624eSRobert Watson */ 234*7527624eSRobert Watson rss_buckets = (1 << rss_bits); 235*7527624eSRobert Watson if (rss_buckets < rss_ncpus) 236*7527624eSRobert Watson printf("%s: WARNING: rss_buckets (%u) less than " 237*7527624eSRobert Watson "rss_ncpus (%u)\n", __func__, rss_buckets, 238*7527624eSRobert Watson rss_ncpus); 239*7527624eSRobert Watson rss_mask = rss_buckets - 1; 240*7527624eSRobert Watson } else { 241*7527624eSRobert Watson rss_bits = 0; 242*7527624eSRobert Watson rss_buckets = 1; 243*7527624eSRobert Watson rss_mask = 0; 244*7527624eSRobert Watson } 245*7527624eSRobert Watson 246*7527624eSRobert Watson /* 247*7527624eSRobert Watson * Set up initial CPU assignments: round-robin by default. 248*7527624eSRobert Watson * 249*7527624eSRobert Watson * XXXRW: Need a mapping to non-contiguous IDs here. 250*7527624eSRobert Watson */ 251*7527624eSRobert Watson for (i = 0; i < rss_buckets; i++) 252*7527624eSRobert Watson rss_table[i].rte_cpu = i % rss_ncpus; 253*7527624eSRobert Watson 254*7527624eSRobert Watson /* 255*7527624eSRobert Watson * Randomize rrs_key. 256*7527624eSRobert Watson * 257*7527624eSRobert Watson * XXXRW: Not yet. If nothing else, will require an rss_isbadkey() 258*7527624eSRobert Watson * loop to check for "bad" RSS keys. 259*7527624eSRobert Watson */ 260*7527624eSRobert Watson } 261*7527624eSRobert Watson SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL); 262*7527624eSRobert Watson 263*7527624eSRobert Watson static uint32_t 264*7527624eSRobert Watson rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen, 265*7527624eSRobert Watson const uint8_t *data) 266*7527624eSRobert Watson { 267*7527624eSRobert Watson uint32_t v; 268*7527624eSRobert Watson u_int i; 269*7527624eSRobert Watson 270*7527624eSRobert Watson v = 0; 271*7527624eSRobert Watson for (i = 0; i < keylen; i++) 272*7527624eSRobert Watson v += key[i]; 273*7527624eSRobert Watson for (i = 0; i < datalen; i++) 274*7527624eSRobert Watson v += data[i]; 275*7527624eSRobert Watson return (v); 276*7527624eSRobert Watson } 277*7527624eSRobert Watson 278*7527624eSRobert Watson static uint32_t 279*7527624eSRobert Watson rss_hash(u_int datalen, const uint8_t *data) 280*7527624eSRobert Watson { 281*7527624eSRobert Watson 282*7527624eSRobert Watson switch (rss_hashalgo) { 283*7527624eSRobert Watson case RSS_HASH_TOEPLITZ: 284*7527624eSRobert Watson return (toeplitz_hash(sizeof(rss_key), rss_key, datalen, 285*7527624eSRobert Watson data)); 286*7527624eSRobert Watson 287*7527624eSRobert Watson case RSS_HASH_NAIVE: 288*7527624eSRobert Watson return (rss_naive_hash(sizeof(rss_key), rss_key, datalen, 289*7527624eSRobert Watson data)); 290*7527624eSRobert Watson 291*7527624eSRobert Watson default: 292*7527624eSRobert Watson panic("%s: unsupported/unknown hashalgo %d", __func__, 293*7527624eSRobert Watson rss_hashalgo); 294*7527624eSRobert Watson } 295*7527624eSRobert Watson } 296*7527624eSRobert Watson 297*7527624eSRobert Watson /* 298*7527624eSRobert Watson * Hash an IPv4 2-tuple. 299*7527624eSRobert Watson */ 300*7527624eSRobert Watson uint32_t 301*7527624eSRobert Watson rss_hash_ip4_2tuple(struct in_addr src, struct in_addr dst) 302*7527624eSRobert Watson { 303*7527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst)]; 304*7527624eSRobert Watson u_int datalen; 305*7527624eSRobert Watson 306*7527624eSRobert Watson datalen = 0; 307*7527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 308*7527624eSRobert Watson datalen += sizeof(src); 309*7527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 310*7527624eSRobert Watson datalen += sizeof(dst); 311*7527624eSRobert Watson return (rss_hash(datalen, data)); 312*7527624eSRobert Watson } 313*7527624eSRobert Watson 314*7527624eSRobert Watson /* 315*7527624eSRobert Watson * Hash an IPv4 4-tuple. 316*7527624eSRobert Watson */ 317*7527624eSRobert Watson uint32_t 318*7527624eSRobert Watson rss_hash_ip4_4tuple(struct in_addr src, u_short srcport, struct in_addr dst, 319*7527624eSRobert Watson u_short dstport) 320*7527624eSRobert Watson { 321*7527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) + 322*7527624eSRobert Watson sizeof(dstport)]; 323*7527624eSRobert Watson u_int datalen; 324*7527624eSRobert Watson 325*7527624eSRobert Watson datalen = 0; 326*7527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 327*7527624eSRobert Watson datalen += sizeof(src); 328*7527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 329*7527624eSRobert Watson datalen += sizeof(dst); 330*7527624eSRobert Watson bcopy(&srcport, &data[datalen], sizeof(srcport)); 331*7527624eSRobert Watson datalen += sizeof(srcport); 332*7527624eSRobert Watson bcopy(&dstport, &data[datalen], sizeof(dstport)); 333*7527624eSRobert Watson datalen += sizeof(dstport); 334*7527624eSRobert Watson return (rss_hash(datalen, data)); 335*7527624eSRobert Watson } 336*7527624eSRobert Watson 337*7527624eSRobert Watson #ifdef INET6 338*7527624eSRobert Watson /* 339*7527624eSRobert Watson * Hash an IPv6 2-tuple. 340*7527624eSRobert Watson */ 341*7527624eSRobert Watson uint32_t 342*7527624eSRobert Watson rss_hash_ip6_2tuple(struct in6_addr src, struct in6_addr dst) 343*7527624eSRobert Watson { 344*7527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst)]; 345*7527624eSRobert Watson u_int datalen; 346*7527624eSRobert Watson 347*7527624eSRobert Watson datalen = 0; 348*7527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 349*7527624eSRobert Watson datalen += sizeof(src); 350*7527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 351*7527624eSRobert Watson datalen += sizeof(dst); 352*7527624eSRobert Watson return (rss_hash(datalen, data)); 353*7527624eSRobert Watson } 354*7527624eSRobert Watson 355*7527624eSRobert Watson /* 356*7527624eSRobert Watson * Hash an IPv6 4-tuple. 357*7527624eSRobert Watson */ 358*7527624eSRobert Watson uint32_t 359*7527624eSRobert Watson rss_hash_ip6_4tuple(struct in6_addr src, u_short srcport, 360*7527624eSRobert Watson struct in6_addr dst, u_short dstport) 361*7527624eSRobert Watson { 362*7527624eSRobert Watson uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) + 363*7527624eSRobert Watson sizeof(dstport)]; 364*7527624eSRobert Watson u_int datalen; 365*7527624eSRobert Watson 366*7527624eSRobert Watson datalen = 0; 367*7527624eSRobert Watson bcopy(&src, &data[datalen], sizeof(src)); 368*7527624eSRobert Watson datalen += sizeof(src); 369*7527624eSRobert Watson bcopy(&dst, &data[datalen], sizeof(dst)); 370*7527624eSRobert Watson datalen += sizeof(dst); 371*7527624eSRobert Watson bcopy(&srcport, &data[datalen], sizeof(srcport)); 372*7527624eSRobert Watson datalen += sizeof(srcport); 373*7527624eSRobert Watson bcopy(&dstport, &data[datalen], sizeof(dstport)); 374*7527624eSRobert Watson datalen += sizeof(dstport); 375*7527624eSRobert Watson return (rss_hash(datalen, data)); 376*7527624eSRobert Watson } 377*7527624eSRobert Watson #endif /* INET6 */ 378*7527624eSRobert Watson 379*7527624eSRobert Watson /* 380*7527624eSRobert Watson * Query the number of RSS bits in use. 381*7527624eSRobert Watson */ 382*7527624eSRobert Watson u_int 383*7527624eSRobert Watson rss_getbits(void) 384*7527624eSRobert Watson { 385*7527624eSRobert Watson 386*7527624eSRobert Watson return (rss_bits); 387*7527624eSRobert Watson } 388*7527624eSRobert Watson 389*7527624eSRobert Watson /* 390*7527624eSRobert Watson * Query the RSS bucket associated with an RSS hash. 391*7527624eSRobert Watson */ 392*7527624eSRobert Watson u_int 393*7527624eSRobert Watson rss_getbucket(u_int hash) 394*7527624eSRobert Watson { 395*7527624eSRobert Watson 396*7527624eSRobert Watson return (hash & rss_mask); 397*7527624eSRobert Watson } 398*7527624eSRobert Watson 399*7527624eSRobert Watson /* 400*7527624eSRobert Watson * Query the RSS CPU associated with an RSS bucket. 401*7527624eSRobert Watson */ 402*7527624eSRobert Watson u_int 403*7527624eSRobert Watson rss_getcpu(u_int bucket) 404*7527624eSRobert Watson { 405*7527624eSRobert Watson 406*7527624eSRobert Watson return (rss_table[bucket].rte_cpu); 407*7527624eSRobert Watson } 408*7527624eSRobert Watson 409*7527624eSRobert Watson /* 410*7527624eSRobert Watson * netisr CPU affinity lookup routine for use by protocols. 411*7527624eSRobert Watson */ 412*7527624eSRobert Watson struct mbuf * 413*7527624eSRobert Watson rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 414*7527624eSRobert Watson { 415*7527624eSRobert Watson 416*7527624eSRobert Watson M_ASSERTPKTHDR(m); 417*7527624eSRobert Watson 418*7527624eSRobert Watson switch (M_HASHTYPE_GET(m)) { 419*7527624eSRobert Watson case M_HASHTYPE_RSS_IPV4: 420*7527624eSRobert Watson case M_HASHTYPE_RSS_TCP_IPV4: 421*7527624eSRobert Watson *cpuid = rss_getcpu(rss_getbucket(m->m_pkthdr.flowid)); 422*7527624eSRobert Watson return (m); 423*7527624eSRobert Watson 424*7527624eSRobert Watson default: 425*7527624eSRobert Watson *cpuid = NETISR_CPUID_NONE; 426*7527624eSRobert Watson return (m); 427*7527624eSRobert Watson } 428*7527624eSRobert Watson } 429*7527624eSRobert Watson 430*7527624eSRobert Watson /* 431*7527624eSRobert Watson * Query the RSS hash algorithm. 432*7527624eSRobert Watson */ 433*7527624eSRobert Watson u_int 434*7527624eSRobert Watson rss_gethashalgo(void) 435*7527624eSRobert Watson { 436*7527624eSRobert Watson 437*7527624eSRobert Watson return (rss_hashalgo); 438*7527624eSRobert Watson } 439*7527624eSRobert Watson 440*7527624eSRobert Watson /* 441*7527624eSRobert Watson * Query the current RSS key; likely to be used by device drivers when 442*7527624eSRobert Watson * configuring hardware RSS. Caller must pass an array of size RSS_KEYSIZE. 443*7527624eSRobert Watson * 444*7527624eSRobert Watson * XXXRW: Perhaps we should do the accept-a-length-and-truncate thing? 445*7527624eSRobert Watson */ 446*7527624eSRobert Watson void 447*7527624eSRobert Watson rss_getkey(uint8_t *key) 448*7527624eSRobert Watson { 449*7527624eSRobert Watson 450*7527624eSRobert Watson bcopy(rss_key, key, sizeof(rss_key)); 451*7527624eSRobert Watson } 452*7527624eSRobert Watson 453*7527624eSRobert Watson /* 454*7527624eSRobert Watson * Query the number of buckets; this may be used by both network device 455*7527624eSRobert Watson * drivers, which will need to populate hardware shadows of the software 456*7527624eSRobert Watson * indirection table, and the network stack itself (such as when deciding how 457*7527624eSRobert Watson * many connection groups to allocate). 458*7527624eSRobert Watson */ 459*7527624eSRobert Watson u_int 460*7527624eSRobert Watson rss_getnumbuckets(void) 461*7527624eSRobert Watson { 462*7527624eSRobert Watson 463*7527624eSRobert Watson return (rss_buckets); 464*7527624eSRobert Watson } 465*7527624eSRobert Watson 466*7527624eSRobert Watson /* 467*7527624eSRobert Watson * Query the number of CPUs in use by RSS; may be useful to device drivers 468*7527624eSRobert Watson * trying to figure out how to map a larger number of CPUs into a smaller 469*7527624eSRobert Watson * number of receive queues. 470*7527624eSRobert Watson */ 471*7527624eSRobert Watson u_int 472*7527624eSRobert Watson rss_getnumcpus(void) 473*7527624eSRobert Watson { 474*7527624eSRobert Watson 475*7527624eSRobert Watson return (rss_ncpus); 476*7527624eSRobert Watson } 477*7527624eSRobert Watson 478*7527624eSRobert Watson /* 479*7527624eSRobert Watson * XXXRW: Confirm that sysctl -a won't dump this keying material, don't want 480*7527624eSRobert Watson * it appearing in debugging output unnecessarily. 481*7527624eSRobert Watson */ 482*7527624eSRobert Watson static int 483*7527624eSRobert Watson sysctl_rss_key(SYSCTL_HANDLER_ARGS) 484*7527624eSRobert Watson { 485*7527624eSRobert Watson uint8_t temp_rss_key[RSS_KEYSIZE]; 486*7527624eSRobert Watson int error; 487*7527624eSRobert Watson 488*7527624eSRobert Watson error = priv_check(req->td, PRIV_NETINET_HASHKEY); 489*7527624eSRobert Watson if (error) 490*7527624eSRobert Watson return (error); 491*7527624eSRobert Watson 492*7527624eSRobert Watson bcopy(rss_key, temp_rss_key, sizeof(temp_rss_key)); 493*7527624eSRobert Watson error = sysctl_handle_opaque(oidp, temp_rss_key, 494*7527624eSRobert Watson sizeof(temp_rss_key), req); 495*7527624eSRobert Watson if (error) 496*7527624eSRobert Watson return (error); 497*7527624eSRobert Watson if (req->newptr != NULL) { 498*7527624eSRobert Watson /* XXXRW: Not yet. */ 499*7527624eSRobert Watson return (EINVAL); 500*7527624eSRobert Watson } 501*7527624eSRobert Watson return (0); 502*7527624eSRobert Watson } 503*7527624eSRobert Watson SYSCTL_PROC(_net_inet_rss, OID_AUTO, key, 504*7527624eSRobert Watson CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key, 505*7527624eSRobert Watson "", "RSS keying material"); 506