1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2007-2009 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * This software was developed by Robert N. M. Watson under contract 9 * to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /* 37 * netisr is a packet dispatch service, allowing synchronous (directly 38 * dispatched) and asynchronous (deferred dispatch) processing of packets by 39 * registered protocol handlers. Callers pass a protocol identifier and 40 * packet to netisr, along with a direct dispatch hint, and work will either 41 * be immediately processed by the registered handler, or passed to a 42 * software interrupt (SWI) thread for deferred dispatch. Callers will 43 * generally select one or the other based on: 44 * 45 * - Whether directly dispatching a netisr handler lead to code reentrance or 46 * lock recursion, such as entering the socket code from the socket code. 47 * - Whether directly dispatching a netisr handler lead to recursive 48 * processing, such as when decapsulating several wrapped layers of tunnel 49 * information (IPSEC within IPSEC within ...). 50 * 51 * Maintaining ordering for protocol streams is a critical design concern. 52 * Enforcing ordering limits the opportunity for concurrency, but maintains 53 * the strong ordering requirements found in some protocols, such as TCP. Of 54 * related concern is CPU affinity--it is desirable to process all data 55 * associated with a particular stream on the same CPU over time in order to 56 * avoid acquiring locks associated with the connection on different CPUs, 57 * keep connection data in one cache, and to generally encourage associated 58 * user threads to live on the same CPU as the stream. It's also desirable 59 * to avoid lock migration and contention where locks are associated with 60 * more than one flow. 61 * 62 * netisr supports several policy variations, represented by the 63 * NETISR_POLICY_* constants, allowing protocols to play various roles in 64 * identifying flows, assigning work to CPUs, etc. These are described in 65 * netisr.h. 66 */ 67 68 #include "opt_ddb.h" 69 #include "opt_device_polling.h" 70 71 #include <sys/param.h> 72 #include <sys/bus.h> 73 #include <sys/kernel.h> 74 #include <sys/kthread.h> 75 #include <sys/malloc.h> 76 #include <sys/interrupt.h> 77 #include <sys/lock.h> 78 #include <sys/mbuf.h> 79 #include <sys/mutex.h> 80 #include <sys/pcpu.h> 81 #include <sys/proc.h> 82 #include <sys/rmlock.h> 83 #include <sys/sched.h> 84 #include <sys/smp.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 #include <sys/systm.h> 88 89 #ifdef DDB 90 #include <ddb/ddb.h> 91 #endif 92 93 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 94 #include <net/if.h> 95 #include <net/if_var.h> 96 #include <net/if_private.h> 97 #include <net/netisr.h> 98 #include <net/netisr_internal.h> 99 #include <net/vnet.h> 100 101 /*- 102 * Synchronize use and modification of the registered netisr data structures; 103 * acquire a read lock while modifying the set of registered protocols to 104 * prevent partially registered or unregistered protocols from being run. 105 * 106 * The following data structures and fields are protected by this lock: 107 * 108 * - The netisr_proto array, including all fields of struct netisr_proto. 109 * - The nws array, including all fields of struct netisr_worker. 110 * - The nws_array array. 111 * 112 * Note: the NETISR_LOCKING define controls whether read locks are acquired 113 * in packet processing paths requiring netisr registration stability. This 114 * is disabled by default as it can lead to measurable performance 115 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 116 * because netisr registration and unregistration is extremely rare at 117 * runtime. If it becomes more common, this decision should be revisited. 118 * 119 * XXXRW: rmlocks don't support assertions. 120 */ 121 static struct rmlock netisr_rmlock; 122 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 123 RM_NOWITNESS) 124 #define NETISR_LOCK_ASSERT() 125 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 126 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 127 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 128 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 129 /* #define NETISR_LOCKING */ 130 131 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 132 "netisr"); 133 134 /*- 135 * Three global direct dispatch policies are supported: 136 * 137 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 138 * context (may be overridden by protocols). 139 * 140 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 141 * and we're running on the CPU the work would be performed on, then direct 142 * dispatch it if it wouldn't violate ordering constraints on the workstream. 143 * 144 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 145 * always direct dispatch. (The default.) 146 * 147 * Notice that changing the global policy could lead to short periods of 148 * misordered processing, but this is considered acceptable as compared to 149 * the complexity of enforcing ordering during policy changes. Protocols can 150 * override the global policy (when they're not doing that, they select 151 * NETISR_DISPATCH_DEFAULT). 152 */ 153 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 154 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 155 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 156 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 157 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, 158 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 159 0, 0, sysctl_netisr_dispatch_policy, "A", 160 "netisr dispatch policy"); 161 162 /* 163 * Allow the administrator to limit the number of threads (CPUs) to use for 164 * netisr. We don't check netisr_maxthreads before creating the thread for 165 * CPU 0. This must be set at boot. We will create at most one thread per CPU. 166 * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and 167 * therefore only 1 workstream. If set to -1, netisr would use all cpus 168 * (mp_ncpus) and therefore would have those many workstreams. One workstream 169 * per thread (CPU). 170 */ 171 static int netisr_maxthreads = 1; /* Max number of threads. */ 172 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 173 &netisr_maxthreads, 0, 174 "Use at most this many CPUs for netisr processing"); 175 176 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 177 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 178 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 179 180 /* 181 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 182 * both for initial configuration and later modification using 183 * netisr_setqlimit(). 184 */ 185 #define NETISR_DEFAULT_MAXQLIMIT 10240 186 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 187 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 188 &netisr_maxqlimit, 0, 189 "Maximum netisr per-protocol, per-CPU queue depth."); 190 191 /* 192 * The default per-workstream mbuf queue limit for protocols that don't 193 * initialize the nh_qlimit field of their struct netisr_handler. If this is 194 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 195 */ 196 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 197 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 198 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 199 &netisr_defaultqlimit, 0, 200 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 201 202 /* 203 * Store and export the compile-time constant NETISR_MAXPROT limit on the 204 * number of protocols that can register with netisr at a time. This is 205 * required for crashdump analysis, as it sizes netisr_proto[]. 206 */ 207 static u_int netisr_maxprot = NETISR_MAXPROT; 208 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 209 &netisr_maxprot, 0, 210 "Compile-time limit on the number of protocols supported by netisr."); 211 212 /* 213 * The netisr_proto array describes all registered protocols, indexed by 214 * protocol number. See netisr_internal.h for more details. 215 */ 216 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 217 218 #ifdef VIMAGE 219 /* 220 * The netisr_enable array describes a per-VNET flag for registered 221 * protocols on whether this netisr is active in this VNET or not. 222 * netisr_register() will automatically enable the netisr for the 223 * default VNET and all currently active instances. 224 * netisr_unregister() will disable all active VNETs, including vnet0. 225 * Individual network stack instances can be enabled/disabled by the 226 * netisr_(un)register _vnet() functions. 227 * With this we keep the one netisr_proto per protocol but add a 228 * mechanism to stop netisr processing for vnet teardown. 229 * Apart from that we expect a VNET to always be enabled. 230 */ 231 VNET_DEFINE_STATIC(u_int, netisr_enable[NETISR_MAXPROT]); 232 #define V_netisr_enable VNET(netisr_enable) 233 #endif 234 235 /* 236 * Per-CPU workstream data. See netisr_internal.h for more details. 237 */ 238 DPCPU_DEFINE(struct netisr_workstream, nws); 239 240 /* 241 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 242 * accessing workstreams. This allows constructions of the form 243 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 244 */ 245 static u_int nws_array[MAXCPU]; 246 247 /* 248 * Number of registered workstreams. Will be at most the number of running 249 * CPUs once fully started. 250 */ 251 static u_int nws_count; 252 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 253 &nws_count, 0, "Number of extant netisr threads."); 254 255 /* 256 * Synchronization for each workstream: a mutex protects all mutable fields 257 * in each stream, including per-protocol state (mbuf queues). The SWI is 258 * woken up if asynchronous dispatch is required. 259 */ 260 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 261 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 262 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 263 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 264 265 /* 266 * Utility routines for protocols that implement their own mapping of flows 267 * to CPUs. 268 */ 269 u_int 270 netisr_get_cpucount(void) 271 { 272 273 return (nws_count); 274 } 275 276 u_int 277 netisr_get_cpuid(u_int cpunumber) 278 { 279 280 return (nws_array[cpunumber % nws_count]); 281 } 282 283 /* 284 * The default implementation of flow -> CPU ID mapping. 285 * 286 * Non-static so that protocols can use it to map their own work to specific 287 * CPUs in a manner consistent to netisr for affinity purposes. 288 */ 289 u_int 290 netisr_default_flow2cpu(u_int flowid) 291 { 292 293 return (nws_array[flowid % nws_count]); 294 } 295 296 /* 297 * Dispatch tunable and sysctl configuration. 298 */ 299 struct netisr_dispatch_table_entry { 300 u_int ndte_policy; 301 const char *ndte_policy_str; 302 }; 303 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 304 { NETISR_DISPATCH_DEFAULT, "default" }, 305 { NETISR_DISPATCH_DEFERRED, "deferred" }, 306 { NETISR_DISPATCH_HYBRID, "hybrid" }, 307 { NETISR_DISPATCH_DIRECT, "direct" }, 308 }; 309 310 static void 311 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 312 u_int buflen) 313 { 314 const struct netisr_dispatch_table_entry *ndtep; 315 const char *str; 316 u_int i; 317 318 str = "unknown"; 319 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 320 ndtep = &netisr_dispatch_table[i]; 321 if (ndtep->ndte_policy == dispatch_policy) { 322 str = ndtep->ndte_policy_str; 323 break; 324 } 325 } 326 snprintf(buffer, buflen, "%s", str); 327 } 328 329 static int 330 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 331 { 332 const struct netisr_dispatch_table_entry *ndtep; 333 u_int i; 334 335 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 336 ndtep = &netisr_dispatch_table[i]; 337 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 338 *dispatch_policyp = ndtep->ndte_policy; 339 return (0); 340 } 341 } 342 return (EINVAL); 343 } 344 345 static int 346 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 347 { 348 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 349 size_t len; 350 u_int dispatch_policy; 351 int error; 352 353 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 354 sizeof(tmp)); 355 /* 356 * netisr is initialised very early during the boot when malloc isn't 357 * available yet so we can't use sysctl_handle_string() to process 358 * any non-default value that was potentially set via loader. 359 */ 360 if (req->newptr != NULL) { 361 len = req->newlen - req->newidx; 362 if (len >= NETISR_DISPATCH_POLICY_MAXSTR) 363 return (EINVAL); 364 error = SYSCTL_IN(req, tmp, len); 365 if (error == 0) { 366 tmp[len] = '\0'; 367 error = netisr_dispatch_policy_from_str(tmp, 368 &dispatch_policy); 369 if (error == 0 && 370 dispatch_policy == NETISR_DISPATCH_DEFAULT) 371 error = EINVAL; 372 if (error == 0) 373 netisr_dispatch_policy = dispatch_policy; 374 } 375 } else { 376 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 377 } 378 return (error); 379 } 380 381 /* 382 * Register a new netisr handler, which requires initializing per-protocol 383 * fields for each workstream. All netisr work is briefly suspended while 384 * the protocol is installed. 385 */ 386 void 387 netisr_register(const struct netisr_handler *nhp) 388 { 389 VNET_ITERATOR_DECL(vnet_iter); 390 struct netisr_work *npwp; 391 const char *name; 392 u_int i, proto; 393 394 proto = nhp->nh_proto; 395 name = nhp->nh_name; 396 397 /* 398 * Test that the requested registration is valid. 399 */ 400 KASSERT(nhp->nh_name != NULL, 401 ("%s: nh_name NULL for %u", __func__, proto)); 402 KASSERT(nhp->nh_handler != NULL, 403 ("%s: nh_handler NULL for %s", __func__, name)); 404 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 405 nhp->nh_policy == NETISR_POLICY_FLOW || 406 nhp->nh_policy == NETISR_POLICY_CPU, 407 ("%s: unsupported nh_policy %u for %s", __func__, 408 nhp->nh_policy, name)); 409 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 410 nhp->nh_m2flow == NULL, 411 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 412 name)); 413 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 414 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 415 name)); 416 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 417 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 418 name)); 419 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 420 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 421 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 422 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 423 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 424 425 KASSERT(proto < NETISR_MAXPROT, 426 ("%s(%u, %s): protocol too big", __func__, proto, name)); 427 428 /* 429 * Test that no existing registration exists for this protocol. 430 */ 431 NETISR_WLOCK(); 432 KASSERT(netisr_proto[proto].np_name == NULL, 433 ("%s(%u, %s): name present", __func__, proto, name)); 434 KASSERT(netisr_proto[proto].np_handler == NULL, 435 ("%s(%u, %s): handler present", __func__, proto, name)); 436 437 netisr_proto[proto].np_name = name; 438 netisr_proto[proto].np_handler = nhp->nh_handler; 439 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 440 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 441 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 442 if (nhp->nh_qlimit == 0) 443 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 444 else if (nhp->nh_qlimit > netisr_maxqlimit) { 445 printf("%s: %s requested queue limit %u capped to " 446 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 447 netisr_maxqlimit); 448 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 449 } else 450 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 451 netisr_proto[proto].np_policy = nhp->nh_policy; 452 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 453 CPU_FOREACH(i) { 454 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 455 bzero(npwp, sizeof(*npwp)); 456 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 457 } 458 459 #ifdef VIMAGE 460 /* 461 * Test that we are in vnet0 and have a curvnet set. 462 */ 463 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 464 KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p", 465 __func__, curvnet, vnet0)); 466 VNET_LIST_RLOCK_NOSLEEP(); 467 VNET_FOREACH(vnet_iter) { 468 CURVNET_SET(vnet_iter); 469 V_netisr_enable[proto] = 1; 470 CURVNET_RESTORE(); 471 } 472 VNET_LIST_RUNLOCK_NOSLEEP(); 473 #endif 474 NETISR_WUNLOCK(); 475 } 476 477 /* 478 * Clear drop counters across all workstreams for a protocol. 479 */ 480 void 481 netisr_clearqdrops(const struct netisr_handler *nhp) 482 { 483 struct netisr_work *npwp; 484 #ifdef INVARIANTS 485 const char *name; 486 #endif 487 u_int i, proto; 488 489 proto = nhp->nh_proto; 490 #ifdef INVARIANTS 491 name = nhp->nh_name; 492 #endif 493 KASSERT(proto < NETISR_MAXPROT, 494 ("%s(%u): protocol too big for %s", __func__, proto, name)); 495 496 NETISR_WLOCK(); 497 KASSERT(netisr_proto[proto].np_handler != NULL, 498 ("%s(%u): protocol not registered for %s", __func__, proto, 499 name)); 500 501 CPU_FOREACH(i) { 502 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 503 npwp->nw_qdrops = 0; 504 } 505 NETISR_WUNLOCK(); 506 } 507 508 /* 509 * Query current drop counters across all workstreams for a protocol. 510 */ 511 void 512 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 513 { 514 struct netisr_work *npwp; 515 struct rm_priotracker tracker; 516 #ifdef INVARIANTS 517 const char *name; 518 #endif 519 u_int i, proto; 520 521 *qdropp = 0; 522 proto = nhp->nh_proto; 523 #ifdef INVARIANTS 524 name = nhp->nh_name; 525 #endif 526 KASSERT(proto < NETISR_MAXPROT, 527 ("%s(%u): protocol too big for %s", __func__, proto, name)); 528 529 NETISR_RLOCK(&tracker); 530 KASSERT(netisr_proto[proto].np_handler != NULL, 531 ("%s(%u): protocol not registered for %s", __func__, proto, 532 name)); 533 534 CPU_FOREACH(i) { 535 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 536 *qdropp += npwp->nw_qdrops; 537 } 538 NETISR_RUNLOCK(&tracker); 539 } 540 541 /* 542 * Query current per-workstream queue limit for a protocol. 543 */ 544 void 545 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 546 { 547 struct rm_priotracker tracker; 548 #ifdef INVARIANTS 549 const char *name; 550 #endif 551 u_int proto; 552 553 proto = nhp->nh_proto; 554 #ifdef INVARIANTS 555 name = nhp->nh_name; 556 #endif 557 KASSERT(proto < NETISR_MAXPROT, 558 ("%s(%u): protocol too big for %s", __func__, proto, name)); 559 560 NETISR_RLOCK(&tracker); 561 KASSERT(netisr_proto[proto].np_handler != NULL, 562 ("%s(%u): protocol not registered for %s", __func__, proto, 563 name)); 564 *qlimitp = netisr_proto[proto].np_qlimit; 565 NETISR_RUNLOCK(&tracker); 566 } 567 568 /* 569 * Update the queue limit across per-workstream queues for a protocol. We 570 * simply change the limits, and don't drain overflowed packets as they will 571 * (hopefully) take care of themselves shortly. 572 */ 573 int 574 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 575 { 576 struct netisr_work *npwp; 577 #ifdef INVARIANTS 578 const char *name; 579 #endif 580 u_int i, proto; 581 582 if (qlimit > netisr_maxqlimit) 583 return (EINVAL); 584 585 proto = nhp->nh_proto; 586 #ifdef INVARIANTS 587 name = nhp->nh_name; 588 #endif 589 KASSERT(proto < NETISR_MAXPROT, 590 ("%s(%u): protocol too big for %s", __func__, proto, name)); 591 592 NETISR_WLOCK(); 593 KASSERT(netisr_proto[proto].np_handler != NULL, 594 ("%s(%u): protocol not registered for %s", __func__, proto, 595 name)); 596 597 netisr_proto[proto].np_qlimit = qlimit; 598 CPU_FOREACH(i) { 599 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 600 npwp->nw_qlimit = qlimit; 601 } 602 NETISR_WUNLOCK(); 603 return (0); 604 } 605 606 /* 607 * Drain all packets currently held in a particular protocol work queue. 608 */ 609 static void 610 netisr_drain_proto(struct netisr_work *npwp) 611 { 612 struct mbuf *m; 613 614 /* 615 * We would assert the lock on the workstream but it's not passed in. 616 */ 617 while ((m = npwp->nw_head) != NULL) { 618 npwp->nw_head = m->m_nextpkt; 619 m->m_nextpkt = NULL; 620 if (npwp->nw_head == NULL) 621 npwp->nw_tail = NULL; 622 npwp->nw_len--; 623 m_freem(m); 624 } 625 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 626 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 627 } 628 629 /* 630 * Remove the registration of a network protocol, which requires clearing 631 * per-protocol fields across all workstreams, including freeing all mbufs in 632 * the queues at time of unregister. All work in netisr is briefly suspended 633 * while this takes place. 634 */ 635 void 636 netisr_unregister(const struct netisr_handler *nhp) 637 { 638 VNET_ITERATOR_DECL(vnet_iter); 639 struct netisr_work *npwp; 640 #ifdef INVARIANTS 641 const char *name; 642 #endif 643 u_int i, proto; 644 645 proto = nhp->nh_proto; 646 #ifdef INVARIANTS 647 name = nhp->nh_name; 648 #endif 649 KASSERT(proto < NETISR_MAXPROT, 650 ("%s(%u): protocol too big for %s", __func__, proto, name)); 651 652 NETISR_WLOCK(); 653 KASSERT(netisr_proto[proto].np_handler != NULL, 654 ("%s(%u): protocol not registered for %s", __func__, proto, 655 name)); 656 657 #ifdef VIMAGE 658 VNET_LIST_RLOCK_NOSLEEP(); 659 VNET_FOREACH(vnet_iter) { 660 CURVNET_SET(vnet_iter); 661 V_netisr_enable[proto] = 0; 662 CURVNET_RESTORE(); 663 } 664 VNET_LIST_RUNLOCK_NOSLEEP(); 665 #endif 666 667 netisr_proto[proto].np_name = NULL; 668 netisr_proto[proto].np_handler = NULL; 669 netisr_proto[proto].np_m2flow = NULL; 670 netisr_proto[proto].np_m2cpuid = NULL; 671 netisr_proto[proto].np_qlimit = 0; 672 netisr_proto[proto].np_policy = 0; 673 CPU_FOREACH(i) { 674 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 675 netisr_drain_proto(npwp); 676 bzero(npwp, sizeof(*npwp)); 677 } 678 NETISR_WUNLOCK(); 679 } 680 681 #ifdef VIMAGE 682 void 683 netisr_register_vnet(const struct netisr_handler *nhp) 684 { 685 u_int proto; 686 687 proto = nhp->nh_proto; 688 689 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 690 KASSERT(proto < NETISR_MAXPROT, 691 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 692 NETISR_WLOCK(); 693 KASSERT(netisr_proto[proto].np_handler != NULL, 694 ("%s(%u): protocol not registered for %s", __func__, proto, 695 nhp->nh_name)); 696 697 V_netisr_enable[proto] = 1; 698 NETISR_WUNLOCK(); 699 } 700 701 static void 702 netisr_drain_proto_vnet(struct vnet *vnet, u_int proto) 703 { 704 struct epoch_tracker et; 705 struct netisr_workstream *nwsp; 706 struct netisr_work *npwp; 707 struct mbuf *m, *mp, *n, *ne; 708 struct ifnet *ifp; 709 u_int i; 710 711 KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__)); 712 NETISR_LOCK_ASSERT(); 713 714 CPU_FOREACH(i) { 715 nwsp = DPCPU_ID_PTR(i, nws); 716 if (nwsp->nws_intr_event == NULL) 717 continue; 718 npwp = &nwsp->nws_work[proto]; 719 NWS_LOCK(nwsp); 720 721 /* 722 * Rather than dissecting and removing mbufs from the middle 723 * of the chain, we build a new chain if the packet stays and 724 * update the head and tail pointers at the end. All packets 725 * matching the given vnet are freed. 726 */ 727 m = npwp->nw_head; 728 n = ne = NULL; 729 NET_EPOCH_ENTER(et); 730 while (m != NULL) { 731 mp = m; 732 m = m->m_nextpkt; 733 mp->m_nextpkt = NULL; 734 if ((ifp = ifnet_byindexgen(mp->m_pkthdr.rcvidx, 735 mp->m_pkthdr.rcvgen)) != NULL && 736 ifp->if_vnet != vnet) { 737 if (n == NULL) { 738 n = ne = mp; 739 } else { 740 ne->m_nextpkt = mp; 741 ne = mp; 742 } 743 continue; 744 } 745 /* This is a packet in the selected vnet, or belongs 746 to destroyed interface. Free it. */ 747 npwp->nw_len--; 748 m_freem(mp); 749 } 750 NET_EPOCH_EXIT(et); 751 npwp->nw_head = n; 752 npwp->nw_tail = ne; 753 NWS_UNLOCK(nwsp); 754 } 755 } 756 757 void 758 netisr_unregister_vnet(const struct netisr_handler *nhp) 759 { 760 u_int proto; 761 762 proto = nhp->nh_proto; 763 764 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 765 KASSERT(proto < NETISR_MAXPROT, 766 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 767 NETISR_WLOCK(); 768 KASSERT(netisr_proto[proto].np_handler != NULL, 769 ("%s(%u): protocol not registered for %s", __func__, proto, 770 nhp->nh_name)); 771 772 V_netisr_enable[proto] = 0; 773 774 netisr_drain_proto_vnet(curvnet, proto); 775 NETISR_WUNLOCK(); 776 } 777 #endif 778 779 /* 780 * Compose the global and per-protocol policies on dispatch, and return the 781 * dispatch policy to use. 782 */ 783 static u_int 784 netisr_get_dispatch(struct netisr_proto *npp) 785 { 786 787 /* 788 * Protocol-specific configuration overrides the global default. 789 */ 790 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 791 return (npp->np_dispatch); 792 return (netisr_dispatch_policy); 793 } 794 795 /* 796 * Look up the workstream given a packet and source identifier. Do this by 797 * checking the protocol's policy, and optionally call out to the protocol 798 * for assistance if required. 799 */ 800 static struct mbuf * 801 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 802 uintptr_t source, struct mbuf *m, u_int *cpuidp) 803 { 804 struct ifnet *ifp; 805 u_int policy; 806 807 NETISR_LOCK_ASSERT(); 808 809 /* 810 * In the event we have only one worker, shortcut and deliver to it 811 * without further ado. 812 */ 813 if (nws_count == 1) { 814 *cpuidp = nws_array[0]; 815 return (m); 816 } 817 818 /* 819 * What happens next depends on the policy selected by the protocol. 820 * If we want to support per-interface policies, we should do that 821 * here first. 822 */ 823 policy = npp->np_policy; 824 if (policy == NETISR_POLICY_CPU) { 825 m = npp->np_m2cpuid(m, source, cpuidp); 826 if (m == NULL) 827 return (NULL); 828 829 /* 830 * It's possible for a protocol not to have a good idea about 831 * where to process a packet, in which case we fall back on 832 * the netisr code to decide. In the hybrid case, return the 833 * current CPU ID, which will force an immediate direct 834 * dispatch. In the queued case, fall back on the SOURCE 835 * policy. 836 */ 837 if (*cpuidp != NETISR_CPUID_NONE) { 838 *cpuidp = netisr_get_cpuid(*cpuidp); 839 return (m); 840 } 841 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 842 *cpuidp = netisr_get_cpuid(curcpu); 843 return (m); 844 } 845 policy = NETISR_POLICY_SOURCE; 846 } 847 848 if (policy == NETISR_POLICY_FLOW) { 849 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 850 npp->np_m2flow != NULL) { 851 m = npp->np_m2flow(m, source); 852 if (m == NULL) 853 return (NULL); 854 } 855 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 856 *cpuidp = 857 netisr_default_flow2cpu(m->m_pkthdr.flowid); 858 return (m); 859 } 860 policy = NETISR_POLICY_SOURCE; 861 } 862 863 KASSERT(policy == NETISR_POLICY_SOURCE, 864 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 865 npp->np_name)); 866 867 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 868 ifp = m->m_pkthdr.rcvif; 869 if (ifp != NULL) 870 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 871 else 872 *cpuidp = nws_array[source % nws_count]; 873 return (m); 874 } 875 876 /* 877 * Process packets associated with a workstream and protocol. For reasons of 878 * fairness, we process up to one complete netisr queue at a time, moving the 879 * queue to a stack-local queue for processing, but do not loop refreshing 880 * from the global queue. The caller is responsible for deciding whether to 881 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 882 * locked on entry and relocked before return, but will be released while 883 * processing. The number of packets processed is returned. 884 */ 885 static u_int 886 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 887 { 888 struct netisr_work local_npw, *npwp; 889 u_int handled; 890 struct mbuf *m; 891 892 NETISR_LOCK_ASSERT(); 893 NWS_LOCK_ASSERT(nwsp); 894 895 KASSERT(nwsp->nws_flags & NWS_RUNNING, 896 ("%s(%u): not running", __func__, proto)); 897 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 898 ("%s(%u): invalid proto\n", __func__, proto)); 899 900 npwp = &nwsp->nws_work[proto]; 901 if (npwp->nw_len == 0) 902 return (0); 903 904 /* 905 * Move the global work queue to a thread-local work queue. 906 * 907 * Notice that this means the effective maximum length of the queue 908 * is actually twice that of the maximum queue length specified in 909 * the protocol registration call. 910 */ 911 handled = npwp->nw_len; 912 local_npw = *npwp; 913 npwp->nw_head = NULL; 914 npwp->nw_tail = NULL; 915 npwp->nw_len = 0; 916 nwsp->nws_pendingbits &= ~(1 << proto); 917 NWS_UNLOCK(nwsp); 918 while ((m = local_npw.nw_head) != NULL) { 919 local_npw.nw_head = m->m_nextpkt; 920 m->m_nextpkt = NULL; 921 if (local_npw.nw_head == NULL) 922 local_npw.nw_tail = NULL; 923 local_npw.nw_len--; 924 if (__predict_false(m_rcvif_restore(m) == NULL)) { 925 m_freem(m); 926 continue; 927 } 928 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 929 netisr_proto[proto].np_handler(m); 930 CURVNET_RESTORE(); 931 } 932 KASSERT(local_npw.nw_len == 0, 933 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 934 if (netisr_proto[proto].np_drainedcpu) 935 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 936 NWS_LOCK(nwsp); 937 npwp->nw_handled += handled; 938 return (handled); 939 } 940 941 /* 942 * SWI handler for netisr -- processes packets in a set of workstreams that 943 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 944 * being direct dispatched, go back to sleep and wait for the dispatching 945 * thread to wake us up again. 946 */ 947 static void 948 swi_net(void *arg) 949 { 950 #ifdef NETISR_LOCKING 951 struct rm_priotracker tracker; 952 #endif 953 struct netisr_workstream *nwsp; 954 u_int bits, prot; 955 956 nwsp = arg; 957 958 #ifdef DEVICE_POLLING 959 KASSERT(nws_count == 1, 960 ("%s: device_polling but nws_count != 1", __func__)); 961 netisr_poll(); 962 #endif 963 #ifdef NETISR_LOCKING 964 NETISR_RLOCK(&tracker); 965 #endif 966 NWS_LOCK(nwsp); 967 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 968 if (nwsp->nws_flags & NWS_DISPATCHING) 969 goto out; 970 nwsp->nws_flags |= NWS_RUNNING; 971 nwsp->nws_flags &= ~NWS_SCHEDULED; 972 while ((bits = nwsp->nws_pendingbits) != 0) { 973 while ((prot = ffs(bits)) != 0) { 974 prot--; 975 bits &= ~(1 << prot); 976 (void)netisr_process_workstream_proto(nwsp, prot); 977 } 978 } 979 nwsp->nws_flags &= ~NWS_RUNNING; 980 out: 981 NWS_UNLOCK(nwsp); 982 #ifdef NETISR_LOCKING 983 NETISR_RUNLOCK(&tracker); 984 #endif 985 #ifdef DEVICE_POLLING 986 netisr_pollmore(); 987 #endif 988 } 989 990 static int 991 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 992 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 993 { 994 995 NWS_LOCK_ASSERT(nwsp); 996 997 *dosignalp = 0; 998 if (npwp->nw_len < npwp->nw_qlimit) { 999 m_rcvif_serialize(m); 1000 m->m_nextpkt = NULL; 1001 if (npwp->nw_head == NULL) { 1002 npwp->nw_head = m; 1003 npwp->nw_tail = m; 1004 } else { 1005 npwp->nw_tail->m_nextpkt = m; 1006 npwp->nw_tail = m; 1007 } 1008 npwp->nw_len++; 1009 if (npwp->nw_len > npwp->nw_watermark) 1010 npwp->nw_watermark = npwp->nw_len; 1011 1012 /* 1013 * We must set the bit regardless of NWS_RUNNING, so that 1014 * swi_net() keeps calling netisr_process_workstream_proto(). 1015 */ 1016 nwsp->nws_pendingbits |= (1 << proto); 1017 if (!(nwsp->nws_flags & 1018 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 1019 nwsp->nws_flags |= NWS_SCHEDULED; 1020 *dosignalp = 1; /* Defer until unlocked. */ 1021 } 1022 npwp->nw_queued++; 1023 return (0); 1024 } else { 1025 m_freem(m); 1026 npwp->nw_qdrops++; 1027 return (ENOBUFS); 1028 } 1029 } 1030 1031 static int 1032 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 1033 { 1034 struct netisr_workstream *nwsp; 1035 struct netisr_work *npwp; 1036 int dosignal, error; 1037 1038 #ifdef NETISR_LOCKING 1039 NETISR_LOCK_ASSERT(); 1040 #endif 1041 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 1042 cpuid, mp_maxid)); 1043 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1044 1045 dosignal = 0; 1046 error = 0; 1047 nwsp = DPCPU_ID_PTR(cpuid, nws); 1048 npwp = &nwsp->nws_work[proto]; 1049 NWS_LOCK(nwsp); 1050 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 1051 NWS_UNLOCK(nwsp); 1052 if (dosignal) 1053 NWS_SIGNAL(nwsp); 1054 return (error); 1055 } 1056 1057 int 1058 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 1059 { 1060 #ifdef NETISR_LOCKING 1061 struct rm_priotracker tracker; 1062 #endif 1063 u_int cpuid; 1064 int error; 1065 1066 KASSERT(proto < NETISR_MAXPROT, 1067 ("%s: invalid proto %u", __func__, proto)); 1068 1069 #ifdef NETISR_LOCKING 1070 NETISR_RLOCK(&tracker); 1071 #endif 1072 KASSERT(netisr_proto[proto].np_handler != NULL, 1073 ("%s: invalid proto %u", __func__, proto)); 1074 1075 #ifdef VIMAGE 1076 if (V_netisr_enable[proto] == 0) { 1077 m_freem(m); 1078 return (ENOPROTOOPT); 1079 } 1080 #endif 1081 1082 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 1083 source, m, &cpuid); 1084 if (m != NULL) { 1085 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 1086 cpuid)); 1087 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 1088 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 1089 error = netisr_queue_internal(proto, m, cpuid); 1090 } else 1091 error = ENOBUFS; 1092 #ifdef NETISR_LOCKING 1093 NETISR_RUNLOCK(&tracker); 1094 #endif 1095 return (error); 1096 } 1097 1098 int 1099 netisr_queue(u_int proto, struct mbuf *m) 1100 { 1101 1102 return (netisr_queue_src(proto, 0, m)); 1103 } 1104 1105 /* 1106 * Dispatch a packet for netisr processing; direct dispatch is permitted by 1107 * calling context. 1108 */ 1109 int 1110 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 1111 { 1112 #ifdef NETISR_LOCKING 1113 struct rm_priotracker tracker; 1114 #endif 1115 struct netisr_workstream *nwsp; 1116 struct netisr_proto *npp; 1117 struct netisr_work *npwp; 1118 int dosignal, error; 1119 u_int cpuid, dispatch_policy; 1120 1121 NET_EPOCH_ASSERT(); 1122 KASSERT(proto < NETISR_MAXPROT, 1123 ("%s: invalid proto %u", __func__, proto)); 1124 #ifdef NETISR_LOCKING 1125 NETISR_RLOCK(&tracker); 1126 #endif 1127 npp = &netisr_proto[proto]; 1128 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 1129 proto)); 1130 1131 #ifdef VIMAGE 1132 if (V_netisr_enable[proto] == 0) { 1133 m_freem(m); 1134 return (ENOPROTOOPT); 1135 } 1136 #endif 1137 1138 dispatch_policy = netisr_get_dispatch(npp); 1139 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 1140 return (netisr_queue_src(proto, source, m)); 1141 1142 /* 1143 * If direct dispatch is forced, then unconditionally dispatch 1144 * without a formal CPU selection. Borrow the current CPU's stats, 1145 * even if there's no worker on it. In this case we don't update 1146 * nws_flags because all netisr processing will be source ordered due 1147 * to always being forced to directly dispatch. 1148 */ 1149 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 1150 nwsp = DPCPU_PTR(nws); 1151 npwp = &nwsp->nws_work[proto]; 1152 npwp->nw_dispatched++; 1153 npwp->nw_handled++; 1154 netisr_proto[proto].np_handler(m); 1155 error = 0; 1156 goto out_unlock; 1157 } 1158 1159 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 1160 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 1161 1162 /* 1163 * Otherwise, we execute in a hybrid mode where we will try to direct 1164 * dispatch if we're on the right CPU and the netisr worker isn't 1165 * already running. 1166 */ 1167 sched_pin(); 1168 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 1169 source, m, &cpuid); 1170 if (m == NULL) { 1171 error = ENOBUFS; 1172 goto out_unpin; 1173 } 1174 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1175 if (cpuid != curcpu) 1176 goto queue_fallback; 1177 nwsp = DPCPU_PTR(nws); 1178 npwp = &nwsp->nws_work[proto]; 1179 1180 /*- 1181 * We are willing to direct dispatch only if three conditions hold: 1182 * 1183 * (1) The netisr worker isn't already running, 1184 * (2) Another thread isn't already directly dispatching, and 1185 * (3) The netisr hasn't already been woken up. 1186 */ 1187 NWS_LOCK(nwsp); 1188 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1189 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1190 &dosignal); 1191 NWS_UNLOCK(nwsp); 1192 if (dosignal) 1193 NWS_SIGNAL(nwsp); 1194 goto out_unpin; 1195 } 1196 1197 /* 1198 * The current thread is now effectively the netisr worker, so set 1199 * the dispatching flag to prevent concurrent processing of the 1200 * stream from another thread (even the netisr worker), which could 1201 * otherwise lead to effective misordering of the stream. 1202 */ 1203 nwsp->nws_flags |= NWS_DISPATCHING; 1204 NWS_UNLOCK(nwsp); 1205 netisr_proto[proto].np_handler(m); 1206 NWS_LOCK(nwsp); 1207 nwsp->nws_flags &= ~NWS_DISPATCHING; 1208 npwp->nw_handled++; 1209 npwp->nw_hybrid_dispatched++; 1210 1211 /* 1212 * If other work was enqueued by another thread while we were direct 1213 * dispatching, we need to signal the netisr worker to do that work. 1214 * In the future, we might want to do some of that work in the 1215 * current thread, rather than trigger further context switches. If 1216 * so, we'll want to establish a reasonable bound on the work done in 1217 * the "borrowed" context. 1218 */ 1219 if (nwsp->nws_pendingbits != 0) { 1220 nwsp->nws_flags |= NWS_SCHEDULED; 1221 dosignal = 1; 1222 } else 1223 dosignal = 0; 1224 NWS_UNLOCK(nwsp); 1225 if (dosignal) 1226 NWS_SIGNAL(nwsp); 1227 error = 0; 1228 goto out_unpin; 1229 1230 queue_fallback: 1231 error = netisr_queue_internal(proto, m, cpuid); 1232 out_unpin: 1233 sched_unpin(); 1234 out_unlock: 1235 #ifdef NETISR_LOCKING 1236 NETISR_RUNLOCK(&tracker); 1237 #endif 1238 return (error); 1239 } 1240 1241 int 1242 netisr_dispatch(u_int proto, struct mbuf *m) 1243 { 1244 1245 return (netisr_dispatch_src(proto, 0, m)); 1246 } 1247 1248 #ifdef DEVICE_POLLING 1249 /* 1250 * Kernel polling borrows a netisr thread to run interface polling in; this 1251 * function allows kernel polling to request that the netisr thread be 1252 * scheduled even if no packets are pending for protocols. 1253 */ 1254 void 1255 netisr_sched_poll(void) 1256 { 1257 struct netisr_workstream *nwsp; 1258 1259 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1260 NWS_SIGNAL(nwsp); 1261 } 1262 #endif 1263 1264 static void 1265 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1266 { 1267 char swiname[12]; 1268 struct netisr_workstream *nwsp; 1269 int error; 1270 1271 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1272 1273 nwsp = DPCPU_ID_PTR(cpuid, nws); 1274 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1275 nwsp->nws_cpu = cpuid; 1276 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1277 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1278 SWI_NET, INTR_TYPE_NET | INTR_MPSAFE, &nwsp->nws_swi_cookie); 1279 if (error) 1280 panic("%s: swi_add %d", __func__, error); 1281 pc->pc_netisr = nwsp->nws_intr_event; 1282 if (netisr_bindthreads) { 1283 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1284 if (error != 0) 1285 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1286 cpuid, error); 1287 } 1288 NETISR_WLOCK(); 1289 nws_array[nws_count] = nwsp->nws_cpu; 1290 nws_count++; 1291 NETISR_WUNLOCK(); 1292 } 1293 1294 /* 1295 * Initialize the netisr subsystem. We rely on BSS and static initialization 1296 * of most fields in global data structures. 1297 * 1298 * Start a worker thread for the boot CPU so that we can support network 1299 * traffic immediately in case the network stack is used before additional 1300 * CPUs are started (for example, diskless boot). 1301 */ 1302 static void 1303 netisr_init(void *arg) 1304 { 1305 struct pcpu *pc; 1306 1307 NETISR_LOCK_INIT(); 1308 if (netisr_maxthreads == 0 || netisr_maxthreads < -1 ) 1309 netisr_maxthreads = 1; /* default behavior */ 1310 else if (netisr_maxthreads == -1) 1311 netisr_maxthreads = mp_ncpus; /* use max cpus */ 1312 if (netisr_maxthreads > mp_ncpus) { 1313 printf("netisr_init: forcing maxthreads from %d to %d\n", 1314 netisr_maxthreads, mp_ncpus); 1315 netisr_maxthreads = mp_ncpus; 1316 } 1317 if (netisr_defaultqlimit > netisr_maxqlimit) { 1318 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1319 netisr_defaultqlimit, netisr_maxqlimit); 1320 netisr_defaultqlimit = netisr_maxqlimit; 1321 } 1322 #ifdef DEVICE_POLLING 1323 /* 1324 * The device polling code is not yet aware of how to deal with 1325 * multiple netisr threads, so for the time being compiling in device 1326 * polling disables parallel netisr workers. 1327 */ 1328 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1329 printf("netisr_init: forcing maxthreads to 1 and " 1330 "bindthreads to 0 for device polling\n"); 1331 netisr_maxthreads = 1; 1332 netisr_bindthreads = 0; 1333 } 1334 #endif 1335 1336 #ifdef EARLY_AP_STARTUP 1337 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1338 if (nws_count >= netisr_maxthreads) 1339 break; 1340 netisr_start_swi(pc->pc_cpuid, pc); 1341 } 1342 #else 1343 pc = get_pcpu(); 1344 netisr_start_swi(pc->pc_cpuid, pc); 1345 #endif 1346 } 1347 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1348 1349 #ifndef EARLY_AP_STARTUP 1350 /* 1351 * Start worker threads for additional CPUs. No attempt to gracefully handle 1352 * work reassignment, we don't yet support dynamic reconfiguration. 1353 */ 1354 static void 1355 netisr_start(void *arg) 1356 { 1357 struct pcpu *pc; 1358 1359 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1360 if (nws_count >= netisr_maxthreads) 1361 break; 1362 /* Worker will already be present for boot CPU. */ 1363 if (pc->pc_netisr != NULL) 1364 continue; 1365 netisr_start_swi(pc->pc_cpuid, pc); 1366 } 1367 } 1368 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1369 #endif 1370 1371 /* 1372 * Sysctl monitoring for netisr: query a list of registered protocols. 1373 */ 1374 static int 1375 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1376 { 1377 struct rm_priotracker tracker; 1378 struct sysctl_netisr_proto *snpp, *snp_array; 1379 struct netisr_proto *npp; 1380 u_int counter, proto; 1381 int error; 1382 1383 if (req->newptr != NULL) 1384 return (EINVAL); 1385 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1386 M_ZERO | M_WAITOK); 1387 counter = 0; 1388 NETISR_RLOCK(&tracker); 1389 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1390 npp = &netisr_proto[proto]; 1391 if (npp->np_name == NULL) 1392 continue; 1393 snpp = &snp_array[counter]; 1394 snpp->snp_version = sizeof(*snpp); 1395 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1396 snpp->snp_proto = proto; 1397 snpp->snp_qlimit = npp->np_qlimit; 1398 snpp->snp_policy = npp->np_policy; 1399 snpp->snp_dispatch = npp->np_dispatch; 1400 if (npp->np_m2flow != NULL) 1401 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1402 if (npp->np_m2cpuid != NULL) 1403 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1404 if (npp->np_drainedcpu != NULL) 1405 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1406 counter++; 1407 } 1408 NETISR_RUNLOCK(&tracker); 1409 KASSERT(counter <= NETISR_MAXPROT, 1410 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1411 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1412 free(snp_array, M_TEMP); 1413 return (error); 1414 } 1415 1416 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1417 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1418 "S,sysctl_netisr_proto", 1419 "Return list of protocols registered with netisr"); 1420 1421 /* 1422 * Sysctl monitoring for netisr: query a list of workstreams. 1423 */ 1424 static int 1425 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1426 { 1427 struct rm_priotracker tracker; 1428 struct sysctl_netisr_workstream *snwsp, *snws_array; 1429 struct netisr_workstream *nwsp; 1430 u_int counter, cpuid; 1431 int error; 1432 1433 if (req->newptr != NULL) 1434 return (EINVAL); 1435 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1436 M_ZERO | M_WAITOK); 1437 counter = 0; 1438 NETISR_RLOCK(&tracker); 1439 CPU_FOREACH(cpuid) { 1440 nwsp = DPCPU_ID_PTR(cpuid, nws); 1441 if (nwsp->nws_intr_event == NULL) 1442 continue; 1443 NWS_LOCK(nwsp); 1444 snwsp = &snws_array[counter]; 1445 snwsp->snws_version = sizeof(*snwsp); 1446 1447 /* 1448 * For now, we equate workstream IDs and CPU IDs in the 1449 * kernel, but expose them independently to userspace in case 1450 * that assumption changes in the future. 1451 */ 1452 snwsp->snws_wsid = cpuid; 1453 snwsp->snws_cpu = cpuid; 1454 if (nwsp->nws_intr_event != NULL) 1455 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1456 NWS_UNLOCK(nwsp); 1457 counter++; 1458 } 1459 NETISR_RUNLOCK(&tracker); 1460 KASSERT(counter <= MAXCPU, 1461 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1462 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1463 free(snws_array, M_TEMP); 1464 return (error); 1465 } 1466 1467 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1468 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1469 "S,sysctl_netisr_workstream", 1470 "Return list of workstreams implemented by netisr"); 1471 1472 /* 1473 * Sysctl monitoring for netisr: query per-protocol data across all 1474 * workstreams. 1475 */ 1476 static int 1477 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1478 { 1479 struct rm_priotracker tracker; 1480 struct sysctl_netisr_work *snwp, *snw_array; 1481 struct netisr_workstream *nwsp; 1482 struct netisr_proto *npp; 1483 struct netisr_work *nwp; 1484 u_int counter, cpuid, proto; 1485 int error; 1486 1487 if (req->newptr != NULL) 1488 return (EINVAL); 1489 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1490 M_TEMP, M_ZERO | M_WAITOK); 1491 counter = 0; 1492 NETISR_RLOCK(&tracker); 1493 CPU_FOREACH(cpuid) { 1494 nwsp = DPCPU_ID_PTR(cpuid, nws); 1495 if (nwsp->nws_intr_event == NULL) 1496 continue; 1497 NWS_LOCK(nwsp); 1498 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1499 npp = &netisr_proto[proto]; 1500 if (npp->np_name == NULL) 1501 continue; 1502 nwp = &nwsp->nws_work[proto]; 1503 snwp = &snw_array[counter]; 1504 snwp->snw_version = sizeof(*snwp); 1505 snwp->snw_wsid = cpuid; /* See comment above. */ 1506 snwp->snw_proto = proto; 1507 snwp->snw_len = nwp->nw_len; 1508 snwp->snw_watermark = nwp->nw_watermark; 1509 snwp->snw_dispatched = nwp->nw_dispatched; 1510 snwp->snw_hybrid_dispatched = 1511 nwp->nw_hybrid_dispatched; 1512 snwp->snw_qdrops = nwp->nw_qdrops; 1513 snwp->snw_queued = nwp->nw_queued; 1514 snwp->snw_handled = nwp->nw_handled; 1515 counter++; 1516 } 1517 NWS_UNLOCK(nwsp); 1518 } 1519 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1520 ("sysctl_netisr_work: counter too big (%d)", counter)); 1521 NETISR_RUNLOCK(&tracker); 1522 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1523 free(snw_array, M_TEMP); 1524 return (error); 1525 } 1526 1527 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1528 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1529 "S,sysctl_netisr_work", 1530 "Return list of per-workstream, per-protocol work in netisr"); 1531 1532 #ifdef DDB 1533 DB_SHOW_COMMAND(netisr, db_show_netisr) 1534 { 1535 struct netisr_workstream *nwsp; 1536 struct netisr_work *nwp; 1537 int first, proto; 1538 u_int cpuid; 1539 1540 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1541 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1542 CPU_FOREACH(cpuid) { 1543 nwsp = DPCPU_ID_PTR(cpuid, nws); 1544 if (nwsp->nws_intr_event == NULL) 1545 continue; 1546 first = 1; 1547 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1548 if (netisr_proto[proto].np_handler == NULL) 1549 continue; 1550 nwp = &nwsp->nws_work[proto]; 1551 if (first) { 1552 db_printf("%3d ", cpuid); 1553 first = 0; 1554 } else 1555 db_printf("%3s ", ""); 1556 db_printf( 1557 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1558 netisr_proto[proto].np_name, nwp->nw_len, 1559 nwp->nw_watermark, nwp->nw_qlimit, 1560 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1561 nwp->nw_qdrops, nwp->nw_queued); 1562 } 1563 } 1564 } 1565 #endif 1566