1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2007-2009 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * This software was developed by Robert N. M. Watson under contract 9 * to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 /* 35 * netisr is a packet dispatch service, allowing synchronous (directly 36 * dispatched) and asynchronous (deferred dispatch) processing of packets by 37 * registered protocol handlers. Callers pass a protocol identifier and 38 * packet to netisr, along with a direct dispatch hint, and work will either 39 * be immediately processed by the registered handler, or passed to a 40 * software interrupt (SWI) thread for deferred dispatch. Callers will 41 * generally select one or the other based on: 42 * 43 * - Whether directly dispatching a netisr handler lead to code reentrance or 44 * lock recursion, such as entering the socket code from the socket code. 45 * - Whether directly dispatching a netisr handler lead to recursive 46 * processing, such as when decapsulating several wrapped layers of tunnel 47 * information (IPSEC within IPSEC within ...). 48 * 49 * Maintaining ordering for protocol streams is a critical design concern. 50 * Enforcing ordering limits the opportunity for concurrency, but maintains 51 * the strong ordering requirements found in some protocols, such as TCP. Of 52 * related concern is CPU affinity--it is desirable to process all data 53 * associated with a particular stream on the same CPU over time in order to 54 * avoid acquiring locks associated with the connection on different CPUs, 55 * keep connection data in one cache, and to generally encourage associated 56 * user threads to live on the same CPU as the stream. It's also desirable 57 * to avoid lock migration and contention where locks are associated with 58 * more than one flow. 59 * 60 * netisr supports several policy variations, represented by the 61 * NETISR_POLICY_* constants, allowing protocols to play various roles in 62 * identifying flows, assigning work to CPUs, etc. These are described in 63 * netisr.h. 64 */ 65 66 #include "opt_ddb.h" 67 #include "opt_device_polling.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/kernel.h> 72 #include <sys/kthread.h> 73 #include <sys/malloc.h> 74 #include <sys/interrupt.h> 75 #include <sys/lock.h> 76 #include <sys/mbuf.h> 77 #include <sys/mutex.h> 78 #include <sys/pcpu.h> 79 #include <sys/proc.h> 80 #include <sys/rmlock.h> 81 #include <sys/sched.h> 82 #include <sys/smp.h> 83 #include <sys/socket.h> 84 #include <sys/sysctl.h> 85 #include <sys/systm.h> 86 87 #ifdef DDB 88 #include <ddb/ddb.h> 89 #endif 90 91 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 92 #include <net/if.h> 93 #include <net/if_var.h> 94 #include <net/if_private.h> 95 #include <net/netisr.h> 96 #include <net/netisr_internal.h> 97 #include <net/vnet.h> 98 99 /*- 100 * Synchronize use and modification of the registered netisr data structures; 101 * acquire a read lock while modifying the set of registered protocols to 102 * prevent partially registered or unregistered protocols from being run. 103 * 104 * The following data structures and fields are protected by this lock: 105 * 106 * - The netisr_proto array, including all fields of struct netisr_proto. 107 * - The nws array, including all fields of struct netisr_worker. 108 * - The nws_array array. 109 * 110 * Note: the NETISR_LOCKING define controls whether read locks are acquired 111 * in packet processing paths requiring netisr registration stability. This 112 * is disabled by default as it can lead to measurable performance 113 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 114 * because netisr registration and unregistration is extremely rare at 115 * runtime. If it becomes more common, this decision should be revisited. 116 * 117 * XXXRW: rmlocks don't support assertions. 118 */ 119 static struct rmlock netisr_rmlock; 120 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 121 RM_NOWITNESS) 122 #define NETISR_LOCK_ASSERT() 123 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 124 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 125 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 126 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 127 /* #define NETISR_LOCKING */ 128 129 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 130 "netisr"); 131 132 /*- 133 * Three global direct dispatch policies are supported: 134 * 135 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 136 * context (may be overridden by protocols). 137 * 138 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 139 * and we're running on the CPU the work would be performed on, then direct 140 * dispatch it if it wouldn't violate ordering constraints on the workstream. 141 * 142 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 143 * always direct dispatch. (The default.) 144 * 145 * Notice that changing the global policy could lead to short periods of 146 * misordered processing, but this is considered acceptable as compared to 147 * the complexity of enforcing ordering during policy changes. Protocols can 148 * override the global policy (when they're not doing that, they select 149 * NETISR_DISPATCH_DEFAULT). 150 */ 151 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 152 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 153 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 154 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 155 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, 156 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 157 0, 0, sysctl_netisr_dispatch_policy, "A", 158 "netisr dispatch policy"); 159 160 /* 161 * Allow the administrator to limit the number of threads (CPUs) to use for 162 * netisr. We don't check netisr_maxthreads before creating the thread for 163 * CPU 0. This must be set at boot. We will create at most one thread per CPU. 164 * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and 165 * therefore only 1 workstream. If set to -1, netisr would use all cpus 166 * (mp_ncpus) and therefore would have those many workstreams. One workstream 167 * per thread (CPU). 168 */ 169 static int netisr_maxthreads = 1; /* Max number of threads. */ 170 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 171 &netisr_maxthreads, 0, 172 "Use at most this many CPUs for netisr processing"); 173 174 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 175 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 176 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 177 178 /* 179 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 180 * both for initial configuration and later modification using 181 * netisr_setqlimit(). 182 */ 183 #define NETISR_DEFAULT_MAXQLIMIT 10240 184 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 185 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 186 &netisr_maxqlimit, 0, 187 "Maximum netisr per-protocol, per-CPU queue depth."); 188 189 /* 190 * The default per-workstream mbuf queue limit for protocols that don't 191 * initialize the nh_qlimit field of their struct netisr_handler. If this is 192 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 193 */ 194 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 195 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 196 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 197 &netisr_defaultqlimit, 0, 198 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 199 200 /* 201 * Store and export the compile-time constant NETISR_MAXPROT limit on the 202 * number of protocols that can register with netisr at a time. This is 203 * required for crashdump analysis, as it sizes netisr_proto[]. 204 */ 205 static u_int netisr_maxprot = NETISR_MAXPROT; 206 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 207 &netisr_maxprot, 0, 208 "Compile-time limit on the number of protocols supported by netisr."); 209 210 /* 211 * The netisr_proto array describes all registered protocols, indexed by 212 * protocol number. See netisr_internal.h for more details. 213 */ 214 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 215 216 #ifdef VIMAGE 217 /* 218 * The netisr_enable array describes a per-VNET flag for registered 219 * protocols on whether this netisr is active in this VNET or not. 220 * netisr_register() will automatically enable the netisr for the 221 * default VNET and all currently active instances. 222 * netisr_unregister() will disable all active VNETs, including vnet0. 223 * Individual network stack instances can be enabled/disabled by the 224 * netisr_(un)register _vnet() functions. 225 * With this we keep the one netisr_proto per protocol but add a 226 * mechanism to stop netisr processing for vnet teardown. 227 * Apart from that we expect a VNET to always be enabled. 228 */ 229 VNET_DEFINE_STATIC(u_int, netisr_enable[NETISR_MAXPROT]); 230 #define V_netisr_enable VNET(netisr_enable) 231 #endif 232 233 /* 234 * Per-CPU workstream data. See netisr_internal.h for more details. 235 */ 236 DPCPU_DEFINE(struct netisr_workstream, nws); 237 238 /* 239 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 240 * accessing workstreams. This allows constructions of the form 241 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 242 */ 243 static u_int nws_array[MAXCPU]; 244 245 /* 246 * Number of registered workstreams. Will be at most the number of running 247 * CPUs once fully started. 248 */ 249 static u_int nws_count; 250 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 251 &nws_count, 0, "Number of extant netisr threads."); 252 253 /* 254 * Synchronization for each workstream: a mutex protects all mutable fields 255 * in each stream, including per-protocol state (mbuf queues). The SWI is 256 * woken up if asynchronous dispatch is required. 257 */ 258 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 259 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 260 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 261 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 262 263 /* 264 * Utility routines for protocols that implement their own mapping of flows 265 * to CPUs. 266 */ 267 u_int 268 netisr_get_cpucount(void) 269 { 270 271 return (nws_count); 272 } 273 274 u_int 275 netisr_get_cpuid(u_int cpunumber) 276 { 277 278 return (nws_array[cpunumber % nws_count]); 279 } 280 281 /* 282 * The default implementation of flow -> CPU ID mapping. 283 * 284 * Non-static so that protocols can use it to map their own work to specific 285 * CPUs in a manner consistent to netisr for affinity purposes. 286 */ 287 u_int 288 netisr_default_flow2cpu(u_int flowid) 289 { 290 291 return (nws_array[flowid % nws_count]); 292 } 293 294 /* 295 * Dispatch tunable and sysctl configuration. 296 */ 297 struct netisr_dispatch_table_entry { 298 u_int ndte_policy; 299 const char *ndte_policy_str; 300 }; 301 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 302 { NETISR_DISPATCH_DEFAULT, "default" }, 303 { NETISR_DISPATCH_DEFERRED, "deferred" }, 304 { NETISR_DISPATCH_HYBRID, "hybrid" }, 305 { NETISR_DISPATCH_DIRECT, "direct" }, 306 }; 307 308 static void 309 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 310 u_int buflen) 311 { 312 const struct netisr_dispatch_table_entry *ndtep; 313 const char *str; 314 u_int i; 315 316 str = "unknown"; 317 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 318 ndtep = &netisr_dispatch_table[i]; 319 if (ndtep->ndte_policy == dispatch_policy) { 320 str = ndtep->ndte_policy_str; 321 break; 322 } 323 } 324 snprintf(buffer, buflen, "%s", str); 325 } 326 327 static int 328 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 329 { 330 const struct netisr_dispatch_table_entry *ndtep; 331 u_int i; 332 333 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 334 ndtep = &netisr_dispatch_table[i]; 335 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 336 *dispatch_policyp = ndtep->ndte_policy; 337 return (0); 338 } 339 } 340 return (EINVAL); 341 } 342 343 static int 344 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 345 { 346 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 347 size_t len; 348 u_int dispatch_policy; 349 int error; 350 351 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 352 sizeof(tmp)); 353 /* 354 * netisr is initialised very early during the boot when malloc isn't 355 * available yet so we can't use sysctl_handle_string() to process 356 * any non-default value that was potentially set via loader. 357 */ 358 if (req->newptr != NULL) { 359 len = req->newlen - req->newidx; 360 if (len >= NETISR_DISPATCH_POLICY_MAXSTR) 361 return (EINVAL); 362 error = SYSCTL_IN(req, tmp, len); 363 if (error == 0) { 364 tmp[len] = '\0'; 365 error = netisr_dispatch_policy_from_str(tmp, 366 &dispatch_policy); 367 if (error == 0 && 368 dispatch_policy == NETISR_DISPATCH_DEFAULT) 369 error = EINVAL; 370 if (error == 0) 371 netisr_dispatch_policy = dispatch_policy; 372 } 373 } else { 374 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 375 } 376 return (error); 377 } 378 379 /* 380 * Register a new netisr handler, which requires initializing per-protocol 381 * fields for each workstream. All netisr work is briefly suspended while 382 * the protocol is installed. 383 */ 384 void 385 netisr_register(const struct netisr_handler *nhp) 386 { 387 VNET_ITERATOR_DECL(vnet_iter); 388 struct netisr_work *npwp; 389 const char *name; 390 u_int i, proto; 391 392 proto = nhp->nh_proto; 393 name = nhp->nh_name; 394 395 /* 396 * Test that the requested registration is valid. 397 */ 398 KASSERT(nhp->nh_name != NULL, 399 ("%s: nh_name NULL for %u", __func__, proto)); 400 KASSERT(nhp->nh_handler != NULL, 401 ("%s: nh_handler NULL for %s", __func__, name)); 402 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 403 nhp->nh_policy == NETISR_POLICY_FLOW || 404 nhp->nh_policy == NETISR_POLICY_CPU, 405 ("%s: unsupported nh_policy %u for %s", __func__, 406 nhp->nh_policy, name)); 407 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 408 nhp->nh_m2flow == NULL, 409 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 410 name)); 411 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 412 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 413 name)); 414 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 415 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 416 name)); 417 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 418 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 419 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 420 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 421 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 422 423 KASSERT(proto < NETISR_MAXPROT, 424 ("%s(%u, %s): protocol too big", __func__, proto, name)); 425 426 /* 427 * Test that no existing registration exists for this protocol. 428 */ 429 NETISR_WLOCK(); 430 KASSERT(netisr_proto[proto].np_name == NULL, 431 ("%s(%u, %s): name present", __func__, proto, name)); 432 KASSERT(netisr_proto[proto].np_handler == NULL, 433 ("%s(%u, %s): handler present", __func__, proto, name)); 434 435 netisr_proto[proto].np_name = name; 436 netisr_proto[proto].np_handler = nhp->nh_handler; 437 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 438 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 439 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 440 if (nhp->nh_qlimit == 0) 441 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 442 else if (nhp->nh_qlimit > netisr_maxqlimit) { 443 printf("%s: %s requested queue limit %u capped to " 444 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 445 netisr_maxqlimit); 446 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 447 } else 448 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 449 netisr_proto[proto].np_policy = nhp->nh_policy; 450 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 451 CPU_FOREACH(i) { 452 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 453 bzero(npwp, sizeof(*npwp)); 454 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 455 } 456 457 #ifdef VIMAGE 458 /* 459 * Test that we are in vnet0 and have a curvnet set. 460 */ 461 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 462 KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p", 463 __func__, curvnet, vnet0)); 464 VNET_LIST_RLOCK_NOSLEEP(); 465 VNET_FOREACH(vnet_iter) { 466 CURVNET_SET(vnet_iter); 467 V_netisr_enable[proto] = 1; 468 CURVNET_RESTORE(); 469 } 470 VNET_LIST_RUNLOCK_NOSLEEP(); 471 #endif 472 NETISR_WUNLOCK(); 473 } 474 475 /* 476 * Clear drop counters across all workstreams for a protocol. 477 */ 478 void 479 netisr_clearqdrops(const struct netisr_handler *nhp) 480 { 481 struct netisr_work *npwp; 482 #ifdef INVARIANTS 483 const char *name; 484 #endif 485 u_int i, proto; 486 487 proto = nhp->nh_proto; 488 #ifdef INVARIANTS 489 name = nhp->nh_name; 490 #endif 491 KASSERT(proto < NETISR_MAXPROT, 492 ("%s(%u): protocol too big for %s", __func__, proto, name)); 493 494 NETISR_WLOCK(); 495 KASSERT(netisr_proto[proto].np_handler != NULL, 496 ("%s(%u): protocol not registered for %s", __func__, proto, 497 name)); 498 499 CPU_FOREACH(i) { 500 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 501 npwp->nw_qdrops = 0; 502 } 503 NETISR_WUNLOCK(); 504 } 505 506 /* 507 * Query current drop counters across all workstreams for a protocol. 508 */ 509 void 510 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 511 { 512 struct netisr_work *npwp; 513 struct rm_priotracker tracker; 514 #ifdef INVARIANTS 515 const char *name; 516 #endif 517 u_int i, proto; 518 519 *qdropp = 0; 520 proto = nhp->nh_proto; 521 #ifdef INVARIANTS 522 name = nhp->nh_name; 523 #endif 524 KASSERT(proto < NETISR_MAXPROT, 525 ("%s(%u): protocol too big for %s", __func__, proto, name)); 526 527 NETISR_RLOCK(&tracker); 528 KASSERT(netisr_proto[proto].np_handler != NULL, 529 ("%s(%u): protocol not registered for %s", __func__, proto, 530 name)); 531 532 CPU_FOREACH(i) { 533 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 534 *qdropp += npwp->nw_qdrops; 535 } 536 NETISR_RUNLOCK(&tracker); 537 } 538 539 /* 540 * Query current per-workstream queue limit for a protocol. 541 */ 542 void 543 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 544 { 545 struct rm_priotracker tracker; 546 #ifdef INVARIANTS 547 const char *name; 548 #endif 549 u_int proto; 550 551 proto = nhp->nh_proto; 552 #ifdef INVARIANTS 553 name = nhp->nh_name; 554 #endif 555 KASSERT(proto < NETISR_MAXPROT, 556 ("%s(%u): protocol too big for %s", __func__, proto, name)); 557 558 NETISR_RLOCK(&tracker); 559 KASSERT(netisr_proto[proto].np_handler != NULL, 560 ("%s(%u): protocol not registered for %s", __func__, proto, 561 name)); 562 *qlimitp = netisr_proto[proto].np_qlimit; 563 NETISR_RUNLOCK(&tracker); 564 } 565 566 /* 567 * Update the queue limit across per-workstream queues for a protocol. We 568 * simply change the limits, and don't drain overflowed packets as they will 569 * (hopefully) take care of themselves shortly. 570 */ 571 int 572 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 573 { 574 struct netisr_work *npwp; 575 #ifdef INVARIANTS 576 const char *name; 577 #endif 578 u_int i, proto; 579 580 if (qlimit > netisr_maxqlimit) 581 return (EINVAL); 582 583 proto = nhp->nh_proto; 584 #ifdef INVARIANTS 585 name = nhp->nh_name; 586 #endif 587 KASSERT(proto < NETISR_MAXPROT, 588 ("%s(%u): protocol too big for %s", __func__, proto, name)); 589 590 NETISR_WLOCK(); 591 KASSERT(netisr_proto[proto].np_handler != NULL, 592 ("%s(%u): protocol not registered for %s", __func__, proto, 593 name)); 594 595 netisr_proto[proto].np_qlimit = qlimit; 596 CPU_FOREACH(i) { 597 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 598 npwp->nw_qlimit = qlimit; 599 } 600 NETISR_WUNLOCK(); 601 return (0); 602 } 603 604 /* 605 * Drain all packets currently held in a particular protocol work queue. 606 */ 607 static void 608 netisr_drain_proto(struct netisr_work *npwp) 609 { 610 struct mbuf *m; 611 612 /* 613 * We would assert the lock on the workstream but it's not passed in. 614 */ 615 while ((m = npwp->nw_head) != NULL) { 616 npwp->nw_head = m->m_nextpkt; 617 m->m_nextpkt = NULL; 618 if (npwp->nw_head == NULL) 619 npwp->nw_tail = NULL; 620 npwp->nw_len--; 621 m_freem(m); 622 } 623 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 624 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 625 } 626 627 /* 628 * Remove the registration of a network protocol, which requires clearing 629 * per-protocol fields across all workstreams, including freeing all mbufs in 630 * the queues at time of unregister. All work in netisr is briefly suspended 631 * while this takes place. 632 */ 633 void 634 netisr_unregister(const struct netisr_handler *nhp) 635 { 636 VNET_ITERATOR_DECL(vnet_iter); 637 struct netisr_work *npwp; 638 #ifdef INVARIANTS 639 const char *name; 640 #endif 641 u_int i, proto; 642 643 proto = nhp->nh_proto; 644 #ifdef INVARIANTS 645 name = nhp->nh_name; 646 #endif 647 KASSERT(proto < NETISR_MAXPROT, 648 ("%s(%u): protocol too big for %s", __func__, proto, name)); 649 650 NETISR_WLOCK(); 651 KASSERT(netisr_proto[proto].np_handler != NULL, 652 ("%s(%u): protocol not registered for %s", __func__, proto, 653 name)); 654 655 #ifdef VIMAGE 656 VNET_LIST_RLOCK_NOSLEEP(); 657 VNET_FOREACH(vnet_iter) { 658 CURVNET_SET(vnet_iter); 659 V_netisr_enable[proto] = 0; 660 CURVNET_RESTORE(); 661 } 662 VNET_LIST_RUNLOCK_NOSLEEP(); 663 #endif 664 665 netisr_proto[proto].np_name = NULL; 666 netisr_proto[proto].np_handler = NULL; 667 netisr_proto[proto].np_m2flow = NULL; 668 netisr_proto[proto].np_m2cpuid = NULL; 669 netisr_proto[proto].np_qlimit = 0; 670 netisr_proto[proto].np_policy = 0; 671 CPU_FOREACH(i) { 672 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 673 netisr_drain_proto(npwp); 674 bzero(npwp, sizeof(*npwp)); 675 } 676 NETISR_WUNLOCK(); 677 } 678 679 #ifdef VIMAGE 680 void 681 netisr_register_vnet(const struct netisr_handler *nhp) 682 { 683 u_int proto; 684 685 proto = nhp->nh_proto; 686 687 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 688 KASSERT(proto < NETISR_MAXPROT, 689 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 690 NETISR_WLOCK(); 691 KASSERT(netisr_proto[proto].np_handler != NULL, 692 ("%s(%u): protocol not registered for %s", __func__, proto, 693 nhp->nh_name)); 694 695 V_netisr_enable[proto] = 1; 696 NETISR_WUNLOCK(); 697 } 698 699 static void 700 netisr_drain_proto_vnet(struct vnet *vnet, u_int proto) 701 { 702 struct epoch_tracker et; 703 struct netisr_workstream *nwsp; 704 struct netisr_work *npwp; 705 struct mbuf *m, *mp, *n, *ne; 706 struct ifnet *ifp; 707 u_int i; 708 709 KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__)); 710 NETISR_LOCK_ASSERT(); 711 712 CPU_FOREACH(i) { 713 nwsp = DPCPU_ID_PTR(i, nws); 714 if (nwsp->nws_intr_event == NULL) 715 continue; 716 npwp = &nwsp->nws_work[proto]; 717 NWS_LOCK(nwsp); 718 719 /* 720 * Rather than dissecting and removing mbufs from the middle 721 * of the chain, we build a new chain if the packet stays and 722 * update the head and tail pointers at the end. All packets 723 * matching the given vnet are freed. 724 */ 725 m = npwp->nw_head; 726 n = ne = NULL; 727 NET_EPOCH_ENTER(et); 728 while (m != NULL) { 729 mp = m; 730 m = m->m_nextpkt; 731 mp->m_nextpkt = NULL; 732 if ((ifp = ifnet_byindexgen(mp->m_pkthdr.rcvidx, 733 mp->m_pkthdr.rcvgen)) != NULL && 734 ifp->if_vnet != vnet) { 735 if (n == NULL) { 736 n = ne = mp; 737 } else { 738 ne->m_nextpkt = mp; 739 ne = mp; 740 } 741 continue; 742 } 743 /* This is a packet in the selected vnet, or belongs 744 to destroyed interface. Free it. */ 745 npwp->nw_len--; 746 m_freem(mp); 747 } 748 NET_EPOCH_EXIT(et); 749 npwp->nw_head = n; 750 npwp->nw_tail = ne; 751 NWS_UNLOCK(nwsp); 752 } 753 } 754 755 void 756 netisr_unregister_vnet(const struct netisr_handler *nhp) 757 { 758 u_int proto; 759 760 proto = nhp->nh_proto; 761 762 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 763 KASSERT(proto < NETISR_MAXPROT, 764 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 765 NETISR_WLOCK(); 766 KASSERT(netisr_proto[proto].np_handler != NULL, 767 ("%s(%u): protocol not registered for %s", __func__, proto, 768 nhp->nh_name)); 769 770 V_netisr_enable[proto] = 0; 771 772 netisr_drain_proto_vnet(curvnet, proto); 773 NETISR_WUNLOCK(); 774 } 775 #endif 776 777 /* 778 * Compose the global and per-protocol policies on dispatch, and return the 779 * dispatch policy to use. 780 */ 781 static u_int 782 netisr_get_dispatch(struct netisr_proto *npp) 783 { 784 785 /* 786 * Protocol-specific configuration overrides the global default. 787 */ 788 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 789 return (npp->np_dispatch); 790 return (netisr_dispatch_policy); 791 } 792 793 /* 794 * Look up the workstream given a packet and source identifier. Do this by 795 * checking the protocol's policy, and optionally call out to the protocol 796 * for assistance if required. 797 */ 798 static struct mbuf * 799 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 800 uintptr_t source, struct mbuf *m, u_int *cpuidp) 801 { 802 struct ifnet *ifp; 803 u_int policy; 804 805 NETISR_LOCK_ASSERT(); 806 807 /* 808 * In the event we have only one worker, shortcut and deliver to it 809 * without further ado. 810 */ 811 if (nws_count == 1) { 812 *cpuidp = nws_array[0]; 813 return (m); 814 } 815 816 /* 817 * What happens next depends on the policy selected by the protocol. 818 * If we want to support per-interface policies, we should do that 819 * here first. 820 */ 821 policy = npp->np_policy; 822 if (policy == NETISR_POLICY_CPU) { 823 m = npp->np_m2cpuid(m, source, cpuidp); 824 if (m == NULL) 825 return (NULL); 826 827 /* 828 * It's possible for a protocol not to have a good idea about 829 * where to process a packet, in which case we fall back on 830 * the netisr code to decide. In the hybrid case, return the 831 * current CPU ID, which will force an immediate direct 832 * dispatch. In the queued case, fall back on the SOURCE 833 * policy. 834 */ 835 if (*cpuidp != NETISR_CPUID_NONE) { 836 *cpuidp = netisr_get_cpuid(*cpuidp); 837 return (m); 838 } 839 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 840 *cpuidp = netisr_get_cpuid(curcpu); 841 return (m); 842 } 843 policy = NETISR_POLICY_SOURCE; 844 } 845 846 if (policy == NETISR_POLICY_FLOW) { 847 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 848 npp->np_m2flow != NULL) { 849 m = npp->np_m2flow(m, source); 850 if (m == NULL) 851 return (NULL); 852 } 853 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 854 *cpuidp = 855 netisr_default_flow2cpu(m->m_pkthdr.flowid); 856 return (m); 857 } 858 policy = NETISR_POLICY_SOURCE; 859 } 860 861 KASSERT(policy == NETISR_POLICY_SOURCE, 862 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 863 npp->np_name)); 864 865 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 866 ifp = m->m_pkthdr.rcvif; 867 if (ifp != NULL) 868 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 869 else 870 *cpuidp = nws_array[source % nws_count]; 871 return (m); 872 } 873 874 /* 875 * Process packets associated with a workstream and protocol. For reasons of 876 * fairness, we process up to one complete netisr queue at a time, moving the 877 * queue to a stack-local queue for processing, but do not loop refreshing 878 * from the global queue. The caller is responsible for deciding whether to 879 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 880 * locked on entry and relocked before return, but will be released while 881 * processing. The number of packets processed is returned. 882 */ 883 static u_int 884 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 885 { 886 struct netisr_work local_npw, *npwp; 887 u_int handled; 888 struct mbuf *m; 889 890 NETISR_LOCK_ASSERT(); 891 NWS_LOCK_ASSERT(nwsp); 892 893 KASSERT(nwsp->nws_flags & NWS_RUNNING, 894 ("%s(%u): not running", __func__, proto)); 895 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 896 ("%s(%u): invalid proto\n", __func__, proto)); 897 898 npwp = &nwsp->nws_work[proto]; 899 if (npwp->nw_len == 0) 900 return (0); 901 902 /* 903 * Move the global work queue to a thread-local work queue. 904 * 905 * Notice that this means the effective maximum length of the queue 906 * is actually twice that of the maximum queue length specified in 907 * the protocol registration call. 908 */ 909 handled = npwp->nw_len; 910 local_npw = *npwp; 911 npwp->nw_head = NULL; 912 npwp->nw_tail = NULL; 913 npwp->nw_len = 0; 914 nwsp->nws_pendingbits &= ~(1 << proto); 915 NWS_UNLOCK(nwsp); 916 while ((m = local_npw.nw_head) != NULL) { 917 local_npw.nw_head = m->m_nextpkt; 918 m->m_nextpkt = NULL; 919 if (local_npw.nw_head == NULL) 920 local_npw.nw_tail = NULL; 921 local_npw.nw_len--; 922 if (__predict_false(m_rcvif_restore(m) == NULL)) { 923 m_freem(m); 924 continue; 925 } 926 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 927 netisr_proto[proto].np_handler(m); 928 CURVNET_RESTORE(); 929 } 930 KASSERT(local_npw.nw_len == 0, 931 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 932 if (netisr_proto[proto].np_drainedcpu) 933 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 934 NWS_LOCK(nwsp); 935 npwp->nw_handled += handled; 936 return (handled); 937 } 938 939 /* 940 * SWI handler for netisr -- processes packets in a set of workstreams that 941 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 942 * being direct dispatched, go back to sleep and wait for the dispatching 943 * thread to wake us up again. 944 */ 945 static void 946 swi_net(void *arg) 947 { 948 #ifdef NETISR_LOCKING 949 struct rm_priotracker tracker; 950 #endif 951 struct netisr_workstream *nwsp; 952 u_int bits, prot; 953 954 nwsp = arg; 955 956 #ifdef DEVICE_POLLING 957 KASSERT(nws_count == 1, 958 ("%s: device_polling but nws_count != 1", __func__)); 959 netisr_poll(); 960 #endif 961 #ifdef NETISR_LOCKING 962 NETISR_RLOCK(&tracker); 963 #endif 964 NWS_LOCK(nwsp); 965 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 966 if (nwsp->nws_flags & NWS_DISPATCHING) 967 goto out; 968 nwsp->nws_flags |= NWS_RUNNING; 969 nwsp->nws_flags &= ~NWS_SCHEDULED; 970 while ((bits = nwsp->nws_pendingbits) != 0) { 971 while ((prot = ffs(bits)) != 0) { 972 prot--; 973 bits &= ~(1 << prot); 974 (void)netisr_process_workstream_proto(nwsp, prot); 975 } 976 } 977 nwsp->nws_flags &= ~NWS_RUNNING; 978 out: 979 NWS_UNLOCK(nwsp); 980 #ifdef NETISR_LOCKING 981 NETISR_RUNLOCK(&tracker); 982 #endif 983 #ifdef DEVICE_POLLING 984 netisr_pollmore(); 985 #endif 986 } 987 988 static int 989 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 990 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 991 { 992 993 NWS_LOCK_ASSERT(nwsp); 994 995 *dosignalp = 0; 996 if (npwp->nw_len < npwp->nw_qlimit) { 997 m_rcvif_serialize(m); 998 m->m_nextpkt = NULL; 999 if (npwp->nw_head == NULL) { 1000 npwp->nw_head = m; 1001 npwp->nw_tail = m; 1002 } else { 1003 npwp->nw_tail->m_nextpkt = m; 1004 npwp->nw_tail = m; 1005 } 1006 npwp->nw_len++; 1007 if (npwp->nw_len > npwp->nw_watermark) 1008 npwp->nw_watermark = npwp->nw_len; 1009 1010 /* 1011 * We must set the bit regardless of NWS_RUNNING, so that 1012 * swi_net() keeps calling netisr_process_workstream_proto(). 1013 */ 1014 nwsp->nws_pendingbits |= (1 << proto); 1015 if (!(nwsp->nws_flags & 1016 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 1017 nwsp->nws_flags |= NWS_SCHEDULED; 1018 *dosignalp = 1; /* Defer until unlocked. */ 1019 } 1020 npwp->nw_queued++; 1021 return (0); 1022 } else { 1023 m_freem(m); 1024 npwp->nw_qdrops++; 1025 return (ENOBUFS); 1026 } 1027 } 1028 1029 static int 1030 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 1031 { 1032 struct netisr_workstream *nwsp; 1033 struct netisr_work *npwp; 1034 int dosignal, error; 1035 1036 #ifdef NETISR_LOCKING 1037 NETISR_LOCK_ASSERT(); 1038 #endif 1039 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 1040 cpuid, mp_maxid)); 1041 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1042 1043 dosignal = 0; 1044 error = 0; 1045 nwsp = DPCPU_ID_PTR(cpuid, nws); 1046 npwp = &nwsp->nws_work[proto]; 1047 NWS_LOCK(nwsp); 1048 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 1049 NWS_UNLOCK(nwsp); 1050 if (dosignal) 1051 NWS_SIGNAL(nwsp); 1052 return (error); 1053 } 1054 1055 int 1056 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 1057 { 1058 #ifdef NETISR_LOCKING 1059 struct rm_priotracker tracker; 1060 #endif 1061 u_int cpuid; 1062 int error; 1063 1064 KASSERT(proto < NETISR_MAXPROT, 1065 ("%s: invalid proto %u", __func__, proto)); 1066 1067 #ifdef NETISR_LOCKING 1068 NETISR_RLOCK(&tracker); 1069 #endif 1070 KASSERT(netisr_proto[proto].np_handler != NULL, 1071 ("%s: invalid proto %u", __func__, proto)); 1072 1073 #ifdef VIMAGE 1074 if (V_netisr_enable[proto] == 0) { 1075 m_freem(m); 1076 return (ENOPROTOOPT); 1077 } 1078 #endif 1079 1080 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 1081 source, m, &cpuid); 1082 if (m != NULL) { 1083 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 1084 cpuid)); 1085 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 1086 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 1087 error = netisr_queue_internal(proto, m, cpuid); 1088 } else 1089 error = ENOBUFS; 1090 #ifdef NETISR_LOCKING 1091 NETISR_RUNLOCK(&tracker); 1092 #endif 1093 return (error); 1094 } 1095 1096 int 1097 netisr_queue(u_int proto, struct mbuf *m) 1098 { 1099 1100 return (netisr_queue_src(proto, 0, m)); 1101 } 1102 1103 /* 1104 * Dispatch a packet for netisr processing; direct dispatch is permitted by 1105 * calling context. 1106 */ 1107 int 1108 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 1109 { 1110 #ifdef NETISR_LOCKING 1111 struct rm_priotracker tracker; 1112 #endif 1113 struct netisr_workstream *nwsp; 1114 struct netisr_proto *npp; 1115 struct netisr_work *npwp; 1116 int dosignal, error; 1117 u_int cpuid, dispatch_policy; 1118 1119 NET_EPOCH_ASSERT(); 1120 KASSERT(proto < NETISR_MAXPROT, 1121 ("%s: invalid proto %u", __func__, proto)); 1122 #ifdef NETISR_LOCKING 1123 NETISR_RLOCK(&tracker); 1124 #endif 1125 npp = &netisr_proto[proto]; 1126 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 1127 proto)); 1128 1129 #ifdef VIMAGE 1130 if (V_netisr_enable[proto] == 0) { 1131 m_freem(m); 1132 return (ENOPROTOOPT); 1133 } 1134 #endif 1135 1136 dispatch_policy = netisr_get_dispatch(npp); 1137 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 1138 return (netisr_queue_src(proto, source, m)); 1139 1140 /* 1141 * If direct dispatch is forced, then unconditionally dispatch 1142 * without a formal CPU selection. Borrow the current CPU's stats, 1143 * even if there's no worker on it. In this case we don't update 1144 * nws_flags because all netisr processing will be source ordered due 1145 * to always being forced to directly dispatch. 1146 */ 1147 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 1148 nwsp = DPCPU_PTR(nws); 1149 npwp = &nwsp->nws_work[proto]; 1150 npwp->nw_dispatched++; 1151 npwp->nw_handled++; 1152 netisr_proto[proto].np_handler(m); 1153 error = 0; 1154 goto out_unlock; 1155 } 1156 1157 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 1158 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 1159 1160 /* 1161 * Otherwise, we execute in a hybrid mode where we will try to direct 1162 * dispatch if we're on the right CPU and the netisr worker isn't 1163 * already running. 1164 */ 1165 sched_pin(); 1166 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 1167 source, m, &cpuid); 1168 if (m == NULL) { 1169 error = ENOBUFS; 1170 goto out_unpin; 1171 } 1172 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1173 if (cpuid != curcpu) 1174 goto queue_fallback; 1175 nwsp = DPCPU_PTR(nws); 1176 npwp = &nwsp->nws_work[proto]; 1177 1178 /*- 1179 * We are willing to direct dispatch only if three conditions hold: 1180 * 1181 * (1) The netisr worker isn't already running, 1182 * (2) Another thread isn't already directly dispatching, and 1183 * (3) The netisr hasn't already been woken up. 1184 */ 1185 NWS_LOCK(nwsp); 1186 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1187 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1188 &dosignal); 1189 NWS_UNLOCK(nwsp); 1190 if (dosignal) 1191 NWS_SIGNAL(nwsp); 1192 goto out_unpin; 1193 } 1194 1195 /* 1196 * The current thread is now effectively the netisr worker, so set 1197 * the dispatching flag to prevent concurrent processing of the 1198 * stream from another thread (even the netisr worker), which could 1199 * otherwise lead to effective misordering of the stream. 1200 */ 1201 nwsp->nws_flags |= NWS_DISPATCHING; 1202 NWS_UNLOCK(nwsp); 1203 netisr_proto[proto].np_handler(m); 1204 NWS_LOCK(nwsp); 1205 nwsp->nws_flags &= ~NWS_DISPATCHING; 1206 npwp->nw_handled++; 1207 npwp->nw_hybrid_dispatched++; 1208 1209 /* 1210 * If other work was enqueued by another thread while we were direct 1211 * dispatching, we need to signal the netisr worker to do that work. 1212 * In the future, we might want to do some of that work in the 1213 * current thread, rather than trigger further context switches. If 1214 * so, we'll want to establish a reasonable bound on the work done in 1215 * the "borrowed" context. 1216 */ 1217 if (nwsp->nws_pendingbits != 0) { 1218 nwsp->nws_flags |= NWS_SCHEDULED; 1219 dosignal = 1; 1220 } else 1221 dosignal = 0; 1222 NWS_UNLOCK(nwsp); 1223 if (dosignal) 1224 NWS_SIGNAL(nwsp); 1225 error = 0; 1226 goto out_unpin; 1227 1228 queue_fallback: 1229 error = netisr_queue_internal(proto, m, cpuid); 1230 out_unpin: 1231 sched_unpin(); 1232 out_unlock: 1233 #ifdef NETISR_LOCKING 1234 NETISR_RUNLOCK(&tracker); 1235 #endif 1236 return (error); 1237 } 1238 1239 int 1240 netisr_dispatch(u_int proto, struct mbuf *m) 1241 { 1242 1243 return (netisr_dispatch_src(proto, 0, m)); 1244 } 1245 1246 #ifdef DEVICE_POLLING 1247 /* 1248 * Kernel polling borrows a netisr thread to run interface polling in; this 1249 * function allows kernel polling to request that the netisr thread be 1250 * scheduled even if no packets are pending for protocols. 1251 */ 1252 void 1253 netisr_sched_poll(void) 1254 { 1255 struct netisr_workstream *nwsp; 1256 1257 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1258 NWS_SIGNAL(nwsp); 1259 } 1260 #endif 1261 1262 static void 1263 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1264 { 1265 char swiname[12]; 1266 struct netisr_workstream *nwsp; 1267 int error; 1268 1269 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1270 1271 nwsp = DPCPU_ID_PTR(cpuid, nws); 1272 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1273 nwsp->nws_cpu = cpuid; 1274 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1275 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1276 SWI_NET, INTR_TYPE_NET | INTR_MPSAFE, &nwsp->nws_swi_cookie); 1277 if (error) 1278 panic("%s: swi_add %d", __func__, error); 1279 pc->pc_netisr = nwsp->nws_intr_event; 1280 if (netisr_bindthreads) { 1281 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1282 if (error != 0) 1283 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1284 cpuid, error); 1285 } 1286 NETISR_WLOCK(); 1287 nws_array[nws_count] = nwsp->nws_cpu; 1288 nws_count++; 1289 NETISR_WUNLOCK(); 1290 } 1291 1292 /* 1293 * Initialize the netisr subsystem. We rely on BSS and static initialization 1294 * of most fields in global data structures. 1295 * 1296 * Start a worker thread for the boot CPU so that we can support network 1297 * traffic immediately in case the network stack is used before additional 1298 * CPUs are started (for example, diskless boot). 1299 */ 1300 static void 1301 netisr_init(void *arg) 1302 { 1303 struct pcpu *pc; 1304 1305 NETISR_LOCK_INIT(); 1306 if (netisr_maxthreads == 0 || netisr_maxthreads < -1 ) 1307 netisr_maxthreads = 1; /* default behavior */ 1308 else if (netisr_maxthreads == -1) 1309 netisr_maxthreads = mp_ncpus; /* use max cpus */ 1310 if (netisr_maxthreads > mp_ncpus) { 1311 printf("netisr_init: forcing maxthreads from %d to %d\n", 1312 netisr_maxthreads, mp_ncpus); 1313 netisr_maxthreads = mp_ncpus; 1314 } 1315 if (netisr_defaultqlimit > netisr_maxqlimit) { 1316 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1317 netisr_defaultqlimit, netisr_maxqlimit); 1318 netisr_defaultqlimit = netisr_maxqlimit; 1319 } 1320 #ifdef DEVICE_POLLING 1321 /* 1322 * The device polling code is not yet aware of how to deal with 1323 * multiple netisr threads, so for the time being compiling in device 1324 * polling disables parallel netisr workers. 1325 */ 1326 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1327 printf("netisr_init: forcing maxthreads to 1 and " 1328 "bindthreads to 0 for device polling\n"); 1329 netisr_maxthreads = 1; 1330 netisr_bindthreads = 0; 1331 } 1332 #endif 1333 1334 #ifdef EARLY_AP_STARTUP 1335 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1336 if (nws_count >= netisr_maxthreads) 1337 break; 1338 netisr_start_swi(pc->pc_cpuid, pc); 1339 } 1340 #else 1341 pc = get_pcpu(); 1342 netisr_start_swi(pc->pc_cpuid, pc); 1343 #endif 1344 } 1345 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1346 1347 #ifndef EARLY_AP_STARTUP 1348 /* 1349 * Start worker threads for additional CPUs. No attempt to gracefully handle 1350 * work reassignment, we don't yet support dynamic reconfiguration. 1351 */ 1352 static void 1353 netisr_start(void *arg) 1354 { 1355 struct pcpu *pc; 1356 1357 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1358 if (nws_count >= netisr_maxthreads) 1359 break; 1360 /* Worker will already be present for boot CPU. */ 1361 if (pc->pc_netisr != NULL) 1362 continue; 1363 netisr_start_swi(pc->pc_cpuid, pc); 1364 } 1365 } 1366 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1367 #endif 1368 1369 /* 1370 * Sysctl monitoring for netisr: query a list of registered protocols. 1371 */ 1372 static int 1373 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1374 { 1375 struct rm_priotracker tracker; 1376 struct sysctl_netisr_proto *snpp, *snp_array; 1377 struct netisr_proto *npp; 1378 u_int counter, proto; 1379 int error; 1380 1381 if (req->newptr != NULL) 1382 return (EINVAL); 1383 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1384 M_ZERO | M_WAITOK); 1385 counter = 0; 1386 NETISR_RLOCK(&tracker); 1387 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1388 npp = &netisr_proto[proto]; 1389 if (npp->np_name == NULL) 1390 continue; 1391 snpp = &snp_array[counter]; 1392 snpp->snp_version = sizeof(*snpp); 1393 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1394 snpp->snp_proto = proto; 1395 snpp->snp_qlimit = npp->np_qlimit; 1396 snpp->snp_policy = npp->np_policy; 1397 snpp->snp_dispatch = npp->np_dispatch; 1398 if (npp->np_m2flow != NULL) 1399 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1400 if (npp->np_m2cpuid != NULL) 1401 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1402 if (npp->np_drainedcpu != NULL) 1403 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1404 counter++; 1405 } 1406 NETISR_RUNLOCK(&tracker); 1407 KASSERT(counter <= NETISR_MAXPROT, 1408 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1409 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1410 free(snp_array, M_TEMP); 1411 return (error); 1412 } 1413 1414 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1415 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1416 "S,sysctl_netisr_proto", 1417 "Return list of protocols registered with netisr"); 1418 1419 /* 1420 * Sysctl monitoring for netisr: query a list of workstreams. 1421 */ 1422 static int 1423 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1424 { 1425 struct rm_priotracker tracker; 1426 struct sysctl_netisr_workstream *snwsp, *snws_array; 1427 struct netisr_workstream *nwsp; 1428 u_int counter, cpuid; 1429 int error; 1430 1431 if (req->newptr != NULL) 1432 return (EINVAL); 1433 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1434 M_ZERO | M_WAITOK); 1435 counter = 0; 1436 NETISR_RLOCK(&tracker); 1437 CPU_FOREACH(cpuid) { 1438 nwsp = DPCPU_ID_PTR(cpuid, nws); 1439 if (nwsp->nws_intr_event == NULL) 1440 continue; 1441 NWS_LOCK(nwsp); 1442 snwsp = &snws_array[counter]; 1443 snwsp->snws_version = sizeof(*snwsp); 1444 1445 /* 1446 * For now, we equate workstream IDs and CPU IDs in the 1447 * kernel, but expose them independently to userspace in case 1448 * that assumption changes in the future. 1449 */ 1450 snwsp->snws_wsid = cpuid; 1451 snwsp->snws_cpu = cpuid; 1452 if (nwsp->nws_intr_event != NULL) 1453 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1454 NWS_UNLOCK(nwsp); 1455 counter++; 1456 } 1457 NETISR_RUNLOCK(&tracker); 1458 KASSERT(counter <= MAXCPU, 1459 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1460 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1461 free(snws_array, M_TEMP); 1462 return (error); 1463 } 1464 1465 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1466 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1467 "S,sysctl_netisr_workstream", 1468 "Return list of workstreams implemented by netisr"); 1469 1470 /* 1471 * Sysctl monitoring for netisr: query per-protocol data across all 1472 * workstreams. 1473 */ 1474 static int 1475 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1476 { 1477 struct rm_priotracker tracker; 1478 struct sysctl_netisr_work *snwp, *snw_array; 1479 struct netisr_workstream *nwsp; 1480 struct netisr_proto *npp; 1481 struct netisr_work *nwp; 1482 u_int counter, cpuid, proto; 1483 int error; 1484 1485 if (req->newptr != NULL) 1486 return (EINVAL); 1487 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1488 M_TEMP, M_ZERO | M_WAITOK); 1489 counter = 0; 1490 NETISR_RLOCK(&tracker); 1491 CPU_FOREACH(cpuid) { 1492 nwsp = DPCPU_ID_PTR(cpuid, nws); 1493 if (nwsp->nws_intr_event == NULL) 1494 continue; 1495 NWS_LOCK(nwsp); 1496 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1497 npp = &netisr_proto[proto]; 1498 if (npp->np_name == NULL) 1499 continue; 1500 nwp = &nwsp->nws_work[proto]; 1501 snwp = &snw_array[counter]; 1502 snwp->snw_version = sizeof(*snwp); 1503 snwp->snw_wsid = cpuid; /* See comment above. */ 1504 snwp->snw_proto = proto; 1505 snwp->snw_len = nwp->nw_len; 1506 snwp->snw_watermark = nwp->nw_watermark; 1507 snwp->snw_dispatched = nwp->nw_dispatched; 1508 snwp->snw_hybrid_dispatched = 1509 nwp->nw_hybrid_dispatched; 1510 snwp->snw_qdrops = nwp->nw_qdrops; 1511 snwp->snw_queued = nwp->nw_queued; 1512 snwp->snw_handled = nwp->nw_handled; 1513 counter++; 1514 } 1515 NWS_UNLOCK(nwsp); 1516 } 1517 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1518 ("sysctl_netisr_work: counter too big (%d)", counter)); 1519 NETISR_RUNLOCK(&tracker); 1520 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1521 free(snw_array, M_TEMP); 1522 return (error); 1523 } 1524 1525 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1526 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1527 "S,sysctl_netisr_work", 1528 "Return list of per-workstream, per-protocol work in netisr"); 1529 1530 #ifdef DDB 1531 DB_SHOW_COMMAND(netisr, db_show_netisr) 1532 { 1533 struct netisr_workstream *nwsp; 1534 struct netisr_work *nwp; 1535 int first, proto; 1536 u_int cpuid; 1537 1538 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1539 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1540 CPU_FOREACH(cpuid) { 1541 nwsp = DPCPU_ID_PTR(cpuid, nws); 1542 if (nwsp->nws_intr_event == NULL) 1543 continue; 1544 first = 1; 1545 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1546 if (netisr_proto[proto].np_handler == NULL) 1547 continue; 1548 nwp = &nwsp->nws_work[proto]; 1549 if (first) { 1550 db_printf("%3d ", cpuid); 1551 first = 0; 1552 } else 1553 db_printf("%3s ", ""); 1554 db_printf( 1555 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1556 netisr_proto[proto].np_name, nwp->nw_len, 1557 nwp->nw_watermark, nwp->nw_qlimit, 1558 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1559 nwp->nw_qdrops, nwp->nw_queued); 1560 } 1561 } 1562 } 1563 #endif 1564