1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007-2009 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * This software was developed by Robert N. M. Watson under contract 9 * to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /* 37 * netisr is a packet dispatch service, allowing synchronous (directly 38 * dispatched) and asynchronous (deferred dispatch) processing of packets by 39 * registered protocol handlers. Callers pass a protocol identifier and 40 * packet to netisr, along with a direct dispatch hint, and work will either 41 * be immediately processed by the registered handler, or passed to a 42 * software interrupt (SWI) thread for deferred dispatch. Callers will 43 * generally select one or the other based on: 44 * 45 * - Whether directly dispatching a netisr handler lead to code reentrance or 46 * lock recursion, such as entering the socket code from the socket code. 47 * - Whether directly dispatching a netisr handler lead to recursive 48 * processing, such as when decapsulating several wrapped layers of tunnel 49 * information (IPSEC within IPSEC within ...). 50 * 51 * Maintaining ordering for protocol streams is a critical design concern. 52 * Enforcing ordering limits the opportunity for concurrency, but maintains 53 * the strong ordering requirements found in some protocols, such as TCP. Of 54 * related concern is CPU affinity--it is desirable to process all data 55 * associated with a particular stream on the same CPU over time in order to 56 * avoid acquiring locks associated with the connection on different CPUs, 57 * keep connection data in one cache, and to generally encourage associated 58 * user threads to live on the same CPU as the stream. It's also desirable 59 * to avoid lock migration and contention where locks are associated with 60 * more than one flow. 61 * 62 * netisr supports several policy variations, represented by the 63 * NETISR_POLICY_* constants, allowing protocols to play various roles in 64 * identifying flows, assigning work to CPUs, etc. These are described in 65 * netisr.h. 66 */ 67 68 #include "opt_ddb.h" 69 #include "opt_device_polling.h" 70 71 #include <sys/param.h> 72 #include <sys/bus.h> 73 #include <sys/kernel.h> 74 #include <sys/kthread.h> 75 #include <sys/malloc.h> 76 #include <sys/interrupt.h> 77 #include <sys/lock.h> 78 #include <sys/mbuf.h> 79 #include <sys/mutex.h> 80 #include <sys/pcpu.h> 81 #include <sys/proc.h> 82 #include <sys/rmlock.h> 83 #include <sys/sched.h> 84 #include <sys/smp.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 #include <sys/systm.h> 88 89 #ifdef DDB 90 #include <ddb/ddb.h> 91 #endif 92 93 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 94 #include <net/if.h> 95 #include <net/if_var.h> 96 #include <net/netisr.h> 97 #include <net/netisr_internal.h> 98 #include <net/vnet.h> 99 100 /*- 101 * Synchronize use and modification of the registered netisr data structures; 102 * acquire a read lock while modifying the set of registered protocols to 103 * prevent partially registered or unregistered protocols from being run. 104 * 105 * The following data structures and fields are protected by this lock: 106 * 107 * - The netisr_proto array, including all fields of struct netisr_proto. 108 * - The nws array, including all fields of struct netisr_worker. 109 * - The nws_array array. 110 * 111 * Note: the NETISR_LOCKING define controls whether read locks are acquired 112 * in packet processing paths requiring netisr registration stability. This 113 * is disabled by default as it can lead to measurable performance 114 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 115 * because netisr registration and unregistration is extremely rare at 116 * runtime. If it becomes more common, this decision should be revisited. 117 * 118 * XXXRW: rmlocks don't support assertions. 119 */ 120 static struct rmlock netisr_rmlock; 121 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 122 RM_NOWITNESS) 123 #define NETISR_LOCK_ASSERT() 124 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 125 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 126 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 127 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 128 /* #define NETISR_LOCKING */ 129 130 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 131 "netisr"); 132 133 /*- 134 * Three global direct dispatch policies are supported: 135 * 136 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 137 * context (may be overriden by protocols). 138 * 139 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 140 * and we're running on the CPU the work would be performed on, then direct 141 * dispatch it if it wouldn't violate ordering constraints on the workstream. 142 * 143 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 144 * always direct dispatch. (The default.) 145 * 146 * Notice that changing the global policy could lead to short periods of 147 * misordered processing, but this is considered acceptable as compared to 148 * the complexity of enforcing ordering during policy changes. Protocols can 149 * override the global policy (when they're not doing that, they select 150 * NETISR_DISPATCH_DEFAULT). 151 */ 152 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 153 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 154 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 155 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 156 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, 157 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 158 0, 0, sysctl_netisr_dispatch_policy, "A", 159 "netisr dispatch policy"); 160 161 /* 162 * Allow the administrator to limit the number of threads (CPUs) to use for 163 * netisr. We don't check netisr_maxthreads before creating the thread for 164 * CPU 0. This must be set at boot. We will create at most one thread per CPU. 165 * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and 166 * therefore only 1 workstream. If set to -1, netisr would use all cpus 167 * (mp_ncpus) and therefore would have those many workstreams. One workstream 168 * per thread (CPU). 169 */ 170 static int netisr_maxthreads = 1; /* Max number of threads. */ 171 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 172 &netisr_maxthreads, 0, 173 "Use at most this many CPUs for netisr processing"); 174 175 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 176 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 177 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 178 179 /* 180 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 181 * both for initial configuration and later modification using 182 * netisr_setqlimit(). 183 */ 184 #define NETISR_DEFAULT_MAXQLIMIT 10240 185 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 186 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 187 &netisr_maxqlimit, 0, 188 "Maximum netisr per-protocol, per-CPU queue depth."); 189 190 /* 191 * The default per-workstream mbuf queue limit for protocols that don't 192 * initialize the nh_qlimit field of their struct netisr_handler. If this is 193 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 194 */ 195 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 196 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 197 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 198 &netisr_defaultqlimit, 0, 199 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 200 201 /* 202 * Store and export the compile-time constant NETISR_MAXPROT limit on the 203 * number of protocols that can register with netisr at a time. This is 204 * required for crashdump analysis, as it sizes netisr_proto[]. 205 */ 206 static u_int netisr_maxprot = NETISR_MAXPROT; 207 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 208 &netisr_maxprot, 0, 209 "Compile-time limit on the number of protocols supported by netisr."); 210 211 /* 212 * The netisr_proto array describes all registered protocols, indexed by 213 * protocol number. See netisr_internal.h for more details. 214 */ 215 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 216 217 #ifdef VIMAGE 218 /* 219 * The netisr_enable array describes a per-VNET flag for registered 220 * protocols on whether this netisr is active in this VNET or not. 221 * netisr_register() will automatically enable the netisr for the 222 * default VNET and all currently active instances. 223 * netisr_unregister() will disable all active VNETs, including vnet0. 224 * Individual network stack instances can be enabled/disabled by the 225 * netisr_(un)register _vnet() functions. 226 * With this we keep the one netisr_proto per protocol but add a 227 * mechanism to stop netisr processing for vnet teardown. 228 * Apart from that we expect a VNET to always be enabled. 229 */ 230 VNET_DEFINE_STATIC(u_int, netisr_enable[NETISR_MAXPROT]); 231 #define V_netisr_enable VNET(netisr_enable) 232 #endif 233 234 /* 235 * Per-CPU workstream data. See netisr_internal.h for more details. 236 */ 237 DPCPU_DEFINE(struct netisr_workstream, nws); 238 239 /* 240 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 241 * accessing workstreams. This allows constructions of the form 242 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 243 */ 244 static u_int nws_array[MAXCPU]; 245 246 /* 247 * Number of registered workstreams. Will be at most the number of running 248 * CPUs once fully started. 249 */ 250 static u_int nws_count; 251 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 252 &nws_count, 0, "Number of extant netisr threads."); 253 254 /* 255 * Synchronization for each workstream: a mutex protects all mutable fields 256 * in each stream, including per-protocol state (mbuf queues). The SWI is 257 * woken up if asynchronous dispatch is required. 258 */ 259 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 260 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 261 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 262 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 263 264 /* 265 * Utility routines for protocols that implement their own mapping of flows 266 * to CPUs. 267 */ 268 u_int 269 netisr_get_cpucount(void) 270 { 271 272 return (nws_count); 273 } 274 275 u_int 276 netisr_get_cpuid(u_int cpunumber) 277 { 278 279 return (nws_array[cpunumber % nws_count]); 280 } 281 282 /* 283 * The default implementation of flow -> CPU ID mapping. 284 * 285 * Non-static so that protocols can use it to map their own work to specific 286 * CPUs in a manner consistent to netisr for affinity purposes. 287 */ 288 u_int 289 netisr_default_flow2cpu(u_int flowid) 290 { 291 292 return (nws_array[flowid % nws_count]); 293 } 294 295 /* 296 * Dispatch tunable and sysctl configuration. 297 */ 298 struct netisr_dispatch_table_entry { 299 u_int ndte_policy; 300 const char *ndte_policy_str; 301 }; 302 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 303 { NETISR_DISPATCH_DEFAULT, "default" }, 304 { NETISR_DISPATCH_DEFERRED, "deferred" }, 305 { NETISR_DISPATCH_HYBRID, "hybrid" }, 306 { NETISR_DISPATCH_DIRECT, "direct" }, 307 }; 308 309 static void 310 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 311 u_int buflen) 312 { 313 const struct netisr_dispatch_table_entry *ndtep; 314 const char *str; 315 u_int i; 316 317 str = "unknown"; 318 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 319 ndtep = &netisr_dispatch_table[i]; 320 if (ndtep->ndte_policy == dispatch_policy) { 321 str = ndtep->ndte_policy_str; 322 break; 323 } 324 } 325 snprintf(buffer, buflen, "%s", str); 326 } 327 328 static int 329 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 330 { 331 const struct netisr_dispatch_table_entry *ndtep; 332 u_int i; 333 334 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 335 ndtep = &netisr_dispatch_table[i]; 336 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 337 *dispatch_policyp = ndtep->ndte_policy; 338 return (0); 339 } 340 } 341 return (EINVAL); 342 } 343 344 static int 345 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 346 { 347 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 348 u_int dispatch_policy; 349 int error; 350 351 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 352 sizeof(tmp)); 353 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 354 if (error == 0 && req->newptr != NULL) { 355 error = netisr_dispatch_policy_from_str(tmp, 356 &dispatch_policy); 357 if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) 358 error = EINVAL; 359 if (error == 0) 360 netisr_dispatch_policy = dispatch_policy; 361 } 362 return (error); 363 } 364 365 /* 366 * Register a new netisr handler, which requires initializing per-protocol 367 * fields for each workstream. All netisr work is briefly suspended while 368 * the protocol is installed. 369 */ 370 void 371 netisr_register(const struct netisr_handler *nhp) 372 { 373 VNET_ITERATOR_DECL(vnet_iter); 374 struct netisr_work *npwp; 375 const char *name; 376 u_int i, proto; 377 378 proto = nhp->nh_proto; 379 name = nhp->nh_name; 380 381 /* 382 * Test that the requested registration is valid. 383 */ 384 KASSERT(nhp->nh_name != NULL, 385 ("%s: nh_name NULL for %u", __func__, proto)); 386 KASSERT(nhp->nh_handler != NULL, 387 ("%s: nh_handler NULL for %s", __func__, name)); 388 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 389 nhp->nh_policy == NETISR_POLICY_FLOW || 390 nhp->nh_policy == NETISR_POLICY_CPU, 391 ("%s: unsupported nh_policy %u for %s", __func__, 392 nhp->nh_policy, name)); 393 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 394 nhp->nh_m2flow == NULL, 395 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 396 name)); 397 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 398 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 399 name)); 400 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 401 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 402 name)); 403 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 404 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 405 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 406 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 407 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 408 409 KASSERT(proto < NETISR_MAXPROT, 410 ("%s(%u, %s): protocol too big", __func__, proto, name)); 411 412 /* 413 * Test that no existing registration exists for this protocol. 414 */ 415 NETISR_WLOCK(); 416 KASSERT(netisr_proto[proto].np_name == NULL, 417 ("%s(%u, %s): name present", __func__, proto, name)); 418 KASSERT(netisr_proto[proto].np_handler == NULL, 419 ("%s(%u, %s): handler present", __func__, proto, name)); 420 421 netisr_proto[proto].np_name = name; 422 netisr_proto[proto].np_handler = nhp->nh_handler; 423 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 424 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 425 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 426 if (nhp->nh_qlimit == 0) 427 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 428 else if (nhp->nh_qlimit > netisr_maxqlimit) { 429 printf("%s: %s requested queue limit %u capped to " 430 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 431 netisr_maxqlimit); 432 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 433 } else 434 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 435 netisr_proto[proto].np_policy = nhp->nh_policy; 436 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 437 CPU_FOREACH(i) { 438 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 439 bzero(npwp, sizeof(*npwp)); 440 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 441 } 442 443 #ifdef VIMAGE 444 /* 445 * Test that we are in vnet0 and have a curvnet set. 446 */ 447 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 448 KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p", 449 __func__, curvnet, vnet0)); 450 VNET_LIST_RLOCK_NOSLEEP(); 451 VNET_FOREACH(vnet_iter) { 452 CURVNET_SET(vnet_iter); 453 V_netisr_enable[proto] = 1; 454 CURVNET_RESTORE(); 455 } 456 VNET_LIST_RUNLOCK_NOSLEEP(); 457 #endif 458 NETISR_WUNLOCK(); 459 } 460 461 /* 462 * Clear drop counters across all workstreams for a protocol. 463 */ 464 void 465 netisr_clearqdrops(const struct netisr_handler *nhp) 466 { 467 struct netisr_work *npwp; 468 #ifdef INVARIANTS 469 const char *name; 470 #endif 471 u_int i, proto; 472 473 proto = nhp->nh_proto; 474 #ifdef INVARIANTS 475 name = nhp->nh_name; 476 #endif 477 KASSERT(proto < NETISR_MAXPROT, 478 ("%s(%u): protocol too big for %s", __func__, proto, name)); 479 480 NETISR_WLOCK(); 481 KASSERT(netisr_proto[proto].np_handler != NULL, 482 ("%s(%u): protocol not registered for %s", __func__, proto, 483 name)); 484 485 CPU_FOREACH(i) { 486 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 487 npwp->nw_qdrops = 0; 488 } 489 NETISR_WUNLOCK(); 490 } 491 492 /* 493 * Query current drop counters across all workstreams for a protocol. 494 */ 495 void 496 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 497 { 498 struct netisr_work *npwp; 499 struct rm_priotracker tracker; 500 #ifdef INVARIANTS 501 const char *name; 502 #endif 503 u_int i, proto; 504 505 *qdropp = 0; 506 proto = nhp->nh_proto; 507 #ifdef INVARIANTS 508 name = nhp->nh_name; 509 #endif 510 KASSERT(proto < NETISR_MAXPROT, 511 ("%s(%u): protocol too big for %s", __func__, proto, name)); 512 513 NETISR_RLOCK(&tracker); 514 KASSERT(netisr_proto[proto].np_handler != NULL, 515 ("%s(%u): protocol not registered for %s", __func__, proto, 516 name)); 517 518 CPU_FOREACH(i) { 519 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 520 *qdropp += npwp->nw_qdrops; 521 } 522 NETISR_RUNLOCK(&tracker); 523 } 524 525 /* 526 * Query current per-workstream queue limit for a protocol. 527 */ 528 void 529 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 530 { 531 struct rm_priotracker tracker; 532 #ifdef INVARIANTS 533 const char *name; 534 #endif 535 u_int proto; 536 537 proto = nhp->nh_proto; 538 #ifdef INVARIANTS 539 name = nhp->nh_name; 540 #endif 541 KASSERT(proto < NETISR_MAXPROT, 542 ("%s(%u): protocol too big for %s", __func__, proto, name)); 543 544 NETISR_RLOCK(&tracker); 545 KASSERT(netisr_proto[proto].np_handler != NULL, 546 ("%s(%u): protocol not registered for %s", __func__, proto, 547 name)); 548 *qlimitp = netisr_proto[proto].np_qlimit; 549 NETISR_RUNLOCK(&tracker); 550 } 551 552 /* 553 * Update the queue limit across per-workstream queues for a protocol. We 554 * simply change the limits, and don't drain overflowed packets as they will 555 * (hopefully) take care of themselves shortly. 556 */ 557 int 558 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 559 { 560 struct netisr_work *npwp; 561 #ifdef INVARIANTS 562 const char *name; 563 #endif 564 u_int i, proto; 565 566 if (qlimit > netisr_maxqlimit) 567 return (EINVAL); 568 569 proto = nhp->nh_proto; 570 #ifdef INVARIANTS 571 name = nhp->nh_name; 572 #endif 573 KASSERT(proto < NETISR_MAXPROT, 574 ("%s(%u): protocol too big for %s", __func__, proto, name)); 575 576 NETISR_WLOCK(); 577 KASSERT(netisr_proto[proto].np_handler != NULL, 578 ("%s(%u): protocol not registered for %s", __func__, proto, 579 name)); 580 581 netisr_proto[proto].np_qlimit = qlimit; 582 CPU_FOREACH(i) { 583 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 584 npwp->nw_qlimit = qlimit; 585 } 586 NETISR_WUNLOCK(); 587 return (0); 588 } 589 590 /* 591 * Drain all packets currently held in a particular protocol work queue. 592 */ 593 static void 594 netisr_drain_proto(struct netisr_work *npwp) 595 { 596 struct mbuf *m; 597 598 /* 599 * We would assert the lock on the workstream but it's not passed in. 600 */ 601 while ((m = npwp->nw_head) != NULL) { 602 npwp->nw_head = m->m_nextpkt; 603 m->m_nextpkt = NULL; 604 if (npwp->nw_head == NULL) 605 npwp->nw_tail = NULL; 606 npwp->nw_len--; 607 m_freem(m); 608 } 609 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 610 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 611 } 612 613 /* 614 * Remove the registration of a network protocol, which requires clearing 615 * per-protocol fields across all workstreams, including freeing all mbufs in 616 * the queues at time of unregister. All work in netisr is briefly suspended 617 * while this takes place. 618 */ 619 void 620 netisr_unregister(const struct netisr_handler *nhp) 621 { 622 VNET_ITERATOR_DECL(vnet_iter); 623 struct netisr_work *npwp; 624 #ifdef INVARIANTS 625 const char *name; 626 #endif 627 u_int i, proto; 628 629 proto = nhp->nh_proto; 630 #ifdef INVARIANTS 631 name = nhp->nh_name; 632 #endif 633 KASSERT(proto < NETISR_MAXPROT, 634 ("%s(%u): protocol too big for %s", __func__, proto, name)); 635 636 NETISR_WLOCK(); 637 KASSERT(netisr_proto[proto].np_handler != NULL, 638 ("%s(%u): protocol not registered for %s", __func__, proto, 639 name)); 640 641 #ifdef VIMAGE 642 VNET_LIST_RLOCK_NOSLEEP(); 643 VNET_FOREACH(vnet_iter) { 644 CURVNET_SET(vnet_iter); 645 V_netisr_enable[proto] = 0; 646 CURVNET_RESTORE(); 647 } 648 VNET_LIST_RUNLOCK_NOSLEEP(); 649 #endif 650 651 netisr_proto[proto].np_name = NULL; 652 netisr_proto[proto].np_handler = NULL; 653 netisr_proto[proto].np_m2flow = NULL; 654 netisr_proto[proto].np_m2cpuid = NULL; 655 netisr_proto[proto].np_qlimit = 0; 656 netisr_proto[proto].np_policy = 0; 657 CPU_FOREACH(i) { 658 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 659 netisr_drain_proto(npwp); 660 bzero(npwp, sizeof(*npwp)); 661 } 662 NETISR_WUNLOCK(); 663 } 664 665 #ifdef VIMAGE 666 void 667 netisr_register_vnet(const struct netisr_handler *nhp) 668 { 669 u_int proto; 670 671 proto = nhp->nh_proto; 672 673 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 674 KASSERT(proto < NETISR_MAXPROT, 675 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 676 NETISR_WLOCK(); 677 KASSERT(netisr_proto[proto].np_handler != NULL, 678 ("%s(%u): protocol not registered for %s", __func__, proto, 679 nhp->nh_name)); 680 681 V_netisr_enable[proto] = 1; 682 NETISR_WUNLOCK(); 683 } 684 685 static void 686 netisr_drain_proto_vnet(struct vnet *vnet, u_int proto) 687 { 688 struct netisr_workstream *nwsp; 689 struct netisr_work *npwp; 690 struct mbuf *m, *mp, *n, *ne; 691 u_int i; 692 693 KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__)); 694 NETISR_LOCK_ASSERT(); 695 696 CPU_FOREACH(i) { 697 nwsp = DPCPU_ID_PTR(i, nws); 698 if (nwsp->nws_intr_event == NULL) 699 continue; 700 npwp = &nwsp->nws_work[proto]; 701 NWS_LOCK(nwsp); 702 703 /* 704 * Rather than dissecting and removing mbufs from the middle 705 * of the chain, we build a new chain if the packet stays and 706 * update the head and tail pointers at the end. All packets 707 * matching the given vnet are freed. 708 */ 709 m = npwp->nw_head; 710 n = ne = NULL; 711 while (m != NULL) { 712 mp = m; 713 m = m->m_nextpkt; 714 mp->m_nextpkt = NULL; 715 if (mp->m_pkthdr.rcvif->if_vnet != vnet) { 716 if (n == NULL) { 717 n = ne = mp; 718 } else { 719 ne->m_nextpkt = mp; 720 ne = mp; 721 } 722 continue; 723 } 724 /* This is a packet in the selected vnet. Free it. */ 725 npwp->nw_len--; 726 m_freem(mp); 727 } 728 npwp->nw_head = n; 729 npwp->nw_tail = ne; 730 NWS_UNLOCK(nwsp); 731 } 732 } 733 734 void 735 netisr_unregister_vnet(const struct netisr_handler *nhp) 736 { 737 u_int proto; 738 739 proto = nhp->nh_proto; 740 741 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 742 KASSERT(proto < NETISR_MAXPROT, 743 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 744 NETISR_WLOCK(); 745 KASSERT(netisr_proto[proto].np_handler != NULL, 746 ("%s(%u): protocol not registered for %s", __func__, proto, 747 nhp->nh_name)); 748 749 V_netisr_enable[proto] = 0; 750 751 netisr_drain_proto_vnet(curvnet, proto); 752 NETISR_WUNLOCK(); 753 } 754 #endif 755 756 /* 757 * Compose the global and per-protocol policies on dispatch, and return the 758 * dispatch policy to use. 759 */ 760 static u_int 761 netisr_get_dispatch(struct netisr_proto *npp) 762 { 763 764 /* 765 * Protocol-specific configuration overrides the global default. 766 */ 767 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 768 return (npp->np_dispatch); 769 return (netisr_dispatch_policy); 770 } 771 772 /* 773 * Look up the workstream given a packet and source identifier. Do this by 774 * checking the protocol's policy, and optionally call out to the protocol 775 * for assistance if required. 776 */ 777 static struct mbuf * 778 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 779 uintptr_t source, struct mbuf *m, u_int *cpuidp) 780 { 781 struct ifnet *ifp; 782 u_int policy; 783 784 NETISR_LOCK_ASSERT(); 785 786 /* 787 * In the event we have only one worker, shortcut and deliver to it 788 * without further ado. 789 */ 790 if (nws_count == 1) { 791 *cpuidp = nws_array[0]; 792 return (m); 793 } 794 795 /* 796 * What happens next depends on the policy selected by the protocol. 797 * If we want to support per-interface policies, we should do that 798 * here first. 799 */ 800 policy = npp->np_policy; 801 if (policy == NETISR_POLICY_CPU) { 802 m = npp->np_m2cpuid(m, source, cpuidp); 803 if (m == NULL) 804 return (NULL); 805 806 /* 807 * It's possible for a protocol not to have a good idea about 808 * where to process a packet, in which case we fall back on 809 * the netisr code to decide. In the hybrid case, return the 810 * current CPU ID, which will force an immediate direct 811 * dispatch. In the queued case, fall back on the SOURCE 812 * policy. 813 */ 814 if (*cpuidp != NETISR_CPUID_NONE) { 815 *cpuidp = netisr_get_cpuid(*cpuidp); 816 return (m); 817 } 818 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 819 *cpuidp = netisr_get_cpuid(curcpu); 820 return (m); 821 } 822 policy = NETISR_POLICY_SOURCE; 823 } 824 825 if (policy == NETISR_POLICY_FLOW) { 826 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 827 npp->np_m2flow != NULL) { 828 m = npp->np_m2flow(m, source); 829 if (m == NULL) 830 return (NULL); 831 } 832 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 833 *cpuidp = 834 netisr_default_flow2cpu(m->m_pkthdr.flowid); 835 return (m); 836 } 837 policy = NETISR_POLICY_SOURCE; 838 } 839 840 KASSERT(policy == NETISR_POLICY_SOURCE, 841 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 842 npp->np_name)); 843 844 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 845 ifp = m->m_pkthdr.rcvif; 846 if (ifp != NULL) 847 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 848 else 849 *cpuidp = nws_array[source % nws_count]; 850 return (m); 851 } 852 853 /* 854 * Process packets associated with a workstream and protocol. For reasons of 855 * fairness, we process up to one complete netisr queue at a time, moving the 856 * queue to a stack-local queue for processing, but do not loop refreshing 857 * from the global queue. The caller is responsible for deciding whether to 858 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 859 * locked on entry and relocked before return, but will be released while 860 * processing. The number of packets processed is returned. 861 */ 862 static u_int 863 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 864 { 865 struct netisr_work local_npw, *npwp; 866 u_int handled; 867 struct mbuf *m; 868 869 NETISR_LOCK_ASSERT(); 870 NWS_LOCK_ASSERT(nwsp); 871 872 KASSERT(nwsp->nws_flags & NWS_RUNNING, 873 ("%s(%u): not running", __func__, proto)); 874 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 875 ("%s(%u): invalid proto\n", __func__, proto)); 876 877 npwp = &nwsp->nws_work[proto]; 878 if (npwp->nw_len == 0) 879 return (0); 880 881 /* 882 * Move the global work queue to a thread-local work queue. 883 * 884 * Notice that this means the effective maximum length of the queue 885 * is actually twice that of the maximum queue length specified in 886 * the protocol registration call. 887 */ 888 handled = npwp->nw_len; 889 local_npw = *npwp; 890 npwp->nw_head = NULL; 891 npwp->nw_tail = NULL; 892 npwp->nw_len = 0; 893 nwsp->nws_pendingbits &= ~(1 << proto); 894 NWS_UNLOCK(nwsp); 895 while ((m = local_npw.nw_head) != NULL) { 896 local_npw.nw_head = m->m_nextpkt; 897 m->m_nextpkt = NULL; 898 if (local_npw.nw_head == NULL) 899 local_npw.nw_tail = NULL; 900 local_npw.nw_len--; 901 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 902 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 903 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 904 netisr_proto[proto].np_handler(m); 905 CURVNET_RESTORE(); 906 } 907 KASSERT(local_npw.nw_len == 0, 908 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 909 if (netisr_proto[proto].np_drainedcpu) 910 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 911 NWS_LOCK(nwsp); 912 npwp->nw_handled += handled; 913 return (handled); 914 } 915 916 /* 917 * SWI handler for netisr -- processes packets in a set of workstreams that 918 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 919 * being direct dispatched, go back to sleep and wait for the dispatching 920 * thread to wake us up again. 921 */ 922 static void 923 swi_net(void *arg) 924 { 925 #ifdef NETISR_LOCKING 926 struct rm_priotracker tracker; 927 #endif 928 struct netisr_workstream *nwsp; 929 u_int bits, prot; 930 931 nwsp = arg; 932 933 #ifdef DEVICE_POLLING 934 KASSERT(nws_count == 1, 935 ("%s: device_polling but nws_count != 1", __func__)); 936 netisr_poll(); 937 #endif 938 #ifdef NETISR_LOCKING 939 NETISR_RLOCK(&tracker); 940 #endif 941 NWS_LOCK(nwsp); 942 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 943 if (nwsp->nws_flags & NWS_DISPATCHING) 944 goto out; 945 nwsp->nws_flags |= NWS_RUNNING; 946 nwsp->nws_flags &= ~NWS_SCHEDULED; 947 while ((bits = nwsp->nws_pendingbits) != 0) { 948 while ((prot = ffs(bits)) != 0) { 949 prot--; 950 bits &= ~(1 << prot); 951 (void)netisr_process_workstream_proto(nwsp, prot); 952 } 953 } 954 nwsp->nws_flags &= ~NWS_RUNNING; 955 out: 956 NWS_UNLOCK(nwsp); 957 #ifdef NETISR_LOCKING 958 NETISR_RUNLOCK(&tracker); 959 #endif 960 #ifdef DEVICE_POLLING 961 netisr_pollmore(); 962 #endif 963 } 964 965 static int 966 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 967 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 968 { 969 970 NWS_LOCK_ASSERT(nwsp); 971 972 *dosignalp = 0; 973 if (npwp->nw_len < npwp->nw_qlimit) { 974 m->m_nextpkt = NULL; 975 if (npwp->nw_head == NULL) { 976 npwp->nw_head = m; 977 npwp->nw_tail = m; 978 } else { 979 npwp->nw_tail->m_nextpkt = m; 980 npwp->nw_tail = m; 981 } 982 npwp->nw_len++; 983 if (npwp->nw_len > npwp->nw_watermark) 984 npwp->nw_watermark = npwp->nw_len; 985 986 /* 987 * We must set the bit regardless of NWS_RUNNING, so that 988 * swi_net() keeps calling netisr_process_workstream_proto(). 989 */ 990 nwsp->nws_pendingbits |= (1 << proto); 991 if (!(nwsp->nws_flags & 992 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 993 nwsp->nws_flags |= NWS_SCHEDULED; 994 *dosignalp = 1; /* Defer until unlocked. */ 995 } 996 npwp->nw_queued++; 997 return (0); 998 } else { 999 m_freem(m); 1000 npwp->nw_qdrops++; 1001 return (ENOBUFS); 1002 } 1003 } 1004 1005 static int 1006 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 1007 { 1008 struct netisr_workstream *nwsp; 1009 struct netisr_work *npwp; 1010 int dosignal, error; 1011 1012 #ifdef NETISR_LOCKING 1013 NETISR_LOCK_ASSERT(); 1014 #endif 1015 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 1016 cpuid, mp_maxid)); 1017 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1018 1019 dosignal = 0; 1020 error = 0; 1021 nwsp = DPCPU_ID_PTR(cpuid, nws); 1022 npwp = &nwsp->nws_work[proto]; 1023 NWS_LOCK(nwsp); 1024 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 1025 NWS_UNLOCK(nwsp); 1026 if (dosignal) 1027 NWS_SIGNAL(nwsp); 1028 return (error); 1029 } 1030 1031 int 1032 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 1033 { 1034 #ifdef NETISR_LOCKING 1035 struct rm_priotracker tracker; 1036 #endif 1037 u_int cpuid; 1038 int error; 1039 1040 KASSERT(proto < NETISR_MAXPROT, 1041 ("%s: invalid proto %u", __func__, proto)); 1042 1043 #ifdef NETISR_LOCKING 1044 NETISR_RLOCK(&tracker); 1045 #endif 1046 KASSERT(netisr_proto[proto].np_handler != NULL, 1047 ("%s: invalid proto %u", __func__, proto)); 1048 1049 #ifdef VIMAGE 1050 if (V_netisr_enable[proto] == 0) { 1051 m_freem(m); 1052 return (ENOPROTOOPT); 1053 } 1054 #endif 1055 1056 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 1057 source, m, &cpuid); 1058 if (m != NULL) { 1059 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 1060 cpuid)); 1061 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 1062 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 1063 error = netisr_queue_internal(proto, m, cpuid); 1064 } else 1065 error = ENOBUFS; 1066 #ifdef NETISR_LOCKING 1067 NETISR_RUNLOCK(&tracker); 1068 #endif 1069 return (error); 1070 } 1071 1072 int 1073 netisr_queue(u_int proto, struct mbuf *m) 1074 { 1075 1076 return (netisr_queue_src(proto, 0, m)); 1077 } 1078 1079 /* 1080 * Dispatch a packet for netisr processing; direct dispatch is permitted by 1081 * calling context. 1082 */ 1083 int 1084 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 1085 { 1086 #ifdef NETISR_LOCKING 1087 struct rm_priotracker tracker; 1088 #endif 1089 struct netisr_workstream *nwsp; 1090 struct netisr_proto *npp; 1091 struct netisr_work *npwp; 1092 int dosignal, error; 1093 u_int cpuid, dispatch_policy; 1094 1095 NET_EPOCH_ASSERT(); 1096 KASSERT(proto < NETISR_MAXPROT, 1097 ("%s: invalid proto %u", __func__, proto)); 1098 #ifdef NETISR_LOCKING 1099 NETISR_RLOCK(&tracker); 1100 #endif 1101 npp = &netisr_proto[proto]; 1102 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 1103 proto)); 1104 1105 #ifdef VIMAGE 1106 if (V_netisr_enable[proto] == 0) { 1107 m_freem(m); 1108 return (ENOPROTOOPT); 1109 } 1110 #endif 1111 1112 dispatch_policy = netisr_get_dispatch(npp); 1113 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 1114 return (netisr_queue_src(proto, source, m)); 1115 1116 /* 1117 * If direct dispatch is forced, then unconditionally dispatch 1118 * without a formal CPU selection. Borrow the current CPU's stats, 1119 * even if there's no worker on it. In this case we don't update 1120 * nws_flags because all netisr processing will be source ordered due 1121 * to always being forced to directly dispatch. 1122 */ 1123 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 1124 nwsp = DPCPU_PTR(nws); 1125 npwp = &nwsp->nws_work[proto]; 1126 npwp->nw_dispatched++; 1127 npwp->nw_handled++; 1128 netisr_proto[proto].np_handler(m); 1129 error = 0; 1130 goto out_unlock; 1131 } 1132 1133 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 1134 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 1135 1136 /* 1137 * Otherwise, we execute in a hybrid mode where we will try to direct 1138 * dispatch if we're on the right CPU and the netisr worker isn't 1139 * already running. 1140 */ 1141 sched_pin(); 1142 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 1143 source, m, &cpuid); 1144 if (m == NULL) { 1145 error = ENOBUFS; 1146 goto out_unpin; 1147 } 1148 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1149 if (cpuid != curcpu) 1150 goto queue_fallback; 1151 nwsp = DPCPU_PTR(nws); 1152 npwp = &nwsp->nws_work[proto]; 1153 1154 /*- 1155 * We are willing to direct dispatch only if three conditions hold: 1156 * 1157 * (1) The netisr worker isn't already running, 1158 * (2) Another thread isn't already directly dispatching, and 1159 * (3) The netisr hasn't already been woken up. 1160 */ 1161 NWS_LOCK(nwsp); 1162 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1163 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1164 &dosignal); 1165 NWS_UNLOCK(nwsp); 1166 if (dosignal) 1167 NWS_SIGNAL(nwsp); 1168 goto out_unpin; 1169 } 1170 1171 /* 1172 * The current thread is now effectively the netisr worker, so set 1173 * the dispatching flag to prevent concurrent processing of the 1174 * stream from another thread (even the netisr worker), which could 1175 * otherwise lead to effective misordering of the stream. 1176 */ 1177 nwsp->nws_flags |= NWS_DISPATCHING; 1178 NWS_UNLOCK(nwsp); 1179 netisr_proto[proto].np_handler(m); 1180 NWS_LOCK(nwsp); 1181 nwsp->nws_flags &= ~NWS_DISPATCHING; 1182 npwp->nw_handled++; 1183 npwp->nw_hybrid_dispatched++; 1184 1185 /* 1186 * If other work was enqueued by another thread while we were direct 1187 * dispatching, we need to signal the netisr worker to do that work. 1188 * In the future, we might want to do some of that work in the 1189 * current thread, rather than trigger further context switches. If 1190 * so, we'll want to establish a reasonable bound on the work done in 1191 * the "borrowed" context. 1192 */ 1193 if (nwsp->nws_pendingbits != 0) { 1194 nwsp->nws_flags |= NWS_SCHEDULED; 1195 dosignal = 1; 1196 } else 1197 dosignal = 0; 1198 NWS_UNLOCK(nwsp); 1199 if (dosignal) 1200 NWS_SIGNAL(nwsp); 1201 error = 0; 1202 goto out_unpin; 1203 1204 queue_fallback: 1205 error = netisr_queue_internal(proto, m, cpuid); 1206 out_unpin: 1207 sched_unpin(); 1208 out_unlock: 1209 #ifdef NETISR_LOCKING 1210 NETISR_RUNLOCK(&tracker); 1211 #endif 1212 return (error); 1213 } 1214 1215 int 1216 netisr_dispatch(u_int proto, struct mbuf *m) 1217 { 1218 1219 return (netisr_dispatch_src(proto, 0, m)); 1220 } 1221 1222 #ifdef DEVICE_POLLING 1223 /* 1224 * Kernel polling borrows a netisr thread to run interface polling in; this 1225 * function allows kernel polling to request that the netisr thread be 1226 * scheduled even if no packets are pending for protocols. 1227 */ 1228 void 1229 netisr_sched_poll(void) 1230 { 1231 struct netisr_workstream *nwsp; 1232 1233 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1234 NWS_SIGNAL(nwsp); 1235 } 1236 #endif 1237 1238 static void 1239 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1240 { 1241 char swiname[12]; 1242 struct netisr_workstream *nwsp; 1243 int error; 1244 1245 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1246 1247 nwsp = DPCPU_ID_PTR(cpuid, nws); 1248 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1249 nwsp->nws_cpu = cpuid; 1250 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1251 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1252 SWI_NET, INTR_TYPE_NET | INTR_MPSAFE, &nwsp->nws_swi_cookie); 1253 if (error) 1254 panic("%s: swi_add %d", __func__, error); 1255 pc->pc_netisr = nwsp->nws_intr_event; 1256 if (netisr_bindthreads) { 1257 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1258 if (error != 0) 1259 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1260 cpuid, error); 1261 } 1262 NETISR_WLOCK(); 1263 nws_array[nws_count] = nwsp->nws_cpu; 1264 nws_count++; 1265 NETISR_WUNLOCK(); 1266 } 1267 1268 /* 1269 * Initialize the netisr subsystem. We rely on BSS and static initialization 1270 * of most fields in global data structures. 1271 * 1272 * Start a worker thread for the boot CPU so that we can support network 1273 * traffic immediately in case the network stack is used before additional 1274 * CPUs are started (for example, diskless boot). 1275 */ 1276 static void 1277 netisr_init(void *arg) 1278 { 1279 struct pcpu *pc; 1280 1281 NETISR_LOCK_INIT(); 1282 if (netisr_maxthreads == 0 || netisr_maxthreads < -1 ) 1283 netisr_maxthreads = 1; /* default behavior */ 1284 else if (netisr_maxthreads == -1) 1285 netisr_maxthreads = mp_ncpus; /* use max cpus */ 1286 if (netisr_maxthreads > mp_ncpus) { 1287 printf("netisr_init: forcing maxthreads from %d to %d\n", 1288 netisr_maxthreads, mp_ncpus); 1289 netisr_maxthreads = mp_ncpus; 1290 } 1291 if (netisr_defaultqlimit > netisr_maxqlimit) { 1292 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1293 netisr_defaultqlimit, netisr_maxqlimit); 1294 netisr_defaultqlimit = netisr_maxqlimit; 1295 } 1296 #ifdef DEVICE_POLLING 1297 /* 1298 * The device polling code is not yet aware of how to deal with 1299 * multiple netisr threads, so for the time being compiling in device 1300 * polling disables parallel netisr workers. 1301 */ 1302 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1303 printf("netisr_init: forcing maxthreads to 1 and " 1304 "bindthreads to 0 for device polling\n"); 1305 netisr_maxthreads = 1; 1306 netisr_bindthreads = 0; 1307 } 1308 #endif 1309 1310 #ifdef EARLY_AP_STARTUP 1311 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1312 if (nws_count >= netisr_maxthreads) 1313 break; 1314 netisr_start_swi(pc->pc_cpuid, pc); 1315 } 1316 #else 1317 pc = get_pcpu(); 1318 netisr_start_swi(pc->pc_cpuid, pc); 1319 #endif 1320 } 1321 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1322 1323 #ifndef EARLY_AP_STARTUP 1324 /* 1325 * Start worker threads for additional CPUs. No attempt to gracefully handle 1326 * work reassignment, we don't yet support dynamic reconfiguration. 1327 */ 1328 static void 1329 netisr_start(void *arg) 1330 { 1331 struct pcpu *pc; 1332 1333 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1334 if (nws_count >= netisr_maxthreads) 1335 break; 1336 /* Worker will already be present for boot CPU. */ 1337 if (pc->pc_netisr != NULL) 1338 continue; 1339 netisr_start_swi(pc->pc_cpuid, pc); 1340 } 1341 } 1342 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1343 #endif 1344 1345 /* 1346 * Sysctl monitoring for netisr: query a list of registered protocols. 1347 */ 1348 static int 1349 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1350 { 1351 struct rm_priotracker tracker; 1352 struct sysctl_netisr_proto *snpp, *snp_array; 1353 struct netisr_proto *npp; 1354 u_int counter, proto; 1355 int error; 1356 1357 if (req->newptr != NULL) 1358 return (EINVAL); 1359 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1360 M_ZERO | M_WAITOK); 1361 counter = 0; 1362 NETISR_RLOCK(&tracker); 1363 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1364 npp = &netisr_proto[proto]; 1365 if (npp->np_name == NULL) 1366 continue; 1367 snpp = &snp_array[counter]; 1368 snpp->snp_version = sizeof(*snpp); 1369 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1370 snpp->snp_proto = proto; 1371 snpp->snp_qlimit = npp->np_qlimit; 1372 snpp->snp_policy = npp->np_policy; 1373 snpp->snp_dispatch = npp->np_dispatch; 1374 if (npp->np_m2flow != NULL) 1375 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1376 if (npp->np_m2cpuid != NULL) 1377 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1378 if (npp->np_drainedcpu != NULL) 1379 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1380 counter++; 1381 } 1382 NETISR_RUNLOCK(&tracker); 1383 KASSERT(counter <= NETISR_MAXPROT, 1384 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1385 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1386 free(snp_array, M_TEMP); 1387 return (error); 1388 } 1389 1390 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1391 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1392 "S,sysctl_netisr_proto", 1393 "Return list of protocols registered with netisr"); 1394 1395 /* 1396 * Sysctl monitoring for netisr: query a list of workstreams. 1397 */ 1398 static int 1399 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1400 { 1401 struct rm_priotracker tracker; 1402 struct sysctl_netisr_workstream *snwsp, *snws_array; 1403 struct netisr_workstream *nwsp; 1404 u_int counter, cpuid; 1405 int error; 1406 1407 if (req->newptr != NULL) 1408 return (EINVAL); 1409 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1410 M_ZERO | M_WAITOK); 1411 counter = 0; 1412 NETISR_RLOCK(&tracker); 1413 CPU_FOREACH(cpuid) { 1414 nwsp = DPCPU_ID_PTR(cpuid, nws); 1415 if (nwsp->nws_intr_event == NULL) 1416 continue; 1417 NWS_LOCK(nwsp); 1418 snwsp = &snws_array[counter]; 1419 snwsp->snws_version = sizeof(*snwsp); 1420 1421 /* 1422 * For now, we equate workstream IDs and CPU IDs in the 1423 * kernel, but expose them independently to userspace in case 1424 * that assumption changes in the future. 1425 */ 1426 snwsp->snws_wsid = cpuid; 1427 snwsp->snws_cpu = cpuid; 1428 if (nwsp->nws_intr_event != NULL) 1429 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1430 NWS_UNLOCK(nwsp); 1431 counter++; 1432 } 1433 NETISR_RUNLOCK(&tracker); 1434 KASSERT(counter <= MAXCPU, 1435 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1436 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1437 free(snws_array, M_TEMP); 1438 return (error); 1439 } 1440 1441 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1442 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1443 "S,sysctl_netisr_workstream", 1444 "Return list of workstreams implemented by netisr"); 1445 1446 /* 1447 * Sysctl monitoring for netisr: query per-protocol data across all 1448 * workstreams. 1449 */ 1450 static int 1451 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1452 { 1453 struct rm_priotracker tracker; 1454 struct sysctl_netisr_work *snwp, *snw_array; 1455 struct netisr_workstream *nwsp; 1456 struct netisr_proto *npp; 1457 struct netisr_work *nwp; 1458 u_int counter, cpuid, proto; 1459 int error; 1460 1461 if (req->newptr != NULL) 1462 return (EINVAL); 1463 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1464 M_TEMP, M_ZERO | M_WAITOK); 1465 counter = 0; 1466 NETISR_RLOCK(&tracker); 1467 CPU_FOREACH(cpuid) { 1468 nwsp = DPCPU_ID_PTR(cpuid, nws); 1469 if (nwsp->nws_intr_event == NULL) 1470 continue; 1471 NWS_LOCK(nwsp); 1472 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1473 npp = &netisr_proto[proto]; 1474 if (npp->np_name == NULL) 1475 continue; 1476 nwp = &nwsp->nws_work[proto]; 1477 snwp = &snw_array[counter]; 1478 snwp->snw_version = sizeof(*snwp); 1479 snwp->snw_wsid = cpuid; /* See comment above. */ 1480 snwp->snw_proto = proto; 1481 snwp->snw_len = nwp->nw_len; 1482 snwp->snw_watermark = nwp->nw_watermark; 1483 snwp->snw_dispatched = nwp->nw_dispatched; 1484 snwp->snw_hybrid_dispatched = 1485 nwp->nw_hybrid_dispatched; 1486 snwp->snw_qdrops = nwp->nw_qdrops; 1487 snwp->snw_queued = nwp->nw_queued; 1488 snwp->snw_handled = nwp->nw_handled; 1489 counter++; 1490 } 1491 NWS_UNLOCK(nwsp); 1492 } 1493 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1494 ("sysctl_netisr_work: counter too big (%d)", counter)); 1495 NETISR_RUNLOCK(&tracker); 1496 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1497 free(snw_array, M_TEMP); 1498 return (error); 1499 } 1500 1501 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1502 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1503 "S,sysctl_netisr_work", 1504 "Return list of per-workstream, per-protocol work in netisr"); 1505 1506 #ifdef DDB 1507 DB_SHOW_COMMAND(netisr, db_show_netisr) 1508 { 1509 struct netisr_workstream *nwsp; 1510 struct netisr_work *nwp; 1511 int first, proto; 1512 u_int cpuid; 1513 1514 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1515 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1516 CPU_FOREACH(cpuid) { 1517 nwsp = DPCPU_ID_PTR(cpuid, nws); 1518 if (nwsp->nws_intr_event == NULL) 1519 continue; 1520 first = 1; 1521 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1522 if (netisr_proto[proto].np_handler == NULL) 1523 continue; 1524 nwp = &nwsp->nws_work[proto]; 1525 if (first) { 1526 db_printf("%3d ", cpuid); 1527 first = 0; 1528 } else 1529 db_printf("%3s ", ""); 1530 db_printf( 1531 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1532 netisr_proto[proto].np_name, nwp->nw_len, 1533 nwp->nw_watermark, nwp->nw_qlimit, 1534 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1535 nwp->nw_qdrops, nwp->nw_queued); 1536 } 1537 } 1538 } 1539 #endif 1540